GIT b668e28b74558b3a8277b5fde363ae5f803b36f0 git+ssh://master.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git#mm

commit b668e28b74558b3a8277b5fde363ae5f803b36f0
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Tue Dec 11 16:54:33 2007 +0100

    x86: clean up mm/init_32.c
    
    Some code reformatting in init_32.c.  No functional change.
    
    Signed-off-by: Jeremy Fitzhardinge <Jeremy.Fitzhardinge@citrix.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 2ab64049a40b1ae65540037a43c6c0259f009f07
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Tue Dec 11 16:54:33 2007 +0100

    x86: kill mk_pte_huge
    
    It only has a single use, which can be trivially replaced.
    
    Signed-off-by: Jeremy Fitzhardinge <Jeremy.Fitzhardinge@citrix.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit a035f564b662e930896e87d2f823a0cc9971a9a4
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 11 16:54:33 2007 +0100

    x86: clean up drivers/char/rtc.c
    
    tons of style cleanup in drivers/char/rtc.c - no code changed:
    
       text    data     bss     dec     hex filename
       6400     384      32    6816    1aa0 rtc.o.before
       6400     384      32    6816    1aa0 rtc.o.after
    
    since we seem to have a number of open breakages in this code we might
    as well start with making the code more readable and maintainable.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit d84a5ed46f1a3425ffb1c831e98ee3585477cc6c
Author: Randy Dunlap <randy.dunlap@oracle.com>
Date:   Tue Dec 11 16:54:33 2007 +0100

    x64/page.h: convert some macros to inlines
    
    Convert clear_page/copy_page macros to inline functions for type-checking.
    Andrew wants to extirpate these ugly macros. (Ingo too. Thomas as well.
    Please send us more "kill ugly macros" patches! :-)
    
    Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 3a272d8ac5d35b3c53ada731569ca054d282856c
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:33 2007 +0100

    remove arch specific segment headers
    
    This file puts the remainder of the arch specificic segment
    headers in segment.h.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 477c361fce3f41b051ad2f5cbff69f3a5e9779f3
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:32 2007 +0100

    unify common parts of segment.h
    
    Although segment handling in i386 and x86_64 are very different,
    there's a common part. Put them in segment.h instead of arch specific
    headers
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 50c7fb5a81d2d38e6d0853c2953441407ebb88a5
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:32 2007 +0100

    put get_kernel_rpl in a common location
    
    This macro is useful for both i386 and x86_64, so put it in a common
    location, where both arches can grab it.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit f0e3b6b3f2b54458b5c86ae35cc6bf2f24ae9bb2
Author: Markus Metzger <markus.t.metzger@intel.com>
Date:   Tue Dec 11 16:54:32 2007 +0100

    x86, ptrace: support for branch trace store(BTS)
    
    Resend using different mail client
    
    Changes to the last version:
    - split implementation into two layers: ds/bts and ptrace
    - renamed TIF's
    - save/restore ds save area msr in __switch_to_xtra()
    - make block-stepping only look at BTF bit
    
    Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
    Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit c6bf19608e39e36e97e8ec218a43afec1861a0b2
Author: Jeff Dike <jdike@addtoit.com>
Date:   Tue Dec 11 16:54:32 2007 +0100

    UML - change sigcontext fields to match x86
    
    git-x86, in commit 70aa1bd3839e3ec74ce65316528a82570e8de666, changed
    a lot of the sigcontext field names.  This patch changes UML usage to
    match.
    
    I also changed includes of generic headers from "" to <>.
    
    Signed-off-by: Jeff Dike <jdike@linux.intel.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit e183a2382f0916fd64a7c54065b4091beb78729a
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:32 2007 +0100

    unify system.h
    
    This patch finishes the unification of system.h file.
    i386 needs a constant to be defined, and it is defined inside an ifdef
    
    Other than that, pretty much nothing but includes are left in the arch
    specific headers, and they are deleted.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit cf7ccddb46c08f9e0b87d7e714d160c3983e88c2
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:31 2007 +0100

    move switch_to macro to system.h
    
    This patch moves the switch_to() macro to system.h
    
    As those macros are fundamentally different between i386 and x86_64,
    they are enclosed around an ifdef.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 0a31e630c297a429638bb39efe411f6ac9d5cce3
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:31 2007 +0100

    unify smp parts of system.h
    
    The memory barrier parts of system.h are not very different between
    i386 and x86_64, the main difference being the availability of
    instructions, which we handle with the use of ifdefs.
    
    They are consolidated in system.h file, and then removed from
    the arch-specific headers.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 6dec9cb6283e3ab5b7fef63da3eb8f8c5602c480
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:31 2007 +0100

    remove unused macro
    
    Mr. Grep says warn_if_not_ulong() is not used anymore anywhere
    in the code. So, we remove it.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit aeba0eab1d788785e21681ca9c617b503d5c7ea1
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:31 2007 +0100

    unify paravirt parts of system.h
    
    This patch moves the i386 control registers manipulation functions,
    wbinvd, and clts functions to system.h. They are essentially the same
    as in x86_64.
    
    With this, system.h paravirt comes for free in x86_64.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit dc7114ea029fad39cb20ffa9c42710c55b3ba312
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:31 2007 +0100

    remove references to cr8 register
    
    As pointed out by Andi, linux never really uses this register
    so saving and restoring is not really necessary. This patch
    removes all references to it.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit d86153abbfbc808a811021e0c88901efdd852d24
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:30 2007 +0100

    unify load_segment macro
    
    This patch unifies the load_segment() macro, making them equal in both
    x86_64 and i386 architectures. The common version goes to system.h,
    and the old are deleted.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit bf1205cf81b8c5ad594b0cc8357616de5fb9f127
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:30 2007 +0100

    put together equal pieces of system.h
    
    This patch puts together pieces of system_{32,64}.h that
    looks like the same. It's the first step towards integration
    of this file.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 380c5f157b71776d02ee31254583f8c2f32315fe
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:30 2007 +0100

    remove volatile keyword from clflush.
    
    the p parameter is an explicit memory reference, and is
    enough to prevent gcc to being nasty here. The volatile
    seems completely not needed.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 8374dfdc4590a63608f2e99059d1a280c95f543a
Author: Joerg Roedel <joerg.roedel@amd.com>
Date:   Tue Dec 11 16:54:30 2007 +0100

    x86_64: some whitespace cleanups in paging code
    
    This patch does some whitespace cleanups in the paging code to fix some
    checkpatch.pl warnings of my formerly merged cleanup patches.
    
    Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 4c095886b945d59e0a39f6e25ff637a72b80371e
Author: Andrew Morton <akpm@linux-foundation.org>
Date:   Tue Dec 11 16:54:30 2007 +0100

    pie-executable-randomization-uninlining
    
    Cc: "Luck, Tony" <tony.luck@intel.com>
    Cc: Arjan van de Ven <arjan@infradead.org>
    Cc: Jakub Jelinek <jakub@redhat.com>
    Cc: Jiri Kosina <jkosina@suse.cz>
    Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
    Cc: Roland McGrath <roland@redhat.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit d4407e0b0ed7acac4e6714ec3f91d7aa0587b5d7
Author: Andrew Morton <akpm@linux-foundation.org>
Date:   Tue Dec 11 16:54:30 2007 +0100

    pie-executable-randomization-checkpatch-fixes
    
    #39: FILE: arch/ia64/ia32/binfmt_elf32.c:229:
    +elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused)
    
    WARNING: no space between function name and open parenthesis '('
    #39: FILE: arch/ia64/ia32/binfmt_elf32.c:229:
    +elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused)
    
    WARNING: line over 80 characters
    #67: FILE: arch/x86/kernel/sys_x86_64.c:80:
    +			new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
    
    ERROR: use tabs not spaces
    #110: FILE: arch/x86/kernel/sys_x86_64.c:185:
    + ^I        mm->cached_hole_size = 0;$
    
    ERROR: use tabs not spaces
    #111: FILE: arch/x86/kernel/sys_x86_64.c:186:
    + ^I^Imm->free_area_cache = mm->mmap_base;$
    
    ERROR: use tabs not spaces
    #112: FILE: arch/x86/kernel/sys_x86_64.c:187:
    + ^I}$
    
    ERROR: use tabs not spaces
    #141: FILE: arch/x86/kernel/sys_x86_64.c:216:
    + ^I^I/* remember the largest hole we saw so far */$
    
    ERROR: use tabs not spaces
    #142: FILE: arch/x86/kernel/sys_x86_64.c:217:
    + ^I^Iif (addr + mm->cached_hole_size < vma->vm_start)$
    
    ERROR: use tabs not spaces
    #143: FILE: arch/x86/kernel/sys_x86_64.c:218:
    + ^I^I        mm->cached_hole_size = vma->vm_start - addr;$
    
    ERROR: use tabs not spaces
    #157: FILE: arch/x86/kernel/sys_x86_64.c:232:
    +  ^Imm->free_area_cache = TASK_UNMAPPED_BASE;$
    
    ERROR: need a space before the open parenthesis '('
    #291: FILE: arch/x86/mm/mmap_64.c:101:
    +	} else if(mmap_is_legacy()) {
    
    WARNING: braces {} are not necessary for single statement blocks
    #302: FILE: arch/x86/mm/mmap_64.c:112:
    +	if (current->flags & PF_RANDOMIZE) {
    +		mm->mmap_base += ((long)rnd) << PAGE_SHIFT;
    +	}
    
    WARNING: line over 80 characters
    #314: FILE: fs/binfmt_elf.c:48:
    +static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
    
    WARNING: no space between function name and open parenthesis '('
    #314: FILE: fs/binfmt_elf.c:48:
    +static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
    
    WARNING: line over 80 characters
    #429: FILE: fs/binfmt_elf.c:438:
    +					   eppnt, elf_prot, elf_type, total_size);
    
    ERROR: need space after that ',' (ctx:VxV)
    #480: FILE: fs/binfmt_elf.c:939:
    +				elf_prot, elf_flags,0);
     				                   ^
    
    total: 9 errors, 7 warnings, 461 lines checked
    Your patch has style problems, please review.  If any of these errors
    are false positives report them to the maintainer, see
    CHECKPATCH in MAINTAINERS.
    
    Please run checkpatch prior to sending patches
    
    Cc: "Luck, Tony" <tony.luck@intel.com>
    Cc: Arjan van de Ven <arjan@infradead.org>
    Cc: Jakub Jelinek <jakub@redhat.com>
    Cc: Jiri Kosina <jkosina@suse.cz>
    Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
    Cc: Roland McGrath <roland@redhat.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 111261328cf6e564e5320132a51131dc16ec4908
Author: Jiri Kosina <jkosina@suse.cz>
Date:   Tue Dec 11 16:54:29 2007 +0100

    PIE executable randomization
    
    main executable of (specially compiled/linked -pie/-fpie) ET_DYN binaries
    onto a random address (in cases in which mmap() is allowed to perform a
    randomization).
    
    The code has been extraced from Ingo's exec-shield patch
    http://people.redhat.com/mingo/exec-shield/
    
    [akpm@linux-foundation.org: fix used-uninitialsied warning]
    [kamezawa.hiroyu@jp.fujitsu.com: fixed ia32 ELF on x86_64 handling]
    Signed-off-by: Jiri Kosina <jkosina@suse.cz>
    Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
    Cc: Arjan van de Ven <arjan@infradead.org>
    Cc: Roland McGrath <roland@redhat.com>
    Cc: Jakub Jelinek <jakub@redhat.com>
    Cc: "Luck, Tony" <tony.luck@intel.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 692121ec573be347b3cf2db5528f578b78610260
Author: Harvey Harrison <harvey.harrison@gmail.com>
Date:   Tue Dec 11 16:54:29 2007 +0100

    x86: Unify include/asm-x86/linkage_[32|64].h
    
    Remove definitions of FASTCALL/fastcall from linkage_32 as compiled with
    -regparm=3 by default since 2.6.20 and should no longer be needed.
    
    CONFIG X86_64 and CONFIG_X86_ALIGNMENT_16 are mutually exclusive as found
    in Kconfig.cpu so it should be fine to test them separately.
    
    Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 23fec1ce1909c1badf17edc033dd4dda3edc9933
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:29 2007 +0100

    integrate i386 and x86_64 code in msr.h
    
    This patches proceeds with the integration of msr.h, making
    the code unified, instead of having a version for each architecture.
    We stick with the native_* functions, and then paravirt comes for free.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit de70111229d0a34c519b59e846ad2480cfa482a8
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:29 2007 +0100

    make fixups wordsize agnostic
    
    This patch uses the _ASM_ALIGN and _ASM_PTR macros
    to make the fixups in native_read/write_msr_safe look the same
    for x86_64 and i386. Besides using this macros, we also have to
    take the explicit instruction suffixes out. It's okay
    because all this instructions uses registers, and can be sized by
    them.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 3e05c0dae575ab8ea168c64ab66b8dfc3d82bf22
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:29 2007 +0100

    change write msr functions interface
    
    This patche changes the native_write_msr() and friends interface
    to explicitly take 2 32-bit registers instead of a 64-bit value.
    The change will ease the merge with 64-bit code. As the 64-bit
    value will be passed as two registers anyway in i386,
    the PVOP_CALL interface has to account for that and use low/high parameters
    It would force the x86_64 version to be different.
    
    The change does not make i386 generated code less efficient. As said above,
    it would get the values from two registers anyway.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit fcf12c80f63da42a6e6132557a80bc52f3a1bcfe
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:29 2007 +0100

    change rdpmc interface
    
    the rdpmc instruction gets a counter argument in rcx. However,
    the i386 version was ignoring it. To make both x86_64 and i386 versions
    the same, as well as to comply with the instruction semantics, this
    parameter is added in the i386 version
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 2df8317a8a487b3108149ad09566af28d723b619
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:28 2007 +0100

    introduce native_read_tscp
    
    Targetting paravirt, this patch introduces native_read_tscp, in
    place of rdtscp() macro. When in a paravirt guest, this will
    involve a function call, and thus, cannot be done in the vdso area.
    These users then have to call the native version directly
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 2902a275d21c96e3510f7a37444286274cee8832
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:28 2007 +0100

    unify cpuid functions
    
    cpuid is not very different between i386 and x86_64.
    We move away the x86_64 version from msr.h, and
    unify them at processor.h, where they belong.
    
    cpuid() paravirt then comes for free.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 8b3e6fa25d42b746b93f04efe674e45aa456ca81
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:28 2007 +0100

    split get_cycles_sync
    
    This patch splits get_cycles_sync() into  __get_cycles_sync(),
    and the rdtscll part. Paravirt guests cannot issue rdtscl directly,
    as it involves a function call in vdso area.
    
    So, using the __get_cycles_sync() base, we introduce vget_cycles_sync,
    which then calls the native version of rdtscll. Ideally, however, a guest
    should define its own clocksource, together with a vread function
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 00306c35382881c96591c35c2f6a1f551201a24a
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:28 2007 +0100

    allow sched clock to be overridden by paravirt
    
    This patch turns the sched_clock into native_sched_clock.
    sched clock becomes a weak symbol, which can then give its
    place to a paravirt definition.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 3364171443a6d8d904ca1db3a46fb4eb0ac9f84c
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:27 2007 +0100

    unify msr smp funcs
    
    The functions under #ifdef CONFIG_SMP in msr.h are the same
    for both x86_64 and i386, and this patches removes one of them,
    putting them in a single location
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 340640451d0562f45a75f40171a0cedda3d09f30
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:54:27 2007 +0100

    Wipe out traditional opt from x86_64 Makefile
    
    Among other things, using -traditional as a gcc option stops us from
    using macro token pasting, which is a feature we heavily rely on.
    
    There was still a use of -traditional in arch/x86/kernel/Makefile_64,
    which this patch removes.
    
    I don't see any problems building kernels in my x86_64 box without
    -traditional.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
    Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit be8aa9e34284902ea9019f7c9f993c818b0e2f8b
Author: Harvey Harrison <harvey.harrison@gmail.com>
Date:   Tue Dec 11 16:54:27 2007 +0100

    x86: Use def_bool where possible in Kconfig.cpu
    
    x86: Use def_bool where possible in Kconfig.cpu
    
    Change occurances of:
    	bool
    	default X
    
    to:
    	def_bool X
    
    Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 77b6265a6c56695e0b22e7528a5d4fdd82d55eec
Author: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date:   Tue Dec 11 16:54:27 2007 +0100

    x86: clean up process_32/64.c
    
    White space and coding style clean up.
    Make process_32/64.c similar.
    
    Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 710ce369a1d9b9332d48848c0fc1b0b2d929a409
Author: Harvey Harrison <harvey.harrison@gmail.com>
Date:   Tue Dec 11 16:54:27 2007 +0100

    x86: Use def_bool where possible
    
    Change occurances of:
    	bool
    	default X
    
    to:
    	def_bool X
    
    Change ocurances of:
    	bool "Foo"
    	default X
    
    to:
    	def_bool X
    	prompt "Foo"
    
    Purely mechanical changes, applies on top of your mm lineup.  Shows no
    difference in generated config for allmodconfig/alyesconfig.  If you aren't
    interested in these kind of patches, just let me know.
    
    Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit ae673837f855f4cae9296f43228acb076daa3340
Author: Joerg Roedel <joerg.roedel@amd.com>
Date:   Tue Dec 11 16:54:26 2007 +0100

    x86_64: use __PAGE_KERNEL_EXEC in ioremap_64.c
    
    This patch replaces the manual permission setup for pages in ioremap_64.c with
    the pre-defined __PAGE_KERNEL_EXEC value.
    
    Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 00fa370b8b89e3182011d30465fae4c061772580
Author: Joerg Roedel <joerg.roedel@amd.com>
Date:   Tue Dec 11 16:54:26 2007 +0100

    x86_64: use __PAGE_KERNEL* instead of _KERNPG_TABLE
    
    This minor cleanup replaces _KERNPG_TABLE with the __PAGE_KERNEL* for 2MB PTEs
    in the x86_64 memory initialization code. The __PAGE_KERNEL* defines are more
    appropriate for PTEs.
    
    Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit af7ef54581f1960477b23168091a8dfea6c041ea
Author: Joerg Roedel <joerg.roedel@amd.com>
Date:   Tue Dec 11 16:54:26 2007 +0100

    x86_64: define all _PAGE_* in terms of _PAGE_BIT_*
    
    This patch defines the _PAGE_* paging attributes in pgtable_64.h in terms of
    the former defined _PAGE_BIT_* values.
    
    Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit b65ef7c08c817ca7355a389d3e23055c24ed639c
Author: Len Brown <lenb@kernel.org>
Date:   Tue Dec 11 16:54:26 2007 +0100

    x86: 32-bit IOAPIC: de-fang IRQ compression
    
    commit c434b7a6aedfe428ad17cd61b21b125a7b7a29ce
    (x86: avoid wasting IRQs for PCI devices)
    created a concept of "IRQ compression" on i386
    to conserve IRQ numbers on systems with many
    sparsely populated IO APICs.
    
    The same scheme was also added to x86_64,
    but later removed when x86_64 recieved an IRQ over-haul
    that made it unnecessary -- including per-CPU
    IRQ vectors that greatly increased the IRQ capacity
    on the machine.
    
    i386 has not received the analogous over-haul,
    and thus a previous attempt to delete IRQ compression
    from i386 was rejected on the theory that there may
    exist machines that actually need it.  The fact is
    that the author of IRQ compression patch was unable
    to confirm the actual existence of such a system.
    
    As a result, all i386 kernels with IOAPIC support
    pay the following:
    
    1. confusion
    
    IRQ compression re-names the traditional IOAPIC
    pin numbers (aka ACPI GSI's) into sequential IRQ #s:
    
    ACPI: PCI Interrupt 0000:00:1c.0[A] -> GSI 20 (level, low) -> IRQ 16
    ACPI: PCI Interrupt 0000:00:1c.1[B] -> GSI 21 (level, low) -> IRQ 17
    ACPI: PCI Interrupt 0000:00:1c.2[C] -> GSI 22 (level, low) -> IRQ 18
    ACPI: PCI Interrupt 0000:00:1c.3[D] -> GSI 23 (level, low) -> IRQ 19
    ACPI: PCI Interrupt 0000:00:1c.4[A] -> GSI 20 (level, low) -> IRQ 16
    
    This makes /proc/interrupts look different
    depending on system configuration and device probe order.
    It is also different than the x86_64 kernel running
    on the exact same system.  As a result, programmers
    get confused when comparing systems.
    
    2. complexity
    
    The IRQ code in Linux is already overly complex,
    and IRQ compression makes it worse.  There have
    already been two bug workarounds related to IRQ
    compression -- the IRQ0 timer workaround and
    the VIA PCI IRQ workaround.
    
    3. size
    
    All i386 kernels with IOAPIC support contain an int[4096] --
    a 4 page array to contain the renamed IRQs.
    
    So while the irq compression code on i386 should really
    be deleted -- even before merging the x86_64 irq-overhaul,
    this patch simply disables it on all high volume systems
    to avoid problems #1 and #2 on most all i386 systems.
    
    A large system with pin numbers >=64 will still have compression
    to conserve limited IRQ numbers for sparse IOAPICS.  However,
    the vast majority of the planet, those with only pin numbers < 64
    will use an identity GSI -> IRQ mapping.
    
    Signed-off-by: Len Brown <len.brown@intel.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>

commit 03eff7f1930e74f3d3b1281d2920d67de5a64a82
Author: akpm@linux-foundation.org <akpm@linux-foundation.org>
Date:   Tue Dec 11 16:54:26 2007 +0100

    x86: fix typo in ptrace.c
    
    > arch/x86/kernel/ptrace.c: In function 'set_segment_reg':
    > arch/x86/kernel/ptrace.c:226: error: label at end of compound statement
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 86bbaad9a3e50b15ad4e9c8b7f4a424f00f83db2
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:26 2007 +0100

    x86 ptrace getreg/putreg merge
    
    invalid_selector() didn't need to be implemented as a macro hence
    it shouldn't have been.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit c1e38cfa06022e4b2cd9d2c64149b33ee3a4147e
Author: H. Peter Anvin <hpa@zytor.com>
Date:   Tue Dec 11 16:54:24 2007 +0100

    x86: use generic register name in the thread and tss structures
    
    This changes size-specific register names (eip/rip, esp/rsp, etc.) to
    generic names in the thread and tss structures.
    
    Signed-off-by: H. Peter Anvin <hpa@zytor.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit e3877c4af0e489a33b10d602e29f22ef20590b33
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:24 2007 +0100

    x86 ptrace merge removals
    
    This removes the old separate 64-bit and ia32 ptrace source files.
    They are no longer used.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 1f9b5b3a6f82ff771f950ab765b927a6e72c5a16
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:23 2007 +0100

    x86 ptrace merge complete
    
    This switches over the 64-bit build to use the shared ptrace code,
    instead of the old ptrace_64.c and arch/x86/ia32/ptrace32.c code.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 9541fd39d2ee3c4a62ae729024c89106916edf15
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:23 2007 +0100

    x86 ia32 ptrace arch merge
    
    This moves the sys32_ptrace code into arch/x86/kernel/ptrace.c,
    verbatim except for a few hard-coded sizes replaced with sizeof.
    Here this code can use the shared local functions in this file.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 02d965fed890b602a2361935fa4b1e3dfb215868
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:23 2007 +0100

    x86 ia32 ptrace getreg/putreg merge
    
    This reimplements the 64-bit IA32-emulation register access
    functions in arch/x86/kernel/ptrace.c, where they can share
    some guts with the native access functions directly.
    
    These functions are not used yet, but this paves the way to move
    IA32 ptrace support into this file to share its local functions.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit b3f59cbe2216398b36772968d59acdae3d413d6a
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:23 2007 +0100

    x86 ptrace merge syscall trace
    
    This moves the 64-bit syscall tracing functions into ptrace.c,
    so that ptrace_64.c becomes entirely obsolete.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 1a97b472842a9d49443f7622a8342f723ec0f51b
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:23 2007 +0100

    x86 ptrace arch merge
    
    This adds 64-bit support to arch_ptrace in arch/x86/kernel/ptrace.c,
    so this function can be used for native ptrace on both 32 and 64.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 02762584f35832e3e7a182a062ea6f819c678c3a
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:22 2007 +0100

    x86 ptrace getreg/putreg merge
    
    This merges 64-bit support into the low-level register access
    functions in arch/x86/kernel/ptrace.c, paving the way to share
    this file between 32-bit and 64-bit builds.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 8bd3cc7fe800063d71c27e8f45c6d5469cdc12d5
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:22 2007 +0100

    x86 ptrace getreg/putreg cleanup
    
    This cleans up the getreg/putreg functions to move the special cases
    (segment registers and eflags) out into their own subroutines.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 2609bfe8c77c3ffa3052ea3c23d525db6a764b1f
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:22 2007 +0100

    x86: ptrace FLAG_MASK cleanup
    
    This cleans up the FLAG_MASK macro to use symbolic constants instead of a
    magic number.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 9771bf0be289b9c36ae4af4f4f1c7851d2194ae2
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:22 2007 +0100

    x86: ptrace_32 renamed
    
    This renames ptrace_32.c back to ptrace.c, in preparation
    for merging the 32/64 versions of these files.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit b125d644f09ffd34a6568de3de165b34d54ff2ba
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:21 2007 +0100

    x86-32 thread_struct.debugreg
    
    This replaces the debugreg[7] member of thread_struct with individual
    members debugreg0, etc.  This saves two words for the dummies 4 and 5,
    and harmonizes the code between 32 and 64.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit f09f41cf27019fe6df77d9477b864beb56dd488c
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:21 2007 +0100

    x86-64 ia32 ptrace get/putreg32 current task
    
    This generalizes the getreg32 and putreg32 functions so they can be used on
    the current task, as well as on a task stopped in TASK_TRACED and switched
    off.  This lays the groundwork to share this code for all kinds of
    user-mode machine state access, not just ptrace.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 114aa8deff27e2260c63d25704d16dc82a4c05a1
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:21 2007 +0100

    x86-32 ptrace get/putreg current task
    
    This generalizes the getreg and putreg functions so they can be used on the
    current task, as well as on a task stopped in TASK_TRACED and switched off.
    This lays the groundwork to share this code for all kinds of user-mode
    machine state access, not just ptrace.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit dc4362eaf692c2b0ad2e9cf5b7c7ca15677eb64e
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:21 2007 +0100

    x86-64 ptrace get/putreg current task
    
    This generalizes the getreg and putreg functions so they can be used on the
    current task, as well as on a task stopped in TASK_TRACED and switched off.
    This lays the groundwork to share this code for all kinds of user-mode
    machine state access, not just ptrace.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit a1472754f2631796dbf25f0a1160032a8bca770b
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:18 2007 +0100

    x86-32 ptrace whitespace
    
    This canonicalizes the indentation in the getreg and putreg functions.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit d80612809ef4011b31a0dbd57279d426c567bea4
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:18 2007 +0100

    x86-64 ptrace whitespace
    
    This canonicalizes the indentation in the getreg and putreg functions.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit deaca215e8b988de04ee5ed06deec462bf8d0750
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:18 2007 +0100

    x86-64 ia32 ptrace pt_regs cleanup
    
    This cleans up the getreg32/putreg32 functions to use struct pt_regs in a
    straightforward fashion, instead of equivalent ugly pointer arithmetic.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit c1f4ddc330974b7ef2256e14218481030dc4c9d4
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:17 2007 +0100

    x86: eflags enum
    
    This removes the EF_* enum from <asm/ptrace.h>.  It is no longer used,
    and duplicates the X86_EFLAGS_* constants from <asm/processor-flags.h>.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit b554c65c53644917d15576f013aed91169a5f49e
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:17 2007 +0100

    x86: setup64 eflags constants
    
    This cleans up arch/x86/kernel/setup64.c to use the X86_EFLAGS_* constants
    from <asm/processor-flags.h> instead of the EF_* enum in <asm/ptrace.h>.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 1aa68ff48cd51a75b723902cd18ff8ded8cf802e
Author: H. Peter Anvin <hpa@zytor.com>
Date:   Tue Dec 11 16:54:17 2007 +0100

    x86: use generic register names in struct sigcontext
    
    Switch struct sigcontext (defined in <asm/sigcontext*.h>) to using
    register names withut e- or r-prefixes for both 32- and 64-bit x86.
    This is intended as a preliminary step in unifying this code between
    architectures.
    
    Signed-off-by: H. Peter Anvin <hpa@zytor.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 83511c4323ed2bd253b66ff8b64b5ebd451ad153
Author: H. Peter Anvin <hpa@zytor.com>
Date:   Tue Dec 11 16:54:17 2007 +0100

    x86: Use generic register names in struct user_regs_struct
    
    Switch struct user_regs_struct (defined in <asm/user.h>, which is no
    longer exported to userspace) to using register names without e- or
    r-prefixes for both 32 and 64 bit x86.  This is intended as a
    preliminary step in unifying this code between architectures.
    
    Also, be a bit more strict in truncating 32-bit "extended" segment
    register values to 16 bits.
    
    Signed-off-by: H. Peter Anvin <hpa@zytor.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 8c4ce71c400d8d334881856c86d2a982ae0d91d2
Author: H. Peter Anvin <hpa@zytor.com>
Date:   Tue Dec 11 16:54:16 2007 +0100

    x86: rename the struct pt_regs members for 32/64-bit consistency
    
    We have a lot of code which differs only by the naming of specific
    members of structures that contain registers.  In order to enable
    additional unifications, this patch drops the e- or r- size prefix
    from the register names in struct pt_regs, and drops the x- prefixes
    for segment registers on the 32-bit side.
    
    This patch also performs the equivalent renames in some additional
    places that might be candidates for unification in the future.
    
    Signed-off-by: H. Peter Anvin <hpa@zytor.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit b4329d1a6d54e23d321e2290bc0fadb74137bd25
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Tue Dec 11 16:54:16 2007 +0100

    x86: add set/clear_cpu_cap operations
    
    The patch to suppress bitops-related warnings added a pile of ugly
    casts.  Many of these were related to the management of x86 CPU
    capabilities.  Clean these up by adding specific set/clear_cpu_cap
    macros, and use them consistently.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
    Cc: Andi Kleen <ak@suse.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit f55d0d6da3274a5179a65f7fd4daabf7fca09f29
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Tue Dec 11 16:54:16 2007 +0100

    x86: clean up bitops-related warnings
    
    Add casts to appropriate places to silence spurious bitops warnings.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
    Cc: Andi Kleen <ak@suse.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 489b5c202be14c42d578897c2ac7b602730efa36
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Tue Dec 11 16:54:15 2007 +0100

    x86: partial unification of asm-x86/bitops.h
    
    This unifies the set/clear/test bit functions of asm/bitops.h.
    
    I have not attempted to merge the bit-finding functions, since they
    rely on the machine word size and can't be easily restructured to work
    generically without a lot of #ifdefs.  In particular, the 64-bit code
    can assume the presence of conditional move instructions, whereas
    32-bit needs to be more careful.
    
    The inline assembly for the bit operations has been changed to remove
    explicit sizing hints on the instructions, so the assembler will pick
    the appropriate instruction forms depending on the architecture and
    the context.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
    Cc: Andi Kleen <ak@suse.de>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 1f356cf6f80490978f92bac029b7884fa7e1f925
Author: Pavel Machek <pavel@ucw.cz>
Date:   Tue Dec 11 16:54:15 2007 +0100

    time: more timer related cleanups
    
    I was confused by FSEC = 10^15 NSEC statement, plus small whitespace
    fixes. When there's copyright, there should be GPL.
    
    Signed-off-by: Pavel Machek <pavel@suse.cz>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit e63707dd20ec07856d45eaea0161ce0242dec306
Author: Pavel Machek <pavel@ucw.cz>
Date:   Tue Dec 11 16:54:15 2007 +0100

    time: timer cleanups
    
    Small cleanups to tick-related code. Wrong preempt count is followed
    by BUG(), so it is hardly KERN_WARNING.
    
    Signed-off-by: Pavel Machek <pavel@suse.cz>
    Cc: john stultz <johnstul@us.ibm.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 7b0245c43f8c1e8fa28124404f6aac716ac873e8
Author: Pavel Machek <pavel@ucw.cz>
Date:   Tue Dec 11 16:54:15 2007 +0100

    time: clean hungarian notation from timers
    
    Clean up hungarian notation from timer code.
    
    Signed-off-by: Pavel Machek <pavel@suse.cz>
    Cc: john stultz <johnstul@us.ibm.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 93b26d0a613b0bf501eef2f6b13ec9a20ebc4635
Author: akpm@linux-foundation.org <akpm@linux-foundation.org>
Date:   Tue Dec 11 16:54:14 2007 +0100

    + mm-prevent-dereferencing-non-allocated-per_cpu-variables-fix.patch added to -mm tree
    
    The patch titled
         mm-prevent-dereferencing-non-allocated-per_cpu-variables-fix
    has been added to the -mm tree.  Its filename is
         mm-prevent-dereferencing-non-allocated-per_cpu-variables-fix.patch
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit a6d13e672f1c33629c1fa4240a611d5ae965ab50
Author: akpm@linux-foundation.org <akpm@linux-foundation.org>
Date:   Tue Dec 11 16:54:14 2007 +0100

    + mm-prevent-dereferencing-non-allocated-per_cpu-variables.patch added to -mm tree
    
    The patch titled
         prevent dereferencing non-allocated per_cpu variables
    has been added to the -mm tree.  Its filename is
         mm-prevent-dereferencing-non-allocated-per_cpu-variables.patch
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit d245af85060a948155590cf0c288348ea58b26b9
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:14 2007 +0100

    x86: PTRACE_SINGLEBLOCK
    
    This adds the PTRACE_SINGLEBLOCK request on x86, matching the ia64 feature.
    The implementation comes from the generic ptrace code and relies on the
    low-level machine support provided by arch_has_block_step() et al.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 92eb690b04898c1bec42695892920d26e83c7fc9
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:14 2007 +0100

    x86: debugctlmsr kprobes
    
    This adjusts the x86 kprobes implementation to cope with per-thread
    MSR_IA32_DEBUGCTLMSR being set for user mode.  I haven't delved deep
    enough into the kprobes code to be really sure this covers all the
    cases where the user-mode BTF setting needs to be cleared or restored.
    It looks about right to me.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit fd5d8365b642017879b2a1105c2dda009b3c8991
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:14 2007 +0100

    x86: debugctlmsr arch_has_block_step
    
    This implements user-mode step-until-branch on x86 using the BTF bit
    in MSR_IA32_DEBUGCTLMSR.  It's just like single-step, only less so.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 96e26b23b22fd46b3ee13bf4d87dab26ec35198a
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:14 2007 +0100

    x86: debugctlmsr context switch
    
    This adds low-level support for a per-thread value of MSR_IA32_DEBUGCTLMSR.
    The per-thread value is switched in when TIF_DEBUGCTLMSR is set.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 69faa0fd17ef9a151852b56e8e6d1aebcc0cadd1
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:13 2007 +0100

    x86: debugctlmsr kconfig
    
    This adds the (internal) Kconfig macro CONFIG_X86_DEBUGCTLMSR,
    to be defined when configuring to support only hardware that
    definitely supports MSR_IA32_DEBUGCTLMSR with the BTF flag.
    
    The Intel documentation says "P6 family" and later processors all have it.
    I think the Kconfig dependencies are right to have it set for those and
    unset for others (i.e., when 586 and earlier are supported).
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 33f593caf53381efaac5b43cff38a5064512cf14
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:13 2007 +0100

    x86: debugctlmsr constants
    
    This adds constant macros for a few of the bits in MSR_IA32_DEBUGCTLMSR.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit b89e3f35d9060ef7a8a77677d64ec34e6c038b36
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:12 2007 +0100

    ptrace: generic PTRACE_SINGLEBLOCK
    
    This makes ptrace_request handle PTRACE_SINGLEBLOCK along with
    PTRACE_CONT et al.  The new generic code makes use of the
    arch_has_block_step macro and generic entry points on machines
    that define them.
    
    [ mingo@elte.hu: bugfix ]
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 41101245dfd53fafa194ad4eed8680b64d03103d
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:12 2007 +0100

    ptrace: arch_has_block_step
    
    This defines the new macro arch_has_block_step() in linux/ptrace.h, a
    default for when asm/ptrace.h does not define it.  This is the analog
    of arch_has_single_step() for step-until-branch on machines that have
    it.  It declares the new user_enable_block_step function, which goes
    with the existing user_enable_single_step and user_disable_single_step.
    This is not used yet, but paves the way to harmonize on this interface
    for the arch-specific calls on all machines.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 2d8f989feff799d6de20ca37b062c3f71524e79c
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:11 2007 +0100

    x86-32 ptrace debugreg cleanup
    
    This cleans up the 32-bit ptrace code to separate the guts of the
    debug register access from the implementation of PTRACE_PEEKUSR and
    PTRACE_POKEUSR.  The new functions ptrace_[gs]et_debugreg match the
    new 64-bit entry points for parity, but they don't need to be global.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit eb4bb86d0d31cea6975f6439bd4a680be8895143
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:11 2007 +0100

    x86-64 ia32 ptrace debugreg cleanup
    
    This cleans up the ia32 compat ptrace code to use shared code from
    native ptrace for the implementation guts of debug register access.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 747e89f1391c9a97ef0e9f990fdf898c41c3ff80
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:11 2007 +0100

    x86-64 ptrace debugreg cleanup
    
    This cleans up the 64-bit ptrace code to separate the guts of the
    debug register access from the implementation of PTRACE_PEEKUSR and
    PTRACE_POKEUSR.  The new functions ptrace_[gs]et_debugreg are made
    global so that the ia32 code can later be changed to call them too.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 47fff8a43947aa6054f3bcf4ae5a46d391ad5f71
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:11 2007 +0100

    x86-64 ptrace: use task_pt_regs
    
    This cleans up the 64-bit ptrace code to use task_pt_regs instead of its
    own redundant code that does the same thing a different way.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 82fb9c9ddb179e46e863595034b145b7eaa4d77d
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:11 2007 +0100

    x86-32 ptrace: use task_pt_regs
    
    This cleans up the 32-bit ptrace code to use task_pt_regs instead of its
    own redundant code that does the same thing a different way.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 08d23aa27b7c953715286abf07cbe05b550c721a
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:11 2007 +0100

    powerpc: ptrace generic resume
    
    This removes the handling for PTRACE_CONT et al from the powerpc
    ptrace code, so it uses the new generic code via ptrace_request.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 1ab20c0e2b72c2004c1a546ecdc4ee55f52e8d04
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:10 2007 +0100

    powerpc: arch_has_single_step
    
    This defines the new standard arch_has_single_step macro.  It makes the
    existing set_single_step and clear_single_step entry points global, and
    renames them to the new standard names user_enable_single_step and
    user_disable_single_step, respectively.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 76e8fd637c7ce50cf1fa91e9ac8c3b7353005f24
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:10 2007 +0100

    x86-32: ptrace generic resume
    
    This removes the handling for PTRACE_CONT et al from the 32-bit
    ptrace code, so it uses the new generic code via ptrace_request.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 233bdc3ec059d63daa19390f1e78fceddfe714a0
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:10 2007 +0100

    x86-64: ptrace generic resume
    
    This removes the handling for PTRACE_CONT et al from the 64-bit
    ptrace code, so it uses the new generic code via ptrace_request.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 83f267a31a3a023d4fa2a3d26386c0fc033a9356
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:10 2007 +0100

    ptrace: generic resume
    
    This makes ptrace_request handle all the ptrace requests that wake
    up the traced task.  These do low-level ptrace implementation magic
    that is not arch-specific and should be kept out of arch code.  The
    implementations on each arch usually do the same thing.  The new
    generic code makes use of the arch_has_single_step macro and generic
    entry points to handle PTRACE_SINGLESTEP.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 1650bed63b5ef49533f1a1a12eb1077eac85e095
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:09 2007 +0100

    x86 single_step: TIF_FORCED_TF
    
    This changes the single-step support to use a new thread_info flag
    TIF_FORCED_TF instead of the PT_DTRACE flag in task_struct.ptrace.
    This keeps arch implementation uses out of this non-arch field.
    
    This changes the ptrace access to eflags to mask TF and maintain
    the TIF_FORCED_TF flag directly if userland sets TF, instead of
    relying on ptrace_signal_deliver.  The 64-bit and 32-bit kernels
    are harmonized on this same behavior.  The ptrace_signal_deliver
    approach works now, but this change makes the low-level register
    access code reliable when called from different contexts than a
    ptrace stop, which will be possible in the future.
    
    The 64-bit do_debug exception handler is also changed not to clear TF
    from user-mode registers.  This matches the 32-bit kernel's behavior.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit eaadb751fe7d9f1f1d16b6ede0e83ae66ac464ad
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:09 2007 +0100

    x86: single_step: share code
    
    This removes the single-step code from ptrace_32.c and uses the step.c code
    shared with the 64-bit kernel.  The two versions of the code were nearly
    identical already, so the shared code has only a couple of simple #ifdef's.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit d68cb9478526855759132b722413052829c20660
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:09 2007 +0100

    x86: single_step 0xf0
    
    This fixes the 64-bit single-step handling code's instruction
    decoder to grok the 0xf0 (lock) prefix, which the 32-bit code
    already does correctly.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit edb2f8c31be313ea6b70e0e16382f06391944879
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:09 2007 +0100

    x86: single_step segment macros
    
    This cleans up the single-step code to use the asm/segment.h macros
    for segment selector magic bits, rather than its own constant.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 630e12f4ec239f179b3192422205442df883e8ff
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:08 2007 +0100

    x86: single_step moved
    
    This moves the single-step support code from ptrace_64.c into a new file
    step.c, verbatim.  This paves the way for consolidating this code between
    64-bit and 32-bit versions.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 555d38e80e6635116b3a0881c32c1e81e4e54963
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:08 2007 +0100

    x86: arch_has_single_step
    
    This defines the new standard arch_has_single_step macro.  It makes the
    existing set_singlestep and clear_singlestep entry points global, and
    renames them to the new standard names user_enable_single_step and
    user_disable_single_step, respectively.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 11f04959792821bdc5db219702a9f155a0a6b993
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:08 2007 +0100

    x86: remove TRAP_FLAG
    
    This gets rid of the local constant macro TRAP_FLAG.
    It's redundant with the public constant macro X86_EFLAGS_TF.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 9c4efd8f2bbaec4736fbc1a1f0780ef14ff99590
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:08 2007 +0100

    x86: segment selector macros
    
    This copies into asm-x86/segment_64.h some macros from asm-x86/segment_32.h
    for dissecting segment selectors.  This lets other code use these macros
    uniformly on 32/64-bit rather than duplicating the constants elsewhere.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 525b2171bcc1a55a303175d18fbc4e13400dca76
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:07 2007 +0100

    ptrace: arch_has_single_step
    
    This defines the new macro arch_has_single_step() in linux/ptrace.h, a
    default for when asm/ptrace.h does not define it.  It declares the new
    user_enable_single_step and user_disable_single_step functions.
    This is not used yet, but paves the way to harmonize on this interface
    for the arch-specific calls on all machines.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 653baf26a3f08b87a11b05c35ac9236fa2cbc78d
Author: Andrew Morton <akpm@osdl.org>
Date:   Tue Dec 11 16:54:07 2007 +0100

    x86: kmap_atomic() debugging
    
    [ mingo@elte.hu: cleanups and made dependent on CONFIG_DEBUG_HIGHMEM.
    
      this caught a handful of bugs already, so lets apply it. If it gets
      things wrong we'll disable it. ]
    
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 78d59d75c6c1f7fb9ba7a3bf65cbe98ca7c25a3f
Author: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date:   Tue Dec 11 16:54:07 2007 +0100

    x86: fall back on interrupt disable in cmpxchg8b on 80386 and 80486
    
    Actually, on 386, cmpxchg and cmpxchg_local fall back on
    cmpxchg_386_u8/16/32: it disables interruptions around non atomic
    updates to mimic the cmpxchg behavior.
    
    The comment:
    /* Poor man's cmpxchg for 386. Unsuitable for SMP */
    
    already present in cmpxchg_386_u32 tells much about how this cmpxchg
    implementation should not be used in a SMP context. However, the cmpxchg_local
    can perfectly use this fallback, since it only needs to be atomic wrt the local
    cpu.
    
    This patch adds a cmpxchg_486_u64 and uses it as a fallback for cmpxchg64
    and cmpxchg64_local on 80386 and 80486.
    
    Q:
    but why is it called cmpxchg_486 when the other functions are called
    
    A:
    Because the standard cmpxchg is missing only on 386, but cmpxchg8b is
    missing both on 386 and 486.
    
    Citing Intel's Instruction set reference:
    
    cmpxchg:
    This instruction is not supported on Intel processors earlier than the
    Intel486 processors.
    
    cmpxchg8b:
    This instruction encoding is not supported on Intel processors earlier
    than the Pentium processors.
    
    Q:
    What's the reason to have cmpxchg64_local on 32 bit architectures?
    Without that need all this would just be a few simple defines.
    
    A:
    cmpxchg64_local on 32 bits architectures takes unsigned long long
    parameters, but cmpxchg_local only takes longs. Since we have cmpxchg8b
    to execute a 8 byte cmpxchg atomically on pentium and +, it makes sense
    to provide a flavor of cmpxchg and cmpxchg_local using this instruction.
    
    Also, for 32 bits architectures lacking the 64 bits atomic cmpxchg, it
    makes sense _not_ to define cmpxchg64 while cmpxchg could still be
    available.
    
    Moreover, the fallback for cmpxchg8b on i386 for 386 and 486 is a
    
    However, cmpxchg64_local will be emulated by disabling interrupts on all
    architectures where it is not supported atomically.
    
    Therefore, we *could* turn cmpxchg64_local into a cmpxchg_local, but it
    would make the 386/486 fallbacks ugly, make its design different from
    cmpxchg/cmpxchg64 (which really depends on atomic operations and cannot
    be emulated) and require the __cmpxchg_local to be expressed as a macro
    rather than an inline function so the parameters would not be fixed to
    unsigned long long in every case.
    
    So I think cmpxchg64_local makes sense there, but I am open to
    suggestions.
    
    Q:
    Are there any callers?
    
    A:
    I am actually using it in LTTng in my timestamping code. I use it to
    work around CPUs with asynchronous TSCs. I need to update 64 bits
    values atomically on this 32 bits architecture.
    
    Changelog:
    - Ran though checkpatch.
    
    Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
    Cc: Andi Kleen <ak@suse.de>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 1f7faee233617ed96e0032088b27f424e4e92d70
Author: Ralf Baechle <ralf@linux-mips.org>
Date:   Tue Dec 11 16:54:07 2007 +0100

    mips, x86: optimize the i8259 code a bit
    
    The timer code always calls the clock_event_device set_net_event and
    set_mode methods with interrupts disabled, so no need to use
    spin_lock_irqsave / spin_unlock_irqrestore for those.
    
    Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
    Acked-by:Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 27b82f4f1aeaf616ef852e958c7e1983c8ec91c2
Author: Christoph Lameter <clameter@sgi.com>
Date:   Tue Dec 11 16:54:07 2007 +0100

    x86: 64-bit, make sparsemem vmemmap the only memory model
    
    Use sparsemem as the only memory model for UP, SMP and NUMA.  Measurements
    indicate that DISCONTIGMEM has a higher overhead than sparsemem.  And
    FLATMEMs benefits are minimal.  So I think its best to simply standardize
    on sparsemem.
    
    Results of page allocator tests (test can be had via git from slab git
    tree branch tests)
    
    Measurements in cycle counts. 1000 allocations were performed and then the
    average cycle count was calculated.
    
    Order	FlatMem	Discontig	SparseMem
    0	  639	  665		  641
    1	  567	  647		  593
    2	  679	  774		  692
    3	  763	  967		  781
    4	  961	 1501		  962
    5	 1356	 2344		 1392
    6	 2224	 3982		 2336
    7	 4869	 7225		 5074
    8	12500	14048		12732
    9	27926	28223		28165
    10	58578	58714		58682
    
    (Note that FlatMem is an SMP config and the rest NUMA configurations)
    
    Memory use:
    
    SMP Sparsemem
    -------------
    
    Kernel size:
    
       text    data     bss     dec     hex filename
    3849268  397739 1264856 5511863  541ab7 vmlinux
    
                 total       used       free     shared    buffers     cached
    Mem:       8242252      41164    8201088          0        352      11512
    -/+ buffers/cache:      29300    8212952
    Swap:      9775512          0    9775512
    
    SMP Flatmem
    -----------
    
    Kernel size:
    
       text    data     bss     dec     hex filename
    3844612  397739 1264536 5506887  540747 vmlinux
    
    So 4.5k growth in text size vs. FLATMEM.
    
                 total       used       free     shared    buffers     cached
    Mem:       8244052      40544    8203508          0        352      11484
    -/+ buffers/cache:      28708    8215344
    
    2k growth in overall memory use after boot.
    
    NUMA discontig:
    
       text    data     bss     dec     hex filename
    3888124  470659 1276504 5635287  55fcd7 vmlinux
    
                 total       used       free     shared    buffers     cached
    Mem:       8256256      56908    8199348          0        352      11496
    -/+ buffers/cache:      45060    8211196
    Swap:      9775512          0    9775512
    
    NUMA sparse:
    
       text    data     bss     dec     hex filename
    3896428  470659 1276824 5643911  561e87 vmlinux
    
    8k text growth. Given that we fully inline virt_to_page and friends now
    that is rather good.
    
                 total       used       free     shared    buffers     cached
    Mem:       8264720      57240    8207480          0        352      11516
    -/+ buffers/cache:      45372    8219348
    Swap:      9775512          0    9775512
    
    The total available memory is increased by 8k.
    
    This patch makes sparsemem the default and removes discontig and
    flatmem support from x86.
    
    Acked-by: Andi Kleen <ak@suse.de>
    Signed-off-by: Christoph Lameter <clameter@sgi.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit cf603b8758f6905610f6ae9231500a0a7502ec42
Author: Borislav Petkov <bbpetkov@yahoo.de>
Date:   Tue Dec 11 16:54:06 2007 +0100

    x86: vmlinux_32.lds.S: remove repeated comment from the x86-32 linker script
    
    Remove repeated comment from the linker script for the x86-32 target.
    
    Signed-off-by: Borislav Petkov <bbpetkov@yahoo.de>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit e47d6fd89996144ef1c7ea8d06baf17bd8ab71ec
Author: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date:   Tue Dec 11 16:54:06 2007 +0100

    x86: do not set boot cpu in cpu_online_map at x86_64_start_kernel()
    
    In init/main.c boot_cpu_init() does that later
    
    Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
    Cc: Zachary Amsden <zach@vmware.com>
    Cc: Andi Kleen <ak@suse.de>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 4f308d09ca0ea4a4f06057a8f07f5e1a7a08db14
Author: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date:   Tue Dec 11 16:54:06 2007 +0100

    x86: set cpu_index to nr_cpus instead of 0
    
    Same BIOS will support two/four dualcore/quadcore system, and will get
    
    ACPI: LAPIC (acpi_id[0x01] lapic_id[0x00] enabled)
    Processor #0 15:1 APIC version 16
    ACPI: LAPIC (acpi_id[0x02] lapic_id[0x01] enabled)
    Processor #1 15:1 APIC version 16
    ACPI: LAPIC (acpi_id[0x03] lapic_id[0x02] enabled)
    Processor #2 15:1 APIC version 16
    ACPI: LAPIC (acpi_id[0x04] lapic_id[0x03] enabled)
    Processor #3 15:1 APIC version 16
    ACPI: LAPIC (acpi_id[0x05] lapic_id[0x84] disabled)
    ACPI: LAPIC (acpi_id[0x06] lapic_id[0x85] disabled)
    ACPI: LAPIC (acpi_id[0x07] lapic_id[0x86] disabled)
    ACPI: LAPIC (acpi_id[0x08] lapic_id[0x87] disabled)
    ACPI: LAPIC (acpi_id[0x09] lapic_id[0x88] disabled)
    ACPI: LAPIC (acpi_id[0x0a] lapic_id[0x89] disabled)
    ACPI: LAPIC (acpi_id[0x0b] lapic_id[0x8a] disabled)
    ACPI: LAPIC (acpi_id[0x0c] lapic_id[0x8b] disabled)
    ACPI: LAPIC (acpi_id[0x0d] lapic_id[0x8c] disabled)
    ACPI: LAPIC (acpi_id[0x0e] lapic_id[0x8d] disabled)
    ACPI: LAPIC (acpi_id[0x0f] lapic_id[0x8e] disabled)
    ACPI: LAPIC (acpi_id[0x10] lapic_id[0x8f] disabled)
    
    SMP: Allowing 16 CPUs, 12 hotplug CPUs
    
    the /proc/cpuinfo will show as bunch of NULL cpus with cpu_index=0
    
    so assign impossible cpu_index value at first instead of 0.
    
    Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
    Cc: Andi Kleen <ak@suse.de>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit d7d4be5c86af01e3812070da293d6f9e4d8822e1
Author: Jeremy Fitzhardinge <jeremy@goop.org>
Date:   Tue Dec 11 16:54:06 2007 +0100

    xen-mask-_page_pcd-from-ptes
    
    _PAGE_PCD maps a page with caching disabled, which is typically used for
    mapping harware registers.  Xen never allows it to be set on a mapping, and
    unprivileged guests never need it since they can't see the real underlying
    hardware.  However, some uncached mappings are made early when probing the
    (non-existent) APIC, and its OK to mask off the PCD flag in these cases.
    
    This became necessary because Xen started checking for this bit, rather
    than silently masking it off.
    
    Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 3f57d32e2b0d11b38b4ff4f2b93c2362323473cb
Author: Florian Fainelli <florian.fainelli@telecomint.eu>
Date:   Tue Dec 11 16:54:06 2007 +0100

    x86: Add the RDC machine specific reboot fixup
    
    The RDC R-321x SoC needs a reboot fixup which
    uses its internal hardware watchdog set to
    reset the CPU on next tick.
    
    Signed-off-by: Florian Fainelli <florian.fainelli@telecomint.eu>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 2c4c11e772059439cf2726628bcacb77e1f0ffdc
Author: Florian Fainelli <florian.fainelli@telecomint.eu>
Date:   Tue Dec 11 16:54:06 2007 +0100

    x86: Add support for the RDC R-321x SoC
    
    This patch adds support for the RDC R-321x system-on-chip,
    also known as R-861x-(G). It uses the generic GPIO API and
    has support for the on-chip hardware watchdog.
    
    Signed-off-by: Florian Fainelli <florian.fainelli@telecomint.eu>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit d65ae8827318e6957c7f8118009e987aa024114e
Author: Florian Fainelli <florian.fainelli@telecomint.eu>
Date:   Tue Dec 11 16:54:05 2007 +0100

    pci: Add PCI identifiers for the RDC devices
    
    This patch defines the PCI identifiers found in
    the RDC R-321x System-on-Chip.
    
    Signed-off-by: Florian Fainelli <florian.fainelli@telecomint.eu>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 18e7765c4e0b7e4a08416439b2deec2132963703
Author: Florian Fainelli <florian.fainelli@telecomint.eu>
Date:   Tue Dec 11 16:54:05 2007 +0100

    x86: Add generic GPIO support to x86
    
    This patch adds the generic GPIO support to the x86
    architecture. We do the same as for MIPS, we let
    the machine override the gpio callbacks and provide
    defaults one in mach-generic.
    
    Signed-off-by: Florian Fainelli <florian.fainelli@telecomint.eu>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 1297a775e2ae4a8fe51f57910ab8e13d0ac4817b
Author: Andres Salomon <dilinger@queued.net>
Date:   Tue Dec 11 16:54:05 2007 +0100

    x86: GEODE: update GPIO API to support setting multiple GPIOs at once
    
    The existing Geode GPIO API only allows for updating one GPIO at once.  There
    are instances where users want to update multiple GPIOs at once.  With the
    current API, they are given two choices; either ignore the GPIO API:
    
          outl(0xc000, gpio_base + GPIO_OUTPUT_VAL);
          outl(0xc000, gpio_base + GPIO_OUTPUT_ENABLE);
    
    Alternatively, call each GPIO update separately:
    
          geode_gpio_set(14, GPIO_OUTPUT_VAL);
          geode_gpio_set(15, GPIO_OUTPUT_VAL);
          geode_gpio_set(14, GPIO_OUTPUT_ENABLE);
          geode_gpio_set(15, GPIO_OUTPUT_ENABLE);
    
    Neither are desirable.  This patch changes the GPIO API to allow for setting
    of multiple GPIOs at once; rather than being passed an integer, we pass
    a bitmask and provide a translation function.  The above code would now
    look like this:
    
          geode_gpio_set(geode_gpio(14)|geode_gpio(15), GPIO_OUTPUT_VAL);
          geode_gpio_set(geode_gpio(14)|geode_gpio(15), GPIO_OUTPUT_ENABLE);
    
    Since there are no upstream users of the GPIO API yet (afaik), best to
    change this now.  This also adds a bit of sanity checking; it is no
    longer possible to use a GPIO above 28.
    
    Note the semantics of geode_gpio_isset() have changed:
    geode_gpio_isset(geode_gpio(3)|geode_gpio(4), ...)
    will only return true iff both GPIOs are set.
    
    Signed-off-by: Andres Salomon <dilinger@debian.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 5a2da98776a6017ee17713f95cee9d2ff0517b2d
Author: Vladimir Berezniker <vmpn@hitechman.com>
Date:   Tue Dec 11 16:54:05 2007 +0100

    x86_64: sanitize user specified e820 memmap values
    
    Sanitize user specified e820 memory ranges, using the same logic that is
    applied to the values returned by the BIOS.  This ensures consistent
    handling regardless of the source of the memory mappings.
    
    Allows overriding portions of the memory map without specifying one in
    it's entirety (memmap=exactmap).
    
    E.g. marking a range of bad RAM as reserved with memmap=48M$528M
    
    BIOS supplied range
    
    BIOS-e820: 0000000000100000 - 000000007fe80000 (usable)
    
    becomes
    
    user: 0000000000100000 - 0000000021000000 (usable)
    user: 0000000021000000 - 0000000024000000 (reserved)
    user: 0000000024000000 - 000000007fe80000 (usable)
    
    Previously this did not work, as the original BIOS range was left
    untouched while the user defined range was appended to the end of the
    memory map.
    
    [ tglx: arch/x86 adaptation ]
    
    Signed-off-by: Vladimir Berezniker <vmpn@hitechman.com>
    Signed-off-by: Andi Kleen <ak@suse.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit a23e502316719b665e4615b948921b9b17265891
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:05 2007 +0100

    x86: TLS cleanup
    
    This consolidates the four different places that implemented the same
    encoding magic for the GDT-slot 32-bit TLS support.  The old tls32.c was
    renamed and is now only slightly modified to be the shared implementation.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Zachary Amsden <zach@vmware.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 2541ad16396e1cb605ca57fed6cff2c0c4a538a4
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:04 2007 +0100

    x86: tls32 moved
    
    This renames arch/x86/ia32/tls32.c to arch/x86/kernel/tls.c, which does
    nothing now but paves the way to consolidate this code for 32-bit too.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Zachary Amsden <zach@vmware.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 4f9cb81328f02d98f140849c2af36873f22b9e76
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:03 2007 +0100

    x86: desc_empty
    
    This replaces the desc_empty macro with an inline.  It now handles
    easily any of the four different types used between 32/64 code to
    refer to these 8 bytes.  It's identical in both asm-x86/processor_64.h
    and asm-x86/processor_32.h, so if these files ever get merged this
    function can be in the common code.
    
    This also removes the desc_equal macro because nothing uses it.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 35169a39ceb25760a7ae0fa221d76b8e98d52339
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:03 2007 +0100

    x86: ptrace fs/gs_base
    
    The fs_base and gs_base fields are available in user_regs_struct.
    But reading these via ptrace (PTRACE_GETREGS or PTRACE_PEEKUSR) does
    not give a reliably useful value.  The thread_struct fields are 0
    when do_arch_prctl decided to use a GDT slot instead of MSR_FS_BASE,
    which it does for a value under 1<<32.
    
    This changes ptrace access to fs_base and gs_base to work like
    PTRACE_ARCH_PRCTL does.  That is, it reads the base address that
    user-mode memory access using the fs/gs instruction prefixes will
    use, regardless of how it's being implemented in the kernel.  The
    MSR vs GDT is an implementation detail that is pretty much hidden
    from userland in the actual using, and there is no reason that
    ptrace should give the internal implementation picture rather than
    the user-mode semantic picture.  In the case of setting the value,
    this can implicitly change the fsindex/gsindex value (also
    separately in user_regs_struct), which is what happens when the
    thread calls arch_prctl itself.  In a PTRACE_SETREGS, the fs_base
    change will come after the fsindex change due to the order of the
    struct, and so a change the debugger made to fs_base will have the
    effect intended, another part of the user_regs_struct will now
    differ when read back from what the debugger wrote.
    
    This makes PTRACE_ARCH_PRCTL obsolete.  We could consider declaring
    it deprecated and removing it one day, though there is no hurry.
    For the foreseeable future, debuggers have to assume an old kernel
    that does not report reliable fs_base/gs_base values in user_regs_struct
    and stick to PTRACE_ARCH_PRCTL anyway.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit af843fff5d8ceba41596649fed0d19ac1efcfa8b
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:02 2007 +0100

    x86: use get_desc_base
    
    This changes a couple of places to use the get_desc_base function.
    They were duplicating the same calculation with different equivalent code.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit f3631ddf7100ea1aceff1d7051f3285e9ad7630e
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:02 2007 +0100

    x86: get_desc_base
    
    This defines the get_desc_base function in asm-x86/desc_64.h to match the
    one in desc_32.h.  If these two files ever get merged together, this
    function could be the same in both.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit fd2ab59395160127dc9491e78e791982aa218e9e
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:02 2007 +0100

    x86 vDSO: canonicalize sysenter .eh_frame
    
    Some assembler versions automagically optimize .eh_frame contents,
    changing their size.  The CFI in sysenter.S was not using optimal
    formatting, so it would be changed by newer/smarter assemblers.
    This ran afoul of the wired constant for padding out the other vDSO
    images to match its size.  This changes the original hand-coded
    source to use the optimal format encoding for its operations.  That
    leaves nothing more for a fancy assembler to do, so the sizes will
    match the wired-in expected size regardless of the assembler version.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 3a45a9a26a624c451dd33b8c650253a9efdac940
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:02 2007 +0100

    x86 vDSO: makefile cleanup
    
    This cleans up the arch/x86/vdso/Makefile rules for vdso.so to
    share more code with the vdso32-*.so rules and remove old cruft.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 288543401b7baf2036cc3a5249aa4ef307457d66
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:02 2007 +0100

    x86 vDSO: i386 vdso32
    
    fix:
    
    > The .eh_frame sections are different in size, which bumps the sysenter
    > one into the next alignment datum.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 720c8688bd2634e4212d0af64e83eacfd74486ba
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:01 2007 +0100

    x86 vDSO: reorder vdso32 code
    
    This reorders the code in the 32-bit vDSO images to put the signal
    trampolines first and __kernel_vsyscall after them.  The order does
    not matter to userland, it just uses what AT_SYSINFO or e_entry
    says.  Since the signal trampolines are the same size in both
    versions of the vDSO, putting them first is the simplest way to get
    the addresses to line up.  This makes it work to use a more compact
    layout for the vDSO.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 702201ff3a59d27c21af546b2d50b65784893e92
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:54:01 2007 +0100

    x86 vDSO: ia32 vsyscall removal
    
    This removes all the old vsyscall code from arch/x86/ia32/ that is
    no longer used because arch/x86/vdso/ code has replaced it.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 43ccb7cf8e0bc6f99169d3acaf031d606ad4350f
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:59 2007 +0100

    x86 vDSO: consolidate vdso32
    
    This makes x86_64's ia32 emulation support share the sources used in the
    32-bit kernel for the 32-bit vDSO and much of its setup code.
    
    The 32-bit vDSO mapping now behaves the same on x86_64 as on native 32-bit.
    The abi.syscall32 sysctl on x86_64 now takes the same values that
    vm.vdso_enabled takes on the 32-bit kernel.  That is, 1 means a randomized
    vDSO location, 2 means the fixed old address.  The CONFIG_COMPAT_VDSO
    option is now available to make this the default setting, the same meaning
    it has for the 32-bit kernel.  (This does not affect the 64-bit vDSO.)
    
    The argument vdso32=[012] can be used on both 32-bit and 64-bit kernels to
    set this paramter at boot time.  The vdso=[012] argument still does this
    same thing on the 32-bit kernel.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 3d3c0a45fb16ebf8584c978324e50f05c791496a
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:58 2007 +0100

    x86 vDSO: ia32 vdso32-syscall build
    
    This puts the syscall version of the 32-bit vDSO in arch/x86/vdso/vdso32/
    for 64-bit IA32 support.  This is not used yet, but it paves the way for
    consolidating the 32-bit vDSO source and build logic all in one place.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit e68913d241715d77d0ce1f836a1b77eaf462081c
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:57 2007 +0100

    x86 vDSO: ia32 sysenter_return
    
    This changes the 64-bit kernel's support for the 32-bit sysenter
    instruction to use stored fields rather than constants for the
    user-mode return address, as the 32-bit kernel does.  This adds a
    sysenter_return field to struct thread_info, as 32-bit has.  There
    is no observable effect from this yet.  It makes the assembly code
    independent of the 32-bit vDSO mapping address, paving the way for
    making the vDSO address vary as it does on the 32-bit kernel.
    
    [ akpm@linux-foundation.org: build fix on !CONFIG_IA32_EMULATION ]
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 5651cb78fb074337130f99116f404be1a05e5819
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:57 2007 +0100

    x86 vDSO: ia32_sysenter_target
    
    This harmonizes the name for the entry point from the 32-bit sysenter
    instruction across 32-bit and 64-bit kernels.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 3072269949a0a4c0bab140fe98691a88b2b74774
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:57 2007 +0100

    x86 vDSO: vdso32 setup
    
    This moves arch/x86/kernel/sysenter_32.c to arch/x86/vdso/vdso32-setup.c,
    keeping all the code relating only to vDSO magic in the vdso/ subdirectory.
    This is a pure renaming, but it paves the way to consolidating the code for
    dealing with 32-bit vDSOs across CONFIG_X86_32 and CONFIG_IA32_EMULATION.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 0c7850b657141f1f7a052cec2a88d9bf47d63677
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:57 2007 +0100

    x86 vDSO: i386 vdso32 install
    
    This enables 'make vdso_install' for i386 as on x86_64 and powerpc.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit a0a874ad3b352e85914e652a40f825b4fccb1ae3
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:56 2007 +0100

    x86 vDSO: absolute relocs
    
    This updates the exceptions for absolute relocs for the new symbol name
    convention used for symbols extracted from the vDSO images.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit b758519642abc5bda20bc549812e2d2ba7ada16f
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:56 2007 +0100

    x86 vDSO: i386 vdso32
    
    This makes the i386 kernel use the new vDSO build in arch/x86/vdso/vdso32/
    to replace the old one from arch/x86/kernel/.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 2cf119b0563cc1d23d295ce281923a63f02abd5b
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:56 2007 +0100

    x86 vDSO: vdso32 build
    
    This builds the 32-bit vDSO images in the arch/x86/vdso subdirectory.
    Nothing uses the images yet, but this paves the way for consolidating
    the vDSO build logic all in one place.  The new images use a linker
    script sharing the layout parts from vdso-layout.lds.S with the 64-bit
    vDSO.  A new vdso32-syms.lds is generated in the style of vdso-syms.lds.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 6a3bc454c4a4a76f16f2f873db9f70c281310021
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:56 2007 +0100

    x86 vDSO: arch/x86/vdso/vdso32
    
    This moves the i386 vDSO sources into arch/x86/vdso/vdso32/, a
    new directory.  This patch is a pure renaming, but paves the way
    for consolidating the vDSO build logic.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 1f54b4df109aef5b5979813947d60545264cf95a
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:56 2007 +0100

    x86 vDSO: harmonize asm-offsets
    
    This change harmonizes the asm-offsets macros used in the 32-bit vDSO
    across 32-bit and 64-bit builds.  It's a purely cosmetic change for now,
    but it paves the way for consolidating the 32-bit vDSO builds.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 7756b7e5dfda5ff83e7fae4f5603163d80ffe85a
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:56 2007 +0100

    x86 vDSO: new layout
    
    This revamps the vDSO linker script to lay things out with the best
    packing of the data and good, separate alignment of the code.  The
    rigid layout using VDSO_TEXT_OFFSET no longer matters to the kernel.
    I've moved the layout parts of the linker script into a new include
    file, vdso-layout.lds.S; this is in preparation for sharing the script
    for the 32-bit vDSO builds too.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit fdd57efe707ee803aa6029945771ceae6700f3cd
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:56 2007 +0100

    x86 vDSO: remove vdso-syms.o
    
    Get rid of vdso-syms.o from the kernel link.  We don't need it any more.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit a5c0b67d1ccc33582f9c79f2fb4e0911eeb4ec4c
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:56 2007 +0100

    x86 vDSO: use vdso-syms.lds
    
    This patch changes the kernel's references to addresses in the vDSO image
    to be based on the symbols defined by vdso-syms.lds instead of the old
    vdso-syms.o symbols.  This is all wrapped up in a macro defined by the new
    asm-x86/vdso.h header; that's the only place in the kernel source that has
    to know the details of the scheme for getting vDSO symbol values.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit c22a6aada21108e00c8384e9c9bb24067d5b670c
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:55 2007 +0100

    x86 vDSO: generate vdso-syms.lds
    
    This patch adds a new way of extracting symbols from the built vDSO image.
    This is much simpler and less fragile than using ld -R; it removes the
    need to control the DSO layout quite so exactly.  I was clearly unduly
    distracted by clever ld uses when I did the original vDSO implementation.
    
    Signed-off-by: Roland McGrath <roland@redhat.com>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit db37947e8dadbd886f942540083b97f65d56dd32
Author: Jiri Kosina <jkosina@suse.cz>
Date:   Tue Dec 11 16:53:55 2007 +0100

    x86: randomize brk
    
    Randomize the location of the heap (brk) for i386 and x86_64.  The range is
    randomized in the range starting at current brk location up to 0x02000000
    offset for both architectures.  This, together with
    pie-executable-randomization.patch and
    pie-executable-randomization-fix.patch, should make the address space
    randomization on i386 and x86_64 complete.
    
    Arjan says:
    
    This is known to break older versions of some emacs variants, whose dumper
    code assumed that the last variable declared in the program is equal to the
    start of the dynamically allocated memory region.
    
    (The dumper is the code where emacs effectively dumps core at the end of it's
    compilation stage; this coredump is then loaded as the main program during
    normal use)
    
    iirc this was 5 years or so; we found this way back when I was at RH and we
    first did the security stuff there (including this brk randomization).  It
    wasn't all variants of emacs, and it got fixed as a result (I vaguely remember
    that emacs already had code to deal with it for other archs/oses, just
    ifdeffed wrongly).
    
    It's a rare and wrong assumption as a general thing, just on x86 it mostly
    happened to be true (but to be honest, it'll break too if gcc does
    something fancy or if the linker does a non-standard order).  Still its
    something we should at least document.
    
    Note 2: afaik it only broke the emacs *build*.  I'm not 100% sure about that
    (it IS 5 years ago) though.
    
    [akpm@linux-foundation.org: deuglification]
    
    Signed-off-by: Jiri Kosina <jkosina@suse.cz>
    Cc: Arjan van de Ven <arjan@infradead.org>
    Cc: Roland McGrath <roland@redhat.com>
    Cc: Jakub Jelinek <jakub@redhat.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit ba98b4eebce01732cc469fb8007fa40d65e77c2d
Author: Robert Richter <robert.richter@amd.com>
Date:   Tue Dec 11 16:53:54 2007 +0100

    Extended interrupt LVT support for AMD Barcelona
    
    Also macro definitions in apicdef.h has been updated.
    
    The patch is relative to x86/cleanup tree.
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 4ac31d69ad754c3d4b126330b0dc49c3b6621686
Author: Christoph Lameter <clameter@sgi.com>
Date:   Tue Dec 11 16:53:53 2007 +0100

    x86: make stack size configurable
    
    Make the stack size configurable necessary.  SGI NUMA configurations may need
    more stack because cpumasks and nodemasks are at times kept on the stack.
    This patch allows to run with 16k or 32k kernel stacks.
    
    [tglx@linutronix.de: add range check and dependencies and fix the !NUMA case]
    
    Signed-off-by: Christoph Lameter <clameter@sgi.com>
    Cc: Andi Kleen <ak@suse.de>
    Cc: Mike Travis <travis@sgi.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 354f2a7015a8b8a5870548399fe54cad8e370583
Author: Barry Kasindorf <barry.kasindorf@amd.com>
Date:   Tue Dec 11 16:53:49 2007 +0100

    oprofile: op_model_athlon.c support for AMD family 10h barcelona performance counters
    
    This patch is for controlling the upper 32bits of the event ctrl msrs.
    This includes the upper 4 bits of the event select and the Guest Only and
    Host Only bits
    
    This patch is necessary to make Event Based Profiling work reliably on a
    Family 10h processor
    
    {akpm@linux-foundation.org: checkpatch.pl fixes]
    
    Signed-off-by: Barry Kasindorf <barry.kasindorf@amd.com>
    Signed-off-by: Robert Richter <robert.richter@amd.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 812fab48d989a46f6bdc3495261c27013be79af1
Author: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date:   Tue Dec 11 16:53:49 2007 +0100

    x86: check and enable MMCONFIG for AMD Family 10h Opteron
    
    check and enable MMCONFIG for AMD Family 10h Opteron.
    
    [akpm@linux-foundation.org: section fix]
    
    Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 8b7714794f4f40a325790e2fdc5147fe9a14f2e9
Author: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date:   Tue Dec 11 16:53:42 2007 +0100

    x86: set cfg_size for AMD Family 10h in case MMCONFIG is used
    
    reuse pci_cfg_space_size but skip check pci express and pci-x CAP ID.
    
    Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit edf795f9c0a083027616bb8555ffe4df0f5c791a
Author: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date:   Tue Dec 11 16:53:42 2007 +0100

    x86: check MSR to get mmconfig for amd family 10h opterons
    
    So even MCFG is not there, we still can use MMCONFIG.
    
    Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 1e04909e0421811b227f99f3867974c591b3f734
Author: Robert Hancock <hancockr@shaw.ca>
Date:   Tue Dec 11 16:53:41 2007 +0100

    x86: validate against ACPI motherboard resources
    
    This path adds validation of the MMCONFIG table against the ACPI reserved
    motherboard resources.  If the MMCONFIG table is found to be reserved in
    ACPI, we don't bother checking the E820 table.  The PCI Express firmware
    spec apparently tells BIOS developers that reservation in ACPI is required
    and E820 reservation is optional, so checking against ACPI first makes
    sense.  Many BIOSes don't reserve the MMCONFIG region in E820 even though
    it is perfectly functional, the existing check needlessly disables MMCONFIG
    in these cases.
    
    In order to do this, MMCONFIG setup has been split into two phases.  If PCI
    configuration type 1 is not available then MMCONFIG is enabled early as
    before.  Otherwise, it is enabled later after the ACPI interpreter is
    enabled, since we need to be able to execute control methods in order to
    check the ACPI reserved resources.  Presently this is just triggered off
    the end of ACPI interpreter initialization.
    
    There are a few other behavioral changes here:
    
    - Validate all MMCONFIG configurations provided, not just the first one.
    
    - Validate the entire required length of each configuration according to
      the provided ending bus number is reserved, not just the minimum required
      allocation.
    
    - Validate that the area is reserved even if we read it from the chipset
      directly and not from the MCFG table.  This catches the case where the
      BIOS didn't set the location properly in the chipset and has mapped it
      over other things it shouldn't have.
    
    This also cleans up the MMCONFIG initialization functions so that they
    simply do nothing if MMCONFIG is not compiled in.
    
    Based on an original patch by Rajesh Shah from Intel.
    
    [akpm@linux-foundation.org: many fixes and cleanups]
    Signed-off-by: Robert Hancock <hancockr@shaw.ca>
    Signed-off-by: Andi Kleen <ak@suse.de>
    Cc: Rajesh Shah <rajesh.shah@intel.com>
    Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
    Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
    Cc: Andi Kleen <ak@suse.de>
    Cc: Greg KH <greg@kroah.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 4c733f233e790a62d2a704ec4a5961ba32803d0c
Author: Andi Kleen <ak@suse.de>
Date:   Tue Dec 11 16:53:41 2007 +0100

    x86: untable __init references between IO data
    
    Earlier patch added IO APIC setup into local APIC setup. This caused
    modpost warnings. Fix them by untangling setup_local_APIC() and splitting
    it into smaller functions. The IO APIC initialization is only called
    for the BP init.
    
    Also removed some outdated debugging code and minor cleanup.
    
    [ tglx: arch/x86 adaptation ]
    
    Signed-off-by: Andi Kleen <ak@suse.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit ea9a42e2ce5d4af9760d82d03fa201292b30ac31
Author: Yinghai Lu <yhlu.kernel@gmail.com>
Date:   Tue Dec 11 16:53:41 2007 +0100

    x86: use core id bits for apicid_to_node initialization
    
    We shoud use core id bits instead of max cores, in case later with AMD
    downcores Quad core Opteron.
    
    [ tglx: arch/x86 adaptation ]
    
    Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
    Signed-off-by: Andi Kleen <ak@suse.de>
    Cc: Christoph Lameter <clameter@sgi.com>
    Cc: Len Brown <lenb@kernel.org>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 31c2254bd0a3edd5003a0c8037b644c273ffa2bd
Author: Yinghai Lu <yhlu.kernel@gmail.com>
Date:   Tue Dec 11 16:53:41 2007 +0100

    store core id bits in cpuinfo_x8
    
    We need to store core id bits to cpuinfo_x86 in early_identify_cpu. So we
    use it to create acpiid_to_node array in k8topolgy.c
    
    Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
    Signed-off-by: Andi Kleen <ak@suse.de>
    Cc: Christoph Lameter <clameter@sgi.com>
    Cc: Andi Kleen <ak@suse.de>
    Cc: Len Brown <lenb@kernel.org>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit ad9c624d8d83ae66f13faf90c9022c3f6cc364cd
Author: Adrian Bunk <bunk@stusta.de>
Date:   Tue Dec 11 16:53:41 2007 +0100

    i386: remove -maccumulate-outgoing-args
    
    Contrary to the comment "newer gccs do it by default", newer gcc versions
    default to -maccumulate-outgoing-args only with CONFIG_CC_OPTIMIZE_FOR_SIZE=n,
    and then only with some CPU settings.
    
    Measured with an i386 defconfig, gcc 4.2.1 and kernel 2.6.23-rc1 ("orig" is
    the plain kernel, "changed is with -maccumulate-outgoing-args removed):
    
    $ ls -la vmlinux*
    -rwxrwxr-x 1 bunk bunk 6269713 2007-07-24 22:19 vmlinux.changed
    -rwxrwxr-x 1 bunk bunk 6425361 2007-07-24 22:19 vmlinux.orig
    $ size vmlinux.*
       text    data     bss     dec     hex filename
    4493465  504108  614400 5611973  55a1c5 vmlinux.changed
    4646160  504108  614400 5764668  57f63c vmlinux.orig
    $
    
    That's a 2.5% size increase that does for sure hurt small systems.
    
    If the stack unwinder ever comes back and needs this as indicated in the
    comment, adding it to the cflags when the user enabled the unwinder should be
    a better option.
    
    [ tglx: arch/x86 adaptation ]
    
    Signed-off-by: Adrian Bunk <bunk@stusta.de>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Andi Kleen <ak@suse.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit b811488d4a95279c3e551be09018b3e2c04599d9
Author: Yinghai Lu <Yinghai.Lu@Sun.COM>
Date:   Tue Dec 11 16:53:41 2007 +0100

    x86: clear IO_APIC before enabing apic error vector.
    
    some apic id lifting system: 4 socket quad core, 8 socket quad core will do
    apic id lifting for BSP.
    
    but io-apic regs for ExtINT still use 0 as dest.
    
    so when we enable apic error vector in BSP, we will get one APIC error.
    
    CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
    CPU: L2 Cache: 512K (64 bytes/line)
    CPU 0/4 -> Node 0
    CPU: Physical Processor ID: 1
    CPU: Processor Core ID: 0
    SMP alternatives: switching to UP code
    ACPI: Core revision 20070126
    enabled ExtINT on CPU#0
    ESR value after enabling vector: 00000000, after 0000000c
    APIC error on CPU0: 0c(08)
    ENABLING IO-APIC IRQs
    Synchronizing Arb IDs.
    
    So move enable_IO_APIC from setup_IO_APIC into setup_local_APIC and call it
    before enabling apic error vector.
    
    [ tglx: arch/x86 adaptation ]
    
    Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
    Signed-off-by: Andi Kleen <ak@suse.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit b9fac3d7296b07292f1aba467c23348aabce126c
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:40 2007 +0100

    x86: cleanup kernel/setup_64.c
    
    Clean it up before applying more patches to it.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit b0e41c5d046b935e864cfc49ad172e4c3ded67fc
Author: Steven Rostedt <rostedt@goodmis.org>
Date:   Tue Dec 11 16:53:40 2007 +0100

    remove unused tsk_thread from asm-offsets_64.c
    
    So this patch simply removes the "thread" from asm-offsets.c since I
    can't find an owner for it.
    
    Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 4e32cf56a10f14d0a5ee8e79c041c0822b9e0b7e
Author: Dave Jones <davej@redhat.com>
Date:   Tue Dec 11 16:53:40 2007 +0100

    Use CR0 defines.
    
    Signed-off-by: Dave Jones <davej@redhat.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 46a23f9d0a59e78c610bff9ff3f726988c866364
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:40 2007 +0100

    x86: merge resume-trace.h variants
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 2b613f8d41d41427edce2ac5bdb117abd6a274f9
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:40 2007 +0100

    x86: merge topology.h variants
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit efa5e3f4f09c3871882f8332e4da13fbbcbaa056
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:40 2007 +0100

    x86: consolidate toloplogy_32/64.h
    
    Reorder defines and do white space / coding style cleanups
    to get a readable diff.
    
    Also convert the macros to inline functions. Move the pci
    related inlines to pci.h
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 6c41d3019a0444fd61cf84c6bcbe2ebdd223f1ac
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:39 2007 +0100

    x86: adjust numa 32 namespace
    
    Use the 64bit numa variable names for numa32 as well.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 57688d21b4f6a6ab42434f796ffe23c290ebe357
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:39 2007 +0100

    x86: fixup numa 64 namespace
    
    Using a variable name, which is the same as a macro name is not
    really smart. Change the variable names and fixup all users.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 83b839d1fe2a7408ea75d6763f0c0d71a0f02840
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:39 2007 +0100

    x86: cleanup numa_64.c
    
    Clean it up before applying more patches.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 1afdb8e5de6bb5f9bda1d92db9e045c53c85aa87
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:39 2007 +0100

    x86: merge include/asm-x86/sparsemem.h
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 7e3d37fd68be047af79579e25872d0589fcbe0eb
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:39 2007 +0100

    x86: merge include/asm-x86/sparsemem.h
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 1b3d75bb2ca87ffa8a166aadc55fb3398f74187d
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:38 2007 +0100

    x86: put all kern_addr_valid() incarnations to pgtable.h
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit a6dd54484fdf1f01548f1fcf9ae012f6a456b46a
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:38 2007 +0100

    x86: merge acpi_32/64.h
    
    Merge the files.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 91f377cd4d54ab0a90a09cf80beb4e4b43382b24
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:38 2007 +0100

    x86: cleanup acpi_32/64.h
    
    Fix coding style to get a readable diff
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit c5c25a33d564bf227adf2ff9487dcfa5dbe69f3c
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:38 2007 +0100

    x86: cleanup smp.h variants
    
    Bring the smp.h variants into sync to prepare merging and
    paravirt support.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit aa6dd8d0f7de627c5b28b476f40f8118d9f1246d
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:38 2007 +0100

    x86: merge mpspec variants
    
    The delta is now minimal. Merge them
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 4dfc11064b55c1787b556461f3124f4bc3be2cce
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:38 2007 +0100

    x86: cleanup mpspec variants
    
    Bring the mpspec variants into sync to prepare merging and
    paravirt support.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 032fcd450093ce0e5919a4deb9d4d027a5de84e3
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:38 2007 +0100

    x86: merge tlbflush.h variants
    
    The delta is now minimal. Merge them
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 1f4ccf93fb5816bdf2acbfa8bdc7d5cdd11d0d50
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:37 2007 +0100

    x86: cleanup tlbflush.h variants
    
    Bring the tlbflush.h variants into sync to prepare merging and
    paravirt support.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 5f1171d9d33ba0a2ccc59713308fa2af69572072
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:37 2007 +0100

    x86 cleanup boot_ioreamp_32.c
    
    Coding style cleanup before modifying the file.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit ea943a73f225c6bab2f273120ba55aa7b823fea9
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:37 2007 +0100

    x86: merge spinlock.h variants
    
    Merge them finally together
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit d55f9a459c75c6a572245c73b12b6450a7ad6ed6
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:37 2007 +0100

    x86: spinlock_32/64 substitute types and instructions
    
    Use _slock_t for the spinlock data types and replace the instructions
    by string defines, which makes the code of 32/64 bit versions more
    or less identical.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 532f4ec02ca4be686cf45cb17701dfa341122392
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:37 2007 +0100

    x86: spinlock_32/64 match the jump labels and symbols
    
    Match the jump labels in  the 32/64 variants and switch the
    64bit version to symbols, so the functions are almost identical
    except for the operand size now.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit a426341949a0a8a99a7c13d3108f41f3a4c8863b
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:37 2007 +0100

    x86: use immediates instead of RW_LOCK_BIAS_STR
    
    Use immediate instead of the RW_LOCK_BIAS_STR.
    Makes the code more readable and gets rid of the string constant.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 228bf61a4c1d33ca93f0794e1966241c44382082
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:37 2007 +0100

    x86: fix asm constraints in spinlock_32/64.h
    
    Use the correct constraints for the spinlock assembler functions.
    
    read (modify) write functions need "+m" instead of "=m"
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 4d8066104d6381f564c882a49ad02992ba327823
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:53:37 2007 +0100

    x86: consolidate spinlock.h
    
    The cli and sti instructions need to be replaced by paravirt hooks.
    For the i386 architecture, this is already done. The code requirements
    aren't much different from x86_64 POV, so this part is consolidated in
    the common header
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
    Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 07564fbf852a5e25dc2e237e248e307b783dab9e
Author: Glauber de Oliveira Costa <gcosta@redhat.com>
Date:   Tue Dec 11 16:53:37 2007 +0100

    irqflags consolidation
    
    This patch consolidates the irqflags include files containing common
    paravirt definitions. The native definition for interrupt handling, halt,
    and such, are the same for 32 and 64 bit, and they are kept in irqflags.h.
    the differences are split in the arch-specific files.
    
    The syscall function, irq_enable_sysexit, has a very specific i386 naming,
    and its name is then changed to a more general one.
    
    Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
    Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
    Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit dfaaaf1d91ab90bac4098ed50e414fd420326574
Author: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date:   Tue Dec 11 16:53:37 2007 +0100

    x86: clean up nmi_32/64.c
    
    clean up and make nmi_32/64.c more similar.
    - white space and coding style clean up.
    - nmi_cpu_busy is available on CONFIG_SMP.
    - move functions __acpi_nmi_enable, acpi_nmi_enable,
      __acpi_nmi_disable and acpi_nmi_disable.
    - make variables name more similar.
    
    Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 8713da3b54cf47784f4b077508c27856fbf0cce2
Author: clameter@sgi.com <clameter@sgi.com>
Date:   Tue Dec 11 16:53:36 2007 +0100

    x86: clean up stack allocation and free
    
    Clean up the allocation and freeing of stacks a bit by using a __GFP_ZERO flag
    instead of memset.
    
    Signed-off-by: Christoph Lameter <clameter@sgi.com>
    Cc: Andi Kleen <ak@suse.de>
    Cc: Mike Travis <travis@sgi.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit b17d43fd96b9cb614b887fe618139cd8bfc345b7
Author: Randy Dunlap <randy.dunlap@oracle.com>
Date:   Tue Dec 11 16:53:36 2007 +0100

    x86: bitops_32.h style cleanups
    
    Coding style cleanups in x86/bitops_32.h:
    
    - drop space in "* addr"
    - whitespace & indentation fixes
    - spello fixes
    
    Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 1bc3afafe06222f2edcafdadb6b6acd2a8109aab
Author: Bernhard Walle <bwalle@suse.de>
Date:   Tue Dec 11 16:53:36 2007 +0100

    x86: remove extern declarations for code, data, bss resources
    
    This patch removes the extern struct resource declarations for
    data_resource, code_resource and bss_resource on x86 and declares that
    three structures as static as done on other architectures like IA64.
    
    On i386, these structures are moved to setup_32.c (from e820_32.c) because
    that's code that is not specific to e820 and also required on EFI systems.
    That makes the "extern" reference superfluous.
    
    On x86_64, data_resource, code_resource and bss_resource are passed to
    e820_reserve_resources() as arguments just as done on i386 and IA64.  That
    also avoids the "extern" reference and it's possible to make it static.
    
    Signed-off-by: Bernhard Walle <bwalle@suse.de>
    Cc: "Luck, Tony" <tony.luck@intel.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit e3d2d8c06658f9697230b68702fdad2d6f90da7a
Author: Cyrill Gorcunov <gorcunov@gmail.com>
Date:   Tue Dec 11 16:53:36 2007 +0100

    x86: remove dead code in ia32-emu
    
    Remove useless second time checking of fsave argument in save_i387_ia32()
    routine.  It's possible the compiler is doing the same but that is much
    better to remove the dead code explicitly.
    
    Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit dd21e3efb5f20b467ed8a552efe3be575adcd862
Author: Lucas Woods <woodzy@gmail.com>
Date:   Tue Dec 11 16:53:36 2007 +0100

    x86: remove duplicate includes
    
    Signed-off-by: Lucas Woods <woodzy@gmail.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 41a824ecf85b06c3b117096218201fce3927d603
Author: Paul Jimenez <pj@place.org>
Date:   Tue Dec 11 16:53:36 2007 +0100

    x86: mtrr use type bool [RESEND AGAIN]
    
    This is a janitorish patch to 1) remove private TRUE/FALSE #def's in
    favor of using the standard enum from linux/stddef.h and 2) switch the
    variables holding those values to type 'bool' (from linux/types.h)
    since it both seems more appropriate and allows for potentially better
    optimization.
    
    As a truly minor aside, I removed a couple of comments documenting
    a 'do_safe' parameter that seems to no longer exist.
    
    Signed-off-by: Paul Jimenez <pj@place.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 0cf978a67b0da5d17cfb88e804d80fdea9361222
Author: Adrian Bunk <bunk@kernel.org>
Date:   Tue Dec 11 16:53:35 2007 +0100

    x86: pci-dma_64.c: cleanups
    
    This patch contains the following cleanups:
    - make the needlessly global iommu_setup() static
    - remove the unused EXPORT_SYMBOL(iommu_merge)
    
    Signed-off-by: Adrian Bunk <bunk@kernel.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 9c748b9135ea9cea96d92fe43052c9bce882fca3
Author: Adrian Bunk <bunk@kernel.org>
Date:   Tue Dec 11 16:53:35 2007 +0100

    x86: pci-calgary_64.c: make a variable static
    
    "debugging" is a horrible name for a global variable - thankfully it can
    become static.
    
    Also put it out of __read_mostly so that gcc no longer has to emit it
    at all.
    
    Signed-off-by: Adrian Bunk <bunk@kernel.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 1a77820902a3e8c16f5266a761f362d582435ad4
Author: Adrian Bunk <bunk@kernel.org>
Date:   Tue Dec 11 16:53:35 2007 +0100

    x86: nmi_64.c: make code static
    
    This patch makes the following needlessly global code static:
    - panic_on_timeout
    - setup_nmi_watchdog()
    
    Signed-off-by: Adrian Bunk <bunk@kernel.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 4a4e2bef99d7aec9fd3405dc315c203cb6a9cadb
Author: Adrian Bunk <bunk@kernel.org>
Date:   Tue Dec 11 16:53:35 2007 +0100

    x86 mce_64.c: make struct mcelog static
    
    This patch makes the needlessly global struct mcelog static.
    
    Signed-off-by: Adrian Bunk <bunk@kernel.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 90068863f1dac22ffe7a88fccb985404061daff4
Author: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date:   Tue Dec 11 16:53:35 2007 +0100

    x86: io_apic_64.c: remove unused config check
    
    CONFIG_IRQBALANCE doesn't exist on x86_64.
    
    Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit d32359aabe8a4628d810d0733e148c9e2a241e35
Author: Adrian Bunk <bunk@kernel.org>
Date:   Tue Dec 11 16:53:35 2007 +0100

    x86 e820_64.c: make 2 functions static
    
    This patch makes the following needlessly global functions static:
    - e820_print_map()
    - early_panic()
    
    Signed-off-by: Adrian Bunk <bunk@kernel.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit d6fb0a3559c71332d7458285685fdd878bbc16a5
Author: H. Peter Anvin <hpa@zytor.com>
Date:   Tue Dec 11 16:53:34 2007 +0100

    x86: actually merge <asm/alternative.h>
    
    This actually merges <asm-x86/alternative_{32,64}.h> into
    <asm-x86/alternative.h>.
    
    Signed-off-by: H. Peter Anvin <hpa@zytor.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit b48faadb48661c6db6cbf102c0389a590a58b537
Author: H. Peter Anvin <hpa@zytor.com>
Date:   Tue Dec 11 16:53:34 2007 +0100

    x86: prepare merger of <asm/alternative_{32,64}.h>
    
    Prepare for merging <asm/alternative_{32,64}.h> by making the 32- and
    64-bit versions textually identical.  This involves:
    
    - removing arbitrary header inclusion differences
    - reorganizing the 32-bit version slightly to match the 64-bit version
    - using <asm/asm.h> to unify the assembly code
    - renaming struct paravirt_patch to struct paravirt_patch_site in the
      64-bit version to match the 32-bit version; there are no references
      to struct paravirt_patch elsewhere in the tree.
    
    Signed-off-by: H. Peter Anvin <hpa@zytor.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 2d9510ff82cb5c2c828170c6565bbc7ee68cf696
Author: Paul Jimenez <pj@place.org>
Date:   Tue Dec 11 16:53:34 2007 +0100

    x86: Make i8259_64 more _32-like
    
    Howdy! Here's a simple janitorish patch for you:
    
    This patch mainly hinges around two includes and their ramifications:
    
    #include <i8259.h>	which provides cached_{slave,master}_mask
    #include <io_ports.h>	which provides PIC_{MASTER,SLAVE}_{IMR,CMD}
    
    Adding these two includes and using those half dozen or so definitions
    removed 140+ lines of diffs between i8259_32.c and i8259_64.c, thus
    making it easier for the real substantitive differences between them to
    show up, and hopefully therefore making it easier to eventually merge
    the two.  All the warnings that checkpatch.pl throws (missing spaces
    after commas and >80 character lines) exist intentionally to match
    i8259_32.c.
    
    Signed-off-by: Paul Jimenez <pj@place.org>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit a1e701939a29f9737e14aee3cbf0b4fb2fe3f6c7
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:34 2007 +0100

    x86: move 8259 defines to i8259.h
    
    Move the i8259 defines and remove the now io_ports.h
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 2fb990fa8c8c4d886ed00dbf61efeb43312d6b04
Author: Adrian Bunk <bunk@kernel.org>
Date:   Tue Dec 11 16:53:33 2007 +0100

    x86: unexport __{read,write}_lock_failed
    
    This patch removes the unused exports for __{read,write}_lock_failed.
    
    Signed-off-by: Adrian Bunk <bunk@kernel.org>
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 0346c062e0dc562f0c407769977c115a89c1f146
Author: Dave Jones <davej@redhat.com>
Date:   Tue Dec 11 16:53:33 2007 +0100

    Remove more bogus filenames in comments.
    
    Signed-off-by: Dave Jones <davej@redhat.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit f9cc097cc2b8e6738a01f2f3d0ed3ec1c5044057
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:33 2007 +0100

    x86: Nuke a ton of unused exports
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit c28b9c28fb97b6f408154a4d7d0883d919a2f039
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:33 2007 +0100

    x86: Remove dead code and exports
    
    No users.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 114f366db1c28da70adaeb03130c8dda6ba94e25
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:33 2007 +0100

    x86: nuke a ton of dead hpet code
    
    No users, just ballast
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 00e8c51ed76fa350cf21341d24f4172377f6a40a
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:32 2007 +0100

    x86: smp_64.c: Remove unused exports and cleanup while at it
    
    The exports are nowhere used. There is even no reason why they were
    ever introduced.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit aa399dbf021253f685039b621bc545cf9a094b95
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:32 2007 +0100

    x86: clean up arch/x86/kernel/time_64.c includes
    
    Reduce the lets include all to the minimum.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit b4e66a56fd23bb053e49d350a48c7cec8e5f571f
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:32 2007 +0100

    x86: share rtc code
    
    Remove the rtc code from time_64.c and add the extra bits to the
    i386 path. The ACPI century check is probably valid for i386 as
    well, but this is material for a separate patch.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 523ec58d78870efc5e0d8b6ab1bd6e4a7ff5bbd9
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:32 2007 +0100

    x86: isolate the rtc code for sharing
    
    The mach-default/mach_time.h code inline is moved to arch/x86/kernel/rtc.c
    and the header files are adjusted.
    
    Shrink the 3 dozen includes to the ones we really need.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 3eeb2c75f2f56f68be69eb5b094ededc11e57589
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:32 2007 +0100

    x86: unify mc146818rtc.h - prepare for sharing rtc code
    
    Unify mc146818rtc.h by adding the rtc_cmos_read/write functions to
    time_64.c. This is a preparatory patch to finaly share the rtc code,
    which is unsurprisingly similar.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit e527cfd1a50ebc38ffade4b5cc4d81fde3677a31
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:32 2007 +0100

    x86: remove the duplicated arch/x86/ia32/mmap32.c
    
    Use mmap_32.c in arch/x86/mm instead
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 8f10e7ac416067ac5aca3b0aa0adf943b7add4ad
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:31 2007 +0100

    x86: clean up arch/x86/mm/mmap_32/64.c
    
    White space and coding style clenaup.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 535883e934be578d7708d7377349b9899d63224d
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:31 2007 +0100

    x86: clean up arch/x86/kernel/vsmp_64.c
    
    White space and coding style clenaup.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 6a7eafb41b6ba5fec5974e37afdf0f442f7db87f
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:31 2007 +0100

    x86: clean up ioport_32.c
    
    Remove unused variables, rename the "unused" argument to regp. It is used !
    Codingstyle fixes.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 2ca387a1b9edb596ac3c251db30762ae02c08032
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:31 2007 +0100

    x86: simplify set_bitmap in ioport_32.c
    
    Simplify set_bitmap(). This is not in a hotpath and we really can use the
    straight forward loop through those bits. A similar implementation is used
    in the 64 bit code as well.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit c70129f7bdf2af67d8e1e1ffcab3d1595b0c2fd4
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:31 2007 +0100

    x86: merge include/asm-x86/scatterlist.h
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 6444d8dee0dd7fb88ea3231ea06cb18c405772f1
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:31 2007 +0100

    x86: merge include/asm-x86/dma.h
    
    Almost identical.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 9b1d1c3e0a62f44b15b2c523756cfe188d8e53bc
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:31 2007 +0100

    x86: merge futex_32/64.h
    
    Finally merge them together.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit ed20eec7adb4052d61b8148bc238cb511c5d75ba
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:30 2007 +0100

    x86: prepare merging futex_32/64.h
    
    Replace .quad/.long with a define and use the same asm syntax
    for i386 and x86.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 975680c983c97c6d7d02a2c2b5f8aad27927dd53
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:30 2007 +0100

    x86: prepare merging arch/x86/kernel/apic_32/64.c
    
    Shuffle code around, so we get a readable diff.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit e7b10bbeaf5978bc472444e1fa79bd690df8a0a7
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:30 2007 +0100

    x86: make smp_local_timer_interrupt() static
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 657bff2e787056f1a89597c844971c3f1c3250d5
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:30 2007 +0100

    x86: move ack_bad_irq into irq code
    
    Match i386, where we have this in the irq code. It belongs there.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 5419baecd4e9d6bc9c04cf73e64cbad07c20d721
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:30 2007 +0100

    x86: move ioapic code where it belongs
    
    The commit 399287229c775a8962a852a761d65dc9475dec7c hacked the
    ioapic resource mapping into apic.c for no good reason.
    Move the code into io_apic_64.c where it belongs.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 5cccb5f5d086838aab704bb156c3a8de7ff71fd9
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:29 2007 +0100

    x86: remove obsolte declarations from proto.h
    
    Nuke duplicate and obsolete crap from this ugly dump bin.
    There are still some entries left which need to be sorted out,
    but I'm tired of that puzzle game right now.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 5dd811d94c6e8c790c95eb96c25c14356660cd8d
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:29 2007 +0100

    x86: remove duplicate start_kernel declaration
    
    start_kernel is already declared in a generic header file.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit a38479df13f5eec5b36ca9d4fa75a7d3c3af5cd4
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:29 2007 +0100

    x86: remove obsolete nohpet declaration
    
    Lonely user is hpet.c, so no need to declare it elsewhere.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 4c3f54bbb37b26f7975415912b5679c0da11ff3e
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:29 2007 +0100

    x86: move pmtmr related declarations
    
    Move more stuff out of proto.h
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 98388ff78b4ed304b9f9336c3cd6208100dffe34
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:29 2007 +0100

    x86: move tsc related declarations
    
    tsc has also it's own header file. Nuke the stupid 64 bit ifdef
    while at it.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit e9916f03395e5405af18f02cbcd6b9ba3ba8ee13
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:29 2007 +0100

    x86: move pda related declaration
    
    pda has its own header file as well.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 21c19aa4527741ea7288a5cfaf1ab5d2b98e4264
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:28 2007 +0100

    x86: move page related declaration
    
    end_pfn is in page.h, so end_pfn_map has a place there as well
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 662b1d58c190838d3ca8280f010551bd44a11be0
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:28 2007 +0100

    x86: move numa related declarations
    
    More stuff shuffeled to the correct place
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit cb52e38f70a38dc719e688959c6a62fac62faf4d
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:28 2007 +0100

    x86: move mce related declarations
    
    Move the mce related declarations where they belong, fix the
    users and remove 32bit dependency in mce.h
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 36d851ae14cfc29105ef537b128f5a78b6151e0d
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:28 2007 +0100

    x86: move debug related declarations to kdebug.h
    
    Move them and fixup some users.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit ac14081782d97904371c8f62c32afc62896b51e5
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:27 2007 +0100

    x86: move k8 related declarations
    
    Move k8 related declarations to k8.h and fix numa_64.c
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 4d4953088376641706f1a5efa5e9bcc1e059efca
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:27 2007 +0100

    x86: move idle related declarations
    
    Move idle related declarations to processor_64.h, where the
    the others are as well.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit ca707bc28fbfcf400da3280560082d7db44ea9d0
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:26 2007 +0100

    x86: make early_indentify_cpu static
    
    early_indentify_cpu is only used in setup_64.c
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 21a3d4351e8ec26fbb3527f8b1327b1bbed02c95
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:26 2007 +0100

    x86: move acpi and pci declarations
    
    Move acpi/pci related declarations to the correct headers
    and remove the duplicate.
    
    Build fix from: Andrew Morton
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 55fc48e4b326ece056f783d7ddc10cf487f56351
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:26 2007 +0100

    x86: remove duplicated declarations
    
    Remove declarations which are made already in the appropriate header file.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit a2656bd51048889e329f622345b3e93377095e4a
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:26 2007 +0100

    x86: merge apic_32/64.h
    
    Unify apic.h variants.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 421b1e47b0471d36567091da486be7660a4471f0
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:25 2007 +0100

    x86: use u32 for some lapic functions
    
    Use u32 so 32 and 64bit have the same interface.
    
    Andrew Morton: xen, lguest build fixes
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 4f63edd5109357a8f2f882e3b05df4782abe9f56
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:25 2007 +0100

    x86: use u32 for safe_apic_wait_icr_idle()
    
    Preperatory patch for merging apic headers.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 2f9a8c9d195b16c415238fa1876187578e9e8ec5
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:25 2007 +0100

    x86: rename get_maxlvt to lapic_get_maxlvt
    
    Use the same name for the 32 and 64 bit variant.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 455a128fea154ceac45a9ccb1fed3c442ec83f01
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:25 2007 +0100

    x86: prepare unification of include/asm-x86/apic_32/64.h
    
    White space and coding style clenaup.
    
    Move the K8 local apic defines to apicdef.h, where they belong
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit d87550ea47f829084ff838b3b8ad9b8bd6e65d10
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:25 2007 +0100

    x86: Unify include/asm-x86/apicdef_32/64.h
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 57b1982e9c60dbd96c60fafcbed28e6689471dc2
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:25 2007 +0100

    x86: merge arch/x86/kernel/ldt_32/64.c
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 43e0e60aaf7bff0877d686941f2526d71379a18a
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:24 2007 +0100

    x86: prepare arch/x86/kernel/ldt_32/64.c for merging
    
    White space and coding style cleanups.
    
    Change unsigned to int. There is no win when we compare mincount against pc->size,
    which is an int as well. Casting pc->size to unsigned just might hide real problems.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 55b3335cd586def42b5ab8f1addfc1d88dc8c4f6
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:24 2007 +0100

    x86: introduce ldt_write accessor
    
    Create a ldt write accessor like the 32 bit one.
    
    Preparatory patch for merging ldt.c and anyway necessary for
    64bit paravirt ops.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 2aaac59ab59b059546620f6d1b527f163091d4c1
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:24 2007 +0100

    x86: clean up include/asm-x86/desc_64.h
    
    White space and coding style clenaup.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit f608891937ce0746576b91b89286dced37a7af30
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:24 2007 +0100

    x86: clean up arch/x86/kernel/ldt_32/64.c
    
    White space and coding style clenaup.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 3ec8dd0e600e15df2ca480cc09b15f0033744755
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:24 2007 +0100

    x86: clean up arch/x86/kernel/e820_64.c
    
    White space and coding style cleanup.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 0875985d89cd8319190eaf7df355350a478625b9
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 11 16:53:23 2007 +0100

    x86: code cleanups in arch/x86/kernel/pci-gart_64.c
    
    code cleanups:
    
                                           errors   lines of code   errors/KLOC
     arch/x86/kernel/pci-gart_64.c            183             748         244.6
     arch/x86/kernel/pci-gart_64.c              0             790             0
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit ee3adcf65f4d75b797b1484d2aff4f670470d43e
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 11 16:53:23 2007 +0100

    x86: lindent arch/i386/math-emu, cleanup
    
    manually clean up some of the damage that lindent caused.
    (this is a separate commit so that in the unlikely case of
    a typo we can bisect it down to the manual edits.)
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 16207e842befe39d86f7e18e5af17d262e37755b
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 11 16:53:23 2007 +0100

    x86: lindent arch/i386/math-emu
    
    lindent these files:
                                           errors   lines of code   errors/KLOC
     arch/x86/math-emu/                      2236            9424         237.2
     arch/x86/math-emu/                       128            8706          14.7
    
    no other changes. No code changed:
    
       text    data     bss     dec     hex filename
       5589802  612739 3833856 10036397         9924ad vmlinux.before
       5589802  612739 3833856 10036397         9924ad vmlinux.after
    
    the intent of this patch is to ease the automated tracking of kernel
    code quality - it's just much easier for us to maintain it if every file
    in arch/x86 is supposed to be clean.
    
    NOTE: it is a known problem of lindent that it causes some style damage
    of its own, but it's a safe tool (well, except for the gcc array range
    initializers extension), so we did the bulk of the changes via lindent,
    and did the manual fixups in a followup patch.
    
    the resulting math-emu code has been tested by Thomas Gleixner on a real
    386 DX CPU as well, and it works fine.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit b5058c99c22af7424617711bd5edd285399270b6
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 11 16:53:22 2007 +0100

    x86: mach-voyager, lindent
    
    lindent the mach-voyager files to get rid of more than 300 style errors:
    
                                           errors   lines of code   errors/KLOC
     arch/x86/mach-voyager/   [old]           409            3729         109.6
     arch/x86/mach-voyager/   [new]            71            3678          19.3
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 64a8e03d8455a673eb3ea082cd0d1684a4f675eb
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 11 16:53:22 2007 +0100

    x86: clean up arch/x86/kernel/aperture_64.c printk()s
    
    clean up arch/x86/kernel/aperture_64.c printk()s.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit bd200e9855ad61f5fabdb2935b80b0e91e382195
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 11 16:53:22 2007 +0100

    x86: clean up arch/x86/kernel/aperture_64.c
    
    whitespace cleanup. No code changed:
    
       text    data     bss     dec     hex filename
       2080      76       4    2160     870 aperture_64.o.before
       2080      76       4    2160     870 aperture_64.o.after
    
                                           errors   lines of code   errors/KLOC
     arch/x86/kernel/aperture_64.c            114             299         381.2
     arch/x86/kernel/aperture_64.c              0             315             0
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit d003974b4078236980a378fcc70b1db28b3870d5
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:21 2007 +0100

    x86: clean up arch/x86/ia32/mmap32.c
    
    White space and coding style clenaup.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 8c344235f323238dd1a2677938ffa01ccdcfc1cb
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:21 2007 +0100

    x86: clean up arch/x86/ia32/syscall32.c
    
    White space and coding style clenaup.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 0b0fe78c1595827974934bc2ba7bc82b2c0d12f7
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:21 2007 +0100

    x86: clean up arch/x86/ia32/sys_ia32.c
    
    White space and coding style clenaup.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit dd2b690bd8fdcf70c67e89def4ad22e5e08a5998
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:20 2007 +0100

    x86: clean up arch/x86/ia32/ptrace32.c
    
    White space and coding style clenaup.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit c75207562e721a90c04ceabebf103e189e922d7d
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:20 2007 +0100

    x86: clean up arch/x86/ia32/ipc32.c
    
    White space and coding style cleanup.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit a1b5e358fde0ac896e52848ebb3a116fb3d10e5d
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:20 2007 +0100

    x86: clean up arch/x86/ia32/ia32_signal.c
    
    White space and coding style clenaup.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit f13bfe03a99da73ea9de46a1fd6d14cb22cc1b72
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:19 2007 +0100

    x86: clean up arch/x86/ia32/aout32.c
    
    White space and coding style clenaup.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 22e39df6938224492fe494793845b95521b49ac4
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:19 2007 +0100

    x86: clean up arch/x86/ia32/fpu32.c
    
    White space and coding style clenaup.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 349bb06956f4bd15991298008ab2264c940465f0
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 11 16:53:19 2007 +0100

    x86: clean up arch/x86/mm/pageattr_64.c
    
    clean up arch/x86/mm/pageattr_64.c.
    
    no code changed:
    
       text    data     bss     dec     hex filename
       1751      16       0    1767     6e7 pageattr_64.o.before
       1751      16       0    1767     6e7 pageattr_64.o.after
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 3d9c40187c0b735a5b66556d32f18be4379be36f
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 11 16:53:18 2007 +0100

    x86: clean up arch/x86/mm/pageattr_32.c
    
    clean up arch/x86/mm/pageattr_32.c.
    
    no code changed:
    
       text    data     bss     dec     hex filename
       1255      40       0    1295     50f pageattr_32.o.before
       1255      40       0    1295     50f pageattr_32.o.after
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit d1481d71638be875a66c245fb66e43ed8828f77b
Author: Thomas Gleixner <tglx@linutronix.de>
Date:   Tue Dec 11 16:53:18 2007 +0100

    x86: unify arch/x86/crypto/twofish_32/64.c
    
    Get rid of another duplicate file.
    
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit dc7cc1fe1b4e51f4ac74fc53a04009d73ef5b7c9
Author: H. Peter Anvin <hpa@zytor.com>
Date:   Tue Dec 11 16:53:18 2007 +0100

    x86: unify asm/cpufeature.h
    
    asm/cpufeature.h was already almost unified; this completes the job.
    
    Signed-off-by: H. Peter Anvin <hpa@zytor.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit e4c92e98f9c9cc7e79c0fe2f7ad2eeed5f78e252
Author: H. Peter Anvin <hpa@zytor.com>
Date:   Tue Dec 11 16:53:17 2007 +0100

    x86: add <asm/asm.h>
    
    Create <asm/asm.h>, with common definitions suitable for assembly
    unification.
    
    Signed-off-by: H. Peter Anvin <hpa@zytor.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit be7256616f023a686c814dd40167d831c69cd95f
Author: Roland McGrath <roland@redhat.com>
Date:   Tue Dec 11 16:53:17 2007 +0100

    x86: protect against sigaltstack wraparound
    
    cf http://lkml.org/lkml/2007/10/3/41
    
    To summarize: on Linux, SA_ONSTACK decides whether you are already on the
    signal stack based on the value of the SP at the time of a signal.  If
    you are not already inside the range, you are not "on the signal stack"
    and so the new signal handler frame starts over at the base of the signal
    stack.
    
    sigaltstack (and sigstack before it) was invented in BSD.  There, the
    SA_ONSTACK behavior has always been different.  It uses a kernel state
    flag to decide, rather than the SP value.  When you first take an
    SA_ONSTACK signal and switch to the alternate signal stack, it sets the
    SS_ONSTACK flag in the thread's sigaltstack state in the kernel.
    Thereafter you are "on the signal stack" and don't switch SP before
    pushing a handler frame no matter what the SP value is.  Only when you
    sigreturn from the original handler context do you clear the SS_ONSTACK
    flag so that a new handler frame will start over at the base of the
    alternate signal stack.
    
    The undesireable effect of the Linux behavior is that an overflow of the
    alternate signal stack can not only go undetected, but lead to a ring
    buffer effect of clobbering the original handler frame at the base of the
    signal stack for each successive signal that comes just after the
    overflow.  This is what Shi Weihua's test case demonstrates.  Normally
    this does not come up because of the signal mask, but the test case uses
    SA_NODEFER for its SIGSEGV handler.
    
    The other subtle part of the existing Linux semantics is that a simple
    longjmp out of a signal handler serves to take you off the signal stack
    in a safe and reliable fashion without having used sigreturn (nor having
    just returned from the handler normally, which means the same).  After
    the longjmp (or even informal stack switching not via any proper libc or
    kernel interface), the alternate signal stack stands ready to be used
    again.
    
    A paranoid program would allocate a PROT_NONE red zone around its
    alternate signal stack.  Then a small overflow would trigger a SIGSEGV in
    handler setup, and be fatal (core dump) whether or not SIGSEGV is
    blocked.  As with thread stack red zones, that cannot catch all overflows
    (or underflows).  e.g., a local array as large as page size allocated in
    a function called from a handler, but not actually touched before more
    calls push more stack, could cause an overflow that silently pushes into
    some unrelated allocated pages.
    
    The BSD behavior does not do anything in particular about overflow.  But
    it does at least avoid the wraparound or "ring buffer effect", so you'll
    just get a straightforward all-out overflow down your address space past
    the low end of the alternate signal stack.  I don't know what the BSD
    behavior is for longjmp out of an SA_ONSTACK handler.
    
    The POSIX wording relating to sigaltstack is pretty minimal.  I don't
    think it speaks to this issue one way or another.  (The program that
    overflows its stack is clearly in undefined behavior territory of one
    sort or another anyhow.)
    
    Given the longjmp issue and the potential for highly subtle complications
    in existing programs relying on this in arcane ways deep in their code, I
    am very dubious about changing the behavior to the BSD style persistent
    flag.  I think Shi Weihua's patches have a similar effect by tracking the
    SP used in the last handler setup.
    
    I think it would be sensible for the signal handler setup code to detect
    when it would itself be causing a stack overflow.  Maybe something like
    the following patch (untested).  This issue exists in the same way on all
    machines, so ideally they would all do a similar check.
    
    When it's the handler function itself or its callees that cause the
    overflow, rather than the signal handler frame setup alone crossing the
    boundary, this still won't help.  But I don't see any way to distinguish
    that from the valid longjmp case.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit b5940c5307b3463762e72365cdd6de909b3a5b5c
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 11 16:53:17 2007 +0100

    x86: idle wakeup event in the HLT loop
    
    do a proper idle-wakeup event on HLT as well - some CPUs stop the TSC
    in HLT too, not just when going through the ACPI methods.
    
    (the ACPI idle code already does this.)
    
    [ update the 64-bit side too, as noticed by Jiri Slaby. ]
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit 3fb0214ace73b48fc2d77c31d2bb253d140f8486
Author: Guillaume Chazarain <guichaz@yahoo.fr>
Date:   Tue Dec 11 16:53:17 2007 +0100

    x86: scale cyc_2_nsec according to CPU frequency
    
    scale the sched_clock() cyc_2_nsec scaling factor according to
    CPU frequency changes.
    
    [ mingo@elte.hu: simplified it and fixed it for SMP. ]
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

commit 7409d15e606e336bb6e19589e6f9702ba05e0cff
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 11 16:53:15 2007 +0100

    x86: fix get_cycles_sync() overhead
    
    get_cycles_sync() is causing massive overhead in KVM networking:
    
       http://lkml.org/lkml/2007/12/11/54
    
    remove the explicit CPUID serialization - it causes VM exits and is
    pointless: we care about GTOD coherency but that goes to user-space
    via a syscall, and syscalls are serialization points anyway.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Acked-by: Dor Laor <dor.laor@qumranet.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 Documentation/kernel-parameters.txt         |    5 
 arch/ia64/ia32/binfmt_elf32.c               |    3 
 arch/mips/kernel/i8253.c                    |   12 
 arch/powerpc/kernel/ptrace.c                |   52 
 arch/um/sys-i386/signal.c                   |   50 
 arch/um/sys-x86_64/signal.c                 |   70 
 arch/x86/Kconfig                            |  248 -
 arch/x86/Kconfig.cpu                        |   56 
 arch/x86/Makefile_32                        |   18 
 arch/x86/Makefile_64                        |    3 
 arch/x86/boot/compressed/relocs.c           |    7 
 arch/x86/configs/x86_64_defconfig           |    9 
 arch/x86/crypto/Makefile                    |    4 
 arch/x86/crypto/twofish.c                   |  101 
 arch/x86/crypto/twofish_32.c                |   97 
 arch/x86/crypto/twofish_64.c                |   97 
 arch/x86/ia32/Makefile                      |   42 
 arch/x86/ia32/fpu32.c                       |  132 
 arch/x86/ia32/ia32_aout.c                   |  244 -
 arch/x86/ia32/ia32_binfmt.c                 |   51 
 arch/x86/ia32/ia32_signal.c                 |  471 +-
 arch/x86/ia32/ia32entry.S                   |   11 
 arch/x86/ia32/ipc32.c                       |   30 
 arch/x86/ia32/mmap32.c                      |   79 
 arch/x86/ia32/ptrace32.c                    |  404 --
 arch/x86/ia32/sys_ia32.c                    |  502 +--
 arch/x86/ia32/syscall32.c                   |   83 
 arch/x86/ia32/syscall32_syscall.S           |   17 
 arch/x86/ia32/tls32.c                       |  163 -
 arch/x86/ia32/vsyscall-sigreturn.S          |  143 
 arch/x86/ia32/vsyscall-syscall.S            |   69 
 arch/x86/ia32/vsyscall-sysenter.S           |   95 
 arch/x86/ia32/vsyscall.lds                  |   80 
 arch/x86/kernel/Makefile_32                 |   47 
 arch/x86/kernel/Makefile_64                 |   10 
 arch/x86/kernel/acpi/boot.c                 |    3 
 arch/x86/kernel/acpi/wakeup_64.S            |   32 
 arch/x86/kernel/alternative.c               |   13 
 arch/x86/kernel/aperture_64.c               |  280 -
 arch/x86/kernel/apic_32.c                   |  104 
 arch/x86/kernel/apic_64.c                   | 1474 ++++-----
 arch/x86/kernel/apm_32.c                    |    2 
 arch/x86/kernel/asm-offsets_32.c            |   60 
 arch/x86/kernel/asm-offsets_64.c            |   41 
 arch/x86/kernel/cpu/addon_cpuid_features.c  |    2 
 arch/x86/kernel/cpu/bugs.c                  |    3 
 arch/x86/kernel/cpu/common.c                |    4 
 arch/x86/kernel/cpu/cyrix.c                 |    6 
 arch/x86/kernel/cpu/intel.c                 |   22 
 arch/x86/kernel/cpu/mcheck/mce_64.c         |   18 
 arch/x86/kernel/cpu/mcheck/mce_amd_64.c     |   10 
 arch/x86/kernel/cpu/mtrr/amd.c              |    2 
 arch/x86/kernel/cpu/mtrr/cyrix.c            |    3 
 arch/x86/kernel/cpu/mtrr/generic.c          |   19 
 arch/x86/kernel/cpu/mtrr/if.c               |   15 
 arch/x86/kernel/cpu/mtrr/main.c             |    8 
 arch/x86/kernel/cpu/mtrr/mtrr.h             |    6 
 arch/x86/kernel/cpu/mtrr/state.c            |    3 
 arch/x86/kernel/cpu/perfctr-watchdog.c      |    1 
 arch/x86/kernel/doublefault_32.c            |   15 
 arch/x86/kernel/ds.c                        |  429 ++
 arch/x86/kernel/e820_32.c                   |  110 
 arch/x86/kernel/e820_64.c                   |  330 +-
 arch/x86/kernel/entry_32.S                  |   24 
 arch/x86/kernel/geode_32.c                  |   48 
 arch/x86/kernel/head64.c                    |    6 
 arch/x86/kernel/hpet.c                      |    4 
 arch/x86/kernel/i386_ksyms_32.c             |    7 
 arch/x86/kernel/i8253.c                     |   12 
 arch/x86/kernel/i8259_32.c                  |    4 
 arch/x86/kernel/i8259_64.c                  |  154 
 arch/x86/kernel/init_task.c                 |    1 
 arch/x86/kernel/io_apic_32.c                |    2 
 arch/x86/kernel/io_apic_64.c                |  103 
 arch/x86/kernel/ioport_32.c                 |   58 
 arch/x86/kernel/ioport_64.c                 |    6 
 arch/x86/kernel/irq_32.c                    |   20 
 arch/x86/kernel/irq_64.c                    |   30 
 arch/x86/kernel/kprobes_32.c                |  109 
 arch/x86/kernel/kprobes_64.c                |  103 
 arch/x86/kernel/ldt.c                       |  264 +
 arch/x86/kernel/ldt_32.c                    |  248 -
 arch/x86/kernel/ldt_64.c                    |  250 -
 arch/x86/kernel/machine_kexec_64.c          |    5 
 arch/x86/kernel/mpparse_32.c                |   29 
 arch/x86/kernel/nmi_32.c                    |   15 
 arch/x86/kernel/nmi_64.c                    |   99 
 arch/x86/kernel/paravirt_32.c               |   12 
 arch/x86/kernel/pci-calgary_64.c            |    5 
 arch/x86/kernel/pci-dma_64.c                |    3 
 arch/x86/kernel/pci-gart_64.c               |  506 +--
 arch/x86/kernel/pci-swiotlb_64.c            |    1 
 arch/x86/kernel/pmtimer_64.c                |    4 
 arch/x86/kernel/process_32.c                |  376 --
 arch/x86/kernel/process_64.c                |  307 +
 arch/x86/kernel/ptrace.c                    | 1349 ++++++++
 arch/x86/kernel/ptrace_32.c                 |  717 ----
 arch/x86/kernel/ptrace_64.c                 |  621 ---
 arch/x86/kernel/reboot_fixups_32.c          |   14 
 arch/x86/kernel/rtc.c                       |  196 +
 arch/x86/kernel/setup64.c                   |    3 
 arch/x86/kernel/setup_32.c                  |  111 
 arch/x86/kernel/setup_64.c                  |  399 +-
 arch/x86/kernel/signal_32.c                 |  220 -
 arch/x86/kernel/signal_64.c                 |  128 
 arch/x86/kernel/smp_32.c                    |    4 
 arch/x86/kernel/smp_64.c                    |   88 
 arch/x86/kernel/smpboot_32.c                |   37 
 arch/x86/kernel/smpboot_64.c                |   46 
 arch/x86/kernel/stacktrace.c                |    1 
 arch/x86/kernel/step.c                      |  210 +
 arch/x86/kernel/suspend_64.c                |    2 
 arch/x86/kernel/suspend_asm_64.S            |   32 
 arch/x86/kernel/sys_x86_64.c                |   98 
 arch/x86/kernel/sysenter_32.c               |  346 --
 arch/x86/kernel/time_32.c                   |  114 
 arch/x86/kernel/time_64.c                   |  170 -
 arch/x86/kernel/tls.c                       |  136 
 arch/x86/kernel/traps_32.c                  |  126 
 arch/x86/kernel/traps_64.c                  |  123 
 arch/x86/kernel/tsc_32.c                    |   43 
 arch/x86/kernel/tsc_64.c                    |   74 
 arch/x86/kernel/vm86_32.c                   |  110 
 arch/x86/kernel/vmi_32.c                    |   74 
 arch/x86/kernel/vmiclock_32.c               |    1 
 arch/x86/kernel/vmlinux_32.lds.S            |    6 
 arch/x86/kernel/vsmp_64.c                   |   11 
 arch/x86/kernel/vsyscall-int80_32.S         |   53 
 arch/x86/kernel/vsyscall-note_32.S          |   45 
 arch/x86/kernel/vsyscall-sigreturn_32.S     |  143 
 arch/x86/kernel/vsyscall-sysenter_32.S      |  122 
 arch/x86/kernel/vsyscall_32.S               |   15 
 arch/x86/kernel/vsyscall_32.lds.S           |   67 
 arch/x86/kernel/vsyscall_64.c               |    6 
 arch/x86/kernel/x8664_ksyms_64.c            |    7 
 arch/x86/lguest/boot.c                      |   32 
 arch/x86/mach-rdc321x/Makefile              |    5 
 arch/x86/mach-rdc321x/gpio.c                |   91 
 arch/x86/mach-rdc321x/platform.c            |   68 
 arch/x86/mach-rdc321x/wdt.c                 |  275 +
 arch/x86/mach-visws/mpparse.c               |   16 
 arch/x86/mach-voyager/setup.c               |   32 
 arch/x86/mach-voyager/voyager_basic.c       |  132 
 arch/x86/mach-voyager/voyager_cat.c         |  601 +--
 arch/x86/mach-voyager/voyager_smp.c         |  684 +---
 arch/x86/mach-voyager/voyager_thread.c      |   52 
 arch/x86/math-emu/errors.c                  |  880 ++---
 arch/x86/math-emu/exception.h               |    9 
 arch/x86/math-emu/fpu_arith.c               |  150 
 arch/x86/math-emu/fpu_asm.h                 |    1 
 arch/x86/math-emu/fpu_aux.c                 |  211 -
 arch/x86/math-emu/fpu_emu.h                 |   67 
 arch/x86/math-emu/fpu_entry.c               | 1220 +++----
 arch/x86/math-emu/fpu_etc.c                 |  185 -
 arch/x86/math-emu/fpu_proto.h               |   28 
 arch/x86/math-emu/fpu_tags.c                |   92 
 arch/x86/math-emu/fpu_trig.c                | 2930 ++++++++----------
 arch/x86/math-emu/get_address.c             |  646 +--
 arch/x86/math-emu/load_store.c              |  452 +-
 arch/x86/math-emu/poly.h                    |   69 
 arch/x86/math-emu/poly_2xm1.c               |  197 -
 arch/x86/math-emu/poly_atan.c               |  347 +-
 arch/x86/math-emu/poly_l2.c                 |  386 +-
 arch/x86/math-emu/poly_sin.c                |  643 +--
 arch/x86/math-emu/poly_tan.c                |  334 --
 arch/x86/math-emu/reg_add_sub.c             |  565 +--
 arch/x86/math-emu/reg_compare.c             |  573 +--
 arch/x86/math-emu/reg_constant.c            |   71 
 arch/x86/math-emu/reg_convert.c             |   51 
 arch/x86/math-emu/reg_divide.c              |  319 -
 arch/x86/math-emu/reg_ld_str.c              | 2155 ++++++-------
 arch/x86/math-emu/reg_mul.c                 |  171 -
 arch/x86/math-emu/status_w.h                |    8 
 arch/x86/mm/Makefile_64                     |    2 
 arch/x86/mm/boot_ioremap_32.c               |   24 
 arch/x86/mm/extable_32.c                    |    6 
 arch/x86/mm/fault_32.c                      |   38 
 arch/x86/mm/fault_64.c                      |   22 
 arch/x86/mm/highmem_32.c                    |   47 
 arch/x86/mm/init_32.c                       |   31 
 arch/x86/mm/init_64.c                       |   39 
 arch/x86/mm/ioremap_64.c                    |   20 
 arch/x86/mm/k8topology_64.c                 |   17 
 arch/x86/mm/mmap_32.c                       |    8 
 arch/x86/mm/mmap_64.c                       |  119 
 arch/x86/mm/numa_64.c                       |  246 -
 arch/x86/mm/pageattr_32.c                   |  151 
 arch/x86/mm/pageattr_64.c                   |  143 
 arch/x86/mm/srat_64.c                       |   57 
 arch/x86/oprofile/backtrace.c               |    6 
 arch/x86/oprofile/op_model_athlon.c         |   22 
 arch/x86/pci/fixup.c                        |   13 
 arch/x86/pci/init.c                         |    4 
 arch/x86/pci/mmconfig-shared.c              |  210 +
 arch/x86/pci/pci.h                          |    1 
 arch/x86/power/cpu.c                        |   14 
 arch/x86/vdso/Makefile                      |  130 
 arch/x86/vdso/vclock_gettime.c              |    1 
 arch/x86/vdso/vdso-layout.lds.S             |   64 
 arch/x86/vdso/vdso-start.S                  |    2 
 arch/x86/vdso/vdso.lds.S                    |   94 
 arch/x86/vdso/vdso32-setup.c                |  411 ++
 arch/x86/vdso/vdso32.S                      |   19 
 arch/x86/vdso/vdso32/int80.S                |   56 
 arch/x86/vdso/vdso32/note.S                 |   44 
 arch/x86/vdso/vdso32/sigreturn.S            |  144 
 arch/x86/vdso/vdso32/syscall.S              |   77 
 arch/x86/vdso/vdso32/sysenter.S             |  116 
 arch/x86/vdso/vdso32/vdso32.lds.S           |   37 
 arch/x86/vdso/vgetcpu.c                     |    4 
 arch/x86/vdso/vma.c                         |   18 
 arch/x86/vdso/voffset.h                     |    1 
 arch/x86/xen/enlighten.c                    |   30 
 arch/x86/xen/events.c                       |    2 
 arch/x86/xen/mmu.c                          |    4 
 arch/x86/xen/setup.c                        |    7 
 arch/x86/xen/smp.c                          |    8 
 drivers/acpi/bus.c                          |    2 
 drivers/char/hpet.c                         |   75 
 drivers/char/rtc.c                          |  238 -
 drivers/lguest/x86/core.c                   |    4 
 drivers/pci/probe.c                         |   11 
 fs/binfmt_elf.c                             |  114 
 include/asm-powerpc/ptrace.h                |    7 
 include/asm-x86/Kbuild                      |    1 
 include/asm-x86/acpi.h                      |  146 
 include/asm-x86/acpi_32.h                   |  143 
 include/asm-x86/acpi_64.h                   |  153 
 include/asm-x86/alternative.h               |  162 
 include/asm-x86/alternative_32.h            |  154 
 include/asm-x86/alternative_64.h            |  159 
 include/asm-x86/apic.h                      |  141 
 include/asm-x86/apic_32.h                   |  127 
 include/asm-x86/apic_64.h                   |  102 
 include/asm-x86/apicdef.h                   |  412 ++
 include/asm-x86/apicdef_32.h                |  375 --
 include/asm-x86/apicdef_64.h                |  392 --
 include/asm-x86/arch_hooks.h                |    5 
 include/asm-x86/asm.h                       |   20 
 include/asm-x86/bitops.h                    |  315 +
 include/asm-x86/bitops_32.h                 |  324 -
 include/asm-x86/bitops_64.h                 |  297 -
 include/asm-x86/bug.h                       |    3 
 include/asm-x86/checksum_64.h               |    2 
 include/asm-x86/cmpxchg_32.h                |  122 
 include/asm-x86/compat.h                    |    2 
 include/asm-x86/cpufeature.h                |  201 +
 include/asm-x86/cpufeature_32.h             |  176 -
 include/asm-x86/cpufeature_64.h             |   30 
 include/asm-x86/desc_64.h                   |  114 
 include/asm-x86/dma.h                       |  318 +
 include/asm-x86/dma_32.h                    |  297 -
 include/asm-x86/dma_64.h                    |  304 -
 include/asm-x86/ds.h                        |   65 
 include/asm-x86/e820_32.h                   |    6 
 include/asm-x86/e820_64.h                   |    6 
 include/asm-x86/elf.h                       |  101 
 include/asm-x86/futex.h                     |  138 
 include/asm-x86/futex_32.h                  |  135 
 include/asm-x86/futex_64.h                  |  125 
 include/asm-x86/geode.h                     |   12 
 include/asm-x86/gpio.h                      |    6 
 include/asm-x86/hw_irq_64.h                 |    1 
 include/asm-x86/i387_32.h                   |    2 
 include/asm-x86/i387_64.h                   |    2 
 include/asm-x86/i8259.h                     |   17 
 include/asm-x86/ia32.h                      |    6 
 include/asm-x86/ia32_unistd.h               |    2 
 include/asm-x86/ide.h                       |    2 
 include/asm-x86/idle.h                      |    1 
 include/asm-x86/io_apic.h                   |  158 
 include/asm-x86/io_apic_32.h                |  155 
 include/asm-x86/io_apic_64.h                |  138 
 include/asm-x86/irqflags.h                  |  246 +
 include/asm-x86/irqflags_32.h               |  197 -
 include/asm-x86/irqflags_64.h               |  176 -
 include/asm-x86/k8.h                        |    1 
 include/asm-x86/kdebug.h                    |    5 
 include/asm-x86/kexec_32.h                  |   36 
 include/asm-x86/kexec_64.h                  |   20 
 include/asm-x86/kprobes_32.h                |    2 
 include/asm-x86/kprobes_64.h                |    2 
 include/asm-x86/linkage.h                   |   21 
 include/asm-x86/linkage_32.h                |   15 
 include/asm-x86/linkage_64.h                |    6 
 include/asm-x86/mach-bigsmp/mach_apic.h     |   12 
 include/asm-x86/mach-default/apm.h          |    2 
 include/asm-x86/mach-default/io_ports.h     |   25 
 include/asm-x86/mach-default/mach_apic.h    |   16 
 include/asm-x86/mach-default/mach_time.h    |  111 
 include/asm-x86/mach-default/mach_timer.h   |    2 
 include/asm-x86/mach-default/mach_traps.h   |    2 
 include/asm-x86/mach-es7000/mach_apic.h     |   10 
 include/asm-x86/mach-generic/gpio.h         |   15 
 include/asm-x86/mach-numaq/mach_apic.h      |   10 
 include/asm-x86/mach-rdc321x/gpio.h         |   56 
 include/asm-x86/mach-rdc321x/rdc321x_defs.h |    6 
 include/asm-x86/mach-summit/mach_apic.h     |   16 
 include/asm-x86/mc146818rtc.h               |  101 
 include/asm-x86/mc146818rtc_32.h            |   97 
 include/asm-x86/mc146818rtc_64.h            |   29 
 include/asm-x86/mce.h                       |   18 
 include/asm-x86/mmu_context_64.h            |    2 
 include/asm-x86/mmzone_32.h                 |    3 
 include/asm-x86/mmzone_64.h                 |    6 
 include/asm-x86/mpspec.h                    |  116 
 include/asm-x86/mpspec_32.h                 |   81 
 include/asm-x86/mpspec_64.h                 |  233 -
 include/asm-x86/mpspec_def.h                |   87 
 include/asm-x86/msr-index.h                 |    7 
 include/asm-x86/msr.h                       |  284 -
 include/asm-x86/mtrr.h                      |    8 
 include/asm-x86/nmi_32.h                    |    3 
 include/asm-x86/nmi_64.h                    |    5 
 include/asm-x86/numa_64.h                   |    8 
 include/asm-x86/page_32.h                   |   39 
 include/asm-x86/page_64.h                   |   30 
 include/asm-x86/paravirt.h                  |   31 
 include/asm-x86/pci.h                       |   17 
 include/asm-x86/pci_64.h                    |    1 
 include/asm-x86/pda.h                       |    1 
 include/asm-x86/pgtable_32.h                |    8 
 include/asm-x86/pgtable_64.h                |   31 
 include/asm-x86/processor.h                 |   78 
 include/asm-x86/processor_32.h              |  150 
 include/asm-x86/processor_64.h              |   50 
 include/asm-x86/proto.h                     |   67 
 include/asm-x86/ptrace-abi.h                |   54 
 include/asm-x86/ptrace.h                    |  138 
 include/asm-x86/resume-trace.h              |   23 
 include/asm-x86/resume-trace_32.h           |   13 
 include/asm-x86/resume-trace_64.h           |   13 
 include/asm-x86/rio.h                       |    4 
 include/asm-x86/rwlock.h                    |    1 
 include/asm-x86/rwsem.h                     |    2 
 include/asm-x86/scatterlist.h               |   34 
 include/asm-x86/scatterlist_32.h            |   28 
 include/asm-x86/scatterlist_64.h            |   29 
 include/asm-x86/segment.h                   |  202 +
 include/asm-x86/segment_32.h                |  147 
 include/asm-x86/segment_64.h                |   53 
 include/asm-x86/sigcontext.h                |   42 
 include/asm-x86/sigcontext32.h              |   22 
 include/asm-x86/signal.h                    |   11 
 include/asm-x86/smp_32.h                    |  117 
 include/asm-x86/smp_64.h                    |  133 
 include/asm-x86/sparsemem.h                 |   35 
 include/asm-x86/sparsemem_32.h              |   31 
 include/asm-x86/sparsemem_64.h              |   26 
 include/asm-x86/spinlock.h                  |  225 +
 include/asm-x86/spinlock_32.h               |  221 -
 include/asm-x86/spinlock_64.h               |  167 -
 include/asm-x86/suspend_64.h                |    2 
 include/asm-x86/system.h                    |  376 ++
 include/asm-x86/system_32.h                 |  320 -
 include/asm-x86/system_64.h                 |  178 -
 include/asm-x86/thread_info_32.h            |   16 
 include/asm-x86/thread_info_64.h            |   33 
 include/asm-x86/time.h                      |    2 
 include/asm-x86/timer.h                     |   23 
 include/asm-x86/timex.h                     |    2 
 include/asm-x86/tlbflush.h                  |  157 
 include/asm-x86/tlbflush_32.h               |  168 -
 include/asm-x86/tlbflush_64.h               |  100 
 include/asm-x86/topology.h                  |  143 
 include/asm-x86/topology_32.h               |  121 
 include/asm-x86/topology_64.h               |   71 
 include/asm-x86/tsc.h                       |   48 
 include/asm-x86/user_32.h                   |   24 
 include/asm-x86/user_64.h                   |   41 
 include/asm-x86/vdso.h                      |   28 
 include/asm-x86/vsyscall32.h                |   20 
 include/asm-x86/xor_32.h                    |    2 
 include/asm-x86/xor_64.h                    |    2 
 include/linux/acpi_pmtmr.h                  |    2 
 include/linux/compat.h                      |    4 
 include/linux/hpet.h                        |    3 
 include/linux/ioport.h                      |    2 
 include/linux/pci.h                         |    9 
 include/linux/pci_ids.h                     |    7 
 include/linux/ptrace.h                      |   75 
 include/linux/thread_info.h                 |   10 
 include/linux/timer.h                       |    6 
 kernel/ptrace.c                             |   74 
 kernel/signal.c                             |    4 
 kernel/softirq.c                            |    4 
 kernel/time/tick-sched.c                    |    6 
 kernel/time/timer_stats.c                   |    2 
 kernel/timer.c                              |   82 
 mm/mmap.c                                   |    3 
 390 files changed, 21617 insertions(+), 24111 deletions(-)

diff -puN Documentation/kernel-parameters.txt~git-x86 Documentation/kernel-parameters.txt
--- a/Documentation/kernel-parameters.txt~git-x86
+++ a/Documentation/kernel-parameters.txt
@@ -1964,6 +1964,11 @@ and is between 256 and 4096 characters. 
 			vdso=1: enable VDSO (default)
 			vdso=0: disable VDSO mapping
 
+	vdso32=		[X86-32,X86-64]
+			vdso32=2: enable compat VDSO (default with COMPAT_VDSO)
+			vdso32=1: enable 32-bit VDSO (default)
+			vdso32=0: disable 32-bit VDSO mapping
+
 	vector=		[IA-64,SMP]
 			vector=percpu: enable percpu vector domain
 
diff -puN arch/ia64/ia32/binfmt_elf32.c~git-x86 arch/ia64/ia32/binfmt_elf32.c
--- a/arch/ia64/ia32/binfmt_elf32.c~git-x86
+++ a/arch/ia64/ia32/binfmt_elf32.c
@@ -222,7 +222,8 @@ elf32_set_personality (void)
 }
 
 static unsigned long
-elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
+elf32_map(struct file *filep, unsigned long addr, struct elf_phdr *eppnt,
+		int prot, int type, unsigned long unused)
 {
 	unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK;
 
diff -puN arch/mips/kernel/i8253.c~git-x86 arch/mips/kernel/i8253.c
--- a/arch/mips/kernel/i8253.c~git-x86
+++ a/arch/mips/kernel/i8253.c
@@ -24,9 +24,7 @@ DEFINE_SPINLOCK(i8253_lock);
 static void init_pit_timer(enum clock_event_mode mode,
 			   struct clock_event_device *evt)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8253_lock, flags);
+	spin_lock(&i8253_lock);
 
 	switch(mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
@@ -55,7 +53,7 @@ static void init_pit_timer(enum clock_ev
 		/* Nothing to do here */
 		break;
 	}
-	spin_unlock_irqrestore(&i8253_lock, flags);
+	spin_unlock(&i8253_lock);
 }
 
 /*
@@ -65,12 +63,10 @@ static void init_pit_timer(enum clock_ev
  */
 static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8253_lock, flags);
+	spin_lock(&i8253_lock);
 	outb_p(delta & 0xff , PIT_CH0);	/* LSB */
 	outb(delta >> 8 , PIT_CH0);	/* MSB */
-	spin_unlock_irqrestore(&i8253_lock, flags);
+	spin_unlock(&i8253_lock);
 
 	return 0;
 }
diff -puN arch/powerpc/kernel/ptrace.c~git-x86 arch/powerpc/kernel/ptrace.c
--- a/arch/powerpc/kernel/ptrace.c~git-x86
+++ a/arch/powerpc/kernel/ptrace.c
@@ -256,7 +256,7 @@ static int set_evrregs(struct task_struc
 #endif /* CONFIG_SPE */
 
 
-static void set_single_step(struct task_struct *task)
+void user_enable_single_step(struct task_struct *task)
 {
 	struct pt_regs *regs = task->thread.regs;
 
@@ -271,7 +271,7 @@ static void set_single_step(struct task_
 	set_tsk_thread_flag(task, TIF_SINGLESTEP);
 }
 
-static void clear_single_step(struct task_struct *task)
+void user_disable_single_step(struct task_struct *task)
 {
 	struct pt_regs *regs = task->thread.regs;
 
@@ -313,7 +313,7 @@ static int ptrace_set_debugreg(struct ta
 void ptrace_disable(struct task_struct *child)
 {
 	/* make sure the single step bit is not set. */
-	clear_single_step(child);
+	user_disable_single_step(child);
 }
 
 /*
@@ -445,52 +445,6 @@ long arch_ptrace(struct task_struct *chi
 		break;
 	}
 
-	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
-	case PTRACE_CONT: { /* restart after signal. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		if (request == PTRACE_SYSCALL)
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		else
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->exit_code = data;
-		/* make sure the single step bit is not set. */
-		clear_single_step(child);
-		wake_up_process(child);
-		ret = 0;
-		break;
-	}
-
-/*
- * make the child exit.  Best I can do is send it a sigkill.
- * perhaps it should be put in the status that it wants to
- * exit.
- */
-	case PTRACE_KILL: {
-		ret = 0;
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			break;
-		child->exit_code = SIGKILL;
-		/* make sure the single step bit is not set. */
-		clear_single_step(child);
-		wake_up_process(child);
-		break;
-	}
-
-	case PTRACE_SINGLESTEP: {  /* set the trap flag. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		set_single_step(child);
-		child->exit_code = data;
-		/* give it a chance to run. */
-		wake_up_process(child);
-		ret = 0;
-		break;
-	}
-
 	case PTRACE_GET_DEBUGREG: {
 		ret = -EINVAL;
 		/* We only support one DABR and no IABRS at the moment */
diff -puN arch/um/sys-i386/signal.c~git-x86 arch/um/sys-i386/signal.c
--- a/arch/um/sys-i386/signal.c~git-x86
+++ a/arch/um/sys-i386/signal.c
@@ -3,10 +3,10 @@
  * Licensed under the GPL
  */
 
-#include "linux/ptrace.h"
-#include "asm/unistd.h"
-#include "asm/uaccess.h"
-#include "asm/ucontext.h"
+#include <linux/ptrace.h>
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+#include <asm/ucontext.h>
 #include "frame_kern.h"
 #include "skas.h"
 
@@ -18,17 +18,17 @@ void copy_sc(struct uml_pt_regs *regs, v
 	REGS_FS(regs->gp) = sc->fs;
 	REGS_ES(regs->gp) = sc->es;
 	REGS_DS(regs->gp) = sc->ds;
-	REGS_EDI(regs->gp) = sc->edi;
-	REGS_ESI(regs->gp) = sc->esi;
-	REGS_EBP(regs->gp) = sc->ebp;
-	REGS_SP(regs->gp) = sc->esp;
-	REGS_EBX(regs->gp) = sc->ebx;
-	REGS_EDX(regs->gp) = sc->edx;
-	REGS_ECX(regs->gp) = sc->ecx;
-	REGS_EAX(regs->gp) = sc->eax;
-	REGS_IP(regs->gp) = sc->eip;
+	REGS_EDI(regs->gp) = sc->di;
+	REGS_ESI(regs->gp) = sc->si;
+	REGS_EBP(regs->gp) = sc->bp;
+	REGS_SP(regs->gp) = sc->sp;
+	REGS_EBX(regs->gp) = sc->bx;
+	REGS_EDX(regs->gp) = sc->dx;
+	REGS_ECX(regs->gp) = sc->cx;
+	REGS_EAX(regs->gp) = sc->ax;
+	REGS_IP(regs->gp) = sc->ip;
 	REGS_CS(regs->gp) = sc->cs;
-	REGS_EFLAGS(regs->gp) = sc->eflags;
+	REGS_EFLAGS(regs->gp) = sc->flags;
 	REGS_SS(regs->gp) = sc->ss;
 }
 
@@ -229,18 +229,18 @@ static int copy_sc_to_user(struct sigcon
 	sc.fs = REGS_FS(regs->regs.gp);
 	sc.es = REGS_ES(regs->regs.gp);
 	sc.ds = REGS_DS(regs->regs.gp);
-	sc.edi = REGS_EDI(regs->regs.gp);
-	sc.esi = REGS_ESI(regs->regs.gp);
-	sc.ebp = REGS_EBP(regs->regs.gp);
-	sc.esp = sp;
-	sc.ebx = REGS_EBX(regs->regs.gp);
-	sc.edx = REGS_EDX(regs->regs.gp);
-	sc.ecx = REGS_ECX(regs->regs.gp);
-	sc.eax = REGS_EAX(regs->regs.gp);
-	sc.eip = REGS_IP(regs->regs.gp);
+	sc.di = REGS_EDI(regs->regs.gp);
+	sc.si = REGS_ESI(regs->regs.gp);
+	sc.bp = REGS_EBP(regs->regs.gp);
+	sc.sp = sp;
+	sc.bx = REGS_EBX(regs->regs.gp);
+	sc.dx = REGS_EDX(regs->regs.gp);
+	sc.cx = REGS_ECX(regs->regs.gp);
+	sc.ax = REGS_EAX(regs->regs.gp);
+	sc.ip = REGS_IP(regs->regs.gp);
 	sc.cs = REGS_CS(regs->regs.gp);
-	sc.eflags = REGS_EFLAGS(regs->regs.gp);
-	sc.esp_at_signal = regs->regs.gp[UESP];
+	sc.flags = REGS_EFLAGS(regs->regs.gp);
+	sc.sp_at_signal = regs->regs.gp[UESP];
 	sc.ss = regs->regs.gp[SS];
 	sc.cr2 = fi->cr2;
 	sc.err = fi->error_code;
diff -puN arch/um/sys-x86_64/signal.c~git-x86 arch/um/sys-x86_64/signal.c
--- a/arch/um/sys-x86_64/signal.c~git-x86
+++ a/arch/um/sys-x86_64/signal.c
@@ -4,11 +4,11 @@
  * Licensed under the GPL
  */
 
-#include "linux/personality.h"
-#include "linux/ptrace.h"
-#include "asm/unistd.h"
-#include "asm/uaccess.h"
-#include "asm/ucontext.h"
+#include <linux/personality.h>
+#include <linux/ptrace.h>
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+#include <asm/ucontext.h>
 #include "frame_kern.h"
 #include "skas.h"
 
@@ -27,16 +27,16 @@ void copy_sc(struct uml_pt_regs *regs, v
 	GETREG(regs, R13, sc, r13);
 	GETREG(regs, R14, sc, r14);
 	GETREG(regs, R15, sc, r15);
-	GETREG(regs, RDI, sc, rdi);
-	GETREG(regs, RSI, sc, rsi);
-	GETREG(regs, RBP, sc, rbp);
-	GETREG(regs, RBX, sc, rbx);
-	GETREG(regs, RDX, sc, rdx);
-	GETREG(regs, RAX, sc, rax);
-	GETREG(regs, RCX, sc, rcx);
-	GETREG(regs, RSP, sc, rsp);
-	GETREG(regs, RIP, sc, rip);
-	GETREG(regs, EFLAGS, sc, eflags);
+	GETREG(regs, RDI, sc, di);
+	GETREG(regs, RSI, sc, si);
+	GETREG(regs, RBP, sc, bp);
+	GETREG(regs, RBX, sc, bx);
+	GETREG(regs, RDX, sc, dx);
+	GETREG(regs, RAX, sc, ax);
+	GETREG(regs, RCX, sc, cx);
+	GETREG(regs, RSP, sc, sp);
+	GETREG(regs, RIP, sc, ip);
+	GETREG(regs, EFLAGS, sc, flags);
 	GETREG(regs, CS, sc, cs);
 
 #undef GETREG
@@ -61,16 +61,16 @@ static int copy_sc_from_user(struct pt_r
 	err |= GETREG(regs, R13, from, r13);
 	err |= GETREG(regs, R14, from, r14);
 	err |= GETREG(regs, R15, from, r15);
-	err |= GETREG(regs, RDI, from, rdi);
-	err |= GETREG(regs, RSI, from, rsi);
-	err |= GETREG(regs, RBP, from, rbp);
-	err |= GETREG(regs, RBX, from, rbx);
-	err |= GETREG(regs, RDX, from, rdx);
-	err |= GETREG(regs, RAX, from, rax);
-	err |= GETREG(regs, RCX, from, rcx);
-	err |= GETREG(regs, RSP, from, rsp);
-	err |= GETREG(regs, RIP, from, rip);
-	err |= GETREG(regs, EFLAGS, from, eflags);
+	err |= GETREG(regs, RDI, from, di);
+	err |= GETREG(regs, RSI, from, si);
+	err |= GETREG(regs, RBP, from, bp);
+	err |= GETREG(regs, RBX, from, bx);
+	err |= GETREG(regs, RDX, from, dx);
+	err |= GETREG(regs, RAX, from, ax);
+	err |= GETREG(regs, RCX, from, cx);
+	err |= GETREG(regs, RSP, from, sp);
+	err |= GETREG(regs, RIP, from, ip);
+	err |= GETREG(regs, EFLAGS, from, flags);
 	err |= GETREG(regs, CS, from, cs);
 	if (err)
 		return 1;
@@ -108,19 +108,19 @@ static int copy_sc_to_user(struct sigcon
 	__put_user((regs)->regs.gp[(regno) / sizeof(unsigned long)],	\
 		   &(sc)->regname)
 
-	err |= PUTREG(regs, RDI, to, rdi);
-	err |= PUTREG(regs, RSI, to, rsi);
-	err |= PUTREG(regs, RBP, to, rbp);
+	err |= PUTREG(regs, RDI, to, di);
+	err |= PUTREG(regs, RSI, to, si);
+	err |= PUTREG(regs, RBP, to, bp);
 	/*
 	 * Must use orignal RSP, which is passed in, rather than what's in
 	 * the pt_regs, because that's already been updated to point at the
 	 * signal frame.
 	 */
-	err |= __put_user(sp, &to->rsp);
-	err |= PUTREG(regs, RBX, to, rbx);
-	err |= PUTREG(regs, RDX, to, rdx);
-	err |= PUTREG(regs, RCX, to, rcx);
-	err |= PUTREG(regs, RAX, to, rax);
+	err |= __put_user(sp, &to->sp);
+	err |= PUTREG(regs, RBX, to, bx);
+	err |= PUTREG(regs, RDX, to, dx);
+	err |= PUTREG(regs, RCX, to, cx);
+	err |= PUTREG(regs, RAX, to, ax);
 	err |= PUTREG(regs, R8, to, r8);
 	err |= PUTREG(regs, R9, to, r9);
 	err |= PUTREG(regs, R10, to, r10);
@@ -135,8 +135,8 @@ static int copy_sc_to_user(struct sigcon
 	err |= __put_user(fi->error_code, &to->err);
 	err |= __put_user(fi->trap_no, &to->trapno);
 
-	err |= PUTREG(regs, RIP, to, rip);
-	err |= PUTREG(regs, EFLAGS, to, eflags);
+	err |= PUTREG(regs, RIP, to, ip);
+	err |= PUTREG(regs, EFLAGS, to, flags);
 #undef PUTREG
 
 	err |= __put_user(mask, &to->oldmask);
diff -puN arch/x86/Kconfig~git-x86 arch/x86/Kconfig
--- a/arch/x86/Kconfig~git-x86
+++ a/arch/x86/Kconfig
@@ -17,81 +17,66 @@ config X86_64
 
 ### Arch settings
 config X86
-	bool
-	default y
+	def_bool y
 
 config GENERIC_TIME
-	bool
-	default y
+	def_bool y
 
 config GENERIC_CMOS_UPDATE
-	bool
-	default y
+	def_bool y
 
 config CLOCKSOURCE_WATCHDOG
-	bool
-	default y
+	def_bool y
 
 config GENERIC_CLOCKEVENTS
-	bool
-	default y
+	def_bool y
 
 config GENERIC_CLOCKEVENTS_BROADCAST
-	bool
-	default y
+	def_bool y
 	depends on X86_64 || (X86_32 && X86_LOCAL_APIC)
 
 config LOCKDEP_SUPPORT
-	bool
-	default y
+	def_bool y
 
 config STACKTRACE_SUPPORT
-	bool
-	default y
+	def_bool y
 
 config SEMAPHORE_SLEEPERS
-	bool
-	default y
+	def_bool y
 
 config MMU
-	bool
-	default y
+	def_bool y
 
 config ZONE_DMA
-	bool
-	default y
+	def_bool y
 
 config QUICKLIST
-	bool
-	default X86_32
+	def_bool X86_32
 
 config SBUS
 	bool
 
 config GENERIC_ISA_DMA
-	bool
-	default y
+	def_bool y
 
 config GENERIC_IOMAP
-	bool
-	default y
+	def_bool y
 
 config GENERIC_BUG
-	bool
-	default y
+	def_bool y
 	depends on BUG
 
+config GENERIC_GPIO
+	def_bool n
+
 config GENERIC_HWEIGHT
-	bool
-	default y
+	def_bool y
 
 config ARCH_MAY_HAVE_PC_FDC
-	bool
-	default y
+	def_bool y
 
 config DMI
-	bool
-	default y
+	def_bool y
 
 config RWSEM_GENERIC_SPINLOCK
 	def_bool !X86_XADD
@@ -298,6 +283,17 @@ config X86_ES7000
 	  Only choose this option if you have such a system, otherwise you
 	  should say N here.
 
+config X86_RDC321X
+	bool "RDC R-321x SoC"
+	select M486
+	select X86_REBOOTFIXUPS
+	select GENERIC_GPIO
+	select LEDS_GPIO
+	help
+	  This option is needed for RDC R-321x system-on-chip, also known
+	  as R-8610-(G).
+	  If you don't have one of these chips, you should say N here.
+
 config X86_VSMP
 	bool "Support for ScaleMP vSMP"
 	depends on X86_64 && PCI
@@ -309,8 +305,8 @@ config X86_VSMP
 endchoice
 
 config SCHED_NO_NO_OMIT_FRAME_POINTER
-	bool "Single-depth WCHAN output"
-	default y
+	def_bool y
+	prompt "Single-depth WCHAN output"
 	depends on X86_32
 	help
 	  Calculate simpler /proc/<PID>/wchan values. If this option
@@ -357,37 +353,31 @@ source "arch/x86/lguest/Kconfig"
 endif
 
 config ACPI_SRAT
-	bool
-	default y
+	def_bool y
 	depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH)
 	select ACPI_NUMA
 
 config HAVE_ARCH_PARSE_SRAT
-       bool
-       default y
-       depends on ACPI_SRAT
+	def_bool y
+	depends on ACPI_SRAT
 
 config X86_SUMMIT_NUMA
-	bool
-	default y
+	def_bool y
 	depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH)
 
 config X86_CYCLONE_TIMER
-	bool
-	default y
+	def_bool y
 	depends on X86_32 && X86_SUMMIT || X86_GENERICARCH
 
 config ES7000_CLUSTERED_APIC
-	bool
-	default y
+	def_bool y
 	depends on SMP && X86_ES7000 && MPENTIUMIII
 
 source "arch/x86/Kconfig.cpu"
 
 config HPET_TIMER
-	bool
+	def_bool X86_64
 	prompt "HPET Timer Support" if X86_32
-	default X86_64
 	help
          Use the IA-PC HPET (High Precision Event Timer) to manage
          time in preference to the PIT and RTC, if a HPET is
@@ -405,9 +395,8 @@ config HPET_TIMER
          Choose N to continue using the legacy 8254 timer.
 
 config HPET_EMULATE_RTC
-	bool
+	def_bool y
 	depends on HPET_TIMER && RTC=y
-	default y
 
 # Mark as embedded because too many people got it wrong.
 # The code disables itself when not needed.
@@ -447,8 +436,8 @@ config CALGARY_IOMMU
 	  If unsure, say Y.
 
 config CALGARY_IOMMU_ENABLED_BY_DEFAULT
-	bool "Should Calgary be enabled by default?"
-	default y
+	def_bool y
+	prompt "Should Calgary be enabled by default?"
 	depends on CALGARY_IOMMU
 	help
 	  Should Calgary be enabled by default? if you choose 'y', Calgary
@@ -496,9 +485,9 @@ config SCHED_SMT
 	  N here.
 
 config SCHED_MC
-	bool "Multi-core scheduler support"
+	def_bool y
+	prompt "Multi-core scheduler support"
 	depends on (X86_64 && SMP) || (X86_32 && X86_HT)
-	default y
 	help
 	  Multi-core scheduler support improves the CPU scheduler's decision
 	  making when dealing with multi-core CPU chips at a cost of slightly
@@ -532,19 +521,16 @@ config X86_UP_IOAPIC
 	  an IO-APIC, then the kernel will still run with no slowdown at all.
 
 config X86_LOCAL_APIC
-	bool
+	def_bool y
 	depends on X86_64 || (X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH))
-	default y
 
 config X86_IO_APIC
-	bool
+	def_bool y
 	depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH))
-	default y
 
 config X86_VISWS_APIC
-	bool
+	def_bool y
 	depends on X86_32 && X86_VISWS
-	default y
 
 config X86_MCE
 	bool "Machine Check Exception"
@@ -564,17 +550,17 @@ config X86_MCE
 	  the 386 and 486, so nearly everyone can say Y here.
 
 config X86_MCE_INTEL
-	bool "Intel MCE features"
+	def_bool y
+	prompt "Intel MCE features"
 	depends on X86_64 && X86_MCE && X86_LOCAL_APIC
-	default y
 	help
 	   Additional support for intel specific MCE features such as
 	   the thermal monitor.
 
 config X86_MCE_AMD
-	bool "AMD MCE features"
+	def_bool y
+	prompt "AMD MCE features"
 	depends on X86_64 && X86_MCE && X86_LOCAL_APIC
-	default y
 	help
 	   Additional support for AMD specific MCE features such as
 	   the DRAM Error Threshold.
@@ -647,9 +633,9 @@ config I8K
 	  Say N otherwise.
 
 config X86_REBOOTFIXUPS
-	bool "Enable X86 board specific fixups for reboot"
+	def_bool n
+	prompt "Enable X86 board specific fixups for reboot"
 	depends on X86_32 && X86
-	default n
 	---help---
 	  This enables chipset and/or board specific fixups to be done
 	  in order to get reboot to work correctly. This is only needed on
@@ -658,7 +644,7 @@ config X86_REBOOTFIXUPS
 	  system.
 
 	  Currently, the only fixup is for the Geode machines using
-	  CS5530A and CS5536 chipsets.
+	  CS5530A and CS5536 chipsets and the RDC R-321x SoC.
 
 	  Say Y if you want to enable the fixup. Currently, it's safe to
 	  enable this option even if you don't need it.
@@ -682,9 +668,8 @@ config MICROCODE
 	  module will be called microcode.
 
 config MICROCODE_OLD_INTERFACE
-	bool
+	def_bool y
 	depends on MICROCODE
-	default y
 
 config X86_MSR
 	tristate "/dev/cpu/*/msr - Model-specific register support"
@@ -808,13 +793,12 @@ config PAGE_OFFSET
 	depends on X86_32
 
 config HIGHMEM
-	bool
+	def_bool y
 	depends on X86_32 && (HIGHMEM64G || HIGHMEM4G)
-	default y
 
 config X86_PAE
-	bool "PAE (Physical Address Extension) Support"
-	default n
+	def_bool n
+	prompt "PAE (Physical Address Extension) Support"
 	depends on X86_32 && !HIGHMEM4G
 	select RESOURCES_64BIT
 	help
@@ -846,10 +830,10 @@ comment "NUMA (Summit) requires SMP, 64G
 	depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
 
 config K8_NUMA
-       bool "Old style AMD Opteron NUMA detection"
-       depends on X86_64 && NUMA && PCI
-       default y
-       help
+	def_bool y
+	prompt "Old style AMD Opteron NUMA detection"
+	depends on X86_64 && NUMA && PCI
+	help
 	 Enable K8 NUMA node topology detection.  You should say Y here if
 	 you have a multi processor AMD K8 system. This uses an old
 	 method to read the NUMA configuration directly from the builtin
@@ -857,13 +841,21 @@ config K8_NUMA
 	 instead, which also takes priority if both are compiled in.
 
 config X86_64_ACPI_NUMA
-	bool "ACPI NUMA detection"
+	def_bool y
+	prompt "ACPI NUMA detection"
 	depends on X86_64 && NUMA && ACPI && PCI
 	select ACPI_NUMA
-	default y
 	help
 	  Enable ACPI SRAT based node topology detection.
 
+config THREAD_ORDER
+	int "Kernel stack size (in page order)"
+	range 1 3
+	default "1"
+	depends on X86_64 && NUMA
+	help
+	  Page order for the thread stack.
+
 config NUMA_EMU
 	bool "NUMA emulation"
 	depends on X86_64 && NUMA
@@ -880,46 +872,46 @@ config NODES_SHIFT
 	depends on NEED_MULTIPLE_NODES
 
 config HAVE_ARCH_BOOTMEM_NODE
-	bool
+	def_bool y
 	depends on X86_32 && NUMA
-	default y
 
 config ARCH_HAVE_MEMORY_PRESENT
-	bool
+	def_bool y
 	depends on X86_32 && DISCONTIGMEM
-	default y
 
 config NEED_NODE_MEMMAP_SIZE
-	bool
+	def_bool y
 	depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)
-	default y
 
 config HAVE_ARCH_ALLOC_REMAP
-	bool
+	def_bool y
 	depends on X86_32 && NUMA
-	default y
 
 config ARCH_FLATMEM_ENABLE
 	def_bool y
-	depends on (X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC) || (X86_64 && !NUMA)
+	depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC
 
 config ARCH_DISCONTIGMEM_ENABLE
 	def_bool y
-	depends on NUMA
+	depends on NUMA && X86_32
 
 config ARCH_DISCONTIGMEM_DEFAULT
 	def_bool y
-	depends on NUMA
+	depends on NUMA && X86_32
+
+config ARCH_SPARSEMEM_DEFAULT
+	def_bool y
+	depends on X86_64
 
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
-	depends on NUMA || (EXPERIMENTAL && (X86_PC || X86_64))
+	depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC)
 	select SPARSEMEM_STATIC if X86_32
 	select SPARSEMEM_VMEMMAP_ENABLE if X86_64
 
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
-	depends on X86_32 && ARCH_SPARSEMEM_ENABLE
+	depends on ARCH_SPARSEMEM_ENABLE
 
 config ARCH_MEMORY_PROBE
 	def_bool X86_64
@@ -997,9 +989,9 @@ config MTRR
 	  See <file:Documentation/mtrr.txt> for more information.
 
 config EFI
-	bool "Boot from EFI support"
+	def_bool n
+	prompt "Boot from EFI support"
 	depends on X86_32 && ACPI
-	default n
 	---help---
 	This enables the kernel to boot on EFI platforms using
 	system configuration information passed to it from the firmware.
@@ -1015,9 +1007,9 @@ config EFI
 	kernel should continue to boot on existing non-EFI platforms.
 
 config IRQBALANCE
-	bool "Enable kernel irq balancing"
+	def_bool y
+	prompt "Enable kernel irq balancing"
 	depends on X86_32 && SMP && X86_IO_APIC
-	default y
 	help
 	  The default yes will allow the kernel to do irq load balancing.
 	  Saying no will keep the kernel from doing irq load balancing.
@@ -1025,14 +1017,13 @@ config IRQBALANCE
 # turning this on wastes a bunch of space.
 # Summit needs it only when NUMA is on
 config BOOT_IOREMAP
-	bool
+	def_bool y
 	depends on X86_32 && (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI))
-	default y
 
 config SECCOMP
-	bool "Enable seccomp to safely compute untrusted bytecode"
+	def_bool y
+	prompt "Enable seccomp to safely compute untrusted bytecode"
 	depends on PROC_FS
-	default y
 	help
 	  This kernel feature is useful for number crunching applications
 	  that may need to compute untrusted bytecode during their
@@ -1199,11 +1190,11 @@ config HOTPLUG_CPU
 	  suspend.
 
 config COMPAT_VDSO
-	bool "Compat VDSO support"
-	default y
-	depends on X86_32
+	def_bool y
+	prompt "Compat VDSO support"
+	depends on X86_32 || IA32_EMULATION
 	help
-	  Map the VDSO to the predictable old-style address too.
+	  Map the 32-bit VDSO to the predictable old-style address too.
 	---help---
 	  Say N here if you are running a sufficiently recent glibc
 	  version (2.3.3 or later), to remove the high-mapped
@@ -1217,25 +1208,16 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
 	def_bool y
 	depends on X86_64 || (X86_32 && HIGHMEM)
 
-config MEMORY_HOTPLUG_RESERVE
-	def_bool X86_64
-	depends on (MEMORY_HOTPLUG && DISCONTIGMEM)
-
 config HAVE_ARCH_EARLY_PFN_TO_NID
 	def_bool X86_64
 	depends on NUMA
 
-config OUT_OF_LINE_PFN_TO_PAGE
-	def_bool X86_64
-	depends on DISCONTIGMEM
-
 menu "Power management options"
 	depends on !X86_VOYAGER
 
 config ARCH_HIBERNATION_HEADER
-	bool
+	def_bool y
 	depends on X86_64 && HIBERNATION
-	default y
 
 source "kernel/power/Kconfig"
 
@@ -1428,25 +1410,21 @@ config PCI_GOANY
 endchoice
 
 config PCI_BIOS
-	bool
+	def_bool y
 	depends on X86_32 && !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
-	default y
 
 # x86-64 doesn't support PCI BIOS access from long mode so always go direct.
 config PCI_DIRECT
-	bool
+	def_bool y
 	depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
-	default y
 
 config PCI_MMCONFIG
-	bool
+	def_bool y
 	depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
-	default y
 
 config PCI_DOMAINS
-	bool
+	def_bool y
 	depends on PCI
-	default y
 
 config PCI_MMCONFIG
 	bool "Support mmconfig PCI config space access"
@@ -1463,9 +1441,9 @@ config DMAR
 	  remapping devices.
 
 config DMAR_GFX_WA
-	bool "Support for Graphics workaround"
+	def_bool y
+	prompt "Support for Graphics workaround"
 	depends on DMAR
-	default y
 	help
 	 Current Graphics drivers tend to use physical address
 	 for DMA and avoid using DMA APIs. Setting this config
@@ -1474,9 +1452,8 @@ config DMAR_GFX_WA
 	 to use physical addresses for DMA.
 
 config DMAR_FLOPPY_WA
-	bool
+	def_bool y
 	depends on DMAR
-	default y
 	help
 	 Floppy disk drivers are know to bypass DMA API calls
 	 thereby failing to work when IOMMU is enabled. This
@@ -1489,8 +1466,7 @@ source "drivers/pci/Kconfig"
 
 # x86_64 have no ISA slots, but do have ISA-style DMA.
 config ISA_DMA_API
-	bool
-	default y
+	def_bool y
 
 if X86_32
 
@@ -1556,9 +1532,9 @@ config SCx200HR_TIMER
 	  other workaround is idle=poll boot option.
 
 config GEODE_MFGPT_TIMER
-	bool "Geode Multi-Function General Purpose Timer (MFGPT) events"
+	def_bool y
+	prompt "Geode Multi-Function General Purpose Timer (MFGPT) events"
 	depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS
-	default y
 	help
 	  This driver provides a clock event source based on the MFGPT
 	  timer(s) in the CS5535 and CS5536 companion chip for the geode.
@@ -1597,18 +1573,16 @@ config IA32_AOUT
          Support old a.out binaries in the 32bit emulation.
 
 config COMPAT
-	bool
+	def_bool y
 	depends on IA32_EMULATION
-	default y
 
 config COMPAT_FOR_U64_ALIGNMENT
 	def_bool COMPAT
 	depends on X86_64
 
 config SYSVIPC_COMPAT
-	bool
+	def_bool y
 	depends on X86_64 && COMPAT && SYSVIPC
-	default y
 
 endmenu
 
diff -puN arch/x86/Kconfig.cpu~git-x86 arch/x86/Kconfig.cpu
--- a/arch/x86/Kconfig.cpu~git-x86
+++ a/arch/x86/Kconfig.cpu
@@ -219,10 +219,10 @@ config MGEODEGX1
 	  Select this for a Geode GX1 (Cyrix MediaGX) chip.
 
 config MGEODE_LX
-       bool "Geode GX/LX"
+	bool "Geode GX/LX"
 	depends on X86_32
-       help
-         Select this for AMD Geode GX and LX processors.
+	help
+	  Select this for AMD Geode GX and LX processors.
 
 config MCYRIXIII
 	bool "CyrixIII/VIA-C3"
@@ -258,7 +258,7 @@ config MPSC
 	  Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
 	  Xeon CPUs with Intel 64bit which is compatible with x86-64.
 	  Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the
-          Netburst core and shouldn't use this option. You can distinguish them
+	  Netburst core and shouldn't use this option. You can distinguish them
 	  using the cpu family field
 	  in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
 
@@ -317,81 +317,66 @@ config X86_L1_CACHE_SHIFT
 	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7
 
 config X86_XADD
-	bool
+	def_bool y
 	depends on X86_32 && !M386
-	default y
 
 config X86_PPRO_FENCE
-	bool
+	def_bool y
 	depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1
-	default y
 
 config X86_F00F_BUG
-	bool
+	def_bool y
 	depends on M586MMX || M586TSC || M586 || M486 || M386
-	default y
 
 config X86_WP_WORKS_OK
-	bool
+	def_bool y
 	depends on X86_32 && !M386
-	default y
 
 config X86_INVLPG
-	bool
+	def_bool y
 	depends on X86_32 && !M386
-	default y
 
 config X86_BSWAP
-	bool
+	def_bool y
 	depends on X86_32 && !M386
-	default y
 
 config X86_POPAD_OK
-	bool
+	def_bool y
 	depends on X86_32 && !M386
-	default y
 
 config X86_ALIGNMENT_16
-	bool
+	def_bool y
 	depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
-	default y
 
 config X86_GOOD_APIC
-	bool
+	def_bool y
 	depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7 || X86_64
-	default y
 
 config X86_INTEL_USERCOPY
-	bool
+	def_bool y
 	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
-	default y
 
 config X86_USE_PPRO_CHECKSUM
-	bool
+	def_bool y
 	depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2
-	default y
 
 config X86_USE_3DNOW
-	bool
+	def_bool y
 	depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML
-	default y
 
 config X86_OOSTORE
-	bool
+	def_bool y
 	depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
-	default y
 
 config X86_TSC
-	bool
+	def_bool y
 	depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
-	default y
 
 # this should be set for all -march=.. options where the compiler
 # generates cmov.
 config X86_CMOV
-	bool
+	def_bool y
 	depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7)
-	default y
 
 config X86_MINIMUM_CPU_FAMILY
 	int
@@ -399,3 +384,6 @@ config X86_MINIMUM_CPU_FAMILY
 	default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
 	default "3"
 
+config X86_DEBUGCTLMSR
+	def_bool y
+	depends on !(M586MMX || M586TSC || M586 || M486 || M386)
diff -puN arch/x86/Makefile_32~git-x86 arch/x86/Makefile_32
--- a/arch/x86/Makefile_32~git-x86
+++ a/arch/x86/Makefile_32
@@ -48,10 +48,6 @@ include $(srctree)/arch/x86/Makefile_32.
 # temporary until string.h is fixed
 cflags-y += -ffreestanding
 
-# this works around some issues with generating unwind tables in older gccs
-# newer gccs do it by default
-cflags-y += -maccumulate-outgoing-args
-
 # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
 # a lot more stack due to the lack of sharing of stacklots:
 KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;)
@@ -85,6 +81,11 @@ mcore-$(CONFIG_X86_NUMAQ)	:= arch/x86/ma
 mflags-$(CONFIG_X86_BIGSMP)	:= -Iinclude/asm-x86/mach-bigsmp
 mcore-$(CONFIG_X86_BIGSMP)	:= arch/x86/mach-default
 
+# RDC R-321x subarch support
+mflags-$(CONFIG_X86_RDC321X)	:= -Iinclude/asm-x86/mach-rdc321x
+mcore-$(CONFIG_X86_RDC321X)	:= arch/x86/mach-default
+core-$(CONFIG_X86_RDC321X)	+= arch/x86/mach-rdc321x/
+
 #Summit subarch support
 mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit
 mcore-$(CONFIG_X86_SUMMIT)  := arch/x86/mach-default
@@ -114,7 +115,8 @@ libs-y 					+= arch/x86/lib/
 core-y					+= arch/x86/kernel/ \
 					   arch/x86/mm/ \
 					   $(mcore-y)/ \
-					   arch/x86/crypto/
+					   arch/x86/crypto/ \
+					   arch/x86/vdso/
 drivers-$(CONFIG_MATH_EMULATION)	+= arch/x86/math-emu/
 drivers-$(CONFIG_PCI)			+= arch/x86/pci/
 # must be linked after kernel/
@@ -152,9 +154,13 @@ zdisk bzdisk: vmlinux
 fdimage fdimage144 fdimage288 isoimage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
 
-install:
+install: vdso_install
 	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
 
+PHONY += vdso_install
+vdso_install:
+	$(Q)$(MAKE) $(build)=arch/x86/vdso $@
+
 archclean:
 	$(Q)rm -rf $(objtree)/arch/i386/boot
 	$(Q)$(MAKE) $(clean)=arch/x86/boot
diff -puN arch/x86/Makefile_64~git-x86 arch/x86/Makefile_64
--- a/arch/x86/Makefile_64~git-x86
+++ a/arch/x86/Makefile_64
@@ -117,9 +117,6 @@ install: vdso_install
 	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ 
 
 vdso_install:
-ifeq ($(CONFIG_IA32_EMULATION),y)
-	$(Q)$(MAKE) $(build)=arch/x86/ia32 $@
-endif
 	$(Q)$(MAKE) $(build)=arch/x86/vdso $@
 
 archclean:
diff -puN arch/x86/boot/compressed/relocs.c~git-x86 arch/x86/boot/compressed/relocs.c
--- a/arch/x86/boot/compressed/relocs.c~git-x86
+++ a/arch/x86/boot/compressed/relocs.c
@@ -27,11 +27,6 @@ static unsigned long *relocs;
  * absolute relocations present w.r.t these symbols.
  */
 static const char* safe_abs_relocs[] = {
-		"__kernel_vsyscall",
-		"__kernel_rt_sigreturn",
-		"__kernel_sigreturn",
-		"SYSENTER_RETURN",
-		"VDSO_NOTE_MASK",
 		"xen_irq_disable_direct_reloc",
 		"xen_save_fl_direct_reloc",
 };
@@ -45,6 +40,8 @@ static int is_safe_abs_reloc(const char*
 			/* Match found */
 			return 1;
 	}
+	if (strncmp(sym_name, "VDSO", 4) == 0)
+		return 1;
 	if (strncmp(sym_name, "__crc_", 6) == 0)
 		return 1;
 	return 0;
diff -puN arch/x86/configs/x86_64_defconfig~git-x86 arch/x86/configs/x86_64_defconfig
--- a/arch/x86/configs/x86_64_defconfig~git-x86
+++ a/arch/x86/configs/x86_64_defconfig
@@ -145,15 +145,6 @@ CONFIG_K8_NUMA=y
 CONFIG_NODES_SHIFT=6
 CONFIG_X86_64_ACPI_NUMA=y
 CONFIG_NUMA_EMU=y
-CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
-CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_SELECT_MEMORY_MODEL=y
-# CONFIG_FLATMEM_MANUAL is not set
-CONFIG_DISCONTIGMEM_MANUAL=y
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_DISCONTIGMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
 CONFIG_NEED_MULTIPLE_NODES=y
 # CONFIG_SPARSEMEM_STATIC is not set
 CONFIG_SPLIT_PTLOCK_CPUS=4
diff -puN arch/x86/crypto/Makefile~git-x86 arch/x86/crypto/Makefile
--- a/arch/x86/crypto/Makefile~git-x86
+++ a/arch/x86/crypto/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 
 aes-i586-y := aes-i586-asm_32.o aes_32.o
-twofish-i586-y := twofish-i586-asm_32.o twofish_32.o
+twofish-i586-y := twofish-i586-asm_32.o twofish.o
 
 aes-x86_64-y := aes-x86_64-asm_64.o aes_64.o
-twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o
+twofish-x86_64-y := twofish-x86_64-asm_64.o twofish.o
diff -puN /dev/null arch/x86/crypto/twofish.c
--- /dev/null
+++ a/arch/x86/crypto/twofish.c
@@ -0,0 +1,101 @@
+/*
+ *  Glue Code for optimized x86 assembler version of TWOFISH
+ *
+ * Originally Twofish for GPG
+ * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
+ * 256-bit key length added March 20, 1999
+ * Some modifications to reduce the text size by Werner Koch, April, 1998
+ * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
+ * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
+ *
+ * The original author has disclaimed all copyright interest in this
+ * code and thus put it in the public domain. The subsequent authors
+ * have put this under the GNU General Public License.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ * This code is a "clean room" implementation, written from the paper
+ * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
+ * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
+ * through http://www.counterpane.com/twofish.html
+ *
+ * For background information on multiplication in finite fields, used for
+ * the matrix operations in the key schedule, see the book _Contemporary
+ * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
+ * Third Edition.
+ */
+
+#include <crypto/twofish.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+
+asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+
+static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	twofish_enc_blk(tfm, dst, src);
+}
+
+static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	twofish_dec_blk(tfm, dst, src);
+}
+
+static struct crypto_alg alg = {
+	.cra_name		=	"twofish",
+#ifdef CONFIG_X86_32
+	.cra_driver_name	=	"twofish-i586",
+#else
+	.cra_driver_name	=	"twofish-x86_64",
+#endif
+	.cra_priority		=	200,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	TF_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct twofish_ctx),
+	.cra_alignmask		=	3,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	TF_MIN_KEY_SIZE,
+			.cia_max_keysize	=	TF_MAX_KEY_SIZE,
+			.cia_setkey		=	twofish_setkey,
+			.cia_encrypt		=	twofish_encrypt,
+			.cia_decrypt		=	twofish_decrypt
+		}
+	}
+};
+
+static int __init init(void)
+{
+	return crypto_register_alg(&alg);
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Twofish Cipher Algorithm, x86 asm optimized");
+MODULE_ALIAS("twofish");
diff -puN arch/x86/crypto/twofish_32.c~git-x86 /dev/null
--- a/arch/x86/crypto/twofish_32.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- *  Glue Code for optimized 586 assembler version of TWOFISH
- *
- * Originally Twofish for GPG
- * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
- * 256-bit key length added March 20, 1999
- * Some modifications to reduce the text size by Werner Koch, April, 1998
- * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
- * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
- *
- * The original author has disclaimed all copyright interest in this
- * code and thus put it in the public domain. The subsequent authors
- * have put this under the GNU General Public License.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- * This code is a "clean room" implementation, written from the paper
- * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
- * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
- * through http://www.counterpane.com/twofish.html
- *
- * For background information on multiplication in finite fields, used for
- * the matrix operations in the key schedule, see the book _Contemporary
- * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
- * Third Edition.
- */
-
-#include <crypto/twofish.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-
-
-asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-
-static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	twofish_enc_blk(tfm, dst, src);
-}
-
-static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	twofish_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg alg = {
-	.cra_name		=	"twofish",
-	.cra_driver_name	=	"twofish-i586",
-	.cra_priority		=	200,
-	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		=	TF_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct twofish_ctx),
-	.cra_alignmask		=	3,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
-	.cra_u			=	{
-		.cipher = {
-			.cia_min_keysize	=	TF_MIN_KEY_SIZE,
-			.cia_max_keysize	=	TF_MAX_KEY_SIZE,
-			.cia_setkey		=	twofish_setkey,
-			.cia_encrypt		=	twofish_encrypt,
-			.cia_decrypt		=	twofish_decrypt
-		}
-	}
-};
-
-static int __init init(void)
-{
-	return crypto_register_alg(&alg);
-}
-
-static void __exit fini(void)
-{
-	crypto_unregister_alg(&alg);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized");
-MODULE_ALIAS("twofish");
diff -puN arch/x86/crypto/twofish_64.c~git-x86 /dev/null
--- a/arch/x86/crypto/twofish_64.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Glue Code for optimized x86_64 assembler version of TWOFISH
- *
- * Originally Twofish for GPG
- * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
- * 256-bit key length added March 20, 1999
- * Some modifications to reduce the text size by Werner Koch, April, 1998
- * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
- * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
- *
- * The original author has disclaimed all copyright interest in this
- * code and thus put it in the public domain. The subsequent authors
- * have put this under the GNU General Public License.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- * This code is a "clean room" implementation, written from the paper
- * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
- * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
- * through http://www.counterpane.com/twofish.html
- *
- * For background information on multiplication in finite fields, used for
- * the matrix operations in the key schedule, see the book _Contemporary
- * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
- * Third Edition.
- */
-
-#include <crypto/twofish.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-
-asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-
-static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	twofish_enc_blk(tfm, dst, src);
-}
-
-static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	twofish_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg alg = {
-	.cra_name		=	"twofish",
-	.cra_driver_name	=	"twofish-x86_64",
-	.cra_priority		=	200,
-	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		=	TF_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct twofish_ctx),
-	.cra_alignmask		=	3,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
-	.cra_u			=	{
-		.cipher = {
-			.cia_min_keysize	=	TF_MIN_KEY_SIZE,
-			.cia_max_keysize	=	TF_MAX_KEY_SIZE,
-			.cia_setkey		=	twofish_setkey,
-			.cia_encrypt		=	twofish_encrypt,
-			.cia_decrypt		=	twofish_decrypt
-		}
-	}
-};
-
-static int __init init(void)
-{
-	return crypto_register_alg(&alg);
-}
-
-static void __exit fini(void)
-{
-	crypto_unregister_alg(&alg);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized");
-MODULE_ALIAS("twofish");
diff -puN arch/x86/ia32/Makefile~git-x86 arch/x86/ia32/Makefile
--- a/arch/x86/ia32/Makefile~git-x86
+++ a/arch/x86/ia32/Makefile
@@ -2,9 +2,8 @@
 # Makefile for the ia32 kernel emulation subsystem.
 #
 
-obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \
-	ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o \
-	mmap32.o
+obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o \
+	ia32_binfmt.o fpu32.o
 
 sysv-$(CONFIG_SYSVIPC) := ipc32.o
 obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
@@ -13,40 +12,3 @@ obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
 
 audit-class-$(CONFIG_AUDIT) := audit.o
 obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y)
-
-$(obj)/syscall32_syscall.o: \
-	$(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so)
-
-# Teach kbuild about targets
-targets := $(foreach F,$(addprefix vsyscall-,sysenter syscall),\
-		     $F.o $F.so $F.so.dbg)
-
-# The DSO images are built using a special linker script
-quiet_cmd_syscall = SYSCALL $@
-      cmd_syscall = $(CC) -m32 -nostdlib -shared \
-			  $(call ld-option, -Wl$(comma)--hash-style=sysv) \
-			   -Wl,-soname=linux-gate.so.1 -o $@ \
-			   -Wl,-T,$(filter-out FORCE,$^)
-
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
-	$(call if_changed,objcopy)
-
-$(obj)/vsyscall-sysenter.so.dbg $(obj)/vsyscall-syscall.so.dbg: \
-$(obj)/vsyscall-%.so.dbg: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
-	$(call if_changed,syscall)
-
-AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32
-AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32
-
-vdsos := vdso32-sysenter.so vdso32-syscall.so
-
-quiet_cmd_vdso_install = INSTALL $@
-      cmd_vdso_install = cp $(@:vdso32-%.so=$(obj)/vsyscall-%.so.dbg) \
-			    $(MODLIB)/vdso/$@
-
-$(vdsos):
-	@mkdir -p $(MODLIB)/vdso
-	$(call cmd,vdso_install)
-
-vdso_install: $(vdsos)
diff -puN arch/x86/ia32/fpu32.c~git-x86 arch/x86/ia32/fpu32.c
--- a/arch/x86/ia32/fpu32.c~git-x86
+++ a/arch/x86/ia32/fpu32.c
@@ -1,8 +1,8 @@
-/* 
+/*
  * Copyright 2002 Andi Kleen, SuSE Labs.
  * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes.
  * This is used for ptrace, signals and coredumps in 32bit emulation.
- */ 
+ */
 
 #include <linux/sched.h>
 #include <asm/sigcontext32.h>
@@ -13,96 +13,97 @@
 static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
 {
 	unsigned int tmp; /* to avoid 16 bit prefixes in the code */
- 
+
 	/* Transform each pair of bits into 01 (valid) or 00 (empty) */
-        tmp = ~twd;
-        tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
-        /* and move the valid bits to the lower byte. */
-        tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
-        tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
-        tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
-        return tmp;
+	tmp = ~twd;
+	tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+	/* and move the valid bits to the lower byte. */
+	tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+	tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+	tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+	return tmp;
 }
 
+#define FPREG_ADDR(f, n)	((void *)&(f)->st_space + (n) * 16);
+#define FP_EXP_TAG_VALID	0
+#define FP_EXP_TAG_ZERO		1
+#define FP_EXP_TAG_SPECIAL	2
+#define FP_EXP_TAG_EMPTY	3
+
 static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
 {
-	struct _fpxreg *st = NULL;
+	struct _fpxreg *st;
 	unsigned long tos = (fxsave->swd >> 11) & 7;
 	unsigned long twd = (unsigned long) fxsave->twd;
 	unsigned long tag;
 	unsigned long ret = 0xffff0000;
 	int i;
 
-#define FPREG_ADDR(f, n)	((void *)&(f)->st_space + (n) * 16);
-
-	for (i = 0 ; i < 8 ; i++) {
+	for (i = 0; i < 8; i++, twd >>= 1) {
 		if (twd & 0x1) {
-			st = FPREG_ADDR( fxsave, (i - tos) & 7 );
+			st = FPREG_ADDR(fxsave, (i - tos) & 7);
 
 			switch (st->exponent & 0x7fff) {
 			case 0x7fff:
-				tag = 2;		/* Special */
+				tag = FP_EXP_TAG_SPECIAL;
 				break;
 			case 0x0000:
-				if ( !st->significand[0] &&
-				     !st->significand[1] &&
-				     !st->significand[2] &&
-				     !st->significand[3] ) {
-					tag = 1;	/* Zero */
-				} else {
-					tag = 2;	/* Special */
-				}
+				if (!st->significand[0] &&
+				    !st->significand[1] &&
+				    !st->significand[2] &&
+				    !st->significand[3])
+					tag = FP_EXP_TAG_ZERO;
+				else
+					tag = FP_EXP_TAG_SPECIAL;
 				break;
 			default:
-				if (st->significand[3] & 0x8000) {
-					tag = 0;	/* Valid */
-				} else {
-					tag = 2;	/* Special */
-				}
+				if (st->significand[3] & 0x8000)
+					tag = FP_EXP_TAG_VALID;
+				else
+					tag = FP_EXP_TAG_SPECIAL;
 				break;
 			}
 		} else {
-			tag = 3;			/* Empty */
+			tag = FP_EXP_TAG_EMPTY;
 		}
-		ret |= (tag << (2 * i));
-		twd = twd >> 1;
+		ret |= tag << (2 * i);
 	}
 	return ret;
 }
 
+#define G(num, val) err |= __get_user(val, num + (u32 __user *)buf)
 
 static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave,
 					 struct _fpstate_ia32 __user *buf)
 {
 	struct _fpxreg *to;
 	struct _fpreg __user *from;
-	int i;
+	int i, err = 0;
 	u32 v;
-	int err = 0;
 
-#define G(num,val) err |= __get_user(val, num + (u32 __user *)buf)
 	G(0, fxsave->cwd);
 	G(1, fxsave->swd);
 	G(2, fxsave->twd);
 	fxsave->twd = twd_i387_to_fxsr(fxsave->twd);
 	G(3, fxsave->rip);
 	G(4, v);
-	fxsave->fop = v>>16;	/* cs ignored */
+	/* cs ignored */
+	fxsave->fop = v>>16;
 	G(5, fxsave->rdp);
 	/* 6: ds ignored */
-#undef G
-	if (err) 
-		return -1; 
+	if (err)
+		return -1;
 
 	to = (struct _fpxreg *)&fxsave->st_space[0];
 	from = &buf->_st[0];
-	for (i = 0 ; i < 8 ; i++, to++, from++) {
+	for (i = 0; i < 8; i++, to++, from++) {
 		if (__copy_from_user(to, from, sizeof(*from)))
 			return -1;
 	}
 	return 0;
 }
 
+#define P(num, val) err |= __put_user(val, num + (u32 __user *)buf)
 
 static inline int convert_fxsr_to_user(struct _fpstate_ia32 __user *buf,
 				       struct i387_fxsave_struct *fxsave,
@@ -111,60 +112,59 @@ static inline int convert_fxsr_to_user(s
 {
 	struct _fpreg __user *to;
 	struct _fpxreg *from;
-	int i;
-	u16 cs,ds; 
-	int err = 0; 
+	int i, err = 0;
+	u16 cs, ds;
 
 	if (tsk == current) {
-		/* should be actually ds/cs at fpu exception time,
-		   but that information is not available in 64bit mode. */
-		asm("movw %%ds,%0 " : "=r" (ds)); 
-		asm("movw %%cs,%0 " : "=r" (cs)); 		
-	} else { /* ptrace. task has stopped. */
+		/*
+		 * should be actually ds/cs at fpu exception time, but
+		 * that information is not available in 64bit mode.
+		 */
+		asm("movw %%ds,%0 " : "=r" (ds));
+		asm("movw %%cs,%0 " : "=r" (cs));
+	} else {
+		 /* ptrace. task has stopped. */
 		ds = tsk->thread.ds;
 		cs = regs->cs;
-	} 
+	}
 
-#define P(num,val) err |= __put_user(val, num + (u32 __user *)buf)
 	P(0, (u32)fxsave->cwd | 0xffff0000);
 	P(1, (u32)fxsave->swd | 0xffff0000);
 	P(2, twd_fxsr_to_i387(fxsave));
 	P(3, (u32)fxsave->rip);
-	P(4,  cs | ((u32)fxsave->fop) << 16); 
+	P(4,  cs | ((u32)fxsave->fop) << 16);
 	P(5, fxsave->rdp);
 	P(6, 0xffff0000 | ds);
-#undef P
 
-	if (err) 
-		return -1; 
+	if (err)
+		return -1;
 
 	to = &buf->_st[0];
 	from = (struct _fpxreg *) &fxsave->st_space[0];
-	for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+	for (i = 0; i < 8; i++, to++, from++) {
 		if (__copy_to_user(to, from, sizeof(*to)))
 			return -1;
 	}
 	return 0;
 }
 
-int restore_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, int fsave) 
-{ 
+int restore_i387_ia32(struct task_struct *tsk,
+		      struct _fpstate_ia32 __user *buf, int fsave)
+{
 	clear_fpu(tsk);
-	if (!fsave) { 
-		if (__copy_from_user(&tsk->thread.i387.fxsave, 
+	if (!fsave) {
+		if (__copy_from_user(&tsk->thread.i387.fxsave,
 				     &buf->_fxsr_env[0],
 				     sizeof(struct i387_fxsave_struct)))
 			return -1;
 		tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
 		set_stopped_child_used_math(tsk);
-	} 
+	}
 	return convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf);
-}  
+}
 
-int save_i387_ia32(struct task_struct *tsk, 
-		   struct _fpstate_ia32 __user *buf, 
-		   struct pt_regs *regs,
-		   int fsave)
+int save_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf,
+		   struct pt_regs *regs, int fsave)
 {
 	int err = 0;
 
@@ -174,8 +174,6 @@ int save_i387_ia32(struct task_struct *t
 	if (fsave)
 		return 0;
 	err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
-	if (fsave) 
-		return err ? -1 : 1; 	
 	err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
 	err |= __copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
 			      sizeof(struct i387_fxsave_struct));
diff -puN arch/x86/ia32/ia32_aout.c~git-x86 arch/x86/ia32/ia32_aout.c
--- a/arch/x86/ia32/ia32_aout.c~git-x86
+++ a/arch/x86/ia32/ia32_aout.c
@@ -36,61 +36,67 @@
 #undef WARN_OLD
 #undef CORE_DUMP /* probably broken */
 
-static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
-static int load_aout_library(struct file*);
+static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs);
+static int load_aout_library(struct file *);
 
 #ifdef CORE_DUMP
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
+			  unsigned long limit);
 
 /*
  * fill in the user structure for a core dump..
  */
-static void dump_thread32(struct pt_regs * regs, struct user32 * dump)
+static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
 {
-	u32 fs,gs;
+	u32 fs, gs;
 
 /* changed the size calculations - should hopefully work better. lbt */
 	dump->magic = CMAGIC;
 	dump->start_code = 0;
-	dump->start_stack = regs->rsp & ~(PAGE_SIZE - 1);
+	dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
 	dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
-	dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
+	dump->u_dsize = ((unsigned long)
+			 (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
 	dump->u_dsize -= dump->u_tsize;
 	dump->u_ssize = 0;
-	dump->u_debugreg[0] = current->thread.debugreg0;  
-	dump->u_debugreg[1] = current->thread.debugreg1;  
-	dump->u_debugreg[2] = current->thread.debugreg2;  
-	dump->u_debugreg[3] = current->thread.debugreg3;  
-	dump->u_debugreg[4] = 0;  
-	dump->u_debugreg[5] = 0;  
-	dump->u_debugreg[6] = current->thread.debugreg6;  
-	dump->u_debugreg[7] = current->thread.debugreg7;  
-
-	if (dump->start_stack < 0xc0000000)
-		dump->u_ssize = ((unsigned long) (0xc0000000 - dump->start_stack)) >> PAGE_SHIFT;
-
-	dump->regs.ebx = regs->rbx;
-	dump->regs.ecx = regs->rcx;
-	dump->regs.edx = regs->rdx;
-	dump->regs.esi = regs->rsi;
-	dump->regs.edi = regs->rdi;
-	dump->regs.ebp = regs->rbp;
-	dump->regs.eax = regs->rax;
+	dump->u_debugreg[0] = current->thread.debugreg0;
+	dump->u_debugreg[1] = current->thread.debugreg1;
+	dump->u_debugreg[2] = current->thread.debugreg2;
+	dump->u_debugreg[3] = current->thread.debugreg3;
+	dump->u_debugreg[4] = 0;
+	dump->u_debugreg[5] = 0;
+	dump->u_debugreg[6] = current->thread.debugreg6;
+	dump->u_debugreg[7] = current->thread.debugreg7;
+
+	if (dump->start_stack < 0xc0000000) {
+		unsigned long tmp;
+
+		tmp = (unsigned long) (0xc0000000 - dump->start_stack);
+		dump->u_ssize = tmp >> PAGE_SHIFT;
+	}
+
+	dump->regs.bx = regs->bx;
+	dump->regs.cx = regs->cx;
+	dump->regs.dx = regs->dx;
+	dump->regs.si = regs->si;
+	dump->regs.di = regs->di;
+	dump->regs.bp = regs->bp;
+	dump->regs.ax = regs->ax;
 	dump->regs.ds = current->thread.ds;
 	dump->regs.es = current->thread.es;
 	asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs;
-	asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs; 
-	dump->regs.orig_eax = regs->orig_rax;
-	dump->regs.eip = regs->rip;
+	asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs;
+	dump->regs.orig_ax = regs->orig_ax;
+	dump->regs.ip = regs->ip;
 	dump->regs.cs = regs->cs;
-	dump->regs.eflags = regs->eflags;
-	dump->regs.esp = regs->rsp;
+	dump->regs.flags = regs->flags;
+	dump->regs.sp = regs->sp;
 	dump->regs.ss = regs->ss;
 
 #if 1 /* FIXME */
 	dump->u_fpvalid = 0;
 #else
-	dump->u_fpvalid = dump_fpu (regs, &dump->i387);
+	dump->u_fpvalid = dump_fpu(regs, &dump->i387);
 #endif
 }
 
@@ -128,15 +134,19 @@ static int dump_write(struct file *file,
 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
 }
 
-#define DUMP_WRITE(addr, nr)	\
+#define DUMP_WRITE(addr, nr)			     \
 	if (!dump_write(file, (void *)(addr), (nr))) \
 		goto end_coredump;
 
-#define DUMP_SEEK(offset) \
-if (file->f_op->llseek) { \
-	if (file->f_op->llseek(file,(offset),0) != (offset)) \
- 		goto end_coredump; \
-} else file->f_pos = (offset)
+#define DUMP_SEEK(offset)						\
+	if (file->f_op->llseek) {					\
+		if (file->f_op->llseek(file, (offset), 0) != (offset))	\
+			goto end_coredump;				\
+	} else								\
+		file->f_pos = (offset)
+
+#define START_DATA()	(u.u_tsize << PAGE_SHIFT)
+#define START_STACK(u)	(u.start_stack)
 
 /*
  * Routine writes a core dump image in the current directory.
@@ -148,62 +158,70 @@ if (file->f_op->llseek) { \
  * dumping of the process results in another error..
  */
 
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
+			  unsigned long limit)
 {
 	mm_segment_t fs;
 	int has_dumped = 0;
 	unsigned long dump_start, dump_size;
 	struct user32 dump;
-#       define START_DATA(u)	(u.u_tsize << PAGE_SHIFT)
-#       define START_STACK(u)   (u.start_stack)
 
 	fs = get_fs();
 	set_fs(KERNEL_DS);
 	has_dumped = 1;
 	current->flags |= PF_DUMPCORE;
-       	strncpy(dump.u_comm, current->comm, sizeof(current->comm));
-	dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump)));
+	strncpy(dump.u_comm, current->comm, sizeof(current->comm));
+	dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) -
+			   ((unsigned long)(&dump)));
 	dump.signal = signr;
 	dump_thread32(regs, &dump);
 
-/* If the size of the dump file exceeds the rlimit, then see what would happen
-   if we wrote the stack, but not the data area.  */
+	/*
+	 * If the size of the dump file exceeds the rlimit, then see
+	 * what would happen if we wrote the stack, but not the data
+	 * area.
+	 */
 	if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > limit)
 		dump.u_dsize = 0;
 
-/* Make sure we have enough room to write the stack and data areas. */
+	/* Make sure we have enough room to write the stack and data areas. */
 	if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
 		dump.u_ssize = 0;
 
-/* make sure we actually have a data and stack area to dump */
+	/* make sure we actually have a data and stack area to dump */
 	set_fs(USER_DS);
-	if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
+	if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump),
+		       dump.u_dsize << PAGE_SHIFT))
 		dump.u_dsize = 0;
-	if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
+	if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump),
+		       dump.u_ssize << PAGE_SHIFT))
 		dump.u_ssize = 0;
 
 	set_fs(KERNEL_DS);
-/* struct user */
-	DUMP_WRITE(&dump,sizeof(dump));
-/* Now dump all of the user data.  Include malloced stuff as well */
+	/* struct user */
+	DUMP_WRITE(&dump, sizeof(dump));
+	/* Now dump all of the user data.  Include malloced stuff as well */
 	DUMP_SEEK(PAGE_SIZE);
-/* now we start writing out the user space info */
+	/* now we start writing out the user space info */
 	set_fs(USER_DS);
-/* Dump the data area */
+	/* Dump the data area */
 	if (dump.u_dsize != 0) {
 		dump_start = START_DATA(dump);
 		dump_size = dump.u_dsize << PAGE_SHIFT;
-		DUMP_WRITE(dump_start,dump_size);
+		DUMP_WRITE(dump_start, dump_size);
 	}
-/* Now prepare to dump the stack area */
+	/* Now prepare to dump the stack area */
 	if (dump.u_ssize != 0) {
 		dump_start = START_STACK(dump);
 		dump_size = dump.u_ssize << PAGE_SHIFT;
-		DUMP_WRITE(dump_start,dump_size);
+		DUMP_WRITE(dump_start, dump_size);
 	}
-/* Finally dump the task struct.  Not be used by gdb, but could be useful */
+	/*
+	 * Finally dump the task struct.  Not be used by gdb, but
+	 * could be useful
+	 */
 	set_fs(KERNEL_DS);
-	DUMP_WRITE(current,sizeof(*current));
+	DUMP_WRITE(current, sizeof(*current));
 end_coredump:
 	set_fs(fs);
 	return has_dumped;
@@ -217,35 +235,34 @@ end_coredump:
  */
 static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
 {
-	u32 __user *argv;
-	u32 __user *envp;
-	u32 __user *sp;
-	int argc = bprm->argc;
-	int envc = bprm->envc;
+	u32 __user *argv, *envp, *sp;
+	int argc = bprm->argc, envc = bprm->envc;
 
 	sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p);
 	sp -= envc+1;
 	envp = sp;
 	sp -= argc+1;
 	argv = sp;
-	put_user((unsigned long) envp,--sp);
-	put_user((unsigned long) argv,--sp);
-	put_user(argc,--sp);
+	put_user((unsigned long) envp, --sp);
+	put_user((unsigned long) argv, --sp);
+	put_user(argc, --sp);
 	current->mm->arg_start = (unsigned long) p;
-	while (argc-->0) {
+	while (argc-- > 0) {
 		char c;
-		put_user((u32)(unsigned long)p,argv++);
+
+		put_user((u32)(unsigned long)p, argv++);
 		do {
-			get_user(c,p++);
+			get_user(c, p++);
 		} while (c);
 	}
 	put_user(0, argv);
 	current->mm->arg_end = current->mm->env_start = (unsigned long) p;
-	while (envc-->0) {
+	while (envc-- > 0) {
 		char c;
-		put_user((u32)(unsigned long)p,envp++);
+
+		put_user((u32)(unsigned long)p, envp++);
 		do {
-			get_user(c,p++);
+			get_user(c, p++);
 		} while (c);
 	}
 	put_user(0, envp);
@@ -257,20 +274,18 @@ static u32 __user *create_aout_tables(ch
  * These are the functions used to load a.out style executables and shared
  * libraries.  There is no binary dependent code anywhere else.
  */
-
-static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 {
+	unsigned long error, fd_offset, rlim;
 	struct exec ex;
-	unsigned long error;
-	unsigned long fd_offset;
-	unsigned long rlim;
 	int retval;
 
 	ex = *((struct exec *) bprm->buf);		/* exec-header */
 	if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
 	     N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
 	    N_TRSIZE(ex) || N_DRSIZE(ex) ||
-	    i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+	    i_size_read(bprm->file->f_path.dentry->d_inode) <
+	    ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
 		return -ENOEXEC;
 	}
 
@@ -291,13 +306,13 @@ static int load_aout_binary(struct linux
 	if (retval)
 		return retval;
 
-	regs->cs = __USER32_CS; 
+	regs->cs = __USER32_CS;
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
 		regs->r13 = regs->r14 = regs->r15 = 0;
 
 	/* OK, This is the point of no return */
 	set_personality(PER_LINUX);
-	set_thread_flag(TIF_IA32); 
+	set_thread_flag(TIF_IA32);
 	clear_thread_flag(TIF_ABI_PENDING);
 
 	current->mm->end_code = ex.a_text +
@@ -311,7 +326,7 @@ static int load_aout_binary(struct linux
 
 	current->mm->mmap = NULL;
 	compute_creds(bprm);
- 	current->flags &= ~PF_FORKNOEXEC;
+	current->flags &= ~PF_FORKNOEXEC;
 
 	if (N_MAGIC(ex) == OMAGIC) {
 		unsigned long text_addr, map_size;
@@ -338,30 +353,30 @@ static int load_aout_binary(struct linux
 			send_sig(SIGKILL, current, 0);
 			return error;
 		}
-			 
+
 		flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
 	} else {
 #ifdef WARN_OLD
 		static unsigned long error_time, error_time2;
 		if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
-		    (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
-		{
+		    (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) {
 			printk(KERN_NOTICE "executable not page aligned\n");
 			error_time2 = jiffies;
 		}
 
 		if ((fd_offset & ~PAGE_MASK) != 0 &&
-		    (jiffies-error_time) > 5*HZ)
-		{
-			printk(KERN_WARNING 
-			       "fd_offset is not page aligned. Please convert program: %s\n",
+		    (jiffies - error_time) > 5*HZ) {
+			printk(KERN_WARNING
+			       "fd_offset is not page aligned. Please convert "
+			       "program: %s\n",
 			       bprm->file->f_path.dentry->d_name.name);
 			error_time = jiffies;
 		}
 #endif
 
-		if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
+		if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) {
 			loff_t pos = fd_offset;
+
 			down_write(&current->mm->mmap_sem);
 			do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
 			up_write(&current->mm->mmap_sem);
@@ -376,9 +391,10 @@ static int load_aout_binary(struct linux
 
 		down_write(&current->mm->mmap_sem);
 		error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
-			PROT_READ | PROT_EXEC,
-			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT,
-			fd_offset);
+				PROT_READ | PROT_EXEC,
+				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
+				MAP_EXECUTABLE | MAP_32BIT,
+				fd_offset);
 		up_write(&current->mm->mmap_sem);
 
 		if (error != N_TXTADDR(ex)) {
@@ -387,9 +403,10 @@ static int load_aout_binary(struct linux
 		}
 
 		down_write(&current->mm->mmap_sem);
- 		error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
+		error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
 				PROT_READ | PROT_WRITE | PROT_EXEC,
-				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT,
+				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
+				MAP_EXECUTABLE | MAP_32BIT,
 				fd_offset + ex.a_text);
 		up_write(&current->mm->mmap_sem);
 		if (error != N_DATADDR(ex)) {
@@ -403,9 +420,9 @@ beyond_if:
 	set_brk(current->mm->start_brk, current->mm->brk);
 
 	retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
-	if (retval < 0) { 
-		/* Someone check-me: is this error path enough? */ 
-		send_sig(SIGKILL, current, 0); 
+	if (retval < 0) {
+		/* Someone check-me: is this error path enough? */
+		send_sig(SIGKILL, current, 0);
 		return retval;
 	}
 
@@ -414,10 +431,10 @@ beyond_if:
 	/* start thread */
 	asm volatile("movl %0,%%fs" :: "r" (0)); \
 	asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS));
-	load_gs_index(0); 
-	(regs)->rip = ex.a_entry;
-	(regs)->rsp = current->mm->start_stack;
-	(regs)->eflags = 0x200;
+	load_gs_index(0);
+	(regs)->ip = ex.a_entry;
+	(regs)->sp = current->mm->start_stack;
+	(regs)->flags = 0x200;
 	(regs)->cs = __USER32_CS;
 	(regs)->ss = __USER32_DS;
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 =
@@ -425,7 +442,7 @@ beyond_if:
 	set_fs(USER_DS);
 	if (unlikely(current->ptrace & PT_PTRACED)) {
 		if (current->ptrace & PT_TRACE_EXEC)
-			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
+			ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
 		else
 			send_sig(SIGTRAP, current, 0);
 	}
@@ -434,9 +451,8 @@ beyond_if:
 
 static int load_aout_library(struct file *file)
 {
-	struct inode * inode;
-	unsigned long bss, start_addr, len;
-	unsigned long error;
+	struct inode *inode;
+	unsigned long bss, start_addr, len, error;
 	int retval;
 	struct exec ex;
 
@@ -450,7 +466,8 @@ static int load_aout_library(struct file
 	/* We come in here for the regular a.out style of shared libraries */
 	if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
 	    N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
-	    i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+	    i_size_read(inode) <
+	    ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
 		goto out;
 	}
 
@@ -467,10 +484,10 @@ static int load_aout_library(struct file
 
 #ifdef WARN_OLD
 		static unsigned long error_time;
-		if ((jiffies-error_time) > 5*HZ)
-		{
-			printk(KERN_WARNING 
-			       "N_TXTOFF is not page aligned. Please convert library: %s\n",
+		if ((jiffies-error_time) > 5*HZ) {
+			printk(KERN_WARNING
+			       "N_TXTOFF is not page aligned. Please convert "
+			       "library: %s\n",
 			       file->f_path.dentry->d_name.name);
 			error_time = jiffies;
 		}
@@ -478,11 +495,12 @@ static int load_aout_library(struct file
 		down_write(&current->mm->mmap_sem);
 		do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
 		up_write(&current->mm->mmap_sem);
-		
+
 		file->f_op->read(file, (char __user *)start_addr,
 			ex.a_text + ex.a_data, &pos);
 		flush_icache_range((unsigned long) start_addr,
-				   (unsigned long) start_addr + ex.a_text + ex.a_data);
+				   (unsigned long) start_addr + ex.a_text +
+				   ex.a_data);
 
 		retval = 0;
 		goto out;
diff -puN arch/x86/ia32/ia32_binfmt.c~git-x86 arch/x86/ia32/ia32_binfmt.c
--- a/arch/x86/ia32/ia32_binfmt.c~git-x86
+++ a/arch/x86/ia32/ia32_binfmt.c
@@ -26,7 +26,7 @@
 #include <asm/i387.h>
 #include <asm/uaccess.h>
 #include <asm/ia32.h>
-#include <asm/vsyscall32.h>
+#include <asm/vdso.h>
 
 #undef	ELF_ARCH
 #undef	ELF_CLASS
@@ -47,14 +47,13 @@
 #define AT_SYSINFO 32
 #define AT_SYSINFO_EHDR		33
 
-int sysctl_vsyscall32 = 1;
+extern int sysctl_vsyscall32;
 
 #undef ARCH_DLINFO
 #define ARCH_DLINFO do {  \
 	if (sysctl_vsyscall32) { \
-		current->mm->context.vdso = (void *)VSYSCALL32_BASE;	\
-		NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \
-		NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL32_BASE);    \
+		NEW_AUX_ENT(AT_SYSINFO, (u32)VDSO_ENTRY);		\
+		NEW_AUX_ENT(AT_SYSINFO_EHDR, (u32)VDSO_CURRENT_BASE);	\
 	}	\
 } while(0)
 
@@ -74,22 +73,22 @@ struct file;
 /* Assumes current==process to be dumped */
 #undef	ELF_CORE_COPY_REGS
 #define ELF_CORE_COPY_REGS(pr_reg, regs)       		\
-	pr_reg[0] = regs->rbx;				\
-	pr_reg[1] = regs->rcx;				\
-	pr_reg[2] = regs->rdx;				\
-	pr_reg[3] = regs->rsi;				\
-	pr_reg[4] = regs->rdi;				\
-	pr_reg[5] = regs->rbp;				\
-	pr_reg[6] = regs->rax;				\
+	pr_reg[0] = regs->bx;				\
+	pr_reg[1] = regs->cx;				\
+	pr_reg[2] = regs->dx;				\
+	pr_reg[3] = regs->si;				\
+	pr_reg[4] = regs->di;				\
+	pr_reg[5] = regs->bp;				\
+	pr_reg[6] = regs->ax;				\
 	pr_reg[7] = _GET_SEG(ds);   			\
 	pr_reg[8] = _GET_SEG(es);			\
 	pr_reg[9] = _GET_SEG(fs);			\
 	pr_reg[10] = _GET_SEG(gs);			\
-	pr_reg[11] = regs->orig_rax;			\
-	pr_reg[12] = regs->rip;				\
+	pr_reg[11] = regs->orig_ax;			\
+	pr_reg[12] = regs->ip;				\
 	pr_reg[13] = regs->cs;				\
-	pr_reg[14] = regs->eflags;			\
-	pr_reg[15] = regs->rsp;				\
+	pr_reg[14] = regs->flags;			\
+	pr_reg[15] = regs->sp;				\
 	pr_reg[16] = regs->ss;
 
 
@@ -206,9 +205,9 @@ do {							\
 	asm volatile("movl %0,%%fs" :: "r" (0)); \
 	asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); \
 	load_gs_index(0); \
-	(regs)->rip = (new_rip); \
-	(regs)->rsp = (new_rsp); \
-	(regs)->eflags = 0x200; \
+	(regs)->ip = (new_rip); \
+	(regs)->sp = (new_rsp); \
+	(regs)->flags = X86_EFLAGS_IF; \
 	(regs)->cs = __USER32_CS; \
 	(regs)->ss = __USER32_DS; \
 	set_fs(USER_DS); \
@@ -234,13 +233,13 @@ extern int syscall32_setup_pages(struct 
 static void elf32_init(struct pt_regs *regs)
 {
 	struct task_struct *me = current; 
-	regs->rdi = 0;
-	regs->rsi = 0;
-	regs->rdx = 0;
-	regs->rcx = 0;
-	regs->rax = 0;
-	regs->rbx = 0; 
-	regs->rbp = 0; 
+	regs->di = 0;
+	regs->si = 0;
+	regs->dx = 0;
+	regs->cx = 0;
+	regs->ax = 0;
+	regs->bx = 0;
+	regs->bp = 0;
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
 		regs->r13 = regs->r14 = regs->r15 = 0; 
     me->thread.fs = 0; 
diff -puN arch/x86/ia32/ia32_signal.c~git-x86 arch/x86/ia32/ia32_signal.c
--- a/arch/x86/ia32/ia32_signal.c~git-x86
+++ a/arch/x86/ia32/ia32_signal.c
@@ -31,7 +31,7 @@
 #include <asm/sigcontext32.h>
 #include <asm/fpu32.h>
 #include <asm/proto.h>
-#include <asm/vsyscall32.h>
+#include <asm/vdso.h>
 
 #define DEBUG_SIG 0
 
@@ -43,7 +43,8 @@ void signal_fault(struct pt_regs *regs, 
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
 {
 	int err;
-	if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
+
+	if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
 		return -EFAULT;
 
 	/* If you change siginfo_t structure, please make sure that
@@ -53,16 +54,19 @@ int copy_siginfo_to_user32(compat_siginf
 	   3 ints plus the relevant union member.  */
 	err = __put_user(from->si_signo, &to->si_signo);
 	err |= __put_user(from->si_errno, &to->si_errno);
- 	err |= __put_user((short)from->si_code, &to->si_code);
+	err |= __put_user((short)from->si_code, &to->si_code);
 
 	if (from->si_code < 0) {
 		err |= __put_user(from->si_pid, &to->si_pid);
- 		err |= __put_user(from->si_uid, &to->si_uid);
- 		err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr);
+		err |= __put_user(from->si_uid, &to->si_uid);
+		err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr);
 	} else {
- 		/* First 32bits of unions are always present:
- 		 * si_pid === si_band === si_tid === si_addr(LS half) */
-		err |= __put_user(from->_sifields._pad[0], &to->_sifields._pad[0]);
+		/*
+		 * First 32bits of unions are always present:
+		 * si_pid === si_band === si_tid === si_addr(LS half)
+		 */
+		err |= __put_user(from->_sifields._pad[0],
+				  &to->_sifields._pad[0]);
 		switch (from->si_code >> 16) {
 		case __SI_FAULT >> 16:
 			break;
@@ -76,14 +80,15 @@ int copy_siginfo_to_user32(compat_siginf
 			err |= __put_user(from->si_uid, &to->si_uid);
 			break;
 		case __SI_POLL >> 16:
-			err |= __put_user(from->si_fd, &to->si_fd); 
+			err |= __put_user(from->si_fd, &to->si_fd);
 			break;
 		case __SI_TIMER >> 16:
-			err |= __put_user(from->si_overrun, &to->si_overrun); 
+			err |= __put_user(from->si_overrun, &to->si_overrun);
 			err |= __put_user(ptr_to_compat(from->si_ptr),
-					&to->si_ptr);
+					  &to->si_ptr);
 			break;
-		case __SI_RT >> 16: /* This is not generated by the kernel as of now.  */
+			 /* This is not generated by the kernel as of now.  */
+		case __SI_RT >> 16:
 		case __SI_MESGQ >> 16:
 			err |= __put_user(from->si_uid, &to->si_uid);
 			err |= __put_user(from->si_int, &to->si_int);
@@ -97,7 +102,8 @@ int copy_siginfo_from_user32(siginfo_t *
 {
 	int err;
 	u32 ptr32;
-	if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t)))
+
+	if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
 		return -EFAULT;
 
 	err = __get_user(to->si_signo, &from->si_signo);
@@ -112,8 +118,7 @@ int copy_siginfo_from_user32(siginfo_t *
 	return err;
 }
 
-asmlinkage long
-sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
+asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
 {
 	mask &= _BLOCKABLE;
 	spin_lock_irq(&current->sighand->siglock);
@@ -128,36 +133,37 @@ sys32_sigsuspend(int history0, int histo
 	return -ERESTARTNOHAND;
 }
 
-asmlinkage long
-sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
-		  stack_ia32_t __user *uoss_ptr, 
-		  struct pt_regs *regs)
+asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
+				  stack_ia32_t __user *uoss_ptr,
+				  struct pt_regs *regs)
 {
-	stack_t uss,uoss; 
+	stack_t uss, uoss;
 	int ret;
-	mm_segment_t seg; 
-	if (uss_ptr) { 
+	mm_segment_t seg;
+
+	if (uss_ptr) {
 		u32 ptr;
-		memset(&uss,0,sizeof(stack_t));
-		if (!access_ok(VERIFY_READ,uss_ptr,sizeof(stack_ia32_t)) ||
+
+		memset(&uss, 0, sizeof(stack_t));
+		if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)) ||
 			    __get_user(ptr, &uss_ptr->ss_sp) ||
 			    __get_user(uss.ss_flags, &uss_ptr->ss_flags) ||
 			    __get_user(uss.ss_size, &uss_ptr->ss_size))
 			return -EFAULT;
 		uss.ss_sp = compat_ptr(ptr);
 	}
-	seg = get_fs(); 
-	set_fs(KERNEL_DS); 
-	ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->rsp);
-	set_fs(seg); 
+	seg = get_fs();
+	set_fs(KERNEL_DS);
+	ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp);
+	set_fs(seg);
 	if (ret >= 0 && uoss_ptr)  {
-		if (!access_ok(VERIFY_WRITE,uoss_ptr,sizeof(stack_ia32_t)) ||
+		if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)) ||
 		    __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
 		    __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
 		    __put_user(uoss.ss_size, &uoss_ptr->ss_size))
 			ret = -EFAULT;
-	} 	
-	return ret;	
+	}
+	return ret;
 }
 
 /*
@@ -186,87 +192,85 @@ struct rt_sigframe
 	char retcode[8];
 };
 
-static int
-ia32_restore_sigcontext(struct pt_regs *regs, struct sigcontext_ia32 __user *sc, unsigned int *peax)
-{
-	unsigned int err = 0;
-	
+#define COPY(x)		{ 		\
+	unsigned int reg;		\
+	err |= __get_user(reg, &sc->x);	\
+	regs->x = reg;			\
+}
+
+#define RELOAD_SEG(seg,mask)						\
+	{ unsigned int cur;						\
+	  unsigned short pre;						\
+	  err |= __get_user(pre, &sc->seg);				\
+	  asm volatile("movl %%" #seg ",%0" : "=r" (cur));		\
+	  pre |= mask;							\
+	  if (pre != cur) loadsegment(seg, pre); }
+
+static int ia32_restore_sigcontext(struct pt_regs *regs,
+				   struct sigcontext_ia32 __user *sc,
+				   unsigned int *peax)
+{
+	unsigned int tmpflags, gs, oldgs, err = 0;
+	struct _fpstate_ia32 __user *buf;
+	u32 tmp;
+
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
 #if DEBUG_SIG
-	printk("SIG restore_sigcontext: sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n",
-		sc, sc->err, sc->eip, sc->cs, sc->eflags);
+	printk(KERN_DEBUG "SIG restore_sigcontext: "
+	       "sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n",
+	       sc, sc->err, sc->ip, sc->cs, sc->flags);
 #endif
-#define COPY(x)		{ \
-	unsigned int reg;			\
-	err |= __get_user(reg, &sc->e ##x);	\
-	regs->r ## x = reg;			\
-}
 
-#define RELOAD_SEG(seg,mask)						\
-	{ unsigned int cur; 						\
-	  unsigned short pre;						\
-	  err |= __get_user(pre, &sc->seg);				\
-    	  asm volatile("movl %%" #seg ",%0" : "=r" (cur));		\
-	  pre |= mask; 							\
-	  if (pre != cur) loadsegment(seg,pre); }
-
-	/* Reload fs and gs if they have changed in the signal handler.
-	   This does not handle long fs/gs base changes in the handler, but 
-	   does not clobber them at least in the normal case. */ 
-	
-	{
-		unsigned gs, oldgs; 
-		err |= __get_user(gs, &sc->gs);
-		gs |= 3; 
-		asm("movl %%gs,%0" : "=r" (oldgs));
-		if (gs != oldgs)
-		load_gs_index(gs); 
-	} 
-	RELOAD_SEG(fs,3);
-	RELOAD_SEG(ds,3);
-	RELOAD_SEG(es,3);
+	/*
+	 * Reload fs and gs if they have changed in the signal
+	 * handler.  This does not handle long fs/gs base changes in
+	 * the handler, but does not clobber them at least in the
+	 * normal case.
+	 */
+	err |= __get_user(gs, &sc->gs);
+	gs |= 3;
+	asm("movl %%gs,%0" : "=r" (oldgs));
+	if (gs != oldgs)
+		load_gs_index(gs);
+
+	RELOAD_SEG(fs, 3);
+	RELOAD_SEG(ds, 3);
+	RELOAD_SEG(es, 3);
 
 	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
 	COPY(dx); COPY(cx); COPY(ip);
-	/* Don't touch extended registers */ 
-	
-	err |= __get_user(regs->cs, &sc->cs); 
-	regs->cs |= 3;  
-	err |= __get_user(regs->ss, &sc->ss); 
-	regs->ss |= 3; 
-
-	{
-		unsigned int tmpflags;
-		err |= __get_user(tmpflags, &sc->eflags);
-		regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
-		regs->orig_rax = -1;		/* disable syscall checks */
-	}
+	/* Don't touch extended registers */
+
+	err |= __get_user(regs->cs, &sc->cs);
+	regs->cs |= 3;
+	err |= __get_user(regs->ss, &sc->ss);
+	regs->ss |= 3;
+
+	err |= __get_user(tmpflags, &sc->flags);
+	regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5);
+	/* disable syscall checks */
+	regs->orig_ax = -1;
+
+	err |= __get_user(tmp, &sc->fpstate);
+	buf = compat_ptr(tmp);
+	if (buf) {
+		if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
+			goto badframe;
+		err |= restore_i387_ia32(current, buf, 0);
+	} else {
+		struct task_struct *me = current;
 
-	{
-		u32 tmp;
-		struct _fpstate_ia32 __user * buf;
-		err |= __get_user(tmp, &sc->fpstate);
-		buf = compat_ptr(tmp);
-		if (buf) {
-			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
-				goto badframe;
-			err |= restore_i387_ia32(current, buf, 0);
-		} else {
-			struct task_struct *me = current;
-			if (used_math()) {
-				clear_fpu(me);
-				clear_used_math();
-			}
+		if (used_math()) {
+			clear_fpu(me);
+			clear_used_math();
 		}
 	}
 
-	{ 
-		u32 tmp;
-		err |= __get_user(tmp, &sc->eax);
-		*peax = tmp;
-	}
+	err |= __get_user(tmp, &sc->ax);
+	*peax = tmp;
+
 	return err;
 
 badframe:
@@ -275,15 +279,16 @@ badframe:
 
 asmlinkage long sys32_sigreturn(struct pt_regs *regs)
 {
-	struct sigframe __user *frame = (struct sigframe __user *)(regs->rsp-8);
+	struct sigframe __user *frame = (struct sigframe __user *)(regs->sp-8);
 	sigset_t set;
-	unsigned int eax;
+	unsigned int ax;
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
 	if (__get_user(set.sig[0], &frame->sc.oldmask)
 	    || (_COMPAT_NSIG_WORDS > 1
-		&& __copy_from_user((((char *) &set.sig) + 4), &frame->extramask,
+		&& __copy_from_user((((char *) &set.sig) + 4),
+				    &frame->extramask,
 				    sizeof(frame->extramask))))
 		goto badframe;
 
@@ -292,24 +297,24 @@ asmlinkage long sys32_sigreturn(struct p
 	current->blocked = set;
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
-	
-	if (ia32_restore_sigcontext(regs, &frame->sc, &eax))
+
+	if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
 		goto badframe;
-	return eax;
+	return ax;
 
 badframe:
 	signal_fault(regs, frame, "32bit sigreturn");
 	return 0;
-}	
+}
 
 asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
 	sigset_t set;
-	unsigned int eax;
+	unsigned int ax;
 	struct pt_regs tregs;
 
-	frame = (struct rt_sigframe __user *)(regs->rsp - 4);
+	frame = (struct rt_sigframe __user *)(regs->sp - 4);
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
@@ -321,28 +326,28 @@ asmlinkage long sys32_rt_sigreturn(struc
 	current->blocked = set;
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
-	
-	if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
+
+	if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
 	tregs = *regs;
 	if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
 		goto badframe;
 
-	return eax;
+	return ax;
 
 badframe:
-	signal_fault(regs,frame,"32bit rt sigreturn");
+	signal_fault(regs, frame, "32bit rt sigreturn");
 	return 0;
-}	
+}
 
 /*
  * Set up a signal frame.
  */
 
-static int
-ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __user *fpstate,
-		 struct pt_regs *regs, unsigned int mask)
+static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
+				 struct _fpstate_ia32 __user *fpstate,
+				 struct pt_regs *regs, unsigned int mask)
 {
 	int tmp, err = 0;
 
@@ -356,26 +361,26 @@ ia32_setup_sigcontext(struct sigcontext_
 	__asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp));
 	err |= __put_user(tmp, (unsigned int __user *)&sc->es);
 
-	err |= __put_user((u32)regs->rdi, &sc->edi);
-	err |= __put_user((u32)regs->rsi, &sc->esi);
-	err |= __put_user((u32)regs->rbp, &sc->ebp);
-	err |= __put_user((u32)regs->rsp, &sc->esp);
-	err |= __put_user((u32)regs->rbx, &sc->ebx);
-	err |= __put_user((u32)regs->rdx, &sc->edx);
-	err |= __put_user((u32)regs->rcx, &sc->ecx);
-	err |= __put_user((u32)regs->rax, &sc->eax);
+	err |= __put_user((u32)regs->di, &sc->di);
+	err |= __put_user((u32)regs->si, &sc->si);
+	err |= __put_user((u32)regs->bp, &sc->bp);
+	err |= __put_user((u32)regs->sp, &sc->sp);
+	err |= __put_user((u32)regs->bx, &sc->bx);
+	err |= __put_user((u32)regs->dx, &sc->dx);
+	err |= __put_user((u32)regs->cx, &sc->cx);
+	err |= __put_user((u32)regs->ax, &sc->ax);
 	err |= __put_user((u32)regs->cs, &sc->cs);
 	err |= __put_user((u32)regs->ss, &sc->ss);
 	err |= __put_user(current->thread.trap_no, &sc->trapno);
 	err |= __put_user(current->thread.error_code, &sc->err);
-	err |= __put_user((u32)regs->rip, &sc->eip);
-	err |= __put_user((u32)regs->eflags, &sc->eflags);
-	err |= __put_user((u32)regs->rsp, &sc->esp_at_signal);
+	err |= __put_user((u32)regs->ip, &sc->ip);
+	err |= __put_user((u32)regs->flags, &sc->flags);
+	err |= __put_user((u32)regs->sp, &sc->sp_at_signal);
 
 	tmp = save_i387_ia32(current, fpstate, regs, 0);
 	if (tmp < 0)
 		err = -EFAULT;
-	else { 
+	else {
 		clear_used_math();
 		stts();
 		err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL),
@@ -392,40 +397,53 @@ ia32_setup_sigcontext(struct sigcontext_
 /*
  * Determine which stack to use..
  */
-static void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
+				 size_t frame_size)
 {
-	unsigned long rsp;
+	unsigned long sp;
 
 	/* Default to using normal stack */
-	rsp = regs->rsp;
+	sp = regs->sp;
 
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
-		if (sas_ss_flags(rsp) == 0)
-			rsp = current->sas_ss_sp + current->sas_ss_size;
+		if (sas_ss_flags(sp) == 0)
+			sp = current->sas_ss_sp + current->sas_ss_size;
 	}
 
 	/* This is the legacy signal stack switching. */
 	else if ((regs->ss & 0xffff) != __USER_DS &&
 		!(ka->sa.sa_flags & SA_RESTORER) &&
-		 ka->sa.sa_restorer) {
-		rsp = (unsigned long) ka->sa.sa_restorer;
-	}
+		 ka->sa.sa_restorer)
+		sp = (unsigned long) ka->sa.sa_restorer;
 
-	rsp -= frame_size;
+	sp -= frame_size;
 	/* Align the stack pointer according to the i386 ABI,
 	 * i.e. so that on function entry ((sp + 4) & 15) == 0. */
-	rsp = ((rsp + 4) & -16ul) - 4;
-	return (void __user *) rsp;
+	sp = ((sp + 4) & -16ul) - 4;
+	return (void __user *) sp;
 }
 
 int ia32_setup_frame(int sig, struct k_sigaction *ka,
-		     compat_sigset_t *set, struct pt_regs * regs)
+		     compat_sigset_t *set, struct pt_regs *regs)
 {
 	struct sigframe __user *frame;
+	void __user *restorer;
 	int err = 0;
 
+	/* copy_to_user optimizes that into a single 8 byte store */
+	static const struct {
+		u16 poplmovl;
+		u32 val;
+		u16 int80;
+		u16 pad;
+	} __attribute__((packed)) code = {
+		0xb858,		 /* popl %eax ; movl $...,%eax */
+		__NR_ia32_sigreturn,
+		0x80cd,		/* int $0x80 */
+		0,
+	};
+
 	frame = get_sigframe(ka, regs, sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
@@ -443,64 +461,53 @@ int ia32_setup_frame(int sig, struct k_s
 	if (_COMPAT_NSIG_WORDS > 1) {
 		err |= __copy_to_user(frame->extramask, &set->sig[1],
 				      sizeof(frame->extramask));
+		if (err)
+			goto give_sigsegv;
 	}
-	if (err)
-		goto give_sigsegv;
 
-	/* Return stub is in 32bit vsyscall page */
-	{ 
-		void __user *restorer;
+	if (ka->sa.sa_flags & SA_RESTORER) {
+		restorer = ka->sa.sa_restorer;
+	} else {
+		/* Return stub is in 32bit vsyscall page */
 		if (current->binfmt->hasvdso)
-			restorer = VSYSCALL32_SIGRETURN;
+			restorer = VDSO32_SYMBOL(current->mm->context.vdso,
+						 sigreturn);
 		else
 			restorer = (void *)&frame->retcode;
-		if (ka->sa.sa_flags & SA_RESTORER)
-			restorer = ka->sa.sa_restorer;       
-		err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
-	}
-	/* These are actually not used anymore, but left because some 
-	   gdb versions depend on them as a marker. */
-	{ 
-		/* copy_to_user optimizes that into a single 8 byte store */
-		static const struct { 
-			u16 poplmovl;
-			u32 val;
-			u16 int80;    
-			u16 pad; 
-		} __attribute__((packed)) code = { 
-			0xb858,		 /* popl %eax ; movl $...,%eax */
-			__NR_ia32_sigreturn,   
-			0x80cd,		/* int $0x80 */
-			0,
-		}; 
-		err |= __copy_to_user(frame->retcode, &code, 8); 
 	}
+	err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
+
+	/*
+	 * These are actually not used anymore, but left because some
+	 * gdb versions depend on them as a marker.
+	 */
+	err |= __copy_to_user(frame->retcode, &code, 8);
 	if (err)
 		goto give_sigsegv;
 
 	/* Set up registers for signal handler */
-	regs->rsp = (unsigned long) frame;
-	regs->rip = (unsigned long) ka->sa.sa_handler;
+	regs->sp = (unsigned long) frame;
+	regs->ip = (unsigned long) ka->sa.sa_handler;
 
 	/* Make -mregparm=3 work */
-	regs->rax = sig;
-	regs->rdx = 0;
-	regs->rcx = 0;
+	regs->ax = sig;
+	regs->dx = 0;
+	regs->cx = 0;
 
-	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); 
-	asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); 
+	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
+	asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
 
-	regs->cs = __USER32_CS; 
-	regs->ss = __USER32_DS; 
+	regs->cs = __USER32_CS;
+	regs->ss = __USER32_DS;
 
 	set_fs(USER_DS);
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
 #if DEBUG_SIG
-	printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
-		current->comm, current->pid, frame, regs->rip, frame->pretcode);
+	printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
+	       current->comm, current->pid, frame, regs->ip, frame->pretcode);
 #endif
 
 	return 0;
@@ -511,25 +518,34 @@ give_sigsegv:
 }
 
 int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			compat_sigset_t *set, struct pt_regs * regs)
+			compat_sigset_t *set, struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
+	struct exec_domain *ed = current_thread_info()->exec_domain;
+	void __user *restorer;
 	int err = 0;
 
+	/* __copy_to_user optimizes that into a single 8 byte store */
+	static const struct {
+		u8 movl;
+		u32 val;
+		u16 int80;
+		u16 pad;
+		u8  pad2;
+	} __attribute__((packed)) code = {
+		0xb8,
+		__NR_ia32_rt_sigreturn,
+		0x80cd,
+		0,
+	};
+
 	frame = get_sigframe(ka, regs, sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto give_sigsegv;
 
-	{
-		struct exec_domain *ed = current_thread_info()->exec_domain;
-		err |= __put_user((ed
-		    	   && ed->signal_invmap
-		    	   && sig < 32
-		    	   ? ed->signal_invmap[sig]
-			   : sig),
-			  &frame->sig);
-	}
+	err |= __put_user((ed && ed->signal_invmap && sig < 32
+			   ? ed->signal_invmap[sig] : sig), &frame->sig);
 	err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
 	err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
 	err |= copy_siginfo_to_user32(&frame->info, info);
@@ -540,73 +556,58 @@ int ia32_setup_rt_frame(int sig, struct 
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(regs->rsp),
+	err |= __put_user(sas_ss_flags(regs->sp),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
-			        regs, set->sig[0]);
+				     regs, set->sig[0]);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	if (err)
 		goto give_sigsegv;
 
-	
-	{ 
-		void __user *restorer = VSYSCALL32_RTSIGRETURN; 
-		if (ka->sa.sa_flags & SA_RESTORER)
-			restorer = ka->sa.sa_restorer;       
-		err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
-	}
-
-	/* This is movl $,%eax ; int $0x80 */
-	/* Not actually used anymore, but left because some gdb versions
-	   need it. */ 
-	{ 
-		/* __copy_to_user optimizes that into a single 8 byte store */
-		static const struct { 
-			u8 movl; 
-			u32 val; 
-			u16 int80; 
-			u16 pad;
-			u8  pad2;				
-		} __attribute__((packed)) code = { 
-			0xb8,
-			__NR_ia32_rt_sigreturn,
-			0x80cd,
-			0,
-		}; 
-		err |= __copy_to_user(frame->retcode, &code, 8); 
-	} 
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = ka->sa.sa_restorer;
+	else
+		restorer = VDSO32_SYMBOL(current->mm->context.vdso,
+					 rt_sigreturn);
+	err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
+
+	/*
+	 * Not actually used anymore, but left because some gdb
+	 * versions need it.
+	 */
+	err |= __copy_to_user(frame->retcode, &code, 8);
 	if (err)
 		goto give_sigsegv;
 
 	/* Set up registers for signal handler */
-	regs->rsp = (unsigned long) frame;
-	regs->rip = (unsigned long) ka->sa.sa_handler;
+	regs->sp = (unsigned long) frame;
+	regs->ip = (unsigned long) ka->sa.sa_handler;
 
 	/* Make -mregparm=3 work */
-	regs->rax = sig;
-	regs->rdx = (unsigned long) &frame->info;
-	regs->rcx = (unsigned long) &frame->uc;
+	regs->ax = sig;
+	regs->dx = (unsigned long) &frame->info;
+	regs->cx = (unsigned long) &frame->uc;
 
 	/* Make -mregparm=3 work */
-	regs->rax = sig;
-	regs->rdx = (unsigned long) &frame->info;
-	regs->rcx = (unsigned long) &frame->uc;
-
-	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); 
-	asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); 
-	
-	regs->cs = __USER32_CS; 
-	regs->ss = __USER32_DS; 
+	regs->ax = sig;
+	regs->dx = (unsigned long) &frame->info;
+	regs->cx = (unsigned long) &frame->uc;
+
+	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
+	asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+
+	regs->cs = __USER32_CS;
+	regs->ss = __USER32_DS;
 
 	set_fs(USER_DS);
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
 #if DEBUG_SIG
-	printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
-		current->comm, current->pid, frame, regs->rip, frame->pretcode);
+	printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
+	       current->comm, current->pid, frame, regs->ip, frame->pretcode);
 #endif
 
 	return 0;
diff -puN arch/x86/ia32/ia32entry.S~git-x86 arch/x86/ia32/ia32entry.S
--- a/arch/x86/ia32/ia32entry.S~git-x86
+++ a/arch/x86/ia32/ia32entry.S
@@ -12,7 +12,6 @@
 #include <asm/ia32_unistd.h>	
 #include <asm/thread_info.h>	
 #include <asm/segment.h>
-#include <asm/vsyscall32.h>
 #include <asm/irqflags.h>
 #include <linux/linkage.h>
 
@@ -104,7 +103,7 @@ ENTRY(ia32_sysenter_target)
 	pushfq
 	CFI_ADJUST_CFA_OFFSET 8
 	/*CFI_REL_OFFSET rflags,0*/
-	movl	$VSYSCALL32_SYSEXIT, %r10d
+	movl	8*3-THREAD_SIZE+threadinfo_sysenter_return(%rsp), %r10d
 	CFI_REGISTER rip,r10
 	pushq	$__USER32_CS
 	CFI_ADJUST_CFA_OFFSET 8
@@ -142,6 +141,8 @@ sysenter_do_call:	
 	andl    $~TS_COMPAT,threadinfo_status(%r10)
 	/* clear IF, that popfq doesn't enable interrupts early */
 	andl  $~0x200,EFLAGS-R11(%rsp) 
+	movl	RIP-R11(%rsp),%edx		/* User %eip */
+	CFI_REGISTER rip,rdx
 	RESTORE_ARGS 1,24,1,1,1,1
 	popfq
 	CFI_ADJUST_CFA_OFFSET -8
@@ -149,8 +150,6 @@ sysenter_do_call:	
 	popq	%rcx				/* User %esp */
 	CFI_ADJUST_CFA_OFFSET -8
 	CFI_REGISTER rsp,rcx
-	movl	$VSYSCALL32_SYSEXIT,%edx	/* User %eip */
-	CFI_REGISTER rip,rdx
 	TRACE_IRQS_ON
 	swapgs
 	sti		/* sti only takes effect after the next instruction */
@@ -644,8 +643,8 @@ ia32_sys_call_table:
 	.quad compat_sys_futex		/* 240 */
 	.quad compat_sys_sched_setaffinity
 	.quad compat_sys_sched_getaffinity
-	.quad sys32_set_thread_area
-	.quad sys32_get_thread_area
+	.quad sys_set_thread_area
+	.quad sys_get_thread_area
 	.quad compat_sys_io_setup	/* 245 */
 	.quad sys_io_destroy
 	.quad compat_sys_io_getevents
diff -puN arch/x86/ia32/ipc32.c~git-x86 arch/x86/ia32/ipc32.c
--- a/arch/x86/ia32/ipc32.c~git-x86
+++ a/arch/x86/ia32/ipc32.c
@@ -9,9 +9,8 @@
 #include <linux/ipc.h>
 #include <linux/compat.h>
 
-asmlinkage long
-sys32_ipc(u32 call, int first, int second, int third,
-		compat_uptr_t ptr, u32 fifth)
+asmlinkage long sys32_ipc(u32 call, int first, int second, int third,
+			  compat_uptr_t ptr, u32 fifth)
 {
 	int version;
 
@@ -19,36 +18,35 @@ sys32_ipc(u32 call, int first, int secon
 	call &= 0xffff;
 
 	switch (call) {
-	      case SEMOP:
+	case SEMOP:
 		/* struct sembuf is the same on 32 and 64bit :)) */
 		return sys_semtimedop(first, compat_ptr(ptr), second, NULL);
-	      case SEMTIMEDOP:
+	case SEMTIMEDOP:
 		return compat_sys_semtimedop(first, compat_ptr(ptr), second,
 						compat_ptr(fifth));
-	      case SEMGET:
+	case SEMGET:
 		return sys_semget(first, second, third);
-	      case SEMCTL:
+	case SEMCTL:
 		return compat_sys_semctl(first, second, third, compat_ptr(ptr));
 
-	      case MSGSND:
+	case MSGSND:
 		return compat_sys_msgsnd(first, second, third, compat_ptr(ptr));
-	      case MSGRCV:
+	case MSGRCV:
 		return compat_sys_msgrcv(first, second, fifth, third,
 					 version, compat_ptr(ptr));
-	      case MSGGET:
+	case MSGGET:
 		return sys_msgget((key_t) first, second);
-	      case MSGCTL:
+	case MSGCTL:
 		return compat_sys_msgctl(first, second, compat_ptr(ptr));
 
-	      case SHMAT:
+	case SHMAT:
 		return compat_sys_shmat(first, second, third, version,
 					compat_ptr(ptr));
-		break;
-	      case SHMDT:
+	case SHMDT:
 		return sys_shmdt(compat_ptr(ptr));
-	      case SHMGET:
+	case SHMGET:
 		return sys_shmget(first, (unsigned)second, third);
-	      case SHMCTL:
+	case SHMCTL:
 		return compat_sys_shmctl(first, second, compat_ptr(ptr));
 	}
 	return -ENOSYS;
diff -puN arch/x86/ia32/mmap32.c~git-x86 /dev/null
--- a/arch/x86/ia32/mmap32.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- *  linux/arch/x86_64/ia32/mm/mmap.c
- *
- *  flexible mmap layout support
- *
- * Based on the i386 version which was
- *
- * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- *
- * Started by Ingo Molnar <mingo@elte.hu>
- */
-
-#include <linux/personality.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/sched.h>
-
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave an at least ~128 MB hole.
- */
-#define MIN_GAP (128*1024*1024)
-#define MAX_GAP (TASK_SIZE/6*5)
-
-static inline unsigned long mmap_base(struct mm_struct *mm)
-{
-	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
-	unsigned long random_factor = 0;
-
-	if (current->flags & PF_RANDOMIZE)
-		random_factor = get_random_int() % (1024*1024);
-
-	if (gap < MIN_GAP)
-		gap = MIN_GAP;
-	else if (gap > MAX_GAP)
-		gap = MAX_GAP;
-
-	return PAGE_ALIGN(TASK_SIZE - gap - random_factor);
-}
-
-/*
- * This function, called very early during the creation of a new
- * process VM image, sets up which VM layout function to use:
- */
-void ia32_pick_mmap_layout(struct mm_struct *mm)
-{
-	/*
-	 * Fall back to the standard layout if the personality
-	 * bit is set, or if the expected stack growth is unlimited:
-	 */
-	if (sysctl_legacy_va_layout ||
-			(current->personality & ADDR_COMPAT_LAYOUT) ||
-			current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
-		mm->mmap_base = TASK_UNMAPPED_BASE;
-		mm->get_unmapped_area = arch_get_unmapped_area;
-		mm->unmap_area = arch_unmap_area;
-	} else {
-		mm->mmap_base = mmap_base(mm);
-		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-		mm->unmap_area = arch_unmap_area_topdown;
-	}
-}
diff -puN arch/x86/ia32/ptrace32.c~git-x86 /dev/null
--- a/arch/x86/ia32/ptrace32.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/* 
- * 32bit ptrace for x86-64.
- *
- * Copyright 2001,2002 Andi Kleen, SuSE Labs.
- * Some parts copied from arch/i386/kernel/ptrace.c. See that file for earlier 
- * copyright.
- * 
- * This allows to access 64bit processes too; but there is no way to see the extended 
- * register contents.
- */ 
-
-#include <linux/kernel.h>
-#include <linux/stddef.h>
-#include <linux/sched.h>
-#include <linux/syscalls.h>
-#include <linux/unistd.h>
-#include <linux/mm.h>
-#include <linux/err.h>
-#include <linux/ptrace.h>
-#include <asm/ptrace.h>
-#include <asm/compat.h>
-#include <asm/uaccess.h>
-#include <asm/user32.h>
-#include <asm/user.h>
-#include <asm/errno.h>
-#include <asm/debugreg.h>
-#include <asm/i387.h>
-#include <asm/fpu32.h>
-#include <asm/ia32.h>
-
-/*
- * Determines which flags the user has access to [1 = access, 0 = no access].
- * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9).
- * Also masks reserved bits (31-22, 15, 5, 3, 1).
- */
-#define FLAG_MASK 0x54dd5UL
-
-#define R32(l,q) \
-	case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break
-
-static int putreg32(struct task_struct *child, unsigned regno, u32 val)
-{
-	int i;
-	__u64 *stack = (__u64 *)task_pt_regs(child);
-
-	switch (regno) {
-	case offsetof(struct user32, regs.fs):
-		if (val && (val & 3) != 3) return -EIO; 
-		child->thread.fsindex = val & 0xffff;
-		break;
-	case offsetof(struct user32, regs.gs):
-		if (val && (val & 3) != 3) return -EIO; 
-		child->thread.gsindex = val & 0xffff;
-		break;
-	case offsetof(struct user32, regs.ds):
-		if (val && (val & 3) != 3) return -EIO; 
-		child->thread.ds = val & 0xffff;
-		break;
-	case offsetof(struct user32, regs.es):
-		child->thread.es = val & 0xffff;
-		break;
-	case offsetof(struct user32, regs.ss): 
-		if ((val & 3) != 3) return -EIO;
-        	stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff;
-		break;
-	case offsetof(struct user32, regs.cs): 
-		if ((val & 3) != 3) return -EIO;
-		stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff;
-		break;
-
-	R32(ebx, rbx); 
-	R32(ecx, rcx);
-	R32(edx, rdx);
-	R32(edi, rdi);
-	R32(esi, rsi);
-	R32(ebp, rbp);
-	R32(eax, rax);
-	R32(orig_eax, orig_rax);
-	R32(eip, rip);
-	R32(esp, rsp);
-
-	case offsetof(struct user32, regs.eflags): {
-		__u64 *flags = &stack[offsetof(struct pt_regs, eflags)/8];
-		val &= FLAG_MASK;
-		*flags = val | (*flags & ~FLAG_MASK);
-		break;
-	}
-
-	case offsetof(struct user32, u_debugreg[4]): 
-	case offsetof(struct user32, u_debugreg[5]):
-		return -EIO;
-
-	case offsetof(struct user32, u_debugreg[0]):
-		child->thread.debugreg0 = val;
-		break;
-
-	case offsetof(struct user32, u_debugreg[1]):
-		child->thread.debugreg1 = val;
-		break;
-
-	case offsetof(struct user32, u_debugreg[2]):
-		child->thread.debugreg2 = val;
-		break;
-
-	case offsetof(struct user32, u_debugreg[3]):
-		child->thread.debugreg3 = val;
-		break;
-
-	case offsetof(struct user32, u_debugreg[6]):
-		child->thread.debugreg6 = val;
-		break; 
-
-	case offsetof(struct user32, u_debugreg[7]):
-		val &= ~DR_CONTROL_RESERVED;
-		/* See arch/i386/kernel/ptrace.c for an explanation of
-		 * this awkward check.*/
-		for(i=0; i<4; i++)
-			if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1)
-			       return -EIO;
-		child->thread.debugreg7 = val; 
-		if (val)
-			set_tsk_thread_flag(child, TIF_DEBUG);
-		else
-			clear_tsk_thread_flag(child, TIF_DEBUG);
-		break; 
-		    
-	default:
-		if (regno > sizeof(struct user32) || (regno & 3))
-			return -EIO;
-	       
-		/* Other dummy fields in the virtual user structure are ignored */ 
-		break; 		
-	}
-	return 0;
-}
-
-#undef R32
-
-#define R32(l,q) \
-	case offsetof(struct user32, regs.l): *val = stack[offsetof(struct pt_regs, q)/8]; break
-
-static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
-{
-	__u64 *stack = (__u64 *)task_pt_regs(child);
-
-	switch (regno) {
-	case offsetof(struct user32, regs.fs):
-	        *val = child->thread.fsindex;
-		break;
-	case offsetof(struct user32, regs.gs):
-		*val = child->thread.gsindex;
-		break;
-	case offsetof(struct user32, regs.ds):
-		*val = child->thread.ds;
-		break;
-	case offsetof(struct user32, regs.es):
-		*val = child->thread.es;
-		break;
-
-	R32(cs, cs);
-	R32(ss, ss);
-	R32(ebx, rbx); 
-	R32(ecx, rcx);
-	R32(edx, rdx);
-	R32(edi, rdi);
-	R32(esi, rsi);
-	R32(ebp, rbp);
-	R32(eax, rax);
-	R32(orig_eax, orig_rax);
-	R32(eip, rip);
-	R32(eflags, eflags);
-	R32(esp, rsp);
-
-	case offsetof(struct user32, u_debugreg[0]): 
-		*val = child->thread.debugreg0; 
-		break; 
-	case offsetof(struct user32, u_debugreg[1]): 
-		*val = child->thread.debugreg1; 
-		break; 
-	case offsetof(struct user32, u_debugreg[2]): 
-		*val = child->thread.debugreg2; 
-		break; 
-	case offsetof(struct user32, u_debugreg[3]): 
-		*val = child->thread.debugreg3; 
-		break; 
-	case offsetof(struct user32, u_debugreg[6]): 
-		*val = child->thread.debugreg6; 
-		break; 
-	case offsetof(struct user32, u_debugreg[7]): 
-		*val = child->thread.debugreg7; 
-		break; 
-		    
-	default:
-		if (regno > sizeof(struct user32) || (regno & 3))
-			return -EIO;
-
-		/* Other dummy fields in the virtual user structure are ignored */ 
-		*val = 0;
-		break; 		
-	}
-	return 0;
-}
-
-#undef R32
-
-static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
-{
-	int ret;
-	compat_siginfo_t __user *si32 = compat_ptr(data);
-	siginfo_t ssi; 
-	siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t));
-	if (request == PTRACE_SETSIGINFO) {
-		memset(&ssi, 0, sizeof(siginfo_t));
-		ret = copy_siginfo_from_user32(&ssi, si32);
-		if (ret)
-			return ret;
-		if (copy_to_user(si, &ssi, sizeof(siginfo_t)))
-			return -EFAULT;
-	}
-	ret = sys_ptrace(request, pid, addr, (unsigned long)si);
-	if (ret)
-		return ret;
-	if (request == PTRACE_GETSIGINFO) {
-		if (copy_from_user(&ssi, si, sizeof(siginfo_t)))
-			return -EFAULT;
-		ret = copy_siginfo_to_user32(si32, &ssi);
-	}
-	return ret;
-}
-
-asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
-{
-	struct task_struct *child;
-	struct pt_regs *childregs; 
-	void __user *datap = compat_ptr(data);
-	int ret;
-	__u32 val;
-
-	switch (request) { 
-	case PTRACE_TRACEME:
-	case PTRACE_ATTACH:
-	case PTRACE_KILL:
-	case PTRACE_CONT:
-	case PTRACE_SINGLESTEP:
-	case PTRACE_DETACH:
-	case PTRACE_SYSCALL:
-	case PTRACE_OLDSETOPTIONS:
-	case PTRACE_SETOPTIONS:
-	case PTRACE_SET_THREAD_AREA:
-	case PTRACE_GET_THREAD_AREA:
-		return sys_ptrace(request, pid, addr, data); 
-
-	default:
-		return -EINVAL;
-
-	case PTRACE_PEEKTEXT:
-	case PTRACE_PEEKDATA:
-	case PTRACE_POKEDATA:
-	case PTRACE_POKETEXT:
-	case PTRACE_POKEUSR:       
-	case PTRACE_PEEKUSR:
-	case PTRACE_GETREGS:
-	case PTRACE_SETREGS:
-	case PTRACE_SETFPREGS:
-	case PTRACE_GETFPREGS:
-	case PTRACE_SETFPXREGS:
-	case PTRACE_GETFPXREGS:
-	case PTRACE_GETEVENTMSG:
-		break;
-
-	case PTRACE_SETSIGINFO:
-	case PTRACE_GETSIGINFO:
-		return ptrace32_siginfo(request, pid, addr, data);
-	}
-
-	child = ptrace_get_task_struct(pid);
-	if (IS_ERR(child))
-		return PTR_ERR(child);
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out;
-
-	childregs = task_pt_regs(child);
-
-	switch (request) {
-	case PTRACE_PEEKDATA:
-	case PTRACE_PEEKTEXT:
-		ret = 0;
-		if (access_process_vm(child, addr, &val, sizeof(u32), 0)!=sizeof(u32))
-			ret = -EIO;
-		else
-			ret = put_user(val, (unsigned int __user *)datap); 
-		break; 
-
-	case PTRACE_POKEDATA:
-	case PTRACE_POKETEXT:
-		ret = 0;
-		if (access_process_vm(child, addr, &data, sizeof(u32), 1)!=sizeof(u32))
-			ret = -EIO; 
-		break;
-
-	case PTRACE_PEEKUSR:
-		ret = getreg32(child, addr, &val);
-		if (ret == 0)
-			ret = put_user(val, (__u32 __user *)datap);
-		break;
-
-	case PTRACE_POKEUSR:
-		ret = putreg32(child, addr, data);
-		break;
-
-	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
-		int i;
-	  	if (!access_ok(VERIFY_WRITE, datap, 16*4)) {
-			ret = -EIO;
-			break;
-		}
-		ret = 0;
-		for ( i = 0; i <= 16*4 ; i += sizeof(__u32) ) {
-			getreg32(child, i, &val);
-			ret |= __put_user(val,(u32 __user *)datap);
-			datap += sizeof(u32);
-		}
-		break;
-	}
-
-	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
-		unsigned long tmp;
-		int i;
-	  	if (!access_ok(VERIFY_READ, datap, 16*4)) {
-			ret = -EIO;
-			break;
-		}
-		ret = 0; 
-		for ( i = 0; i <= 16*4; i += sizeof(u32) ) {
-			ret |= __get_user(tmp, (u32 __user *)datap);
-			putreg32(child, i, tmp);
-			datap += sizeof(u32);
-		}
-		break;
-	}
-
-	case PTRACE_GETFPREGS:
-		ret = -EIO; 
-		if (!access_ok(VERIFY_READ, compat_ptr(data), 
-			       sizeof(struct user_i387_struct)))
-			break;
-		save_i387_ia32(child, datap, childregs, 1);
-		ret = 0; 
-			break;
-
-	case PTRACE_SETFPREGS:
-		ret = -EIO;
-		if (!access_ok(VERIFY_WRITE, datap, 
-			       sizeof(struct user_i387_struct)))
-			break;
-		ret = 0;
-		/* don't check EFAULT to be bug-to-bug compatible to i386 */
-		restore_i387_ia32(child, datap, 1);
-		break;
-
-	case PTRACE_GETFPXREGS: { 
-		struct user32_fxsr_struct __user *u = datap;
-		init_fpu(child); 
-		ret = -EIO;
-		if (!access_ok(VERIFY_WRITE, u, sizeof(*u)))
-			break;
-			ret = -EFAULT;
-		if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u)))
-			break;
-		ret = __put_user(childregs->cs, &u->fcs);
-		ret |= __put_user(child->thread.ds, &u->fos); 
-		break; 
-	} 
-	case PTRACE_SETFPXREGS: { 
-		struct user32_fxsr_struct __user *u = datap;
-		unlazy_fpu(child);
-		ret = -EIO;
-		if (!access_ok(VERIFY_READ, u, sizeof(*u)))
-			break;
-		/* no checking to be bug-to-bug compatible with i386. */
-		/* but silence warning */
-		if (__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u)))
-			;
-		set_stopped_child_used_math(child);
-		child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
-		ret = 0; 
-		break;
-	}
-
-	case PTRACE_GETEVENTMSG:
-		ret = put_user(child->ptrace_message,(unsigned int __user *)compat_ptr(data));
-		break;
-
-	default:
-		BUG();
-	}
-
- out:
-	put_task_struct(child);
-	return ret;
-}
-
diff -puN arch/x86/ia32/sys_ia32.c~git-x86 arch/x86/ia32/sys_ia32.c
--- a/arch/x86/ia32/sys_ia32.c~git-x86
+++ a/arch/x86/ia32/sys_ia32.c
@@ -1,29 +1,29 @@
 /*
  * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on
- *             sys_sparc32 
+ *             sys_sparc32
  *
  * Copyright (C) 2000		VA Linux Co
  * Copyright (C) 2000		Don Dugger <n0ano@valinux.com>
- * Copyright (C) 1999 		Arun Sharma <arun.sharma@intel.com>
- * Copyright (C) 1997,1998 	Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 1997 		David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1999		Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 1997,1998	Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997		David S. Miller (davem@caip.rutgers.edu)
  * Copyright (C) 2000		Hewlett-Packard Co.
  * Copyright (C) 2000		David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 2000,2001,2002	Andi Kleen, SuSE Labs (x86-64 port) 
+ * Copyright (C) 2000,2001,2002	Andi Kleen, SuSE Labs (x86-64 port)
  *
  * These routines maintain argument size conversion between 32bit and 64bit
- * environment. In 2.5 most of this should be moved to a generic directory. 
+ * environment. In 2.5 most of this should be moved to a generic directory.
  *
  * This file assumes that there is a hole at the end of user address space.
- * 
- * Some of the functions are LE specific currently. These are hopefully all marked.
- * This should be fixed.
+ *
+ * Some of the functions are LE specific currently. These are
+ * hopefully all marked.  This should be fixed.
  */
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/fs.h> 
-#include <linux/file.h> 
+#include <linux/fs.h>
+#include <linux/file.h>
 #include <linux/signal.h>
 #include <linux/syscalls.h>
 #include <linux/resource.h>
@@ -90,43 +90,44 @@ int cp_compat_stat(struct kstat *kbuf, s
 	if (sizeof(ino) < sizeof(kbuf->ino) && ino != kbuf->ino)
 		return -EOVERFLOW;
 	if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct compat_stat)) ||
-	    __put_user (old_encode_dev(kbuf->dev), &ubuf->st_dev) ||
-	    __put_user (ino, &ubuf->st_ino) ||
-	    __put_user (kbuf->mode, &ubuf->st_mode) ||
-	    __put_user (kbuf->nlink, &ubuf->st_nlink) ||
-	    __put_user (uid, &ubuf->st_uid) ||
-	    __put_user (gid, &ubuf->st_gid) ||
-	    __put_user (old_encode_dev(kbuf->rdev), &ubuf->st_rdev) ||
-	    __put_user (kbuf->size, &ubuf->st_size) ||
-	    __put_user (kbuf->atime.tv_sec, &ubuf->st_atime) ||
-	    __put_user (kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) ||
-	    __put_user (kbuf->mtime.tv_sec, &ubuf->st_mtime) ||
-	    __put_user (kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
-	    __put_user (kbuf->ctime.tv_sec, &ubuf->st_ctime) ||
-	    __put_user (kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
-	    __put_user (kbuf->blksize, &ubuf->st_blksize) ||
-	    __put_user (kbuf->blocks, &ubuf->st_blocks))
+	    __put_user(old_encode_dev(kbuf->dev), &ubuf->st_dev) ||
+	    __put_user(ino, &ubuf->st_ino) ||
+	    __put_user(kbuf->mode, &ubuf->st_mode) ||
+	    __put_user(kbuf->nlink, &ubuf->st_nlink) ||
+	    __put_user(uid, &ubuf->st_uid) ||
+	    __put_user(gid, &ubuf->st_gid) ||
+	    __put_user(old_encode_dev(kbuf->rdev), &ubuf->st_rdev) ||
+	    __put_user(kbuf->size, &ubuf->st_size) ||
+	    __put_user(kbuf->atime.tv_sec, &ubuf->st_atime) ||
+	    __put_user(kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) ||
+	    __put_user(kbuf->mtime.tv_sec, &ubuf->st_mtime) ||
+	    __put_user(kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
+	    __put_user(kbuf->ctime.tv_sec, &ubuf->st_ctime) ||
+	    __put_user(kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
+	    __put_user(kbuf->blksize, &ubuf->st_blksize) ||
+	    __put_user(kbuf->blocks, &ubuf->st_blocks))
 		return -EFAULT;
 	return 0;
 }
 
-asmlinkage long
-sys32_truncate64(char __user * filename, unsigned long offset_low, unsigned long offset_high)
+asmlinkage long sys32_truncate64(char __user *filename,
+				 unsigned long offset_low,
+				 unsigned long offset_high)
 {
        return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);
 }
 
-asmlinkage long
-sys32_ftruncate64(unsigned int fd, unsigned long offset_low, unsigned long offset_high)
+asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low,
+				  unsigned long offset_high)
 {
        return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
 }
 
-/* Another set for IA32/LFS -- x86_64 struct stat is different due to 
-   support for 64bit inode numbers. */
-
-static int
-cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
+/*
+ * Another set for IA32/LFS -- x86_64 struct stat is different due to
+ * support for 64bit inode numbers.
+ */
+static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
 {
 	typeof(ubuf->st_uid) uid = 0;
 	typeof(ubuf->st_gid) gid = 0;
@@ -134,38 +135,39 @@ cp_stat64(struct stat64 __user *ubuf, st
 	SET_GID(gid, stat->gid);
 	if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) ||
 	    __put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) ||
-	    __put_user (stat->ino, &ubuf->__st_ino) ||
-	    __put_user (stat->ino, &ubuf->st_ino) ||
-	    __put_user (stat->mode, &ubuf->st_mode) ||
-	    __put_user (stat->nlink, &ubuf->st_nlink) ||
-	    __put_user (uid, &ubuf->st_uid) ||
-	    __put_user (gid, &ubuf->st_gid) ||
-	    __put_user (huge_encode_dev(stat->rdev), &ubuf->st_rdev) ||
-	    __put_user (stat->size, &ubuf->st_size) ||
-	    __put_user (stat->atime.tv_sec, &ubuf->st_atime) ||
-	    __put_user (stat->atime.tv_nsec, &ubuf->st_atime_nsec) ||
-	    __put_user (stat->mtime.tv_sec, &ubuf->st_mtime) ||
-	    __put_user (stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
-	    __put_user (stat->ctime.tv_sec, &ubuf->st_ctime) ||
-	    __put_user (stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
-	    __put_user (stat->blksize, &ubuf->st_blksize) ||
-	    __put_user (stat->blocks, &ubuf->st_blocks))
+	    __put_user(stat->ino, &ubuf->__st_ino) ||
+	    __put_user(stat->ino, &ubuf->st_ino) ||
+	    __put_user(stat->mode, &ubuf->st_mode) ||
+	    __put_user(stat->nlink, &ubuf->st_nlink) ||
+	    __put_user(uid, &ubuf->st_uid) ||
+	    __put_user(gid, &ubuf->st_gid) ||
+	    __put_user(huge_encode_dev(stat->rdev), &ubuf->st_rdev) ||
+	    __put_user(stat->size, &ubuf->st_size) ||
+	    __put_user(stat->atime.tv_sec, &ubuf->st_atime) ||
+	    __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec) ||
+	    __put_user(stat->mtime.tv_sec, &ubuf->st_mtime) ||
+	    __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
+	    __put_user(stat->ctime.tv_sec, &ubuf->st_ctime) ||
+	    __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
+	    __put_user(stat->blksize, &ubuf->st_blksize) ||
+	    __put_user(stat->blocks, &ubuf->st_blocks))
 		return -EFAULT;
 	return 0;
 }
 
-asmlinkage long
-sys32_stat64(char __user * filename, struct stat64 __user *statbuf)
+asmlinkage long sys32_stat64(char __user *filename,
+			     struct stat64 __user *statbuf)
 {
 	struct kstat stat;
 	int ret = vfs_stat(filename, &stat);
+
 	if (!ret)
 		ret = cp_stat64(statbuf, &stat);
 	return ret;
 }
 
-asmlinkage long
-sys32_lstat64(char __user * filename, struct stat64 __user *statbuf)
+asmlinkage long sys32_lstat64(char __user *filename,
+			      struct stat64 __user *statbuf)
 {
 	struct kstat stat;
 	int ret = vfs_lstat(filename, &stat);
@@ -174,8 +176,7 @@ sys32_lstat64(char __user * filename, st
 	return ret;
 }
 
-asmlinkage long
-sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
+asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
 {
 	struct kstat stat;
 	int ret = vfs_fstat(fd, &stat);
@@ -184,9 +185,8 @@ sys32_fstat64(unsigned int fd, struct st
 	return ret;
 }
 
-asmlinkage long
-sys32_fstatat(unsigned int dfd, char __user *filename,
-	      struct stat64 __user* statbuf, int flag)
+asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename,
+			      struct stat64 __user *statbuf, int flag)
 {
 	struct kstat stat;
 	int error = -EINVAL;
@@ -221,8 +221,7 @@ struct mmap_arg_struct {
 	unsigned int offset;
 };
 
-asmlinkage long
-sys32_mmap(struct mmap_arg_struct __user *arg)
+asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg)
 {
 	struct mmap_arg_struct a;
 	struct file *file = NULL;
@@ -233,33 +232,33 @@ sys32_mmap(struct mmap_arg_struct __user
 		return -EFAULT;
 
 	if (a.offset & ~PAGE_MASK)
-		return -EINVAL; 
+		return -EINVAL;
 
 	if (!(a.flags & MAP_ANONYMOUS)) {
 		file = fget(a.fd);
 		if (!file)
 			return -EBADF;
 	}
-	
-	mm = current->mm; 
-	down_write(&mm->mmap_sem); 
-	retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, a.offset>>PAGE_SHIFT);
+
+	mm = current->mm;
+	down_write(&mm->mmap_sem);
+	retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags,
+			       a.offset>>PAGE_SHIFT);
 	if (file)
 		fput(file);
 
-	up_write(&mm->mmap_sem); 
+	up_write(&mm->mmap_sem);
 
 	return retval;
 }
 
-asmlinkage long 
-sys32_mprotect(unsigned long start, size_t len, unsigned long prot)
+asmlinkage long sys32_mprotect(unsigned long start, size_t len,
+			       unsigned long prot)
 {
-	return sys_mprotect(start,len,prot); 
+	return sys_mprotect(start, len, prot);
 }
 
-asmlinkage long
-sys32_pipe(int __user *fd)
+asmlinkage long sys32_pipe(int __user *fd)
 {
 	int retval;
 	int fds[2];
@@ -269,13 +268,13 @@ sys32_pipe(int __user *fd)
 		goto out;
 	if (copy_to_user(fd, fds, sizeof(fds)))
 		retval = -EFAULT;
-  out:
+out:
 	return retval;
 }
 
-asmlinkage long
-sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
-		   struct sigaction32 __user *oact,  unsigned int sigsetsize)
+asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
+				   struct sigaction32 __user *oact,
+				   unsigned int sigsetsize)
 {
 	struct k_sigaction new_ka, old_ka;
 	int ret;
@@ -291,12 +290,17 @@ sys32_rt_sigaction(int sig, struct sigac
 		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
 		    __get_user(handler, &act->sa_handler) ||
 		    __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
-		    __get_user(restorer, &act->sa_restorer)||
-		    __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)))
+		    __get_user(restorer, &act->sa_restorer) ||
+		    __copy_from_user(&set32, &act->sa_mask,
+				     sizeof(compat_sigset_t)))
 			return -EFAULT;
 		new_ka.sa.sa_handler = compat_ptr(handler);
 		new_ka.sa.sa_restorer = compat_ptr(restorer);
-		/* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */
+
+		/*
+		 * FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
+		 * than _NSIG_WORDS << 1
+		 */
 		switch (_NSIG_WORDS) {
 		case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
 				| (((long)set32.sig[7]) << 32);
@@ -312,7 +316,10 @@ sys32_rt_sigaction(int sig, struct sigac
 	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
 
 	if (!ret && oact) {
-		/* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */
+		/*
+		 * FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
+		 * than _NSIG_WORDS << 1
+		 */
 		switch (_NSIG_WORDS) {
 		case 4:
 			set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
@@ -328,23 +335,26 @@ sys32_rt_sigaction(int sig, struct sigac
 			set32.sig[0] = old_ka.sa.sa_mask.sig[0];
 		}
 		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
-		    __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) ||
-		    __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) ||
+		    __put_user(ptr_to_compat(old_ka.sa.sa_handler),
+			       &oact->sa_handler) ||
+		    __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
+			       &oact->sa_restorer) ||
 		    __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
-		    __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)))
+		    __copy_to_user(&oact->sa_mask, &set32,
+				   sizeof(compat_sigset_t)))
 			return -EFAULT;
 	}
 
 	return ret;
 }
 
-asmlinkage long
-sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigaction32 __user *oact)
+asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
+				struct old_sigaction32 __user *oact)
 {
-        struct k_sigaction new_ka, old_ka;
-        int ret;
+	struct k_sigaction new_ka, old_ka;
+	int ret;
 
-        if (act) {
+	if (act) {
 		compat_old_sigset_t mask;
 		compat_uptr_t handler, restorer;
 
@@ -359,33 +369,35 @@ sys32_sigaction (int sig, struct old_sig
 		new_ka.sa.sa_restorer = compat_ptr(restorer);
 
 		siginitset(&new_ka.sa.sa_mask, mask);
-        }
+	}
 
-        ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
 
 	if (!ret && oact) {
 		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
-		    __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) ||
-		    __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) ||
+		    __put_user(ptr_to_compat(old_ka.sa.sa_handler),
+			       &oact->sa_handler) ||
+		    __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
+			       &oact->sa_restorer) ||
 		    __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
 		    __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
 			return -EFAULT;
-        }
+	}
 
 	return ret;
 }
 
-asmlinkage long
-sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
-			compat_sigset_t __user *oset, unsigned int sigsetsize)
+asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
+				     compat_sigset_t __user *oset,
+				     unsigned int sigsetsize)
 {
 	sigset_t s;
 	compat_sigset_t s32;
 	int ret;
 	mm_segment_t old_fs = get_fs();
-	
+
 	if (set) {
-		if (copy_from_user (&s32, set, sizeof(compat_sigset_t)))
+		if (copy_from_user(&s32, set, sizeof(compat_sigset_t)))
 			return -EFAULT;
 		switch (_NSIG_WORDS) {
 		case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
@@ -394,13 +406,14 @@ sys32_rt_sigprocmask(int how, compat_sig
 		case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
 		}
 	}
-	set_fs (KERNEL_DS);
+	set_fs(KERNEL_DS);
 	ret = sys_rt_sigprocmask(how,
 				 set ? (sigset_t __user *)&s : NULL,
 				 oset ? (sigset_t __user *)&s : NULL,
-				 sigsetsize); 
-	set_fs (old_fs);
-	if (ret) return ret;
+				 sigsetsize);
+	set_fs(old_fs);
+	if (ret)
+		return ret;
 	if (oset) {
 		switch (_NSIG_WORDS) {
 		case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
@@ -408,52 +421,49 @@ sys32_rt_sigprocmask(int how, compat_sig
 		case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
 		case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
 		}
-		if (copy_to_user (oset, &s32, sizeof(compat_sigset_t)))
+		if (copy_to_user(oset, &s32, sizeof(compat_sigset_t)))
 			return -EFAULT;
 	}
 	return 0;
 }
 
-static inline long
-get_tv32(struct timeval *o, struct compat_timeval __user *i)
+static inline long get_tv32(struct timeval *o, struct compat_timeval __user *i)
 {
-	int err = -EFAULT; 
-	if (access_ok(VERIFY_READ, i, sizeof(*i))) { 
+	int err = -EFAULT;
+
+	if (access_ok(VERIFY_READ, i, sizeof(*i))) {
 		err = __get_user(o->tv_sec, &i->tv_sec);
 		err |= __get_user(o->tv_usec, &i->tv_usec);
 	}
-	return err; 
+	return err;
 }
 
-static inline long
-put_tv32(struct compat_timeval __user *o, struct timeval *i)
+static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i)
 {
 	int err = -EFAULT;
-	if (access_ok(VERIFY_WRITE, o, sizeof(*o))) { 
+
+	if (access_ok(VERIFY_WRITE, o, sizeof(*o))) {
 		err = __put_user(i->tv_sec, &o->tv_sec);
 		err |= __put_user(i->tv_usec, &o->tv_usec);
-	} 
-	return err; 
+	}
+	return err;
 }
 
-extern unsigned int alarm_setitimer(unsigned int seconds);
-
-asmlinkage long
-sys32_alarm(unsigned int seconds)
+asmlinkage long sys32_alarm(unsigned int seconds)
 {
 	return alarm_setitimer(seconds);
 }
 
-/* Translations due to time_t size differences.  Which affects all
-   sorts of things, like timeval and itimerval.  */
-
-extern struct timezone sys_tz;
-
-asmlinkage long
-sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
+/*
+ * Translations due to time_t size differences. Which affects all
+ * sorts of things, like timeval and itimerval.
+ */
+asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv,
+				   struct timezone __user *tz)
 {
 	if (tv) {
 		struct timeval ktv;
+
 		do_gettimeofday(&ktv);
 		if (put_tv32(tv, &ktv))
 			return -EFAULT;
@@ -465,14 +475,14 @@ sys32_gettimeofday(struct compat_timeval
 	return 0;
 }
 
-asmlinkage long
-sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
+asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv,
+				   struct timezone __user *tz)
 {
 	struct timeval ktv;
 	struct timespec kts;
 	struct timezone ktz;
 
- 	if (tv) {
+	if (tv) {
 		if (get_tv32(&ktv, tv))
 			return -EFAULT;
 		kts.tv_sec = ktv.tv_sec;
@@ -494,8 +504,7 @@ struct sel_arg_struct {
 	unsigned int tvp;
 };
 
-asmlinkage long
-sys32_old_select(struct sel_arg_struct __user *arg)
+asmlinkage long sys32_old_select(struct sel_arg_struct __user *arg)
 {
 	struct sel_arg_struct a;
 
@@ -505,50 +514,45 @@ sys32_old_select(struct sel_arg_struct _
 				 compat_ptr(a.exp), compat_ptr(a.tvp));
 }
 
-extern asmlinkage long
-compat_sys_wait4(compat_pid_t pid, compat_uint_t * stat_addr, int options,
-		 struct compat_rusage *ru);
-
-asmlinkage long
-sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, int options)
+asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
+			      int options)
 {
 	return compat_sys_wait4(pid, stat_addr, options, NULL);
 }
 
 /* 32-bit timeval and related flotsam.  */
 
-asmlinkage long
-sys32_sysfs(int option, u32 arg1, u32 arg2)
+asmlinkage long sys32_sysfs(int option, u32 arg1, u32 arg2)
 {
 	return sys_sysfs(option, arg1, arg2);
 }
 
-asmlinkage long
-sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval)
+asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
+				    struct compat_timespec __user *interval)
 {
 	struct timespec t;
 	int ret;
-	mm_segment_t old_fs = get_fs ();
-	
-	set_fs (KERNEL_DS);
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
 	ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
-	set_fs (old_fs);
+	set_fs(old_fs);
 	if (put_compat_timespec(&t, interval))
 		return -EFAULT;
 	return ret;
 }
 
-asmlinkage long
-sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize)
+asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set,
+				    compat_size_t sigsetsize)
 {
 	sigset_t s;
 	compat_sigset_t s32;
 	int ret;
 	mm_segment_t old_fs = get_fs();
-		
-	set_fs (KERNEL_DS);
+
+	set_fs(KERNEL_DS);
 	ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize);
-	set_fs (old_fs);
+	set_fs(old_fs);
 	if (!ret) {
 		switch (_NSIG_WORDS) {
 		case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
@@ -556,30 +560,29 @@ sys32_rt_sigpending(compat_sigset_t __us
 		case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
 		case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
 		}
-		if (copy_to_user (set, &s32, sizeof(compat_sigset_t)))
+		if (copy_to_user(set, &s32, sizeof(compat_sigset_t)))
 			return -EFAULT;
 	}
 	return ret;
 }
 
-asmlinkage long
-sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo)
+asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig,
+				      compat_siginfo_t __user *uinfo)
 {
 	siginfo_t info;
 	int ret;
 	mm_segment_t old_fs = get_fs();
-	
+
 	if (copy_siginfo_from_user32(&info, uinfo))
 		return -EFAULT;
-	set_fs (KERNEL_DS);
+	set_fs(KERNEL_DS);
 	ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info);
-	set_fs (old_fs);
+	set_fs(old_fs);
 	return ret;
 }
 
 /* These are here just in case some old ia32 binary calls it. */
-asmlinkage long
-sys32_pause(void)
+asmlinkage long sys32_pause(void)
 {
 	current->state = TASK_INTERRUPTIBLE;
 	schedule();
@@ -599,25 +602,25 @@ struct sysctl_ia32 {
 };
 
 
-asmlinkage long
-sys32_sysctl(struct sysctl_ia32 __user *args32)
+asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *args32)
 {
 	struct sysctl_ia32 a32;
-	mm_segment_t old_fs = get_fs ();
+	mm_segment_t old_fs = get_fs();
 	void __user *oldvalp, *newvalp;
 	size_t oldlen;
 	int __user *namep;
 	long ret;
 
-	if (copy_from_user(&a32, args32, sizeof (a32)))
+	if (copy_from_user(&a32, args32, sizeof(a32)))
 		return -EFAULT;
 
 	/*
-	 * We need to pre-validate these because we have to disable address checking
-	 * before calling do_sysctl() because of OLDLEN but we can't run the risk of the
-	 * user specifying bad addresses here.  Well, since we're dealing with 32 bit
-	 * addresses, we KNOW that access_ok() will always succeed, so this is an
-	 * expensive NOP, but so what...
+	 * We need to pre-validate these because we have to disable
+	 * address checking before calling do_sysctl() because of
+	 * OLDLEN but we can't run the risk of the user specifying bad
+	 * addresses here.  Well, since we're dealing with 32 bit
+	 * addresses, we KNOW that access_ok() will always succeed, so
+	 * this is an expensive NOP, but so what...
 	 */
 	namep = compat_ptr(a32.name);
 	oldvalp = compat_ptr(a32.oldval);
@@ -636,34 +639,34 @@ sys32_sysctl(struct sysctl_ia32 __user *
 	unlock_kernel();
 	set_fs(old_fs);
 
-	if (oldvalp && put_user (oldlen, (int __user *)compat_ptr(a32.oldlenp)))
+	if (oldvalp && put_user(oldlen, (int __user *)compat_ptr(a32.oldlenp)))
 		return -EFAULT;
 
 	return ret;
 }
 #endif
 
-/* warning: next two assume little endian */ 
-asmlinkage long
-sys32_pread(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi)
+/* warning: next two assume little endian */
+asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
+			    u32 poslo, u32 poshi)
 {
 	return sys_pread64(fd, ubuf, count,
 			 ((loff_t)AA(poshi) << 32) | AA(poslo));
 }
 
-asmlinkage long
-sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi)
+asmlinkage long sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count,
+			     u32 poslo, u32 poshi)
 {
 	return sys_pwrite64(fd, ubuf, count,
 			  ((loff_t)AA(poshi) << 32) | AA(poslo));
 }
 
 
-asmlinkage long
-sys32_personality(unsigned long personality)
+asmlinkage long sys32_personality(unsigned long personality)
 {
 	int ret;
-	if (personality(current->personality) == PER_LINUX32 && 
+
+	if (personality(current->personality) == PER_LINUX32 &&
 		personality == PER_LINUX)
 		personality = PER_LINUX32;
 	ret = sys_personality(personality);
@@ -672,34 +675,33 @@ sys32_personality(unsigned long personal
 	return ret;
 }
 
-asmlinkage long
-sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count)
+asmlinkage long sys32_sendfile(int out_fd, int in_fd,
+			       compat_off_t __user *offset, s32 count)
 {
 	mm_segment_t old_fs = get_fs();
 	int ret;
 	off_t of;
-	
+
 	if (offset && get_user(of, offset))
 		return -EFAULT;
-		
+
 	set_fs(KERNEL_DS);
 	ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL,
 			   count);
 	set_fs(old_fs);
-	
+
 	if (offset && put_user(of, offset))
 		return -EFAULT;
-		
 	return ret;
 }
 
 asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long pgoff)
+			    unsigned long prot, unsigned long flags,
+			    unsigned long fd, unsigned long pgoff)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long error;
-	struct file * file = NULL;
+	struct file *file = NULL;
 
 	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
 	if (!(flags & MAP_ANONYMOUS)) {
@@ -717,36 +719,35 @@ asmlinkage long sys32_mmap2(unsigned lon
 	return error;
 }
 
-asmlinkage long sys32_olduname(struct oldold_utsname __user * name)
+asmlinkage long sys32_olduname(struct oldold_utsname __user *name)
 {
+	char *arch = "x86_64";
 	int err;
 
 	if (!name)
 		return -EFAULT;
 	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
 		return -EFAULT;
-  
-  	down_read(&uts_sem);
 
-	err = __copy_to_user(&name->sysname,&utsname()->sysname,
-				__OLD_UTS_LEN);
-	err |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->nodename,&utsname()->nodename,
-				__OLD_UTS_LEN);
-	err |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->release,&utsname()->release,
-				__OLD_UTS_LEN);
-	err |= __put_user(0,name->release+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->version,&utsname()->version,
-				__OLD_UTS_LEN);
-	err |= __put_user(0,name->version+__OLD_UTS_LEN);
-	{
-		char *arch = "x86_64";
-		if (personality(current->personality) == PER_LINUX32)
-			arch = "i686";
-		 
-		err |= __copy_to_user(&name->machine, arch, strlen(arch)+1);
-	}
+	down_read(&uts_sem);
+
+	err = __copy_to_user(&name->sysname, &utsname()->sysname,
+			     __OLD_UTS_LEN);
+	err |= __put_user(0, name->sysname+__OLD_UTS_LEN);
+	err |= __copy_to_user(&name->nodename, &utsname()->nodename,
+			      __OLD_UTS_LEN);
+	err |= __put_user(0, name->nodename+__OLD_UTS_LEN);
+	err |= __copy_to_user(&name->release, &utsname()->release,
+			      __OLD_UTS_LEN);
+	err |= __put_user(0, name->release+__OLD_UTS_LEN);
+	err |= __copy_to_user(&name->version, &utsname()->version,
+			      __OLD_UTS_LEN);
+	err |= __put_user(0, name->version+__OLD_UTS_LEN);
+
+	if (personality(current->personality) == PER_LINUX32)
+		arch = "i686";
+
+	err |= __copy_to_user(&name->machine, arch, strlen(arch) + 1);
 
 	up_read(&uts_sem);
 
@@ -755,17 +756,19 @@ asmlinkage long sys32_olduname(struct ol
 	return err;
 }
 
-long sys32_uname(struct old_utsname __user * name)
+long sys32_uname(struct old_utsname __user *name)
 {
 	int err;
+
 	if (!name)
 		return -EFAULT;
 	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof(*name));
 	up_read(&uts_sem);
-	if (personality(current->personality) == PER_LINUX32) 
+	if (personality(current->personality) == PER_LINUX32)
 		err |= copy_to_user(&name->machine, "i686", 5);
-	return err?-EFAULT:0;
+
+	return err ? -EFAULT : 0;
 }
 
 long sys32_ustat(unsigned dev, struct ustat32 __user *u32p)
@@ -773,27 +776,28 @@ long sys32_ustat(unsigned dev, struct us
 	struct ustat u;
 	mm_segment_t seg;
 	int ret;
-	
-	seg = get_fs(); 
-	set_fs(KERNEL_DS); 
+
+	seg = get_fs();
+	set_fs(KERNEL_DS);
 	ret = sys_ustat(dev, (struct ustat __user *)&u);
 	set_fs(seg);
-	if (ret >= 0) { 
-		if (!access_ok(VERIFY_WRITE,u32p,sizeof(struct ustat32)) || 
-		    __put_user((__u32) u.f_tfree, &u32p->f_tfree) ||
-		    __put_user((__u32) u.f_tinode, &u32p->f_tfree) ||
-		    __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) ||
-		    __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack)))
-			ret = -EFAULT;
-	}
+	if (ret < 0)
+		return ret;
+
+	if (!access_ok(VERIFY_WRITE, u32p, sizeof(struct ustat32)) ||
+	    __put_user((__u32) u.f_tfree, &u32p->f_tfree) ||
+	    __put_user((__u32) u.f_tinode, &u32p->f_tfree) ||
+	    __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) ||
+	    __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack)))
+		ret = -EFAULT;
 	return ret;
-} 
+}
 
 asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
 			     compat_uptr_t __user *envp, struct pt_regs *regs)
 {
 	long error;
-	char * filename;
+	char *filename;
 
 	filename = getname(name);
 	error = PTR_ERR(filename);
@@ -812,18 +816,19 @@ asmlinkage long sys32_execve(char __user
 asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
 			    struct pt_regs *regs)
 {
-	void __user *parent_tid = (void __user *)regs->rdx;
-	void __user *child_tid = (void __user *)regs->rdi;
+	void __user *parent_tid = (void __user *)regs->dx;
+	void __user *child_tid = (void __user *)regs->di;
+
 	if (!newsp)
-		newsp = regs->rsp;
-        return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
+		newsp = regs->sp;
+	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 }
 
 /*
- * Some system calls that need sign extended arguments. This could be done by a generic wrapper.
- */ 
-
-long sys32_lseek (unsigned int fd, int offset, unsigned int whence)
+ * Some system calls that need sign extended arguments. This could be
+ * done by a generic wrapper.
+ */
+long sys32_lseek(unsigned int fd, int offset, unsigned int whence)
 {
 	return sys_lseek(fd, offset, whence);
 }
@@ -832,49 +837,52 @@ long sys32_kill(int pid, int sig)
 {
 	return sys_kill(pid, sig);
 }
- 
-long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, 
+
+long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
 			__u32 len_low, __u32 len_high, int advice)
-{ 
+{
 	return sys_fadvise64_64(fd,
 			       (((u64)offset_high)<<32) | offset_low,
 			       (((u64)len_high)<<32) | len_low,
-			       advice); 
-} 
+				advice);
+}
 
 long sys32_vm86_warning(void)
-{ 
+{
 	struct task_struct *me = current;
 	static char lastcomm[sizeof(me->comm)];
+
 	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-		compat_printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
-		       me->comm);
+		compat_printk(KERN_INFO
+			      "%s: vm86 mode not supported on 64 bit kernel\n",
+			      me->comm);
 		strncpy(lastcomm, me->comm, sizeof(lastcomm));
-	} 
+	}
 	return -ENOSYS;
-} 
+}
 
 long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
-			  char __user * buf, size_t len)
+			  char __user *buf, size_t len)
 {
 	return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len);
 }
 
-asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, size_t count)
+asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
+				   size_t count)
 {
 	return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
 }
 
 asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi,
-			   unsigned n_low, unsigned n_hi,  int flags)
+				      unsigned n_low, unsigned n_hi,  int flags)
 {
 	return sys_sync_file_range(fd,
 				   ((u64)off_hi << 32) | off_low,
 				   ((u64)n_hi << 32) | n_low, flags);
 }
 
-asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, size_t len,
-		     int advice)
+asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
+				size_t len, int advice)
 {
 	return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
 				len, advice);
diff -puN arch/x86/ia32/syscall32.c~git-x86 /dev/null
--- a/arch/x86/ia32/syscall32.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/* Copyright 2002,2003 Andi Kleen, SuSE Labs */
-
-/* vsyscall handling for 32bit processes. Map a stub page into it 
-   on demand because 32bit cannot reach the kernel's fixmaps */
-
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/stringify.h>
-#include <linux/security.h>
-#include <asm/proto.h>
-#include <asm/tlbflush.h>
-#include <asm/ia32_unistd.h>
-#include <asm/vsyscall32.h>
-
-extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
-extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
-extern int sysctl_vsyscall32;
-
-static struct page *syscall32_pages[1];
-static int use_sysenter = -1;
-
-struct linux_binprm;
-
-/* Setup a VMA at program startup for the vsyscall page */
-int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
-{
-	struct mm_struct *mm = current->mm;
-	int ret;
-
-	down_write(&mm->mmap_sem);
-	/*
-	 * MAYWRITE to allow gdb to COW and set breakpoints
-	 *
-	 * Make sure the vDSO gets into every core dump.
-	 * Dumping its contents makes post-mortem fully interpretable later
-	 * without matching up the same kernel and hardware config to see
-	 * what PC values meant.
-	 */
-	/* Could randomize here */
-	ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE,
-				      VM_READ|VM_EXEC|
-				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
-				      VM_ALWAYSDUMP,
-				      syscall32_pages);
-	up_write(&mm->mmap_sem);
-	return ret;
-}
-
-static int __init init_syscall32(void)
-{ 
-	char *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!syscall32_page) 
-		panic("Cannot allocate syscall32 page"); 
-	syscall32_pages[0] = virt_to_page(syscall32_page);
- 	if (use_sysenter > 0) {
- 		memcpy(syscall32_page, syscall32_sysenter,
- 		       syscall32_sysenter_end - syscall32_sysenter);
- 	} else {
-  		memcpy(syscall32_page, syscall32_syscall,
-  		       syscall32_syscall_end - syscall32_syscall);
-  	}	
-	return 0;
-} 
-	
-__initcall(init_syscall32); 
-
-/* May not be __init: called during resume */
-void syscall32_cpu_init(void)
-{
-	if (use_sysenter < 0)
- 		use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
-
-	/* Load these always in case some future AMD CPU supports
-	   SYSENTER from compat mode too. */
-	checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-	checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
-	checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
-
-	wrmsrl(MSR_CSTAR, ia32_cstar_target);
-}
diff -puN arch/x86/ia32/syscall32_syscall.S~git-x86 /dev/null
--- a/arch/x86/ia32/syscall32_syscall.S
+++ /dev/null
@@ -1,17 +0,0 @@
-/* 32bit VDSOs mapped into user space. */
-
-	.section ".init.data","aw"
-
-	.globl syscall32_syscall
-	.globl syscall32_syscall_end
-
-syscall32_syscall:
-	.incbin "arch/x86/ia32/vsyscall-syscall.so"
-syscall32_syscall_end:
-
-	.globl syscall32_sysenter
-	.globl syscall32_sysenter_end
-
-syscall32_sysenter:
-	.incbin "arch/x86/ia32/vsyscall-sysenter.so"
-syscall32_sysenter_end:
diff -puN arch/x86/ia32/tls32.c~git-x86 /dev/null
--- a/arch/x86/ia32/tls32.c
+++ /dev/null
@@ -1,163 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/user.h>
-
-#include <asm/uaccess.h>
-#include <asm/desc.h>
-#include <asm/system.h>
-#include <asm/ldt.h>
-#include <asm/processor.h>
-#include <asm/proto.h>
-
-/*
- * sys_alloc_thread_area: get a yet unused TLS descriptor index.
- */
-static int get_free_idx(void)
-{
-	struct thread_struct *t = &current->thread;
-	int idx;
-
-	for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
-		if (desc_empty((struct n_desc_struct *)(t->tls_array) + idx))
-			return idx + GDT_ENTRY_TLS_MIN;
-	return -ESRCH;
-}
-
-/*
- * Set a given TLS descriptor:
- * When you want addresses > 32bit use arch_prctl() 
- */
-int do_set_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
-{
-	struct user_desc info;
-	struct n_desc_struct *desc;
-	int cpu, idx;
-
-	if (copy_from_user(&info, u_info, sizeof(info)))
-		return -EFAULT;
-
-	idx = info.entry_number;
-
-	/*
-	 * index -1 means the kernel should try to find and
-	 * allocate an empty descriptor:
-	 */
-	if (idx == -1) {
-		idx = get_free_idx();
-		if (idx < 0)
-			return idx;
-		if (put_user(idx, &u_info->entry_number))
-			return -EFAULT;
-	}
-
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-
-	desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN;
-
-	/*
-	 * We must not get preempted while modifying the TLS.
-	 */
-	cpu = get_cpu();
-
-	if (LDT_empty(&info)) {
-		desc->a = 0;
-		desc->b = 0;
-	} else {
-		desc->a = LDT_entry_a(&info);
-		desc->b = LDT_entry_b(&info);
-	}
-	if (t == &current->thread)
-		load_TLS(t, cpu);
-
-	put_cpu();
-	return 0;
-}
-
-asmlinkage long sys32_set_thread_area(struct user_desc __user *u_info)
-{ 
-	return do_set_thread_area(&current->thread, u_info); 
-} 
-
-
-/*
- * Get the current Thread-Local Storage area:
- */
-
-#define GET_BASE(desc) ( \
-	(((desc)->a >> 16) & 0x0000ffff) | \
-	(((desc)->b << 16) & 0x00ff0000) | \
-	( (desc)->b        & 0xff000000)   )
-
-#define GET_LIMIT(desc) ( \
-	((desc)->a & 0x0ffff) | \
-	 ((desc)->b & 0xf0000) )
-	
-#define GET_32BIT(desc)		(((desc)->b >> 22) & 1)
-#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
-#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
-#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
-#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
-#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
-#define GET_LONGMODE(desc)	(((desc)->b >> 21) & 1)
-
-int do_get_thread_area(struct thread_struct *t, struct user_desc __user *u_info)
-{
-	struct user_desc info;
-	struct n_desc_struct *desc;
-	int idx;
-
-	if (get_user(idx, &u_info->entry_number))
-		return -EFAULT;
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-
-	desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN;
-
-	memset(&info, 0, sizeof(struct user_desc));
-	info.entry_number = idx;
-	info.base_addr = GET_BASE(desc);
-	info.limit = GET_LIMIT(desc);
-	info.seg_32bit = GET_32BIT(desc);
-	info.contents = GET_CONTENTS(desc);
-	info.read_exec_only = !GET_WRITABLE(desc);
-	info.limit_in_pages = GET_LIMIT_PAGES(desc);
-	info.seg_not_present = !GET_PRESENT(desc);
-	info.useable = GET_USEABLE(desc);
-	info.lm = GET_LONGMODE(desc);
-
-	if (copy_to_user(u_info, &info, sizeof(info)))
-		return -EFAULT;
-	return 0;
-}
-
-asmlinkage long sys32_get_thread_area(struct user_desc __user *u_info)
-{
-	return do_get_thread_area(&current->thread, u_info);
-} 
-
-
-int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs)
-{
-	struct n_desc_struct *desc;
-	struct user_desc info;
-	struct user_desc __user *cp;
-	int idx;
-	
-	cp = (void __user *)childregs->rsi;
-	if (copy_from_user(&info, cp, sizeof(info)))
-		return -EFAULT;
-	if (LDT_empty(&info))
-		return -EINVAL;
-	
-	idx = info.entry_number;
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-	
-	desc = (struct n_desc_struct *)(p->thread.tls_array) + idx - GDT_ENTRY_TLS_MIN;
-	desc->a = LDT_entry_a(&info);
-	desc->b = LDT_entry_b(&info);
-
-	return 0;
-}
diff -puN arch/x86/ia32/vsyscall-sigreturn.S~git-x86 /dev/null
--- a/arch/x86/ia32/vsyscall-sigreturn.S
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Common code for the sigreturn entry points on the vsyscall page.
- * This code uses SYSCALL_ENTER_KERNEL (either syscall or int $0x80)
- * to enter the kernel.
- * This file is #include'd by vsyscall-*.S to define them after the
- * vsyscall entry point.  The addresses we get for these entry points
- * by doing ".balign 32" must match in both versions of the page.
- */
-
-	.code32
-	.section .text.sigreturn,"ax"
-	.balign 32
-	.globl __kernel_sigreturn
-	.type __kernel_sigreturn,@function
-__kernel_sigreturn:
-.LSTART_sigreturn:
-	popl %eax
-	movl $__NR_ia32_sigreturn, %eax
-	SYSCALL_ENTER_KERNEL
-.LEND_sigreturn:
-	.size __kernel_sigreturn,.-.LSTART_sigreturn
-
-	.section .text.rtsigreturn,"ax"
-	.balign 32
-	.globl __kernel_rt_sigreturn
-	.type __kernel_rt_sigreturn,@function
-__kernel_rt_sigreturn:
-.LSTART_rt_sigreturn:
-	movl $__NR_ia32_rt_sigreturn, %eax
-	SYSCALL_ENTER_KERNEL
-.LEND_rt_sigreturn:
-	.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
-
-	.section .eh_frame,"a",@progbits
-.LSTARTFRAMES:
-        .long .LENDCIES-.LSTARTCIES
-.LSTARTCIES:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zRS"		/* NUL-terminated augmentation string */
-	.uleb128 1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte 8			/* Return address register column */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-	.byte 0x0c		/* DW_CFA_def_cfa */
-	.uleb128 4
-	.uleb128 4
-	.byte 0x88		/* DW_CFA_offset, column 0x8 */
-	.uleb128 1
-	.align 4
-.LENDCIES:
-
-	.long .LENDFDE2-.LSTARTFDE2	/* Length FDE */
-.LSTARTFDE2:
-	.long .LSTARTFDE2-.LSTARTFRAMES	/* CIE pointer */
-	/* HACK: The dwarf2 unwind routines will subtract 1 from the
-	   return address to get an address in the middle of the
-	   presumed call instruction.  Since we didn't get here via
-	   a call, we need to include the nop before the real start
-	   to make up for it.  */
-	.long .LSTART_sigreturn-1-.	/* PC-relative start address */
-	.long .LEND_sigreturn-.LSTART_sigreturn+1
-	.uleb128 0			/* Augmentation length */
-	/* What follows are the instructions for the table generation.
-	   We record the locations of each register saved.  This is
-	   complicated by the fact that the "CFA" is always assumed to
-	   be the value of the stack pointer in the caller.  This means
-	   that we must define the CFA of this body of code to be the
-	   saved value of the stack pointer in the sigcontext.  Which
-	   also means that there is no fixed relation to the other 
-	   saved registers, which means that we must use DW_CFA_expression
-	   to compute their addresses.  It also means that when we 
-	   adjust the stack with the popl, we have to do it all over again.  */
-
-#define do_cfa_expr(offset)						\
-	.byte 0x0f;			/* DW_CFA_def_cfa_expression */	\
-	.uleb128 1f-0f;			/*   length */			\
-0:	.byte 0x74;			/*     DW_OP_breg4 */		\
-	.sleb128 offset;		/*      offset */		\
-	.byte 0x06;			/*     DW_OP_deref */		\
-1:
-
-#define do_expr(regno, offset)						\
-	.byte 0x10;			/* DW_CFA_expression */		\
-	.uleb128 regno;			/*   regno */			\
-	.uleb128 1f-0f;			/*   length */			\
-0:	.byte 0x74;			/*     DW_OP_breg4 */		\
-	.sleb128 offset;		/*       offset */		\
-1:
-
-	do_cfa_expr(IA32_SIGCONTEXT_esp+4)
-	do_expr(0, IA32_SIGCONTEXT_eax+4)
-	do_expr(1, IA32_SIGCONTEXT_ecx+4)
-	do_expr(2, IA32_SIGCONTEXT_edx+4)
-	do_expr(3, IA32_SIGCONTEXT_ebx+4)
-	do_expr(5, IA32_SIGCONTEXT_ebp+4)
-	do_expr(6, IA32_SIGCONTEXT_esi+4)
-	do_expr(7, IA32_SIGCONTEXT_edi+4)
-	do_expr(8, IA32_SIGCONTEXT_eip+4)
-
-	.byte 0x42	/* DW_CFA_advance_loc 2 -- nop; popl eax. */
-
-	do_cfa_expr(IA32_SIGCONTEXT_esp)
-	do_expr(0, IA32_SIGCONTEXT_eax)
-	do_expr(1, IA32_SIGCONTEXT_ecx)
-	do_expr(2, IA32_SIGCONTEXT_edx)
-	do_expr(3, IA32_SIGCONTEXT_ebx)
-	do_expr(5, IA32_SIGCONTEXT_ebp)
-	do_expr(6, IA32_SIGCONTEXT_esi)
-	do_expr(7, IA32_SIGCONTEXT_edi)
-	do_expr(8, IA32_SIGCONTEXT_eip)
-
-	.align 4
-.LENDFDE2:
-
-	.long .LENDFDE3-.LSTARTFDE3	/* Length FDE */
-.LSTARTFDE3:
-	.long .LSTARTFDE3-.LSTARTFRAMES	/* CIE pointer */
-	/* HACK: See above wrt unwind library assumptions.  */
-	.long .LSTART_rt_sigreturn-1-.	/* PC-relative start address */
-	.long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
-	.uleb128 0			/* Augmentation */
-	/* What follows are the instructions for the table generation.
-	   We record the locations of each register saved.  This is
-	   slightly less complicated than the above, since we don't
-	   modify the stack pointer in the process.  */
-
-	do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esp)
-	do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eax)
-	do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ecx)
-	do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edx)
-	do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebx)
-	do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ebp)
-	do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_esi)
-	do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_edi)
-	do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_eip)
-
-	.align 4
-.LENDFDE3:
-
-#include "../../x86/kernel/vsyscall-note_32.S"
-
diff -puN arch/x86/ia32/vsyscall-syscall.S~git-x86 /dev/null
--- a/arch/x86/ia32/vsyscall-syscall.S
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Code for the vsyscall page.  This version uses the syscall instruction.
- */
-
-#include <asm/ia32_unistd.h>
-#include <asm/asm-offsets.h>
-#include <asm/segment.h>
-
-	.code32
-	.text
-	.section .text.vsyscall,"ax"
-	.globl __kernel_vsyscall
-	.type __kernel_vsyscall,@function
-__kernel_vsyscall:
-.LSTART_vsyscall:
-	push	%ebp
-.Lpush_ebp:
-	movl	%ecx, %ebp
-	syscall
-	movl	$__USER32_DS, %ecx
-	movl	%ecx, %ss
-	movl	%ebp, %ecx
-	popl	%ebp
-.Lpop_ebp:
-	ret
-.LEND_vsyscall:
-	.size __kernel_vsyscall,.-.LSTART_vsyscall
-
-	.section .eh_frame,"a",@progbits
-.LSTARTFRAME:
-	.long .LENDCIE-.LSTARTCIE
-.LSTARTCIE:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zR"		/* NUL-terminated augmentation string */
-	.uleb128 1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte 8			/* Return address register column */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-	.byte 0x0c		/* DW_CFA_def_cfa */
-	.uleb128 4
-	.uleb128 4
-	.byte 0x88		/* DW_CFA_offset, column 0x8 */
-	.uleb128 1
-	.align 4
-.LENDCIE:
-
-	.long .LENDFDE1-.LSTARTFDE1	/* Length FDE */
-.LSTARTFDE1:
-	.long .LSTARTFDE1-.LSTARTFRAME	/* CIE pointer */
-	.long .LSTART_vsyscall-.	/* PC-relative start address */
-	.long .LEND_vsyscall-.LSTART_vsyscall
-	.uleb128 0			/* Augmentation length */
-	/* What follows are the instructions for the table generation.
-	   We have to record all changes of the stack pointer.  */
-	.byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.uleb128 8
-	.byte 0x85, 0x02	/* DW_CFA_offset %ebp -8 */
-	.byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
-	.byte 0xc5		/* DW_CFA_restore %ebp */
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.uleb128 4
-	.align 4
-.LENDFDE1:
-
-#define SYSCALL_ENTER_KERNEL	syscall
-#include "vsyscall-sigreturn.S"
diff -puN arch/x86/ia32/vsyscall-sysenter.S~git-x86 /dev/null
--- a/arch/x86/ia32/vsyscall-sysenter.S
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Code for the vsyscall page.  This version uses the sysenter instruction.
- */
-
-#include <asm/ia32_unistd.h>
-#include <asm/asm-offsets.h>
-
-	.code32
-	.text
-	.section .text.vsyscall,"ax"
-	.globl __kernel_vsyscall
-	.type __kernel_vsyscall,@function
-__kernel_vsyscall:
-.LSTART_vsyscall:
-	push	%ecx
-.Lpush_ecx:
-	push	%edx
-.Lpush_edx:
-	push	%ebp
-.Lenter_kernel:
-	movl	%esp,%ebp
-	sysenter
-	.space 7,0x90
-	jmp	.Lenter_kernel
-	/* 16: System call normal return point is here! */
-	pop	%ebp
-.Lpop_ebp:
-	pop	%edx
-.Lpop_edx:
-	pop	%ecx
-.Lpop_ecx:
-	ret
-.LEND_vsyscall:
-	.size __kernel_vsyscall,.-.LSTART_vsyscall
-
-	.section .eh_frame,"a",@progbits
-.LSTARTFRAME:
-	.long .LENDCIE-.LSTARTCIE
-.LSTARTCIE:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zR"		/* NUL-terminated augmentation string */
-	.uleb128 1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte 8			/* Return address register column */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-	.byte 0x0c		/* DW_CFA_def_cfa */
-	.uleb128 4
-	.uleb128 4
-	.byte 0x88		/* DW_CFA_offset, column 0x8 */
-	.uleb128 1
-	.align 4
-.LENDCIE:
-
-	.long .LENDFDE1-.LSTARTFDE1	/* Length FDE */
-.LSTARTFDE1:
-	.long .LSTARTFDE1-.LSTARTFRAME	/* CIE pointer */
-	.long .LSTART_vsyscall-.	/* PC-relative start address */
-	.long .LEND_vsyscall-.LSTART_vsyscall
-	.uleb128 0			/* Augmentation length */
-	/* What follows are the instructions for the table generation.
-	   We have to record all changes of the stack pointer.  */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpush_ecx-.LSTART_vsyscall
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x08		/* RA at offset 8 now */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpush_edx-.Lpush_ecx
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x0c		/* RA at offset 12 now */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lenter_kernel-.Lpush_edx
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x10		/* RA at offset 16 now */
-	.byte 0x85, 0x04	/* DW_CFA_offset %ebp -16 */
-	/* Finally the epilogue.  */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpop_ebp-.Lenter_kernel
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x12		/* RA at offset 12 now */
-	.byte 0xc5		/* DW_CFA_restore %ebp */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpop_edx-.Lpop_ebp
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x08		/* RA at offset 8 now */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpop_ecx-.Lpop_edx
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x04		/* RA at offset 4 now */
-	.align 4
-.LENDFDE1:
-
-#define SYSCALL_ENTER_KERNEL	int $0x80
-#include "vsyscall-sigreturn.S"
diff -puN arch/x86/ia32/vsyscall.lds~git-x86 /dev/null
--- a/arch/x86/ia32/vsyscall.lds
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Linker script for vsyscall DSO.  The vsyscall page is an ELF shared
- * object prelinked to its virtual address. This script controls its layout.
- */
-
-/* This must match <asm/fixmap.h>.  */
-VSYSCALL_BASE = 0xffffe000;
-
-SECTIONS
-{
-  . = VSYSCALL_BASE + SIZEOF_HEADERS;
-
-  .hash           : { *(.hash) }		:text
-  .gnu.hash       : { *(.gnu.hash) }
-  .dynsym         : { *(.dynsym) }
-  .dynstr         : { *(.dynstr) }
-  .gnu.version    : { *(.gnu.version) }
-  .gnu.version_d  : { *(.gnu.version_d) }
-  .gnu.version_r  : { *(.gnu.version_r) }
-
-  /* This linker script is used both with -r and with -shared.
-     For the layouts to match, we need to skip more than enough
-     space for the dynamic symbol table et al.  If this amount
-     is insufficient, ld -shared will barf.  Just increase it here.  */
-  . = VSYSCALL_BASE + 0x400;
-  
-  .text.vsyscall   : { *(.text.vsyscall) } 	:text =0x90909090
-
-  /* This is an 32bit object and we cannot easily get the offsets
-     into the 64bit kernel. Just hardcode them here. This assumes
-     that all the stubs don't need more than 0x100 bytes. */
-  . = VSYSCALL_BASE + 0x500;
-
-  .text.sigreturn  : { *(.text.sigreturn) }	:text =0x90909090
-
-  . = VSYSCALL_BASE + 0x600;
-
-  .text.rtsigreturn : { *(.text.rtsigreturn) }   :text =0x90909090
-	
-  .note		  : { *(.note.*) }		:text :note
-  .eh_frame_hdr   : { *(.eh_frame_hdr) }	:text :eh_frame_hdr
-  .eh_frame       : { KEEP (*(.eh_frame)) }	:text
-  .dynamic        : { *(.dynamic) }		:text :dynamic
-  .useless        : {
-  	*(.got.plt) *(.got)
-	*(.data .data.* .gnu.linkonce.d.*)
-	*(.dynbss)
-	*(.bss .bss.* .gnu.linkonce.b.*)
-  }						:text
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
-  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
-  note PT_NOTE FLAGS(4); /* PF_R */
-  eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-  LINUX_2.5 {
-    global:
-    	__kernel_vsyscall;
-    	__kernel_sigreturn;
-    	__kernel_rt_sigreturn;
-
-    local: *;
-  };
-}
-
-/* The ELF entry point can be used to set the AT_SYSINFO value.  */
-ENTRY(__kernel_vsyscall);
diff -puN arch/x86/kernel/Makefile_32~git-x86 arch/x86/kernel/Makefile_32
--- a/arch/x86/kernel/Makefile_32~git-x86
+++ a/arch/x86/kernel/Makefile_32
@@ -6,10 +6,14 @@ extra-y := head_32.o init_task.o vmlinux
 CPPFLAGS_vmlinux.lds += -Ui386
 
 obj-y	:= process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
-		ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
+		time_32.o ioport_32.o ldt.o setup_32.o i8259_32.o sys_i386_32.o \
 		pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
-		quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o
+		quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o rtc.o
 
+obj-y				+= ptrace.o
+obj-y				+= ds.o
+obj-y				+= tls.o
+obj-y				+= step.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
 obj-y				+= acpi/
@@ -33,7 +37,6 @@ obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
 obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit_32.o
 obj-$(CONFIG_KPROBES)		+= kprobes_32.o
 obj-$(CONFIG_MODULES)		+= module_32.o
-obj-y				+= sysenter_32.o vsyscall_32.o
 obj-$(CONFIG_ACPI_SRAT) 	+= srat_32.o
 obj-$(CONFIG_EFI) 		+= efi_32.o efi_stub_32.o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
@@ -48,41 +51,3 @@ obj-$(CONFIG_PARAVIRT)		+= paravirt_32.o
 obj-y				+= pcspeaker.o
 
 obj-$(CONFIG_SCx200)		+= scx200_32.o
-
-# vsyscall_32.o contains the vsyscall DSO images as __initdata.
-# We must build both images before we can assemble it.
-# Note: kbuild does not track this dependency due to usage of .incbin
-$(obj)/vsyscall_32.o: $(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so
-targets += $(foreach F,int80 sysenter,vsyscall-$F_32.o vsyscall-$F_32.so)
-targets += vsyscall-note_32.o vsyscall_32.lds
-
-# The DSO images are built using a special linker script.
-quiet_cmd_syscall = SYSCALL $@
-      cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \
-		          -Wl,-T,$(filter-out FORCE,$^) -o $@
-
-export CPPFLAGS_vsyscall_32.lds += -P -C -Ui386
-
-vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \
-		 $(call ld-option, -Wl$(comma)--hash-style=sysv)
-SYSCFLAGS_vsyscall-sysenter_32.so	= $(vsyscall-flags)
-SYSCFLAGS_vsyscall-int80_32.so	= $(vsyscall-flags)
-
-$(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so: \
-$(obj)/vsyscall-%.so: $(src)/vsyscall_32.lds \
-		      $(obj)/vsyscall-%.o $(obj)/vsyscall-note_32.o FORCE
-	$(call if_changed,syscall)
-
-# We also create a special relocatable object that should mirror the symbol
-# table and layout of the linked DSO.  With ld -R we can then refer to
-# these symbols in the kernel code rather than hand-coded addresses.
-extra-y += vsyscall-syms.o
-$(obj)/built-in.o: $(obj)/vsyscall-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
-
-SYSCFLAGS_vsyscall-syms.o = -r
-$(obj)/vsyscall-syms.o: $(src)/vsyscall_32.lds \
-			$(obj)/vsyscall-sysenter_32.o $(obj)/vsyscall-note_32.o FORCE
-	$(call if_changed,syscall)
-
-
diff -puN arch/x86/kernel/Makefile_64~git-x86 arch/x86/kernel/Makefile_64
--- a/arch/x86/kernel/Makefile_64~git-x86
+++ a/arch/x86/kernel/Makefile_64
@@ -4,15 +4,19 @@
 
 extra-y 	:= head_64.o head64.o init_task.o vmlinux.lds
 CPPFLAGS_vmlinux.lds += -Ux86_64
-EXTRA_AFLAGS	:= -traditional
 
 obj-y	:= process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
-		ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \
+		time_64.o ioport_64.o ldt.o setup_64.o i8259_64.o sys_x86_64.o \
 		x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
 		setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
 		pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
-		i8253.o
+		i8253.o rtc.o
 
+obj-y				+= ptrace.o
+obj-y				+= ds.o
+obj-y				+= step.o
+
+obj-$(CONFIG_IA32_EMULATION)	+= tls.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
 obj-y				+= acpi/
diff -puN arch/x86/kernel/acpi/boot.c~git-x86 arch/x86/kernel/acpi/boot.c
--- a/arch/x86/kernel/acpi/boot.c~git-x86
+++ a/arch/x86/kernel/acpi/boot.c
@@ -78,7 +78,6 @@ int acpi_ht __initdata = 1;	/* enable HT
 int acpi_lapic;
 int acpi_ioapic;
 int acpi_strict;
-EXPORT_SYMBOL(acpi_strict);
 
 u8 acpi_sci_flags __initdata;
 int acpi_sci_override_gsi __initdata;
@@ -490,8 +489,6 @@ int acpi_register_gsi(u32 gsi, int trigg
 	return irq;
 }
 
-EXPORT_SYMBOL(acpi_register_gsi);
-
 /*
  *  ACPI based hotplug support for CPU
  */
diff -puN arch/x86/kernel/acpi/wakeup_64.S~git-x86 arch/x86/kernel/acpi/wakeup_64.S
--- a/arch/x86/kernel/acpi/wakeup_64.S~git-x86
+++ a/arch/x86/kernel/acpi/wakeup_64.S
@@ -344,13 +344,13 @@ do_suspend_lowlevel:
 	call	save_processor_state
 
 	movq	$saved_context, %rax
-	movq	%rsp, pt_regs_rsp(%rax)
-	movq	%rbp, pt_regs_rbp(%rax)
-	movq	%rsi, pt_regs_rsi(%rax)
-	movq	%rdi, pt_regs_rdi(%rax)
-	movq	%rbx, pt_regs_rbx(%rax)
-	movq	%rcx, pt_regs_rcx(%rax)
-	movq	%rdx, pt_regs_rdx(%rax)
+	movq	%rsp, pt_regs_sp(%rax)
+	movq	%rbp, pt_regs_bp(%rax)
+	movq	%rsi, pt_regs_si(%rax)
+	movq	%rdi, pt_regs_di(%rax)
+	movq	%rbx, pt_regs_bx(%rax)
+	movq	%rcx, pt_regs_cx(%rax)
+	movq	%rdx, pt_regs_dx(%rax)
 	movq	%r8, pt_regs_r8(%rax)
 	movq	%r9, pt_regs_r9(%rax)
 	movq	%r10, pt_regs_r10(%rax)
@@ -360,7 +360,7 @@ do_suspend_lowlevel:
 	movq	%r14, pt_regs_r14(%rax)
 	movq	%r15, pt_regs_r15(%rax)
 	pushfq
-	popq	pt_regs_eflags(%rax)
+	popq	pt_regs_flags(%rax)
 
 	movq	$.L97, saved_rip(%rip)
 
@@ -391,15 +391,15 @@ do_suspend_lowlevel:
 	movq	%rbx, %cr2
 	movq	saved_context_cr0(%rax), %rbx
 	movq	%rbx, %cr0
-	pushq	pt_regs_eflags(%rax)
+	pushq	pt_regs_flags(%rax)
 	popfq
-	movq	pt_regs_rsp(%rax), %rsp
-	movq	pt_regs_rbp(%rax), %rbp
-	movq	pt_regs_rsi(%rax), %rsi
-	movq	pt_regs_rdi(%rax), %rdi
-	movq	pt_regs_rbx(%rax), %rbx
-	movq	pt_regs_rcx(%rax), %rcx
-	movq	pt_regs_rdx(%rax), %rdx
+	movq	pt_regs_sp(%rax), %rsp
+	movq	pt_regs_bp(%rax), %rbp
+	movq	pt_regs_si(%rax), %rsi
+	movq	pt_regs_di(%rax), %rdi
+	movq	pt_regs_bx(%rax), %rbx
+	movq	pt_regs_cx(%rax), %rcx
+	movq	pt_regs_dx(%rax), %rdx
 	movq	pt_regs_r8(%rax), %r8
 	movq	pt_regs_r9(%rax), %r9
 	movq	pt_regs_r10(%rax), %r10
diff -puN arch/x86/kernel/alternative.c~git-x86 arch/x86/kernel/alternative.c
--- a/arch/x86/kernel/alternative.c~git-x86
+++ a/arch/x86/kernel/alternative.c
@@ -356,15 +356,15 @@ void alternatives_smp_switch(int smp)
 	spin_lock_irqsave(&smp_alt, flags);
 	if (smp) {
 		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
-		clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-		clear_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
+		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 		list_for_each_entry(mod, &smp_alt_modules, next)
 			alternatives_smp_lock(mod->locks, mod->locks_end,
 					      mod->text, mod->text_end);
 	} else {
 		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
-		set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-		set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
+		set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
+		set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 		list_for_each_entry(mod, &smp_alt_modules, next)
 			alternatives_smp_unlock(mod->locks, mod->locks_end,
 						mod->text, mod->text_end);
@@ -431,8 +431,9 @@ void __init alternative_instructions(voi
 	if (smp_alt_once) {
 		if (1 == num_possible_cpus()) {
 			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
-			set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-			set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
+			set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
+			set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
+
 			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
 						_text, _etext);
 		}
diff -puN arch/x86/kernel/aperture_64.c~git-x86 arch/x86/kernel/aperture_64.c
--- a/arch/x86/kernel/aperture_64.c~git-x86
+++ a/arch/x86/kernel/aperture_64.c
@@ -1,12 +1,12 @@
-/* 
+/*
  * Firmware replacement code.
- * 
+ *
  * Work around broken BIOSes that don't set an aperture or only set the
- * aperture in the AGP bridge. 
- * If all fails map the aperture over some low memory.  This is cheaper than 
- * doing bounce buffering. The memory is lost. This is done at early boot 
- * because only the bootmem allocator can allocate 32+MB. 
- * 
+ * aperture in the AGP bridge.
+ * If all fails map the aperture over some low memory.  This is cheaper than
+ * doing bounce buffering. The memory is lost. This is done at early boot
+ * because only the bootmem allocator can allocate 32+MB.
+ *
  * Copyright 2002 Andi Kleen, SuSE Labs.
  */
 #include <linux/kernel.h>
@@ -30,7 +30,7 @@ int gart_iommu_aperture_disabled __initd
 int gart_iommu_aperture_allowed __initdata = 0;
 
 int fallback_aper_order __initdata = 1; /* 64MB */
-int fallback_aper_force __initdata = 0; 
+int fallback_aper_force __initdata = 0;
 
 int fix_aperture __initdata = 1;
 
@@ -49,167 +49,180 @@ static void __init insert_aperture_resou
 /* This code runs before the PCI subsystem is initialized, so just
    access the northbridge directly. */
 
-static u32 __init allocate_aperture(void) 
+static u32 __init allocate_aperture(void)
 {
 	u32 aper_size;
-	void *p; 
+	void *p;
 
-	if (fallback_aper_order > 7) 
-		fallback_aper_order = 7; 
-	aper_size = (32 * 1024 * 1024) << fallback_aper_order; 
-
-	/* 
-	 * Aperture has to be naturally aligned. This means an 2GB aperture won't
-	 * have much chance of finding a place in the lower 4GB of memory.
-	 * Unfortunately we cannot move it up because that would make the
-	 * IOMMU useless.
+	if (fallback_aper_order > 7)
+		fallback_aper_order = 7;
+	aper_size = (32 * 1024 * 1024) << fallback_aper_order;
+
+	/*
+	 * Aperture has to be naturally aligned. This means a 2GB aperture
+	 * won't have much chance of finding a place in the lower 4GB of
+	 * memory. Unfortunately we cannot move it up because that would
+	 * make the IOMMU useless.
 	 */
 	p = __alloc_bootmem_nopanic(aper_size, aper_size, 0);
 	if (!p || __pa(p)+aper_size > 0xffffffff) {
-		printk("Cannot allocate aperture memory hole (%p,%uK)\n",
-		       p, aper_size>>10);
+		printk(KERN_ERR
+			"Cannot allocate aperture memory hole (%p,%uK)\n",
+				p, aper_size>>10);
 		if (p)
 			free_bootmem(__pa(p), aper_size);
 		return 0;
 	}
-	printk("Mapping aperture over %d KB of RAM @ %lx\n",
-	       aper_size >> 10, __pa(p)); 
+	printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
+			aper_size >> 10, __pa(p));
 	insert_aperture_resource((u32)__pa(p), aper_size);
-	return (u32)__pa(p); 
+
+	return (u32)__pa(p);
 }
 
 static int __init aperture_valid(u64 aper_base, u32 aper_size)
-{ 
-	if (!aper_base) 
+{
+	if (!aper_base)
 		return 0;
-	if (aper_size < 64*1024*1024) { 
-		printk("Aperture too small (%d MB)\n", aper_size>>20);
+
+	if (aper_size < 64*1024*1024) {
+		printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20);
 		return 0;
 	}
 	if (aper_base + aper_size > 0x100000000UL) {
-		printk("Aperture beyond 4GB. Ignoring.\n");
-		return 0; 
+		printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n");
+		return 0;
 	}
 	if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
-		printk("Aperture pointing to e820 RAM. Ignoring.\n");
-		return 0; 
-	} 
+		printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n");
+		return 0;
+	}
+
 	return 1;
-} 
+}
 
 /* Find a PCI capability */
-static __u32 __init find_cap(int num, int slot, int func, int cap) 
-{ 
-	u8 pos;
+static __u32 __init find_cap(int num, int slot, int func, int cap)
+{
 	int bytes;
-	if (!(read_pci_config_16(num,slot,func,PCI_STATUS) & PCI_STATUS_CAP_LIST))
+	u8 pos;
+
+	if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
+						PCI_STATUS_CAP_LIST))
 		return 0;
-	pos = read_pci_config_byte(num,slot,func,PCI_CAPABILITY_LIST);
-	for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { 
+
+	pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
+	for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
 		u8 id;
-		pos &= ~3; 
-		id = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_ID);
+
+		pos &= ~3;
+		id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
 		if (id == 0xff)
 			break;
-		if (id == cap) 
-			return pos; 
-		pos = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_NEXT); 
-	} 
+		if (id == cap)
+			return pos;
+		pos = read_pci_config_byte(num, slot, func,
+						pos+PCI_CAP_LIST_NEXT);
+	}
 	return 0;
-} 
+}
 
 /* Read a standard AGPv3 bridge header */
 static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
-{ 
+{
 	u32 apsize;
 	u32 apsizereg;
 	int nbits;
 	u32 aper_low, aper_hi;
 	u64 aper;
 
-	printk("AGP bridge at %02x:%02x:%02x\n", num, slot, func);
-	apsizereg = read_pci_config_16(num,slot,func, cap + 0x14);
+	printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", num, slot, func);
+	apsizereg = read_pci_config_16(num, slot, func, cap + 0x14);
 	if (apsizereg == 0xffffffff) {
-		printk("APSIZE in AGP bridge unreadable\n");
+		printk(KERN_ERR "APSIZE in AGP bridge unreadable\n");
 		return 0;
 	}
 
 	apsize = apsizereg & 0xfff;
 	/* Some BIOS use weird encodings not in the AGPv3 table. */
-	if (apsize & 0xff) 
-		apsize |= 0xf00; 
+	if (apsize & 0xff)
+		apsize |= 0xf00;
 	nbits = hweight16(apsize);
 	*order = 7 - nbits;
 	if ((int)*order < 0) /* < 32MB */
 		*order = 0;
-	
-	aper_low = read_pci_config(num,slot,func, 0x10);
-	aper_hi = read_pci_config(num,slot,func,0x14);
+
+	aper_low = read_pci_config(num, slot, func, 0x10);
+	aper_hi = read_pci_config(num, slot, func, 0x14);
 	aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32);
 
-	printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", 
-	       aper, 32 << *order, apsizereg);
+	printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
+			aper, 32 << *order, apsizereg);
 
 	if (!aperture_valid(aper, (32*1024*1024) << *order))
-	    return 0;
-	return (u32)aper; 
-} 
-
-/* Look for an AGP bridge. Windows only expects the aperture in the
-   AGP bridge and some BIOS forget to initialize the Northbridge too.
-   Work around this here. 
-
-   Do an PCI bus scan by hand because we're running before the PCI
-   subsystem. 
-
-   All K8 AGP bridges are AGPv3 compliant, so we can do this scan
-   generically. It's probably overkill to always scan all slots because
-   the AGP bridges should be always an own bus on the HT hierarchy, 
-   but do it here for future safety. */
+		return 0;
+	return (u32)aper;
+}
+
+/*
+ * Look for an AGP bridge. Windows only expects the aperture in the
+ * AGP bridge and some BIOS forget to initialize the Northbridge too.
+ * Work around this here.
+ *
+ * Do an PCI bus scan by hand because we're running before the PCI
+ * subsystem.
+ *
+ * All K8 AGP bridges are AGPv3 compliant, so we can do this scan
+ * generically. It's probably overkill to always scan all slots because
+ * the AGP bridges should be always an own bus on the HT hierarchy,
+ * but do it here for future safety.
+ */
 static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
 {
 	int num, slot, func;
 
 	/* Poor man's PCI discovery */
-	for (num = 0; num < 256; num++) { 
-		for (slot = 0; slot < 32; slot++) { 
-			for (func = 0; func < 8; func++) { 
+	for (num = 0; num < 256; num++) {
+		for (slot = 0; slot < 32; slot++) {
+			for (func = 0; func < 8; func++) {
 				u32 class, cap;
 				u8 type;
-				class = read_pci_config(num,slot,func,
+				class = read_pci_config(num, slot, func,
 							PCI_CLASS_REVISION);
 				if (class == 0xffffffff)
-					break; 
-				
-				switch (class >> 16) { 
+					break;
+
+				switch (class >> 16) {
 				case PCI_CLASS_BRIDGE_HOST:
 				case PCI_CLASS_BRIDGE_OTHER: /* needed? */
 					/* AGP bridge? */
-					cap = find_cap(num,slot,func,PCI_CAP_ID_AGP);
+					cap = find_cap(num, slot, func,
+							PCI_CAP_ID_AGP);
 					if (!cap)
 						break;
-					*valid_agp = 1; 
-					return read_agp(num,slot,func,cap,order);
-				} 
-				
+					*valid_agp = 1;
+					return read_agp(num, slot, func, cap,
+							order);
+				}
+
 				/* No multi-function device? */
-				type = read_pci_config_byte(num,slot,func,
+				type = read_pci_config_byte(num, slot, func,
 							       PCI_HEADER_TYPE);
 				if (!(type & 0x80))
 					break;
-			} 
-		} 
+			}
+		}
 	}
-	printk("No AGP bridge found\n"); 
+	printk(KERN_INFO "No AGP bridge found\n");
+
 	return 0;
 }
 
 void __init gart_iommu_hole_init(void)
-{ 
-	int fix, num; 
+{
 	u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
 	u64 aper_base, last_aper_base = 0;
-	int valid_agp = 0;
+	int fix, num, valid_agp = 0;
 
 	if (gart_iommu_aperture_disabled || !fix_aperture ||
 	    !early_pci_allowed())
@@ -218,24 +231,24 @@ void __init gart_iommu_hole_init(void)
 	printk(KERN_INFO  "Checking aperture...\n");
 
 	fix = 0;
-	for (num = 24; num < 32; num++) {		
+	for (num = 24; num < 32; num++) {
 		if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
 			continue;
 
 		iommu_detected = 1;
 		gart_iommu_aperture = 1;
 
-		aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; 
-		aper_size = (32 * 1024 * 1024) << aper_order; 
+		aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7;
+		aper_size = (32 * 1024 * 1024) << aper_order;
 		aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff;
-		aper_base <<= 25; 
+		aper_base <<= 25;
+
+		printk(KERN_INFO "CPU %d: aperture @ %Lx size %u MB\n",
+				num-24, aper_base, aper_size>>20);
 
-		printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, 
-		       aper_base, aper_size>>20);
-		
 		if (!aperture_valid(aper_base, aper_size)) {
-			fix = 1; 
-			break; 
+			fix = 1;
+			break;
 		}
 
 		if ((last_aper_order && aper_order != last_aper_order) ||
@@ -245,55 +258,64 @@ void __init gart_iommu_hole_init(void)
 		}
 		last_aper_order = aper_order;
 		last_aper_base = aper_base;
-	} 
+	}
 
 	if (!fix && !fallback_aper_force) {
 		if (last_aper_base) {
 			unsigned long n = (32 * 1024 * 1024) << last_aper_order;
+
 			insert_aperture_resource((u32)last_aper_base, n);
 		}
-		return; 
+		return;
 	}
 
 	if (!fallback_aper_force)
-		aper_alloc = search_agp_bridge(&aper_order, &valid_agp); 
-		
-	if (aper_alloc) { 
+		aper_alloc = search_agp_bridge(&aper_order, &valid_agp);
+
+	if (aper_alloc) {
 		/* Got the aperture from the AGP bridge */
 	} else if (swiotlb && !valid_agp) {
 		/* Do nothing */
 	} else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) ||
 		   force_iommu ||
 		   valid_agp ||
-		   fallback_aper_force) { 
-		printk("Your BIOS doesn't leave a aperture memory hole\n");
-		printk("Please enable the IOMMU option in the BIOS setup\n");
-		printk("This costs you %d MB of RAM\n",
-		       32 << fallback_aper_order);
+		   fallback_aper_force) {
+		printk(KERN_ERR
+			"Your BIOS doesn't leave a aperture memory hole\n");
+		printk(KERN_ERR
+			"Please enable the IOMMU option in the BIOS setup\n");
+		printk(KERN_ERR
+			"This costs you %d MB of RAM\n",
+				32 << fallback_aper_order);
 
 		aper_order = fallback_aper_order;
 		aper_alloc = allocate_aperture();
-		if (!aper_alloc) { 
-			/* Could disable AGP and IOMMU here, but it's probably
-			   not worth it. But the later users cannot deal with
-			   bad apertures and turning on the aperture over memory
-			   causes very strange problems, so it's better to 
-			   panic early. */
+		if (!aper_alloc) {
+			/*
+			 * Could disable AGP and IOMMU here, but it's
+			 * probably not worth it. But the later users
+			 * cannot deal with bad apertures and turning
+			 * on the aperture over memory causes very
+			 * strange problems, so it's better to panic
+			 * early.
+			 */
 			panic("Not enough memory for aperture");
 		}
-	} else { 
-		return; 
-	} 
+	} else {
+		return;
+	}
 
 	/* Fix up the north bridges */
-	for (num = 24; num < 32; num++) { 		
+	for (num = 24; num < 32; num++) {
 		if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
-			continue;	
+			continue;
 
-		/* Don't enable translation yet. That is done later. 
-		   Assume this BIOS didn't initialise the GART so 
-		   just overwrite all previous bits */ 
-		write_pci_config(0, num, 3, 0x90, aper_order<<1); 
-		write_pci_config(0, num, 3, 0x94, aper_alloc>>25); 
-	} 
-} 
+		/*
+		 * Don't enable translation yet. That is done later.
+		 * Assume this BIOS didn't initialise the GART so
+		 * just overwrite all previous bits
+		 */
+		write_pci_config(0, num, 3, 0x90, aper_order<<1);
+		write_pci_config(0, num, 3, 0x94, aper_alloc>>25);
+	}
+}
diff -puN arch/x86/kernel/apic_32.c~git-x86 arch/x86/kernel/apic_32.c
--- a/arch/x86/kernel/apic_32.c~git-x86
+++ a/arch/x86/kernel/apic_32.c
@@ -43,8 +43,6 @@
 #include <mach_apicdef.h>
 #include <mach_ipi.h>
 
-#include "io_ports.h"
-
 /*
  * Sanity check
  */
@@ -135,9 +133,9 @@ void apic_wait_icr_idle(void)
 		cpu_relax();
 }
 
-unsigned long safe_apic_wait_icr_idle(void)
+u32 safe_apic_wait_icr_idle(void)
 {
-	unsigned long send_status;
+	u32 send_status;
 	int timeout;
 
 	timeout = 0;
@@ -563,6 +561,9 @@ static void local_apic_timer_interrupt(v
 		return;
 	}
 
+	/*
+	 * the NMI deadlock-detector uses this.
+	 */
 	per_cpu(irq_stat, cpu).apic_timer_irqs++;
 
 	evt->event_handler(evt);
@@ -617,7 +618,7 @@ int setup_profiling_timer(unsigned int m
 void clear_local_APIC(void)
 {
 	int maxlvt = lapic_get_maxlvt();
-	unsigned long v;
+	u32 v;
 
 	/*
 	 * Masking an LVT entry can trigger a local APIC error
@@ -1077,7 +1078,7 @@ static int __init detect_init_APIC (void
 		printk(KERN_WARNING "Could not enable APIC!\n");
 		return -1;
 	}
-	set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
 
 	/* The BIOS may have set up the APIC at some other address */
@@ -1167,7 +1168,7 @@ fake_ioapic_page:
 int __init APIC_init_uniprocessor (void)
 {
 	if (enable_local_apic < 0)
-		clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 
 	if (!smp_found_config && !cpu_has_apic)
 		return -1;
@@ -1179,7 +1180,7 @@ int __init APIC_init_uniprocessor (void)
 	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
 		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
 		       boot_cpu_physical_apicid);
-		clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 		return -1;
 	}
 
@@ -1210,50 +1211,6 @@ int __init APIC_init_uniprocessor (void)
 }
 
 /*
- * APIC command line parameters
- */
-static int __init parse_lapic(char *arg)
-{
-	enable_local_apic = 1;
-	return 0;
-}
-early_param("lapic", parse_lapic);
-
-static int __init parse_nolapic(char *arg)
-{
-	enable_local_apic = -1;
-	clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
-	return 0;
-}
-early_param("nolapic", parse_nolapic);
-
-static int __init parse_disable_lapic_timer(char *arg)
-{
-	local_apic_timer_disabled = 1;
-	return 0;
-}
-early_param("nolapic_timer", parse_disable_lapic_timer);
-
-static int __init parse_lapic_timer_c2_ok(char *arg)
-{
-	local_apic_timer_c2_ok = 1;
-	return 0;
-}
-early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-
-static int __init apic_set_verbosity(char *str)
-{
-	if (strcmp("debug", str) == 0)
-		apic_verbosity = APIC_DEBUG;
-	else if (strcmp("verbose", str) == 0)
-		apic_verbosity = APIC_VERBOSE;
-	return 1;
-}
-
-__setup("apic=", apic_set_verbosity);
-
-
-/*
  * Local APIC interrupts
  */
 
@@ -1565,3 +1522,46 @@ device_initcall(init_lapic_sysfs);
 static void apic_pm_activate(void) { }
 
 #endif	/* CONFIG_PM */
+
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+	enable_local_apic = 1;
+	return 0;
+}
+early_param("lapic", parse_lapic);
+
+static int __init parse_nolapic(char *arg)
+{
+	enable_local_apic = -1;
+	clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+	return 0;
+}
+early_param("nolapic", parse_nolapic);
+
+static int __init parse_disable_lapic_timer(char *arg)
+{
+	local_apic_timer_disabled = 1;
+	return 0;
+}
+early_param("nolapic_timer", parse_disable_lapic_timer);
+
+static int __init parse_lapic_timer_c2_ok(char *arg)
+{
+	local_apic_timer_c2_ok = 1;
+	return 0;
+}
+early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
+
+static int __init apic_set_verbosity(char *str)
+{
+	if (strcmp("debug", str) == 0)
+		apic_verbosity = APIC_DEBUG;
+	else if (strcmp("verbose", str) == 0)
+		apic_verbosity = APIC_VERBOSE;
+	return 1;
+}
+__setup("apic=", apic_set_verbosity);
+
diff -puN arch/x86/kernel/apic_64.c~git-x86 arch/x86/kernel/apic_64.c
--- a/arch/x86/kernel/apic_64.c~git-x86
+++ a/arch/x86/kernel/apic_64.c
@@ -26,6 +26,7 @@
 #include <linux/module.h>
 #include <linux/ioport.h>
 #include <linux/clockchips.h>
+#include <linux/acpi_pmtmr.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -43,12 +44,12 @@
 int apic_verbosity;
 int disable_apic_timer __cpuinitdata;
 static int apic_calibrate_pmtmr __initdata;
+int disable_apic;
 
 /* Local APIC timer works in C2? */
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
-static struct resource *ioapic_resources;
 static struct resource lapic_resource = {
 	.name = "Local APIC",
 	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
@@ -60,10 +61,8 @@ static int lapic_next_event(unsigned lon
 			    struct clock_event_device *evt);
 static void lapic_timer_setup(enum clock_event_mode mode,
 			      struct clock_event_device *evt);
-
 static void lapic_timer_broadcast(cpumask_t mask);
-
-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen);
+static void apic_pm_activate(void);
 
 static struct clock_event_device lapic_clockevent = {
 	.name		= "lapic",
@@ -78,66 +77,43 @@ static struct clock_event_device lapic_c
 };
 static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
-static int lapic_next_event(unsigned long delta,
-			    struct clock_event_device *evt)
+/*
+ * Get the LAPIC version
+ */
+static inline int lapic_get_version(void)
 {
-	apic_write(APIC_TMICT, delta);
-	return 0;
+	return GET_APIC_VERSION(apic_read(APIC_LVR));
 }
 
-static void lapic_timer_setup(enum clock_event_mode mode,
-			      struct clock_event_device *evt)
+/*
+ * Check, if the APIC is integrated or a seperate chip
+ */
+static inline int lapic_is_integrated(void)
 {
-	unsigned long flags;
-	unsigned int v;
-
-	/* Lapic used as dummy for broadcast ? */
-	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
-		return;
-
-	local_irq_save(flags);
-
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-	case CLOCK_EVT_MODE_ONESHOT:
-		__setup_APIC_LVTT(calibration_result,
-				  mode != CLOCK_EVT_MODE_PERIODIC, 1);
-		break;
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-		v = apic_read(APIC_LVTT);
-		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-		apic_write(APIC_LVTT, v);
-		break;
-	case CLOCK_EVT_MODE_RESUME:
-		/* Nothing to do here */
-		break;
-	}
-
-	local_irq_restore(flags);
+	return 1;
 }
 
 /*
- * Local APIC timer broadcast function
+ * Check, whether this is a modern or a first generation APIC
  */
-static void lapic_timer_broadcast(cpumask_t mask)
+static int modern_apic(void)
 {
-#ifdef CONFIG_SMP
-	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
-#endif
+	/* AMD systems use old APIC versions, so check the CPU */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+	    boot_cpu_data.x86 >= 0xf)
+		return 1;
+	return lapic_get_version() >= 0x14;
 }
 
-static void apic_pm_activate(void);
-
 void apic_wait_icr_idle(void)
 {
 	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
 		cpu_relax();
 }
 
-unsigned int safe_apic_wait_icr_idle(void)
+u32 safe_apic_wait_icr_idle(void)
 {
-	unsigned int send_status;
+	u32 send_status;
 	int timeout;
 
 	timeout = 0;
@@ -151,7 +127,10 @@ unsigned int safe_apic_wait_icr_idle(voi
 	return send_status;
 }
 
-void enable_NMI_through_LVT0 (void * dummy)
+/**
+ * enable_NMI_through_LVT0 - enable NMI through local vector table 0
+ */
+void enable_NMI_through_LVT0(void *dummy)
 {
 	unsigned int v;
 
@@ -160,7 +139,10 @@ void enable_NMI_through_LVT0 (void * dum
 	apic_write(APIC_LVT0, v);
 }
 
-int get_maxlvt(void)
+/**
+ * lapic_get_maxlvt - get the maximum number of local vector table entries
+ */
+int lapic_get_maxlvt(void)
 {
 	unsigned int v, maxlvt;
 
@@ -170,203 +152,493 @@ int get_maxlvt(void)
 }
 
 /*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
+ * This function sets up the local APIC timer, with a timeout of
+ * 'clocks' APIC bus clock. During calibration we actually call
+ * this function twice on the boot CPU, once with a bogus timeout
+ * value, second time for real. The other (noncalibrating) CPUs
+ * call this function only once, with the real, calibrated value.
+ *
+ * We do reads before writes even if unnecessary, to get around the
+ * P5 APIC double write bug.
  */
-void ack_bad_irq(unsigned int irq)
-{
-	printk("unexpected IRQ trap at vector %02x\n", irq);
-	/*
-	 * Currently unexpected vectors happen only on SMP and APIC.
-	 * We _must_ ack these because every local APIC has only N
-	 * irq slots per priority level, and a 'hanging, unacked' IRQ
-	 * holds up an irq slot - in excessive cases (when multiple
-	 * unexpected vectors occur) that might lock up the APIC
-	 * completely.
-	 * But don't ack when the APIC is disabled. -AK
-	 */
-	if (!disable_apic)
-		ack_APIC_irq();
-}
 
-void clear_local_APIC(void)
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
 {
-	int maxlvt;
-	unsigned int v;
+	unsigned int lvtt_value, tmp_value;
 
-	maxlvt = get_maxlvt();
+	lvtt_value = LOCAL_TIMER_VECTOR;
+	if (!oneshot)
+		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+	if (!irqen)
+		lvtt_value |= APIC_LVT_MASKED;
 
-	/*
-	 * Masking an LVT entry can trigger a local APIC error
-	 * if the vector is zero. Mask LVTERR first to prevent this.
-	 */
-	if (maxlvt >= 3) {
-		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
-		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
-	}
-	/*
-	 * Careful: we have to set masks only first to deassert
-	 * any level-triggered sources.
-	 */
-	v = apic_read(APIC_LVTT);
-	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
-	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-	v = apic_read(APIC_LVT1);
-	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
-	if (maxlvt >= 4) {
-		v = apic_read(APIC_LVTPC);
-		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
-	}
+	apic_write(APIC_LVTT, lvtt_value);
 
 	/*
-	 * Clean APIC state for other OSs:
+	 * Divide PICLK by 16
 	 */
-	apic_write(APIC_LVTT, APIC_LVT_MASKED);
-	apic_write(APIC_LVT0, APIC_LVT_MASKED);
-	apic_write(APIC_LVT1, APIC_LVT_MASKED);
-	if (maxlvt >= 3)
-		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
-	if (maxlvt >= 4)
-		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
-	apic_write(APIC_ESR, 0);
-	apic_read(APIC_ESR);
+	tmp_value = apic_read(APIC_TDCR);
+	apic_write(APIC_TDCR, (tmp_value
+				& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
+				| APIC_TDR_DIV_16);
+
+	if (!oneshot)
+		apic_write(APIC_TMICT, clocks);
 }
 
-void disconnect_bsp_APIC(int virt_wire_setup)
-{
-	/* Go back to Virtual Wire compatibility mode */
-	unsigned long value;
+/*
+ * Setup extended LVT, AMD specific (K8, family 10h)
+ *
+ * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
+ * MCE interrupts are supported. Thus MCE offset must be set to 0.
+ */
 
-	/* For the spurious interrupt use vector F, and enable it */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	value |= APIC_SPIV_APIC_ENABLED;
-	value |= 0xf;
-	apic_write(APIC_SPIV, value);
+#define APIC_EILVT_LVTOFF_MCE 0
+#define APIC_EILVT_LVTOFF_IBS 1
 
-	if (!virt_wire_setup) {
-		/*
-		 * For LVT0 make it edge triggered, active high,
-		 * external and enabled
-		 */
-		value = apic_read(APIC_LVT0);
-		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
-		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
-		apic_write(APIC_LVT0, value);
-	} else {
-		/* Disable LVT0 */
-		apic_write(APIC_LVT0, APIC_LVT_MASKED);
-	}
+static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
+{
+	unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
+	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
 
-	/* For LVT1 make it edge triggered, active high, nmi and enabled */
-	value = apic_read(APIC_LVT1);
-	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
-	apic_write(APIC_LVT1, value);
+	apic_write(reg, v);
 }
 
-void disable_local_APIC(void)
+u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
 {
-	unsigned int value;
+	setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
+	return APIC_EILVT_LVTOFF_MCE;
+}
 
-	clear_local_APIC();
+u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
+{
+	setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
+	return APIC_EILVT_LVTOFF_IBS;
+}
 
-	/*
-	 * Disable APIC (implies clearing of registers
-	 * for 82489DX!).
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_SPIV_APIC_ENABLED;
-	apic_write(APIC_SPIV, value);
+/*
+ * Program the next event, relative to now
+ */
+static int lapic_next_event(unsigned long delta,
+			    struct clock_event_device *evt)
+{
+	apic_write(APIC_TMICT, delta);
+	return 0;
 }
 
-void lapic_shutdown(void)
+/*
+ * Setup the lapic timer in periodic or oneshot mode
+ */
+static void lapic_timer_setup(enum clock_event_mode mode,
+			      struct clock_event_device *evt)
 {
 	unsigned long flags;
+	unsigned int v;
 
-	if (!cpu_has_apic)
+	/* Lapic used as dummy for broadcast ? */
+	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
 		return;
 
 	local_irq_save(flags);
 
-	disable_local_APIC();
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+	case CLOCK_EVT_MODE_ONESHOT:
+		__setup_APIC_LVTT(calibration_result,
+				  mode != CLOCK_EVT_MODE_PERIODIC, 1);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		v = apic_read(APIC_LVTT);
+		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+		apic_write(APIC_LVTT, v);
+		break;
+	case CLOCK_EVT_MODE_RESUME:
+		/* Nothing to do here */
+		break;
+	}
 
 	local_irq_restore(flags);
 }
 
 /*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
+ * Local APIC timer broadcast function
  */
-int __init verify_local_APIC(void)
+static void lapic_timer_broadcast(cpumask_t mask)
 {
-	unsigned int reg0, reg1;
-
-	/*
-	 * The version register is read-only in a real APIC.
-	 */
-	reg0 = apic_read(APIC_LVR);
-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-	apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
-	reg1 = apic_read(APIC_LVR);
-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
+#ifdef CONFIG_SMP
+	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+#endif
+}
 
-	/*
-	 * The two version reads above should print the same
-	 * numbers.  If the second one is different, then we
-	 * poke at a non-APIC.
-	 */
-	if (reg1 != reg0)
-		return 0;
+/*
+ * Setup the local APIC timer for this CPU. Copy the initilized values
+ * of the boot CPU and register the clock event in the framework.
+ */
+static void setup_APIC_timer(void)
+{
+	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
-	/*
-	 * Check if the version looks reasonably.
-	 */
-	reg1 = GET_APIC_VERSION(reg0);
-	if (reg1 == 0x00 || reg1 == 0xff)
-		return 0;
-	reg1 = get_maxlvt();
-	if (reg1 < 0x02 || reg1 == 0xff)
-		return 0;
+	memcpy(levt, &lapic_clockevent, sizeof(*levt));
+	levt->cpumask = cpumask_of_cpu(smp_processor_id());
 
-	/*
-	 * The ID register is read/write in a real APIC.
-	 */
-	reg0 = apic_read(APIC_ID);
-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-	apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
-	reg1 = apic_read(APIC_ID);
-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
-	apic_write(APIC_ID, reg0);
-	if (reg1 != (reg0 ^ APIC_ID_MASK))
-		return 0;
+	clockevents_register_device(levt);
+}
 
-	/*
+/*
+ * In this function we calibrate APIC bus clocks to the external
+ * timer. Unfortunately we cannot use jiffies and the timer irq
+ * to calibrate, since some later bootup code depends on getting
+ * the first irq? Ugh.
+ *
+ * We want to do the calibration only once since we
+ * want to have local timer irqs syncron. CPUs connected
+ * by the same APIC bus have the very same bus frequency.
+ * And we want to have irqs off anyways, no accidental
+ * APIC irq that way.
+ */
+
+#define TICK_COUNT 100000000
+
+static void __init calibrate_APIC_clock(void)
+{
+	unsigned apic, apic_start;
+	unsigned long tsc, tsc_start;
+	int result;
+
+	local_irq_disable();
+
+	/*
+	 * Put whatever arbitrary (but long enough) timeout
+	 * value into the APIC clock, we just want to get the
+	 * counter running for calibration.
+	 *
+	 * No interrupt enable !
+	 */
+	__setup_APIC_LVTT(250000000, 0, 0);
+
+	apic_start = apic_read(APIC_TMCCT);
+#ifdef CONFIG_X86_PM_TIMER
+	if (apic_calibrate_pmtmr && pmtmr_ioport) {
+		pmtimer_wait(5000);  /* 5ms wait */
+		apic = apic_read(APIC_TMCCT);
+		result = (apic_start - apic) * 1000L / 5;
+	} else
+#endif
+	{
+		rdtscll(tsc_start);
+
+		do {
+			apic = apic_read(APIC_TMCCT);
+			rdtscll(tsc);
+		} while ((tsc - tsc_start) < TICK_COUNT &&
+				(apic_start - apic) < TICK_COUNT);
+
+		result = (apic_start - apic) * 1000L * tsc_khz /
+					(tsc - tsc_start);
+	}
+
+	local_irq_enable();
+
+	printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
+
+	printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
+		result / 1000 / 1000, result / 1000 % 1000);
+
+	/* Calculate the scaled math multiplication factor */
+	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32);
+	lapic_clockevent.max_delta_ns =
+		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+	lapic_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xF, &lapic_clockevent);
+
+	calibration_result = result / HZ;
+}
+
+void __init setup_boot_APIC_clock(void)
+{
+	/*
+	 * The local apic timer can be disabled via the kernel commandline.
+	 * Register the lapic timer as a dummy clock event source on SMP
+	 * systems, so the broadcast mechanism is used. On UP systems simply
+	 * ignore it.
+	 */
+	if (disable_apic_timer) {
+		printk(KERN_INFO "Disabling APIC timer\n");
+		/* No broadcast on UP ! */
+		if (num_possible_cpus() > 1)
+			setup_APIC_timer();
+		return;
+	}
+
+	printk(KERN_INFO "Using local APIC timer interrupts.\n");
+	calibrate_APIC_clock();
+
+	/*
+	 * If nmi_watchdog is set to IO_APIC, we need the
+	 * PIT/HPET going.  Otherwise register lapic as a dummy
+	 * device.
+	 */
+	if (nmi_watchdog != NMI_IO_APIC)
+		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+	else
+		printk(KERN_WARNING "APIC timer registered as dummy,"
+		       " due to nmi_watchdog=1!\n");
+
+	setup_APIC_timer();
+}
+
+/*
+ * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the
+ * C1E flag only in the secondary CPU, so when we detect the wreckage
+ * we already have enabled the boot CPU local apic timer. Check, if
+ * disable_apic_timer is set and the DUMMY flag is cleared. If yes,
+ * set the DUMMY flag again and force the broadcast mode in the
+ * clockevents layer.
+ */
+void __cpuinit check_boot_apic_timer_broadcast(void)
+{
+	if (!disable_apic_timer ||
+	    (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
+		return;
+
+	printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n");
+	lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
+
+	local_irq_enable();
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id);
+	local_irq_disable();
+}
+
+void __cpuinit setup_secondary_APIC_clock(void)
+{
+	check_boot_apic_timer_broadcast();
+	setup_APIC_timer();
+}
+
+/*
+ * The guts of the apic timer interrupt
+ */
+static void local_apic_timer_interrupt(void)
+{
+	int cpu = smp_processor_id();
+	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
+
+	/*
+	 * Normally we should not be here till LAPIC has been initialized but
+	 * in some cases like kdump, its possible that there is a pending LAPIC
+	 * timer interrupt from previous kernel's context and is delivered in
+	 * new kernel the moment interrupts are enabled.
+	 *
+	 * Interrupts are enabled early and LAPIC is setup much later, hence
+	 * its possible that when we get here evt->event_handler is NULL.
+	 * Check for event_handler being NULL and discard the interrupt as
+	 * spurious.
+	 */
+	if (!evt->event_handler) {
+		printk(KERN_WARNING
+		       "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+		/* Switch it off */
+		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
+		return;
+	}
+
+	/*
+	 * the NMI deadlock-detector uses this.
+	 */
+	add_pda(apic_timer_irqs, 1);
+
+	evt->event_handler(evt);
+}
+
+/*
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesn't support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ *   interrupt as well. Thus we cannot inline the local irq ... ]
+ */
+void smp_apic_timer_interrupt(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	/*
+	 * NOTE! We'd better ACK the irq immediately,
+	 * because timer handling can be slow.
+	 */
+	ack_APIC_irq();
+	/*
+	 * update_process_times() expects us to have done irq_enter().
+	 * Besides, if we don't timer interrupts ignore the global
+	 * interrupt lock, which is the WrongThing (tm) to do.
+	 */
+	exit_idle();
+	irq_enter();
+	local_apic_timer_interrupt();
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
+
+
+/*
+ * Local APIC start and shutdown
+ */
+
+/**
+ * clear_local_APIC - shutdown the local APIC
+ *
+ * This is called, when a CPU is disabled and before rebooting, so the state of
+ * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
+ * leftovers during boot.
+ */
+void clear_local_APIC(void)
+{
+	int maxlvt = lapic_get_maxlvt();
+	u32 v;
+
+	/*
+	 * Masking an LVT entry can trigger a local APIC error
+	 * if the vector is zero. Mask LVTERR first to prevent this.
+	 */
+	if (maxlvt >= 3) {
+		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
+		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+	}
+	/*
+	 * Careful: we have to set masks only first to deassert
+	 * any level-triggered sources.
+	 */
+	v = apic_read(APIC_LVTT);
+	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
+	v = apic_read(APIC_LVT0);
+	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+	v = apic_read(APIC_LVT1);
+	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
+	if (maxlvt >= 4) {
+		v = apic_read(APIC_LVTPC);
+		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
+	}
+
+	/*
+	 * Clean APIC state for other OSs:
+	 */
+	apic_write(APIC_LVTT, APIC_LVT_MASKED);
+	apic_write(APIC_LVT0, APIC_LVT_MASKED);
+	apic_write(APIC_LVT1, APIC_LVT_MASKED);
+	if (maxlvt >= 3)
+		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
+	if (maxlvt >= 4)
+		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
+	apic_write(APIC_ESR, 0);
+	apic_read(APIC_ESR);
+}
+
+/**
+ * disable_local_APIC - clear and disable the local APIC
+ */
+void disable_local_APIC(void)
+{
+	unsigned int value;
+
+	clear_local_APIC();
+
+	/*
+	 * Disable APIC (implies clearing of registers
+	 * for 82489DX!).
+	 */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_SPIV_APIC_ENABLED;
+	apic_write(APIC_SPIV, value);
+}
+
+void lapic_shutdown(void)
+{
+	unsigned long flags;
+
+	if (!cpu_has_apic)
+		return;
+
+	local_irq_save(flags);
+
+	disable_local_APIC();
+
+	local_irq_restore(flags);
+}
+
+/*
+ * This is to verify that we're looking at a real local APIC.
+ * Check these against your board if the CPUs aren't getting
+ * started for no apparent reason.
+ */
+int __init verify_local_APIC(void)
+{
+	unsigned int reg0, reg1;
+
+	/*
+	 * The version register is read-only in a real APIC.
+	 */
+	reg0 = apic_read(APIC_LVR);
+	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
+	apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
+	reg1 = apic_read(APIC_LVR);
+	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
+
+	/*
+	 * The two version reads above should print the same
+	 * numbers.  If the second one is different, then we
+	 * poke at a non-APIC.
+	 */
+	if (reg1 != reg0)
+		return 0;
+
+	/*
+	 * Check if the version looks reasonably.
+	 */
+	reg1 = GET_APIC_VERSION(reg0);
+	if (reg1 == 0x00 || reg1 == 0xff)
+		return 0;
+	reg1 = lapic_get_maxlvt();
+	if (reg1 < 0x02 || reg1 == 0xff)
+		return 0;
+
+	/*
+	 * The ID register is read/write in a real APIC.
+	 */
+	reg0 = apic_read(APIC_ID);
+	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
+	apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
+	reg1 = apic_read(APIC_ID);
+	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
+	apic_write(APIC_ID, reg0);
+	if (reg1 != (reg0 ^ APIC_ID_MASK))
+		return 0;
+
+	/*
 	 * The next two are just to see if we have sane values.
 	 * They're only really relevant if we're in Virtual Wire
 	 * compatibility mode, but most boxes are anymore.
 	 */
 	reg0 = apic_read(APIC_LVT0);
-	apic_printk(APIC_DEBUG,"Getting LVT0: %x\n", reg0);
+	apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
 	reg1 = apic_read(APIC_LVT1);
 	apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
 
 	return 1;
 }
 
+/**
+ * sync_Arb_IDs - synchronize APIC bus arbitration IDs
+ */
 void __init sync_Arb_IDs(void)
 {
 	/* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */
-	unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
-	if (ver >= 0x14)	/* P4 or higher */
+	if (modern_apic())
 		return;
 
 	/*
@@ -418,9 +690,12 @@ void __init init_bsp_APIC(void)
 	apic_write(APIC_LVT1, value);
 }
 
-void __cpuinit setup_local_APIC (void)
+/**
+ * setup_local_APIC - setup the local APIC
+ */
+void __cpuinit setup_local_APIC(void)
 {
-	unsigned int value, maxlvt;
+	unsigned int value;
 	int i, j;
 
 	value = apic_read(APIC_LVR);
@@ -516,183 +791,27 @@ void __cpuinit setup_local_APIC (void)
 	else
 		value = APIC_DM_NMI | APIC_LVT_MASKED;
 	apic_write(APIC_LVT1, value);
-
-	{
-		unsigned oldvalue;
-		maxlvt = get_maxlvt();
-		oldvalue = apic_read(APIC_ESR);
-		value = ERROR_APIC_VECTOR;      // enables sending errors
-		apic_write(APIC_LVTERR, value);
-		/*
-		 * spec says clear errors after enabling vector.
-		 */
-		if (maxlvt > 3)
-			apic_write(APIC_ESR, 0);
-		value = apic_read(APIC_ESR);
-		if (value != oldvalue)
-			apic_printk(APIC_VERBOSE,
-			"ESR value after enabling vector: %08x, after %08x\n",
-			oldvalue, value);
-	}
-
-	nmi_watchdog_default();
-	setup_apic_nmi_watchdog(NULL);
-	apic_pm_activate();
 }
 
-#ifdef CONFIG_PM
-
-static struct {
-	/* 'active' is true if the local APIC was enabled by us and
-	   not the BIOS; this signifies that we are also responsible
-	   for disabling it before entering apm/acpi suspend */
-	int active;
-	/* r/w apic fields */
-	unsigned int apic_id;
-	unsigned int apic_taskpri;
-	unsigned int apic_ldr;
-	unsigned int apic_dfr;
-	unsigned int apic_spiv;
-	unsigned int apic_lvtt;
-	unsigned int apic_lvtpc;
-	unsigned int apic_lvt0;
-	unsigned int apic_lvt1;
-	unsigned int apic_lvterr;
-	unsigned int apic_tmict;
-	unsigned int apic_tdcr;
-	unsigned int apic_thmr;
-} apic_pm_state;
-
-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
+void __cpuinit lapic_setup_esr(void)
 {
-	unsigned long flags;
-	int maxlvt;
-
-	if (!apic_pm_state.active)
-		return 0;
-
-	maxlvt = get_maxlvt();
+	unsigned maxlvt = lapic_get_maxlvt();
 
-	apic_pm_state.apic_id = apic_read(APIC_ID);
-	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
-	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
-	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
-	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
-	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
-	if (maxlvt >= 4)
-		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
-	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
-	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
-	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
-	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
-	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-#ifdef CONFIG_X86_MCE_INTEL
-	if (maxlvt >= 5)
-		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
-#endif
-	local_irq_save(flags);
-	disable_local_APIC();
-	local_irq_restore(flags);
-	return 0;
-}
-
-static int lapic_resume(struct sys_device *dev)
-{
-	unsigned int l, h;
-	unsigned long flags;
-	int maxlvt;
-
-	if (!apic_pm_state.active)
-		return 0;
-
-	maxlvt = get_maxlvt();
-
-	local_irq_save(flags);
-	rdmsr(MSR_IA32_APICBASE, l, h);
-	l &= ~MSR_IA32_APICBASE_BASE;
-	l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-	wrmsr(MSR_IA32_APICBASE, l, h);
-	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
-	apic_write(APIC_ID, apic_pm_state.apic_id);
-	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
-	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
-	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
-	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
-	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
-	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#ifdef CONFIG_X86_MCE_INTEL
-	if (maxlvt >= 5)
-		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
-#endif
-	if (maxlvt >= 4)
-		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
-	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
-	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
-	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
-	apic_write(APIC_ESR, 0);
-	apic_read(APIC_ESR);
-	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
-	apic_write(APIC_ESR, 0);
-	apic_read(APIC_ESR);
-	local_irq_restore(flags);
-	return 0;
-}
-
-static struct sysdev_class lapic_sysclass = {
-	set_kset_name("lapic"),
-	.resume		= lapic_resume,
-	.suspend	= lapic_suspend,
-};
-
-static struct sys_device device_lapic = {
-	.id		= 0,
-	.cls		= &lapic_sysclass,
-};
-
-static void __cpuinit apic_pm_activate(void)
-{
-	apic_pm_state.active = 1;
+	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR);
+	/*
+	 * spec says clear errors after enabling vector.
+	 */
+	if (maxlvt > 3)
+		apic_write(APIC_ESR, 0);
 }
 
-static int __init init_lapic_sysfs(void)
+void __cpuinit end_local_APIC_setup(void)
 {
-	int error;
-	if (!cpu_has_apic)
-		return 0;
-	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
-	error = sysdev_class_register(&lapic_sysclass);
-	if (!error)
-		error = sysdev_register(&device_lapic);
-	return error;
-}
-device_initcall(init_lapic_sysfs);
-
-#else	/* CONFIG_PM */
-
-static void apic_pm_activate(void) { }
-
-#endif	/* CONFIG_PM */
-
-static int __init apic_set_verbosity(char *str)
-{
-	if (str == NULL)  {
-		skip_ioapic_setup = 0;
-		ioapic_force = 1;
-		return 0;
-	}
-	if (strcmp("debug", str) == 0)
-		apic_verbosity = APIC_DEBUG;
-	else if (strcmp("verbose", str) == 0)
-		apic_verbosity = APIC_VERBOSE;
-	else {
-		printk(KERN_WARNING "APIC Verbosity level %s not recognised"
-				" use apic=verbose or apic=debug\n", str);
-		return -EINVAL;
-	}
-
-	return 0;
+	lapic_setup_esr();
+	nmi_watchdog_default();
+	setup_apic_nmi_watchdog(NULL);
+	apic_pm_activate();
 }
-early_param("apic", apic_set_verbosity);
 
 /*
  * Detect and enable local APICs on non-SMP boards.
@@ -700,77 +819,21 @@ early_param("apic", apic_set_verbosity);
  * On AMD64 we trust the BIOS - if it says no APIC it is likely
  * not correctly set up (usually the APIC timer won't work etc.)
  */
-
-static int __init detect_init_APIC (void)
+static int __init detect_init_APIC(void)
 {
-	if (!cpu_has_apic) {
-		printk(KERN_INFO "No local APIC present\n");
-		return -1;
-	}
-
-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-	boot_cpu_id = 0;
-	return 0;
-}
-
-#ifdef CONFIG_X86_IO_APIC
-static struct resource * __init ioapic_setup_resources(void)
-{
-#define IOAPIC_RESOURCE_NAME_SIZE 11
-	unsigned long n;
-	struct resource *res;
-	char *mem;
-	int i;
-
-	if (nr_ioapics <= 0)
-		return NULL;
-
-	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
-	n *= nr_ioapics;
-
-	mem = alloc_bootmem(n);
-	res = (void *)mem;
-
-	if (mem != NULL) {
-		memset(mem, 0, n);
-		mem += sizeof(struct resource) * nr_ioapics;
-
-		for (i = 0; i < nr_ioapics; i++) {
-			res[i].name = mem;
-			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-			sprintf(mem,  "IOAPIC %u", i);
-			mem += IOAPIC_RESOURCE_NAME_SIZE;
-		}
-	}
-
-	ioapic_resources = res;
-
-	return res;
-}
-
-static int __init ioapic_insert_resources(void)
-{
-	int i;
-	struct resource *r = ioapic_resources;
-
-	if (!r) {
-		printk("IO APIC resources could be not be allocated.\n");
+	if (!cpu_has_apic) {
+		printk(KERN_INFO "No local APIC present\n");
 		return -1;
 	}
 
-	for (i = 0; i < nr_ioapics; i++) {
-		insert_resource(&iomem_resource, r);
-		r++;
-	}
-
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+	boot_cpu_id = 0;
 	return 0;
 }
 
-/* Insert the IO APIC resources after PCI initialization has occured to handle
- * IO APICS that are mapped in on a BAR in PCI space. */
-late_initcall(ioapic_insert_resources);
-#endif
-
+/**
+ * init_apic_mappings - initialize APIC mappings
+ */
 void __init init_apic_mappings(void)
 {
 	unsigned long apic_phys;
@@ -800,295 +863,279 @@ void __init init_apic_mappings(void)
 	 * default configuration (or the MP table is broken).
 	 */
 	boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
-
-	{
-		unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-		int i;
-		struct resource *ioapic_res;
-
-		ioapic_res = ioapic_setup_resources();
-		for (i = 0; i < nr_ioapics; i++) {
-			if (smp_found_config) {
-				ioapic_phys = mp_ioapics[i].mpc_apicaddr;
-			} else {
-				ioapic_phys = (unsigned long)
-					alloc_bootmem_pages(PAGE_SIZE);
-				ioapic_phys = __pa(ioapic_phys);
-			}
-			set_fixmap_nocache(idx, ioapic_phys);
-			apic_printk(APIC_VERBOSE,
-				    "mapped IOAPIC to %016lx (%016lx)\n",
-				    __fix_to_virt(idx), ioapic_phys);
-			idx++;
-
-			if (ioapic_res != NULL) {
-				ioapic_res->start = ioapic_phys;
-				ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
-				ioapic_res++;
-			}
-		}
-	}
 }
 
 /*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
  */
-
-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
+int __init APIC_init_uniprocessor(void)
 {
-	unsigned int lvtt_value, tmp_value;
+	if (disable_apic) {
+		printk(KERN_INFO "Apic disabled\n");
+		return -1;
+	}
+	if (!cpu_has_apic) {
+		disable_apic = 1;
+		printk(KERN_INFO "Apic disabled by BIOS\n");
+		return -1;
+	}
 
-	lvtt_value = LOCAL_TIMER_VECTOR;
-	if (!oneshot)
-		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
-	if (!irqen)
-		lvtt_value |= APIC_LVT_MASKED;
+	verify_local_APIC();
 
-	apic_write(APIC_LVTT, lvtt_value);
+	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
+	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id));
+
+	setup_local_APIC();
 
 	/*
-	 * Divide PICLK by 16
+	 * Now enable IO-APICs, actually call clear_IO_APIC
+	 * We need clear_IO_APIC before enabling vector on BP
 	 */
-	tmp_value = apic_read(APIC_TDCR);
-	apic_write(APIC_TDCR, (tmp_value
-				& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
-				| APIC_TDR_DIV_16);
-
-	if (!oneshot)
-		apic_write(APIC_TMICT, clocks);
-}
-
-static void setup_APIC_timer(void)
-{
-	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+	if (!skip_ioapic_setup && nr_ioapics)
+		enable_IO_APIC();
 
-	memcpy(levt, &lapic_clockevent, sizeof(*levt));
-	levt->cpumask = cpumask_of_cpu(smp_processor_id());
+	end_local_APIC_setup();
 
-	clockevents_register_device(levt);
+	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
+		setup_IO_APIC();
+	else
+		nr_ioapics = 0;
+	setup_boot_APIC_clock();
+	check_nmi_watchdog();
+	return 0;
 }
 
 /*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
+ * Local APIC interrupts
  */
 
-#define TICK_COUNT 100000000
-
-static void __init calibrate_APIC_clock(void)
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+asmlinkage void smp_spurious_interrupt(void)
 {
-	unsigned apic, apic_start;
-	unsigned long tsc, tsc_start;
-	int result;
-
-	local_irq_disable();
-
+	unsigned int v;
+	exit_idle();
+	irq_enter();
 	/*
-	 * Put whatever arbitrary (but long enough) timeout
-	 * value into the APIC clock, we just want to get the
-	 * counter running for calibration.
-	 *
-	 * No interrupt enable !
+	 * Check if this really is a spurious interrupt and ACK it
+	 * if it is a vectored one.  Just in case...
+	 * Spurious interrupts should not be ACKed.
 	 */
-	__setup_APIC_LVTT(250000000, 0, 0);
-
-	apic_start = apic_read(APIC_TMCCT);
-#ifdef CONFIG_X86_PM_TIMER
-	if (apic_calibrate_pmtmr && pmtmr_ioport) {
-		pmtimer_wait(5000);  /* 5ms wait */
-		apic = apic_read(APIC_TMCCT);
-		result = (apic_start - apic) * 1000L / 5;
-	} else
-#endif
-	{
-		rdtscll(tsc_start);
-
-		do {
-			apic = apic_read(APIC_TMCCT);
-			rdtscll(tsc);
-		} while ((tsc - tsc_start) < TICK_COUNT &&
-				(apic_start - apic) < TICK_COUNT);
-
-		result = (apic_start - apic) * 1000L * tsc_khz /
-					(tsc - tsc_start);
-	}
-
-	local_irq_enable();
+	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+		ack_APIC_irq();
 
-	printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
+	add_pda(irq_spurious_count, 1);
+	irq_exit();
+}
 
-	printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
-		result / 1000 / 1000, result / 1000 % 1000);
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+asmlinkage void smp_error_interrupt(void)
+{
+	unsigned int v, v1;
 
-	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32);
-	lapic_clockevent.max_delta_ns =
-		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-	lapic_clockevent.min_delta_ns =
-		clockevent_delta2ns(0xF, &lapic_clockevent);
+	exit_idle();
+	irq_enter();
+	/* First tickle the hardware, only then report what went on. -- REW */
+	v = apic_read(APIC_ESR);
+	apic_write(APIC_ESR, 0);
+	v1 = apic_read(APIC_ESR);
+	ack_APIC_irq();
+	atomic_inc(&irq_err_count);
 
-	calibration_result = result / HZ;
+	/* Here is what the APIC error bits mean:
+	   0: Send CS error
+	   1: Receive CS error
+	   2: Send accept error
+	   3: Receive accept error
+	   4: Reserved
+	   5: Send illegal vector
+	   6: Received illegal vector
+	   7: Illegal register address
+	*/
+	printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
+		smp_processor_id(), v , v1);
+	irq_exit();
 }
 
-void __init setup_boot_APIC_clock (void)
+void disconnect_bsp_APIC(int virt_wire_setup)
 {
-	/*
-	 * The local apic timer can be disabled via the kernel commandline.
-	 * Register the lapic timer as a dummy clock event source on SMP
-	 * systems, so the broadcast mechanism is used. On UP systems simply
-	 * ignore it.
-	 */
-	if (disable_apic_timer) {
-		printk(KERN_INFO "Disabling APIC timer\n");
-		/* No broadcast on UP ! */
-		if (num_possible_cpus() > 1)
-			setup_APIC_timer();
-		return;
-	}
+	/* Go back to Virtual Wire compatibility mode */
+	unsigned long value;
 
-	printk(KERN_INFO "Using local APIC timer interrupts.\n");
-	calibrate_APIC_clock();
+	/* For the spurious interrupt use vector F, and enable it */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_VECTOR_MASK;
+	value |= APIC_SPIV_APIC_ENABLED;
+	value |= 0xf;
+	apic_write(APIC_SPIV, value);
 
-	/*
-	 * If nmi_watchdog is set to IO_APIC, we need the
-	 * PIT/HPET going.  Otherwise register lapic as a dummy
-	 * device.
-	 */
-	if (nmi_watchdog != NMI_IO_APIC)
-		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-	else
-		printk(KERN_WARNING "APIC timer registered as dummy,"
-		       " due to nmi_watchdog=1!\n");
+	if (!virt_wire_setup) {
+		/*
+		 * For LVT0 make it edge triggered, active high,
+		 * external and enabled
+		 */
+		value = apic_read(APIC_LVT0);
+		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
+		apic_write(APIC_LVT0, value);
+	} else {
+		/* Disable LVT0 */
+		apic_write(APIC_LVT0, APIC_LVT_MASKED);
+	}
 
-	setup_APIC_timer();
+	/* For LVT1 make it edge triggered, active high, nmi and enabled */
+	value = apic_read(APIC_LVT1);
+	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+	apic_write(APIC_LVT1, value);
 }
 
 /*
- * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the
- * C1E flag only in the secondary CPU, so when we detect the wreckage
- * we already have enabled the boot CPU local apic timer. Check, if
- * disable_apic_timer is set and the DUMMY flag is cleared. If yes,
- * set the DUMMY flag again and force the broadcast mode in the
- * clockevents layer.
+ * Power management
  */
-void __cpuinit check_boot_apic_timer_broadcast(void)
+#ifdef CONFIG_PM
+
+static struct {
+	/* 'active' is true if the local APIC was enabled by us and
+	   not the BIOS; this signifies that we are also responsible
+	   for disabling it before entering apm/acpi suspend */
+	int active;
+	/* r/w apic fields */
+	unsigned int apic_id;
+	unsigned int apic_taskpri;
+	unsigned int apic_ldr;
+	unsigned int apic_dfr;
+	unsigned int apic_spiv;
+	unsigned int apic_lvtt;
+	unsigned int apic_lvtpc;
+	unsigned int apic_lvt0;
+	unsigned int apic_lvt1;
+	unsigned int apic_lvterr;
+	unsigned int apic_tmict;
+	unsigned int apic_tdcr;
+	unsigned int apic_thmr;
+} apic_pm_state;
+
+static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 {
-	if (!disable_apic_timer ||
-	    (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
-		return;
+	unsigned long flags;
+	int maxlvt;
 
-	printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n");
-	lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
+	if (!apic_pm_state.active)
+		return 0;
 
-	local_irq_enable();
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id);
-	local_irq_disable();
-}
+	maxlvt = lapic_get_maxlvt();
 
-void __cpuinit setup_secondary_APIC_clock(void)
-{
-	check_boot_apic_timer_broadcast();
-	setup_APIC_timer();
+	apic_pm_state.apic_id = apic_read(APIC_ID);
+	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
+	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
+	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
+	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
+	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
+	if (maxlvt >= 4)
+		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
+	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
+	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
+	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
+	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
+#ifdef CONFIG_X86_MCE_INTEL
+	if (maxlvt >= 5)
+		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#endif
+	local_irq_save(flags);
+	disable_local_APIC();
+	local_irq_restore(flags);
+	return 0;
 }
 
-int setup_profiling_timer(unsigned int multiplier)
+static int lapic_resume(struct sys_device *dev)
 {
-	return -EINVAL;
-}
+	unsigned int l, h;
+	unsigned long flags;
+	int maxlvt;
 
-void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
-			     unsigned char msg_type, unsigned char mask)
-{
-	unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
-	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
-	apic_write(reg, v);
-}
+	if (!apic_pm_state.active)
+		return 0;
 
-/*
- * Local timer interrupt handler. It does both profiling and
- * process statistics/rescheduling.
- *
- * We do profiling in every local tick, statistics/rescheduling
- * happen only every 'profiling multiplier' ticks. The default
- * multiplier is 1 and it can be changed by writing the new multiplier
- * value into /proc/profile.
- */
+	maxlvt = lapic_get_maxlvt();
 
-void smp_local_timer_interrupt(void)
-{
-	int cpu = smp_processor_id();
-	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
+	local_irq_save(flags);
+	rdmsr(MSR_IA32_APICBASE, l, h);
+	l &= ~MSR_IA32_APICBASE_BASE;
+	l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+	wrmsr(MSR_IA32_APICBASE, l, h);
+	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
+	apic_write(APIC_ID, apic_pm_state.apic_id);
+	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
+	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
+	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
+	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
+	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
+	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
+#ifdef CONFIG_X86_MCE_INTEL
+	if (maxlvt >= 5)
+		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+#endif
+	if (maxlvt >= 4)
+		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
+	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
+	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
+	apic_write(APIC_ESR, 0);
+	apic_read(APIC_ESR);
+	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
+	apic_write(APIC_ESR, 0);
+	apic_read(APIC_ESR);
+	local_irq_restore(flags);
+	return 0;
+}
 
-	/*
-	 * Normally we should not be here till LAPIC has been initialized but
-	 * in some cases like kdump, its possible that there is a pending LAPIC
-	 * timer interrupt from previous kernel's context and is delivered in
-	 * new kernel the moment interrupts are enabled.
-	 *
-	 * Interrupts are enabled early and LAPIC is setup much later, hence
-	 * its possible that when we get here evt->event_handler is NULL.
-	 * Check for event_handler being NULL and discard the interrupt as
-	 * spurious.
-	 */
-	if (!evt->event_handler) {
-		printk(KERN_WARNING
-		       "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
-		/* Switch it off */
-		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
-		return;
-	}
+static struct sysdev_class lapic_sysclass = {
+	set_kset_name("lapic"),
+	.resume		= lapic_resume,
+	.suspend	= lapic_suspend,
+};
 
-	/*
-	 * the NMI deadlock-detector uses this.
-	 */
-	add_pda(apic_timer_irqs, 1);
+static struct sys_device device_lapic = {
+	.id		= 0,
+	.cls		= &lapic_sysclass,
+};
 
-	evt->event_handler(evt);
+static void __cpuinit apic_pm_activate(void)
+{
+	apic_pm_state.active = 1;
 }
 
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- *   interrupt as well. Thus we cannot inline the local irq ... ]
- */
-void smp_apic_timer_interrupt(struct pt_regs *regs)
+static int __init init_lapic_sysfs(void)
 {
-	struct pt_regs *old_regs = set_irq_regs(regs);
-
-	/*
-	 * NOTE! We'd better ACK the irq immediately,
-	 * because timer handling can be slow.
-	 */
-	ack_APIC_irq();
-	/*
-	 * update_process_times() expects us to have done irq_enter().
-	 * Besides, if we don't timer interrupts ignore the global
-	 * interrupt lock, which is the WrongThing (tm) to do.
-	 */
-	exit_idle();
-	irq_enter();
-	smp_local_timer_interrupt();
-	irq_exit();
-	set_irq_regs(old_regs);
+	int error;
+	if (!cpu_has_apic)
+		return 0;
+	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
+	error = sysdev_class_register(&lapic_sysclass);
+	if (!error)
+		error = sysdev_register(&device_lapic);
+	return error;
 }
+device_initcall(init_lapic_sysfs);
+
+#else	/* CONFIG_PM */
+
+static void apic_pm_activate(void) { }
+
+#endif	/* CONFIG_PM */
 
 /*
  * apic_is_clustered_box() -- Check if we can expect good TSC
@@ -1138,96 +1185,33 @@ __cpuinit int apic_is_clustered_box(void
 }
 
 /*
- * This interrupt should _never_ happen with our APIC/SMP architecture
- */
-asmlinkage void smp_spurious_interrupt(void)
-{
-	unsigned int v;
-	exit_idle();
-	irq_enter();
-	/*
-	 * Check if this really is a spurious interrupt and ACK it
-	 * if it is a vectored one.  Just in case...
-	 * Spurious interrupts should not be ACKed.
-	 */
-	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
-	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
-		ack_APIC_irq();
-
-	add_pda(irq_spurious_count, 1);
-	irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-
-asmlinkage void smp_error_interrupt(void)
-{
-	unsigned int v, v1;
-
-	exit_idle();
-	irq_enter();
-	/* First tickle the hardware, only then report what went on. -- REW */
-	v = apic_read(APIC_ESR);
-	apic_write(APIC_ESR, 0);
-	v1 = apic_read(APIC_ESR);
-	ack_APIC_irq();
-	atomic_inc(&irq_err_count);
-
-	/* Here is what the APIC error bits mean:
-	   0: Send CS error
-	   1: Receive CS error
-	   2: Send accept error
-	   3: Receive accept error
-	   4: Reserved
-	   5: Send illegal vector
-	   6: Received illegal vector
-	   7: Illegal register address
-	*/
-	printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
-		smp_processor_id(), v , v1);
-	irq_exit();
-}
-
-int disable_apic;
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
+ * APIC command line parameters
  */
-int __init APIC_init_uniprocessor (void)
+static int __init apic_set_verbosity(char *str)
 {
-	if (disable_apic) {
-		printk(KERN_INFO "Apic disabled\n");
-		return -1;
+	if (str == NULL)  {
+		skip_ioapic_setup = 0;
+		ioapic_force = 1;
+		return 0;
 	}
-	if (!cpu_has_apic) {
-		disable_apic = 1;
-		printk(KERN_INFO "Apic disabled by BIOS\n");
-		return -1;
+	if (strcmp("debug", str) == 0)
+		apic_verbosity = APIC_DEBUG;
+	else if (strcmp("verbose", str) == 0)
+		apic_verbosity = APIC_VERBOSE;
+	else {
+		printk(KERN_WARNING "APIC Verbosity level %s not recognised"
+				" use apic=verbose or apic=debug\n", str);
+		return -EINVAL;
 	}
 
-	verify_local_APIC();
-
-	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
-	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id));
-
-	setup_local_APIC();
-
-	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
-		setup_IO_APIC();
-	else
-		nr_ioapics = 0;
-	setup_boot_APIC_clock();
-	check_nmi_watchdog();
 	return 0;
 }
+early_param("apic", apic_set_verbosity);
 
 static __init int setup_disableapic(char *str)
 {
 	disable_apic = 1;
-	clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+	clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
 	return 0;
 }
 early_param("disableapic", setup_disableapic);
diff -puN arch/x86/kernel/apm_32.c~git-x86 arch/x86/kernel/apm_32.c
--- a/arch/x86/kernel/apm_32.c~git-x86
+++ a/arch/x86/kernel/apm_32.c
@@ -235,8 +235,6 @@
 #include <asm/paravirt.h>
 #include <asm/reboot.h>
 
-#include "io_ports.h"
-
 #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
 extern int (*console_blank_hook)(int);
 #endif
diff -puN arch/x86/kernel/asm-offsets_32.c~git-x86 arch/x86/kernel/asm-offsets_32.c
--- a/arch/x86/kernel/asm-offsets_32.c~git-x86
+++ a/arch/x86/kernel/asm-offsets_32.c
@@ -38,15 +38,15 @@ void foo(void);
 
 void foo(void)
 {
-	OFFSET(SIGCONTEXT_eax, sigcontext, eax);
-	OFFSET(SIGCONTEXT_ebx, sigcontext, ebx);
-	OFFSET(SIGCONTEXT_ecx, sigcontext, ecx);
-	OFFSET(SIGCONTEXT_edx, sigcontext, edx);
-	OFFSET(SIGCONTEXT_esi, sigcontext, esi);
-	OFFSET(SIGCONTEXT_edi, sigcontext, edi);
-	OFFSET(SIGCONTEXT_ebp, sigcontext, ebp);
-	OFFSET(SIGCONTEXT_esp, sigcontext, esp);
-	OFFSET(SIGCONTEXT_eip, sigcontext, eip);
+	OFFSET(IA32_SIGCONTEXT_ax, sigcontext, ax);
+	OFFSET(IA32_SIGCONTEXT_bx, sigcontext, bx);
+	OFFSET(IA32_SIGCONTEXT_cx, sigcontext, cx);
+	OFFSET(IA32_SIGCONTEXT_dx, sigcontext, dx);
+	OFFSET(IA32_SIGCONTEXT_si, sigcontext, si);
+	OFFSET(IA32_SIGCONTEXT_di, sigcontext, di);
+	OFFSET(IA32_SIGCONTEXT_bp, sigcontext, bp);
+	OFFSET(IA32_SIGCONTEXT_sp, sigcontext, sp);
+	OFFSET(IA32_SIGCONTEXT_ip, sigcontext, ip);
 	BLANK();
 
 	OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
@@ -75,34 +75,34 @@ void foo(void)
 	OFFSET(GDS_pad, Xgt_desc_struct, pad);
 	BLANK();
 
-	OFFSET(PT_EBX, pt_regs, ebx);
-	OFFSET(PT_ECX, pt_regs, ecx);
-	OFFSET(PT_EDX, pt_regs, edx);
-	OFFSET(PT_ESI, pt_regs, esi);
-	OFFSET(PT_EDI, pt_regs, edi);
-	OFFSET(PT_EBP, pt_regs, ebp);
-	OFFSET(PT_EAX, pt_regs, eax);
-	OFFSET(PT_DS,  pt_regs, xds);
-	OFFSET(PT_ES,  pt_regs, xes);
-	OFFSET(PT_FS,  pt_regs, xfs);
-	OFFSET(PT_ORIG_EAX, pt_regs, orig_eax);
-	OFFSET(PT_EIP, pt_regs, eip);
-	OFFSET(PT_CS,  pt_regs, xcs);
-	OFFSET(PT_EFLAGS, pt_regs, eflags);
-	OFFSET(PT_OLDESP, pt_regs, esp);
-	OFFSET(PT_OLDSS,  pt_regs, xss);
+	OFFSET(PT_EBX, pt_regs, bx);
+	OFFSET(PT_ECX, pt_regs, cx);
+	OFFSET(PT_EDX, pt_regs, dx);
+	OFFSET(PT_ESI, pt_regs, si);
+	OFFSET(PT_EDI, pt_regs, di);
+	OFFSET(PT_EBP, pt_regs, bp);
+	OFFSET(PT_EAX, pt_regs, ax);
+	OFFSET(PT_DS,  pt_regs, ds);
+	OFFSET(PT_ES,  pt_regs, es);
+	OFFSET(PT_FS,  pt_regs, fs);
+	OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
+	OFFSET(PT_EIP, pt_regs, ip);
+	OFFSET(PT_CS,  pt_regs, cs);
+	OFFSET(PT_EFLAGS, pt_regs, flags);
+	OFFSET(PT_OLDESP, pt_regs, sp);
+	OFFSET(PT_OLDSS,  pt_regs, ss);
 	BLANK();
 
 	OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
-	OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
+	OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
 	BLANK();
 
 	OFFSET(pbe_address, pbe, address);
 	OFFSET(pbe_orig_address, pbe, orig_address);
 	OFFSET(pbe_next, pbe, next);
 
-	/* Offset from the sysenter stack to tss.esp0 */
-	DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, x86_tss.esp0) -
+	/* Offset from the sysenter stack to tss.sp0 */
+	DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
 		 sizeof(struct tss_struct));
 
 	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
@@ -111,8 +111,6 @@ void foo(void)
 	DEFINE(PTRS_PER_PMD, PTRS_PER_PMD);
 	DEFINE(PTRS_PER_PGD, PTRS_PER_PGD);
 
-	DEFINE(VDSO_PRELINK_asm, VDSO_PRELINK);
-
 	OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
 
 #ifdef CONFIG_PARAVIRT
@@ -123,7 +121,7 @@ void foo(void)
 	OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
 	OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
 	OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
-	OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
+	OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret);
 	OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
 #endif
 
diff -puN arch/x86/kernel/asm-offsets_64.c~git-x86 arch/x86/kernel/asm-offsets_64.c
--- a/arch/x86/kernel/asm-offsets_64.c~git-x86
+++ a/arch/x86/kernel/asm-offsets_64.c
@@ -38,7 +38,6 @@ int main(void)
 #define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
 	ENTRY(state);
 	ENTRY(flags); 
-	ENTRY(thread); 
 	ENTRY(pid);
 	BLANK();
 #undef ENTRY
@@ -47,6 +46,9 @@ int main(void)
 	ENTRY(addr_limit);
 	ENTRY(preempt_count);
 	ENTRY(status);
+#ifdef CONFIG_IA32_EMULATION
+	ENTRY(sysenter_return);
+#endif
 	BLANK();
 #undef ENTRY
 #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
@@ -61,15 +63,15 @@ int main(void)
 #undef ENTRY
 #ifdef CONFIG_IA32_EMULATION
 #define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry))
-	ENTRY(eax);
-	ENTRY(ebx);
-	ENTRY(ecx);
-	ENTRY(edx);
-	ENTRY(esi);
-	ENTRY(edi);
-	ENTRY(ebp);
-	ENTRY(esp);
-	ENTRY(eip);
+	ENTRY(ax);
+	ENTRY(bx);
+	ENTRY(cx);
+	ENTRY(dx);
+	ENTRY(si);
+	ENTRY(di);
+	ENTRY(bp);
+	ENTRY(sp);
+	ENTRY(ip);
 	BLANK();
 #undef ENTRY
 	DEFINE(IA32_RT_SIGFRAME_sigcontext,
@@ -81,14 +83,14 @@ int main(void)
 	DEFINE(pbe_next, offsetof(struct pbe, next));
 	BLANK();
 #define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry))
-	ENTRY(rbx);
-	ENTRY(rbx);
-	ENTRY(rcx);
-	ENTRY(rdx);
-	ENTRY(rsp);
-	ENTRY(rbp);
-	ENTRY(rsi);
-	ENTRY(rdi);
+	ENTRY(bx);
+	ENTRY(bx);
+	ENTRY(cx);
+	ENTRY(dx);
+	ENTRY(sp);
+	ENTRY(bp);
+	ENTRY(si);
+	ENTRY(di);
 	ENTRY(r8);
 	ENTRY(r9);
 	ENTRY(r10);
@@ -97,7 +99,7 @@ int main(void)
 	ENTRY(r13);
 	ENTRY(r14);
 	ENTRY(r15);
-	ENTRY(eflags);
+	ENTRY(flags);
 	BLANK();
 #undef ENTRY
 #define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry))
@@ -105,7 +107,6 @@ int main(void)
 	ENTRY(cr2);
 	ENTRY(cr3);
 	ENTRY(cr4);
-	ENTRY(cr8);
 	BLANK();
 #undef ENTRY
 	DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
diff -puN arch/x86/kernel/cpu/addon_cpuid_features.c~git-x86 arch/x86/kernel/cpu/addon_cpuid_features.c
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c~git-x86
+++ a/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -45,6 +45,6 @@ void __cpuinit init_scattered_cpuid_feat
 			&regs[CR_ECX], &regs[CR_EDX]);
 
 		if (regs[cb->reg] & (1 << cb->bit))
-			set_bit(cb->feature, c->x86_capability);
+			set_cpu_cap(c, cb->feature);
 	}
 }
diff -puN arch/x86/kernel/cpu/bugs.c~git-x86 arch/x86/kernel/cpu/bugs.c
--- a/arch/x86/kernel/cpu/bugs.c~git-x86
+++ a/arch/x86/kernel/cpu/bugs.c
@@ -11,6 +11,7 @@
 #include <linux/utsname.h>
 #include <asm/bugs.h>
 #include <asm/processor.h>
+#include <asm/processor-flags.h>
 #include <asm/i387.h>
 #include <asm/msr.h>
 #include <asm/paravirt.h>
@@ -35,7 +36,7 @@ __setup("mca-pentium", mca_pentium);
 static int __init no_387(char *s)
 {
 	boot_cpu_data.hard_math = 0;
-	write_cr0(0xE | read_cr0());
+	write_cr0(X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | read_cr0());
 	return 1;
 }
 
diff -puN arch/x86/kernel/cpu/common.c~git-x86 arch/x86/kernel/cpu/common.c
--- a/arch/x86/kernel/cpu/common.c~git-x86
+++ a/arch/x86/kernel/cpu/common.c
@@ -634,7 +634,7 @@ void __init early_cpu_init(void)
 struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
 {
 	memset(regs, 0, sizeof(struct pt_regs));
-	regs->xfs = __KERNEL_PERCPU;
+	regs->fs = __KERNEL_PERCPU;
 	return regs;
 }
 
@@ -691,7 +691,7 @@ void __cpuinit cpu_init(void)
 		BUG();
 	enter_lazy_tlb(&init_mm, curr);
 
-	load_esp0(t, thread);
+	load_sp0(t, thread);
 	set_tss_desc(cpu,t);
 	load_TR_desc();
 	load_LDT(&init_mm.context);
diff -puN arch/x86/kernel/cpu/cyrix.c~git-x86 arch/x86/kernel/cpu/cyrix.c
--- a/arch/x86/kernel/cpu/cyrix.c~git-x86
+++ a/arch/x86/kernel/cpu/cyrix.c
@@ -5,6 +5,7 @@
 #include <asm/dma.h>
 #include <asm/io.h>
 #include <asm/processor-cyrix.h>
+#include <asm/processor-flags.h>
 #include <asm/timer.h>
 #include <asm/pci-direct.h>
 #include <asm/tsc.h>
@@ -126,15 +127,12 @@ static void __cpuinit set_cx86_reorder(v
 
 static void __cpuinit set_cx86_memwb(void)
 {
-	u32 cr0;
-
 	printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
 
 	/* CCR2 bit 2: unlock NW bit */
 	setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
 	/* set 'Not Write-through' */
-	cr0 = 0x20000000;
-	write_cr0(read_cr0() | cr0);
+	write_cr0(read_cr0() | X86_CR0_NW);
 	/* CCR2 bit 2: lock NW bit and set WT1 */
 	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
 }
diff -puN arch/x86/kernel/cpu/intel.c~git-x86 arch/x86/kernel/cpu/intel.c
--- a/arch/x86/kernel/cpu/intel.c~git-x86
+++ a/arch/x86/kernel/cpu/intel.c
@@ -11,6 +11,8 @@
 #include <asm/pgtable.h>
 #include <asm/msr.h>
 #include <asm/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/ds.h>
 
 #include "cpu.h"
 
@@ -219,6 +221,9 @@ static void __cpuinit init_intel(struct 
 		if (!(l1 & (1<<12)))
 			set_bit(X86_FEATURE_PEBS, c->x86_capability);
 	}
+
+	if (cpu_has_bts)
+		ds_init_intel(c);
 }
 
 static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
@@ -342,5 +347,22 @@ unsigned long cmpxchg_386_u32(volatile v
 EXPORT_SYMBOL(cmpxchg_386_u32);
 #endif
 
+#ifndef CONFIG_X86_CMPXCHG64
+unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
+{
+	u64 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u64 *)ptr;
+	if (prev == old)
+		*(u64 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_486_u64);
+#endif
+
 // arch_initcall(intel_cpu_init);
 
diff -puN arch/x86/kernel/cpu/mcheck/mce_64.c~git-x86 arch/x86/kernel/cpu/mcheck/mce_64.c
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c~git-x86
+++ a/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -63,7 +63,7 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait)
  * separate MCEs from kernel messages to avoid bogus bug reports.
  */
 
-struct mce_log mcelog = {
+static struct mce_log mcelog = {
 	MCE_LOG_SIGNATURE,
 	MCE_LOG_LEN,
 };
@@ -80,7 +80,7 @@ void mce_log(struct mce *mce)
 			/* When the buffer fills up discard new entries. Assume
 			   that the earlier errors are the more interesting. */
 			if (entry >= MCE_LOG_LEN) {
-				set_bit(MCE_OVERFLOW, &mcelog.flags);
+				set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
 				return;
 			}
 			/* Old left over entry. Skip. */
@@ -110,12 +110,12 @@ static void print_mce(struct mce *m)
 	       KERN_EMERG
 	       "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
 	       m->cpu, m->mcgstatus, m->bank, m->status);
-	if (m->rip) {
+	if (m->ip) {
 		printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
 		       !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
-		       m->cs, m->rip);
+		       m->cs, m->ip);
 		if (m->cs == __KERNEL_CS)
-			print_symbol("{%s}", m->rip);
+			print_symbol("{%s}", m->ip);
 		printk("\n");
 	}
 	printk(KERN_EMERG "TSC %Lx ", m->tsc);
@@ -156,16 +156,16 @@ static int mce_available(struct cpuinfo_
 static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
 {
 	if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
-		m->rip = regs->rip;
+		m->ip = regs->ip;
 		m->cs = regs->cs;
 	} else {
-		m->rip = 0;
+		m->ip = 0;
 		m->cs = 0;
 	}
 	if (rip_msr) {
 		/* Assume the RIP in the MSR is exact. Is this true? */
 		m->mcgstatus |= MCG_STATUS_EIPV;
-		rdmsrl(rip_msr, m->rip);
+		rdmsrl(rip_msr, m->ip);
 		m->cs = 0;
 	}
 }
@@ -288,7 +288,7 @@ void do_machine_check(struct pt_regs * r
 		 * instruction which caused the MCE.
 		 */
 		if (m.mcgstatus & MCG_STATUS_EIPV)
-			user_space = panicm.rip && (panicm.cs & 3);
+			user_space = panicm.ip && (panicm.cs & 3);
 
 		/*
 		 * If we know that the error was in user space, send a
diff -puN arch/x86/kernel/cpu/mcheck/mce_amd_64.c~git-x86 arch/x86/kernel/cpu/mcheck/mce_amd_64.c
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c~git-x86
+++ a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -118,6 +118,7 @@ void __cpuinit mce_amd_feature_init(stru
 {
 	unsigned int bank, block;
 	unsigned int cpu = smp_processor_id();
+	u8 lvt_off;
 	u32 low = 0, high = 0, address = 0;
 
 	for (bank = 0; bank < NR_BANKS; ++bank) {
@@ -153,14 +154,13 @@ void __cpuinit mce_amd_feature_init(stru
 			if (shared_bank[bank] && c->cpu_core_id)
 				break;
 #endif
+			lvt_off = setup_APIC_eilvt_mce(THRESHOLD_APIC_VECTOR,
+						       APIC_EILVT_MSG_FIX, 0);
+
 			high &= ~MASK_LVTOFF_HI;
-			high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20;
+			high |= lvt_off << 20;
 			wrmsr(address, low, high);
 
-			setup_APIC_extended_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD,
-						THRESHOLD_APIC_VECTOR,
-						K8_APIC_EXT_INT_MSG_FIX, 0);
-
 			threshold_defaults.address = address;
 			threshold_restart_bank(&threshold_defaults, 0, 0);
 		}
diff -puN arch/x86/kernel/cpu/mtrr/amd.c~git-x86 arch/x86/kernel/cpu/mtrr/amd.c
--- a/arch/x86/kernel/cpu/mtrr/amd.c~git-x86
+++ a/arch/x86/kernel/cpu/mtrr/amd.c
@@ -53,8 +53,6 @@ static void amd_set_mtrr(unsigned int re
     <base> The base address of the region.
     <size> The size of the region. If this is 0 the region is disabled.
     <type> The type of the region.
-    <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
-    be done externally.
     [RETURNS] Nothing.
 */
 {
diff -puN arch/x86/kernel/cpu/mtrr/cyrix.c~git-x86 arch/x86/kernel/cpu/mtrr/cyrix.c
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c~git-x86
+++ a/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -4,6 +4,7 @@
 #include <asm/msr.h>
 #include <asm/io.h>
 #include <asm/processor-cyrix.h>
+#include <asm/processor-flags.h>
 #include "mtrr.h"
 
 int arr3_protected;
@@ -142,7 +143,7 @@ static void prepare_set(void)
 
 	/*  Disable and flush caches. Note that wbinvd flushes the TLBs as
 	    a side-effect  */
-	cr0 = read_cr0() | 0x40000000;
+	cr0 = read_cr0() | X86_CR0_CD;
 	wbinvd();
 	write_cr0(cr0);
 	wbinvd();
diff -puN arch/x86/kernel/cpu/mtrr/generic.c~git-x86 arch/x86/kernel/cpu/mtrr/generic.c
--- a/arch/x86/kernel/cpu/mtrr/generic.c~git-x86
+++ a/arch/x86/kernel/cpu/mtrr/generic.c
@@ -9,6 +9,7 @@
 #include <asm/msr.h>
 #include <asm/system.h>
 #include <asm/cpufeature.h>
+#include <asm/processor-flags.h>
 #include <asm/tlbflush.h>
 #include "mtrr.h"
 
@@ -188,7 +189,7 @@ static inline void k8_enable_fixed_iorrs
  * \param changed pointer which indicates whether the MTRR needed to be changed
  * \param msrwords pointer to the MSR values which the MSR should have
  */
-static void set_fixed_range(int msr, int * changed, unsigned int * msrwords)
+static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
 {
 	unsigned lo, hi;
 
@@ -200,7 +201,7 @@ static void set_fixed_range(int msr, int
 		    ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
 			k8_enable_fixed_iorrs();
 		mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
-		*changed = TRUE;
+		*changed = true;
 	}
 }
 
@@ -260,7 +261,7 @@ static void generic_get_mtrr(unsigned in
 static int set_fixed_ranges(mtrr_type * frs)
 {
 	unsigned long long *saved = (unsigned long long *) frs;
-	int changed = FALSE;
+	bool changed = false;
 	int block=-1, range;
 
 	while (fixed_range_blocks[++block].ranges)
@@ -273,17 +274,17 @@ static int set_fixed_ranges(mtrr_type * 
 
 /*  Set the MSR pair relating to a var range. Returns TRUE if
     changes are made  */
-static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
+static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
 {
 	unsigned int lo, hi;
-	int changed = FALSE;
+	bool changed = false;
 
 	rdmsr(MTRRphysBase_MSR(index), lo, hi);
 	if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
 	    || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
 		(hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
 		mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
-		changed = TRUE;
+		changed = true;
 	}
 
 	rdmsr(MTRRphysMask_MSR(index), lo, hi);
@@ -292,7 +293,7 @@ static int set_mtrr_var_ranges(unsigned 
 	    || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
 		(hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
 		mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
-		changed = TRUE;
+		changed = true;
 	}
 	return changed;
 }
@@ -350,7 +351,7 @@ static void prepare_set(void) __acquires
 	spin_lock(&set_atomicity_lock);
 
 	/*  Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
-	cr0 = read_cr0() | 0x40000000;	/* set CD flag */
+	cr0 = read_cr0() | X86_CR0_CD;
 	write_cr0(cr0);
 	wbinvd();
 
@@ -417,8 +418,6 @@ static void generic_set_mtrr(unsigned in
     <base> The base address of the region.
     <size> The size of the region. If this is 0 the region is disabled.
     <type> The type of the region.
-    <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
-    be done externally.
     [RETURNS] Nothing.
 */
 {
diff -puN arch/x86/kernel/cpu/mtrr/if.c~git-x86 arch/x86/kernel/cpu/mtrr/if.c
--- a/arch/x86/kernel/cpu/mtrr/if.c~git-x86
+++ a/arch/x86/kernel/cpu/mtrr/if.c
@@ -37,7 +37,7 @@ const char *mtrr_attrib_to_str(int x)
 
 static int
 mtrr_file_add(unsigned long base, unsigned long size,
-	      unsigned int type, char increment, struct file *file, int page)
+	      unsigned int type, bool increment, struct file *file, int page)
 {
 	int reg, max;
 	unsigned int *fcount = FILE_FCOUNT(file); 
@@ -55,7 +55,7 @@ mtrr_file_add(unsigned long base, unsign
 		base >>= PAGE_SHIFT;
 		size >>= PAGE_SHIFT;
 	}
-	reg = mtrr_add_page(base, size, type, 1);
+	reg = mtrr_add_page(base, size, type, true);
 	if (reg >= 0)
 		++fcount[reg];
 	return reg;
@@ -141,7 +141,7 @@ mtrr_write(struct file *file, const char
 		size >>= PAGE_SHIFT;
 		err =
 		    mtrr_add_page((unsigned long) base, (unsigned long) size, i,
-				  1);
+				  true);
 		if (err < 0)
 			return err;
 		return len;
@@ -217,7 +217,7 @@ mtrr_ioctl(struct file *file, unsigned i
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 		err =
-		    mtrr_file_add(sentry.base, sentry.size, sentry.type, 1,
+		    mtrr_file_add(sentry.base, sentry.size, sentry.type, true,
 				  file, 0);
 		break;
 	case MTRRIOC_SET_ENTRY:
@@ -226,7 +226,7 @@ mtrr_ioctl(struct file *file, unsigned i
 #endif
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		err = mtrr_add(sentry.base, sentry.size, sentry.type, 0);
+		err = mtrr_add(sentry.base, sentry.size, sentry.type, false);
 		break;
 	case MTRRIOC_DEL_ENTRY:
 #ifdef CONFIG_COMPAT
@@ -270,7 +270,7 @@ mtrr_ioctl(struct file *file, unsigned i
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 		err =
-		    mtrr_file_add(sentry.base, sentry.size, sentry.type, 1,
+		    mtrr_file_add(sentry.base, sentry.size, sentry.type, true,
 				  file, 1);
 		break;
 	case MTRRIOC_SET_PAGE_ENTRY:
@@ -279,7 +279,8 @@ mtrr_ioctl(struct file *file, unsigned i
 #endif
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0);
+		err =
+		    mtrr_add_page(sentry.base, sentry.size, sentry.type, false);
 		break;
 	case MTRRIOC_DEL_PAGE_ENTRY:
 #ifdef CONFIG_COMPAT
diff -puN arch/x86/kernel/cpu/mtrr/main.c~git-x86 arch/x86/kernel/cpu/mtrr/main.c
--- a/arch/x86/kernel/cpu/mtrr/main.c~git-x86
+++ a/arch/x86/kernel/cpu/mtrr/main.c
@@ -311,7 +311,7 @@ static void set_mtrr(unsigned int reg, u
  */
 
 int mtrr_add_page(unsigned long base, unsigned long size, 
-		  unsigned int type, char increment)
+		  unsigned int type, bool increment)
 {
 	int i, replace, error;
 	mtrr_type ltype;
@@ -394,7 +394,9 @@ int mtrr_add_page(unsigned long base, un
 		if (likely(replace < 0))
 			usage_table[i] = 1;
 		else {
-			usage_table[i] = usage_table[replace] + !!increment;
+			usage_table[i] = usage_table[replace];
+			if (increment)
+				usage_table[i]++;
 			if (unlikely(replace != i)) {
 				set_mtrr(replace, 0, 0, 0);
 				usage_table[replace] = 0;
@@ -460,7 +462,7 @@ static int mtrr_check(unsigned long base
 
 int
 mtrr_add(unsigned long base, unsigned long size, unsigned int type,
-	 char increment)
+	 bool increment)
 {
 	if (mtrr_check(base, size))
 		return -EINVAL;
diff -puN arch/x86/kernel/cpu/mtrr/mtrr.h~git-x86 arch/x86/kernel/cpu/mtrr/mtrr.h
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h~git-x86
+++ a/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -2,10 +2,8 @@
  * local mtrr defines.
  */
 
-#ifndef TRUE
-#define TRUE  1
-#define FALSE 0
-#endif
+#include <linux/types.h>
+#include <linux/stddef.h>
 
 #define MTRRcap_MSR     0x0fe
 #define MTRRdefType_MSR 0x2ff
diff -puN arch/x86/kernel/cpu/mtrr/state.c~git-x86 arch/x86/kernel/cpu/mtrr/state.c
--- a/arch/x86/kernel/cpu/mtrr/state.c~git-x86
+++ a/arch/x86/kernel/cpu/mtrr/state.c
@@ -4,6 +4,7 @@
 #include <asm/mtrr.h>
 #include <asm/msr.h>
 #include <asm/processor-cyrix.h>
+#include <asm/processor-flags.h>
 #include "mtrr.h"
 
 
@@ -25,7 +26,7 @@ void set_mtrr_prepare_save(struct set_mt
 
 		/*  Disable and flush caches. Note that wbinvd flushes the TLBs as
 		    a side-effect  */
-		cr0 = read_cr0() | 0x40000000;
+		cr0 = read_cr0() | X86_CR0_CD;
 		wbinvd();
 		write_cr0(cr0);
 		wbinvd();
diff -puN arch/x86/kernel/cpu/perfctr-watchdog.c~git-x86 arch/x86/kernel/cpu/perfctr-watchdog.c
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c~git-x86
+++ a/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -167,7 +167,6 @@ void release_evntsel_nmi(unsigned int ms
 	clear_bit(counter, evntsel_nmi_owner);
 }
 
-EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
 EXPORT_SYMBOL(reserve_perfctr_nmi);
 EXPORT_SYMBOL(release_perfctr_nmi);
diff -puN arch/x86/kernel/doublefault_32.c~git-x86 arch/x86/kernel/doublefault_32.c
--- a/arch/x86/kernel/doublefault_32.c~git-x86
+++ a/arch/x86/kernel/doublefault_32.c
@@ -35,12 +35,13 @@ static void doublefault_fn(void)
 		if (ptr_ok(tss)) {
 			struct i386_hw_tss *t = (struct i386_hw_tss *)tss;
 
-			printk(KERN_EMERG "eip = %08lx, esp = %08lx\n", t->eip, t->esp);
+			printk(KERN_EMERG "eip = %08lx, esp = %08lx\n",
+			       t->ip, t->sp);
 
 			printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
-				t->eax, t->ebx, t->ecx, t->edx);
+				t->ax, t->bx, t->cx, t->dx);
 			printk(KERN_EMERG "esi = %08lx, edi = %08lx\n",
-				t->esi, t->edi);
+				t->si, t->di);
 		}
 	}
 
@@ -50,15 +51,15 @@ static void doublefault_fn(void)
 
 struct tss_struct doublefault_tss __cacheline_aligned = {
 	.x86_tss = {
-		.esp0		= STACK_START,
+		.sp0		= STACK_START,
 		.ss0		= __KERNEL_DS,
 		.ldt		= 0,
 		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
 
-		.eip		= (unsigned long) doublefault_fn,
+		.ip		= (unsigned long) doublefault_fn,
 		/* 0x2 bit is always set */
-		.eflags		= X86_EFLAGS_SF | 0x2,
-		.esp		= STACK_START,
+		.flags		= X86_EFLAGS_SF | 0x2,
+		.sp		= STACK_START,
 		.es		= __USER_DS,
 		.cs		= __KERNEL_CS,
 		.ss		= __KERNEL_DS,
diff -puN /dev/null arch/x86/kernel/ds.c
--- /dev/null
+++ a/arch/x86/kernel/ds.c
@@ -0,0 +1,429 @@
+/*
+ * Debug Store support
+ *
+ * This provides a low-level interface to the hardware's Debug Store
+ * feature that is used for last branch recording (LBR) and
+ * precise-event based sampling (PEBS).
+ *
+ * Different architectures use a different DS layout/pointer size.
+ * The below functions therefore work on a void*.
+ *
+ *
+ * Since there is no user for PEBS, yet, only LBR (or branch
+ * trace store, BTS) is supported.
+ *
+ *
+ * Copyright (C) 2007 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ */
+
+#include <asm/ds.h>
+
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+
+
+/*
+ * Debug Store (DS) save area configuration (see Intel64 and IA32
+ * Architectures Software Developer's Manual, section 18.5)
+ *
+ * The DS configuration consists of the following fields; different
+ * architetures vary in the size of those fields.
+ * - double-word aligned base linear address of the BTS buffer
+ * - write pointer into the BTS buffer
+ * - end linear address of the BTS buffer (one byte beyond the end of
+ *   the buffer)
+ * - interrupt pointer into BTS buffer
+ *   (interrupt occurs when write pointer passes interrupt pointer)
+ * - double-word aligned base linear address of the PEBS buffer
+ * - write pointer into the PEBS buffer
+ * - end linear address of the PEBS buffer (one byte beyond the end of
+ *   the buffer)
+ * - interrupt pointer into PEBS buffer
+ *   (interrupt occurs when write pointer passes interrupt pointer)
+ * - value to which counter is reset following counter overflow
+ *
+ * On later architectures, the last branch recording hardware uses
+ * 64bit pointers even in 32bit mode.
+ *
+ *
+ * Branch Trace Store (BTS) records store information about control
+ * flow changes. They at least provide the following information:
+ * - source linear address
+ * - destination linear address
+ *
+ * Netburst supported a predicated bit that had been dropped in later
+ * architectures. We do not suppor it.
+ *
+ *
+ * In order to abstract from the actual DS and BTS layout, we describe
+ * the access to the relevant fields.
+ * Thanks to Andi Kleen for proposing this design.
+ *
+ * The implementation, however, is not as general as it might seem. In
+ * order to stay somewhat simple and efficient, we assume an
+ * underlying unsigned type (mostly a pointer type) and we expect the
+ * field to be at least as big as that type.
+ */
+
+/*
+ * A special from_ip address to indicate that the BTS record is an
+ * info record that needs to be interpreted or skipped.
+ */
+#define BTS_ESCAPE_ADDRESS (-1)
+
+/*
+ * A field access descriptor
+ */
+struct access_desc {
+	unsigned char offset;
+	unsigned char size;
+};
+
+/*
+ * The configuration for a particular DS/BTS hardware implementation.
+ */
+struct ds_configuration {
+	/* the DS configuration */
+	unsigned char  sizeof_ds;
+	struct access_desc bts_buffer_base;
+	struct access_desc bts_index;
+	struct access_desc bts_absolute_maximum;
+	struct access_desc bts_interrupt_threshold;
+	/* the BTS configuration */
+	unsigned char  sizeof_bts;
+	struct access_desc from_ip;
+	struct access_desc to_ip;
+	/* BTS variants used to store additional information like
+	   timestamps */
+	struct access_desc info_type;
+	struct access_desc info_data;
+	unsigned long debugctl_mask;
+};
+
+/*
+ * The global configuration used by the below accessor functions
+ */
+static struct ds_configuration ds_cfg;
+
+/*
+ * Accessor functions for some DS and BTS fields using the above
+ * global ptrace_bts_cfg.
+ */
+static inline void *get_bts_buffer_base(char *base)
+{
+	return *(void **)(base + ds_cfg.bts_buffer_base.offset);
+}
+static inline void set_bts_buffer_base(char *base, void *value)
+{
+	(*(void **)(base + ds_cfg.bts_buffer_base.offset)) = value;
+}
+static inline void *get_bts_index(char *base)
+{
+	return *(void **)(base + ds_cfg.bts_index.offset);
+}
+static inline void set_bts_index(char *base, void *value)
+{
+	(*(void **)(base + ds_cfg.bts_index.offset)) = value;
+}
+static inline void *get_bts_absolute_maximum(char *base)
+{
+	return *(void **)(base + ds_cfg.bts_absolute_maximum.offset);
+}
+static inline void set_bts_absolute_maximum(char *base, void *value)
+{
+	(*(void **)(base + ds_cfg.bts_absolute_maximum.offset)) = value;
+}
+static inline void *get_bts_interrupt_threshold(char *base)
+{
+	return *(void **)(base + ds_cfg.bts_interrupt_threshold.offset);
+}
+static inline void set_bts_interrupt_threshold(char *base, void *value)
+{
+	(*(void **)(base + ds_cfg.bts_interrupt_threshold.offset)) = value;
+}
+static inline long get_from_ip(char *base)
+{
+	return *(long *)(base + ds_cfg.from_ip.offset);
+}
+static inline void set_from_ip(char *base, long value)
+{
+	(*(long *)(base + ds_cfg.from_ip.offset)) = value;
+}
+static inline long get_to_ip(char *base)
+{
+	return *(long *)(base + ds_cfg.to_ip.offset);
+}
+static inline void set_to_ip(char *base, long value)
+{
+	(*(long *)(base + ds_cfg.to_ip.offset)) = value;
+}
+static inline unsigned char get_info_type(char *base)
+{
+	return *(unsigned char *)(base + ds_cfg.info_type.offset);
+}
+static inline void set_info_type(char *base, unsigned char value)
+{
+	(*(unsigned char *)(base + ds_cfg.info_type.offset)) = value;
+}
+/*
+ * The info data might overlap with the info type on some architectures.
+ * We therefore read and write the exact number of bytes.
+ */
+static inline unsigned long long get_info_data(char *base)
+{
+	unsigned long long value = 0;
+	memcpy(&value,
+	       base + ds_cfg.info_data.offset,
+	       ds_cfg.info_data.size);
+	return value;
+}
+static inline void set_info_data(char *base, unsigned long long value)
+{
+	memcpy(base + ds_cfg.info_data.offset,
+	       &value,
+	       ds_cfg.info_data.size);
+}
+
+
+int ds_allocate(void **dsp, size_t bts_size_in_records)
+{
+	size_t bts_size_in_bytes = 0;
+	void *bts = 0;
+	void *ds = 0;
+
+	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+		return -EOPNOTSUPP;
+
+	if (bts_size_in_records < 0)
+		return -EINVAL;
+
+	bts_size_in_bytes =
+		bts_size_in_records * ds_cfg.sizeof_bts;
+
+	if (bts_size_in_bytes <= 0)
+		return -EINVAL;
+
+	bts = kzalloc(bts_size_in_bytes, GFP_KERNEL);
+
+	if (!bts)
+		return -ENOMEM;
+
+	ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
+
+	if (!ds) {
+		kfree(bts);
+		return -ENOMEM;
+	}
+
+	set_bts_buffer_base(ds, bts);
+	set_bts_index(ds, bts);
+	set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
+	set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
+
+	*dsp = ds;
+	return 0;
+}
+
+int ds_free(void **dsp)
+{
+	if (*dsp)
+		kfree(get_bts_buffer_base(*dsp));
+	kfree(*dsp);
+	*dsp = 0;
+
+	return 0;
+}
+
+int ds_get_bts_size(void *ds)
+{
+	size_t size_in_bytes;
+
+	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+		return -EOPNOTSUPP;
+
+	size_in_bytes =
+		get_bts_absolute_maximum(ds) -
+		get_bts_buffer_base(ds);
+
+	return size_in_bytes / ds_cfg.sizeof_bts;
+}
+
+int ds_get_bts_index(void *ds)
+{
+	size_t index_offset_in_bytes;
+
+	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+		return -EOPNOTSUPP;
+
+	index_offset_in_bytes =
+		get_bts_index(ds) -
+		get_bts_buffer_base(ds);
+
+	return index_offset_in_bytes / ds_cfg.sizeof_bts;
+}
+
+int ds_read_bts(void *ds, size_t index, struct bts_struct *out)
+{
+	void *bts;
+
+	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+		return -EOPNOTSUPP;
+
+	if (index < 0)
+		return -EINVAL;
+
+	if (index >= ds_get_bts_size(ds))
+		return -EINVAL;
+
+	bts = get_bts_buffer_base(ds);
+	bts = (char *)bts + (index * ds_cfg.sizeof_bts);
+
+	memset(out, 0, sizeof(*out));
+	if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
+		out->qualifier         = get_info_type(bts);
+		out->variant.timestamp = get_info_data(bts);
+	} else {
+		out->qualifier = BTS_BRANCH;
+		out->variant.lbr.from_ip = get_from_ip(bts);
+		out->variant.lbr.to_ip   = get_to_ip(bts);
+	}
+
+	return 0;
+}
+
+int ds_write_bts(void *ds, const struct bts_struct *in)
+{
+	void *bts;
+
+	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+		return -EOPNOTSUPP;
+
+	if (ds_get_bts_size(ds) <= 0)
+		return -ENXIO;
+
+	bts = get_bts_index(ds);
+
+	memset(bts, 0, ds_cfg.sizeof_bts);
+	switch (in->qualifier) {
+	case BTS_INVALID:
+		break;
+
+	case BTS_BRANCH:
+		set_from_ip(bts, in->variant.lbr.from_ip);
+		set_to_ip(bts, in->variant.lbr.to_ip);
+		break;
+
+	case BTS_TASK_ARRIVES:
+	case BTS_TASK_DEPARTS:
+		set_from_ip(bts, BTS_ESCAPE_ADDRESS);
+		set_info_type(bts, in->qualifier);
+		set_info_data(bts, in->variant.timestamp);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	bts = (char *)bts + ds_cfg.sizeof_bts;
+	if (bts >= get_bts_absolute_maximum(ds))
+		bts = get_bts_buffer_base(ds);
+	set_bts_index(ds, bts);
+
+	return 0;
+}
+
+unsigned long ds_debugctl_mask(void)
+{
+	return ds_cfg.debugctl_mask;
+}
+
+#ifdef __i386__
+static const struct ds_configuration ds_cfg_netburst = {
+	.sizeof_ds = 9 * 4,
+	.bts_buffer_base = { 0, 4 },
+	.bts_index = { 4, 4 },
+	.bts_absolute_maximum = { 8, 4 },
+	.bts_interrupt_threshold = { 12, 4 },
+	.sizeof_bts = 3 * 4,
+	.from_ip = { 0, 4 },
+	.to_ip = { 4, 4 },
+	.info_type = { 4, 1 },
+	.info_data = { 5, 7 },
+	.debugctl_mask = (1<<2)|(1<<3)
+};
+
+static const struct ds_configuration ds_cfg_pentium_m = {
+	.sizeof_ds = 9 * 4,
+	.bts_buffer_base = { 0, 4 },
+	.bts_index = { 4, 4 },
+	.bts_absolute_maximum = { 8, 4 },
+	.bts_interrupt_threshold = { 12, 4 },
+	.sizeof_bts = 3 * 4,
+	.from_ip = { 0, 4 },
+	.to_ip = { 4, 4 },
+	.info_type = { 4, 1 },
+	.info_data = { 5, 7 },
+	.debugctl_mask = (1<<6)|(1<<7)
+};
+#endif /* _i386_ */
+
+static const struct ds_configuration ds_cfg_core2 = {
+	.sizeof_ds = 9 * 8,
+	.bts_buffer_base = { 0, 8 },
+	.bts_index = { 8, 8 },
+	.bts_absolute_maximum = { 16, 8 },
+	.bts_interrupt_threshold = { 24, 8 },
+	.sizeof_bts = 3 * 8,
+	.from_ip = { 0, 8 },
+	.to_ip = { 8, 8 },
+	.info_type = { 8, 1 },
+	.info_data = { 9, 7 },
+	.debugctl_mask = (1<<6)|(1<<7)|(1<<9)
+};
+
+static inline void
+ds_configure(const struct ds_configuration *cfg)
+{
+	ds_cfg = *cfg;
+}
+
+void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
+{
+	switch (c->x86) {
+	case 0x6:
+		switch (c->x86_model) {
+#ifdef __i386__
+		case 0xD:
+		case 0xE: /* Pentium M */
+			ds_configure(&ds_cfg_pentium_m);
+			break;
+#endif /* _i386_ */
+		case 0xF: /* Core2 */
+			ds_configure(&ds_cfg_core2);
+			break;
+		default:
+			/* sorry, don't know about them */
+			break;
+		}
+		break;
+	case 0xF:
+		switch (c->x86_model) {
+#ifdef __i386__
+		case 0x0:
+		case 0x1:
+		case 0x2: /* Netburst */
+			ds_configure(&ds_cfg_netburst);
+			break;
+#endif /* _i386_ */
+		default:
+			/* sorry, don't know about them */
+			break;
+		}
+		break;
+	default:
+		/* sorry, don't know about them */
+		break;
+	}
+}
diff -puN arch/x86/kernel/e820_32.c~git-x86 arch/x86/kernel/e820_32.c
--- a/arch/x86/kernel/e820_32.c~git-x86
+++ a/arch/x86/kernel/e820_32.c
@@ -37,26 +37,6 @@ unsigned long pci_mem_start = 0x10000000
 EXPORT_SYMBOL(pci_mem_start);
 #endif
 extern int user_defined_memmap;
-struct resource data_resource = {
-	.name	= "Kernel data",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-struct resource code_resource = {
-	.name	= "Kernel code",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-struct resource bss_resource = {
-	.name	= "Kernel bss",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
 
 static struct resource system_rom_resource = {
 	.name	= "System ROM",
@@ -111,60 +91,6 @@ static struct resource video_rom_resourc
 	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
 };
 
-static struct resource video_ram_resource = {
-	.name	= "Video RAM area",
-	.start	= 0xa0000,
-	.end	= 0xbffff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-static struct resource standard_io_resources[] = { {
-	.name	= "dma1",
-	.start	= 0x0000,
-	.end	= 0x001f,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "pic1",
-	.start	= 0x0020,
-	.end	= 0x0021,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name   = "timer0",
-	.start	= 0x0040,
-	.end    = 0x0043,
-	.flags  = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name   = "timer1",
-	.start  = 0x0050,
-	.end    = 0x0053,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "keyboard",
-	.start	= 0x0060,
-	.end	= 0x006f,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "dma page reg",
-	.start	= 0x0080,
-	.end	= 0x008f,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "pic2",
-	.start	= 0x00a0,
-	.end	= 0x00a1,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "dma2",
-	.start	= 0x00c0,
-	.end	= 0x00df,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "fpu",
-	.start	= 0x00f0,
-	.end	= 0x00ff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-} };
-
 #define ROMSIGNATURE 0xaa55
 
 static int __init romsignature(const unsigned char *rom)
@@ -260,10 +186,9 @@ static void __init probe_roms(void)
  * Request address space for all standard RAM and ROM resources
  * and also for regions reported as reserved by the e820.
  */
-static void __init
-legacy_init_iomem_resources(struct resource *code_resource,
-			    struct resource *data_resource,
-			    struct resource *bss_resource)
+void __init legacy_init_iomem_resources(struct resource *code_resource,
+		struct resource *data_resource,
+		struct resource *bss_resource)
 {
 	int i;
 
@@ -305,35 +230,6 @@ legacy_init_iomem_resources(struct resou
 	}
 }
 
-/*
- * Request address space for all standard resources
- *
- * This is called just before pcibios_init(), which is also a
- * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
- */
-static int __init request_standard_resources(void)
-{
-	int i;
-
-	printk("Setting up standard PCI resources\n");
-	if (efi_enabled)
-		efi_initialize_iomem_resources(&code_resource,
-				&data_resource, &bss_resource);
-	else
-		legacy_init_iomem_resources(&code_resource,
-				&data_resource, &bss_resource);
-
-	/* EFI systems may still have VGA */
-	request_resource(&iomem_resource, &video_ram_resource);
-
-	/* request I/O space for devices used on all i[345]86 PCs */
-	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
-		request_resource(&ioport_resource, &standard_io_resources[i]);
-	return 0;
-}
-
-subsys_initcall(request_standard_resources);
-
 #if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
 /**
  * e820_mark_nosave_regions - Find the ranges of physical addresses that do not
diff -puN arch/x86/kernel/e820_64.c~git-x86 arch/x86/kernel/e820_64.c
--- a/arch/x86/kernel/e820_64.c~git-x86
+++ a/arch/x86/kernel/e820_64.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Handle the memory map.
  * The functions here do the job until bootmem takes over.
  *
@@ -26,47 +26,45 @@
 #include <asm/proto.h>
 #include <asm/setup.h>
 #include <asm/sections.h>
+#include <asm/kdebug.h>
 
 struct e820map e820;
 
-/* 
+/*
  * PFN of last memory page.
  */
-unsigned long end_pfn; 
-EXPORT_SYMBOL(end_pfn);
+unsigned long end_pfn;
 
-/* 
+/*
  * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
  * The direct mapping extends to end_pfn_map, so that we can directly access
  * apertures, ACPI and other tables without having to play with fixmaps.
- */ 
-unsigned long end_pfn_map; 
+ */
+unsigned long end_pfn_map;
 
-/* 
+/*
  * Last pfn which the user wants to use.
  */
 static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
 
-extern struct resource code_resource, data_resource, bss_resource;
-
-/* Check for some hardcoded bad areas that early boot is not allowed to touch */ 
+/* Check for some hardcoded bad areas that early boot is not allowed to touch */
 static inline int bad_addr(unsigned long *addrp, unsigned long size)
-{ 
-	unsigned long addr = *addrp, last = addr + size; 
+{
+	unsigned long addr = *addrp, last = addr + size;
 
 	/* various gunk below that needed for SMP startup */
-	if (addr < 0x8000) { 
+	if (addr < 0x8000) {
 		*addrp = PAGE_ALIGN(0x8000);
-		return 1; 
+		return 1;
 	}
 
 	/* direct mapping tables of the kernel */
-	if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) { 
+	if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
 		*addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
 		return 1;
-	} 
+	}
 
-	/* initrd */ 
+	/* initrd */
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
 		unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
@@ -77,7 +75,7 @@ static inline int bad_addr(unsigned long
 			*addrp = PAGE_ALIGN(ramdisk_end);
 			return 1;
 		}
-	} 
+	}
 #endif
 	/* kernel code */
 	if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
@@ -97,9 +95,9 @@ static inline int bad_addr(unsigned long
 		return 1;
 	}
 #endif
-	/* XXX ramdisk image here? */ 
+	/* XXX ramdisk image here? */
 	return 0;
-} 
+}
 
 /*
  * This function checks if any part of the range <start,end> is mapped
@@ -107,16 +105,18 @@ static inline int bad_addr(unsigned long
  */
 int
 e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
-{ 
+{
 	int i;
-	for (i = 0; i < e820.nr_map; i++) { 
-		struct e820entry *ei = &e820.map[i]; 
-		if (type && ei->type != type) 
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+
+		if (type && ei->type != type)
 			continue;
 		if (ei->addr >= end || ei->addr + ei->size <= start)
-			continue; 
-		return 1; 
-	} 
+			continue;
+		return 1;
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(e820_any_mapped);
@@ -127,11 +127,14 @@ EXPORT_SYMBOL_GPL(e820_any_mapped);
  * Note: this function only works correct if the e820 table is sorted and
  * not-overlapping, which is the case
  */
-int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
+int __init e820_all_mapped(unsigned long start, unsigned long end,
+			   unsigned type)
 {
 	int i;
+
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
+
 		if (type && ei->type != type)
 			continue;
 		/* is the region (part) in overlap with the current region ?*/
@@ -143,65 +146,73 @@ int __init e820_all_mapped(unsigned long
 		 */
 		if (ei->addr <= start)
 			start = ei->addr + ei->size;
-		/* if start is now at or beyond end, we're done, full coverage */
+		/*
+		 * if start is now at or beyond end, we're done, full
+		 * coverage
+		 */
 		if (start >= end)
-			return 1; /* we're done */
+			return 1;
 	}
 	return 0;
 }
 
-/* 
- * Find a free area in a specific range. 
- */ 
-unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsigned size) 
-{ 
-	int i; 
-	for (i = 0; i < e820.nr_map; i++) { 
-		struct e820entry *ei = &e820.map[i]; 
-		unsigned long addr = ei->addr, last; 
-		if (ei->type != E820_RAM) 
-			continue; 
-		if (addr < start) 
+/*
+ * Find a free area in a specific range.
+ */
+unsigned long __init find_e820_area(unsigned long start, unsigned long end,
+				    unsigned size)
+{
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		unsigned long addr = ei->addr, last;
+
+		if (ei->type != E820_RAM)
+			continue;
+		if (addr < start)
 			addr = start;
-		if (addr > ei->addr + ei->size) 
-			continue; 
+		if (addr > ei->addr + ei->size)
+			continue;
 		while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
 			;
 		last = PAGE_ALIGN(addr) + size;
 		if (last > ei->addr + ei->size)
 			continue;
-		if (last > end) 
+		if (last > end)
 			continue;
-		return addr; 
-	} 
-	return -1UL;		
-} 
+		return addr;
+	}
+	return -1UL;
+}
 
 /*
  * Find the highest page frame number we have available
  */
 unsigned long __init e820_end_of_ram(void)
 {
-	unsigned long end_pfn = 0;
+	unsigned long end_pfn;
+
 	end_pfn = find_max_pfn_with_active_regions();
-	
-	if (end_pfn > end_pfn_map) 
+
+	if (end_pfn > end_pfn_map)
 		end_pfn_map = end_pfn;
 	if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
 		end_pfn_map = MAXMEM>>PAGE_SHIFT;
 	if (end_pfn > end_user_pfn)
 		end_pfn = end_user_pfn;
-	if (end_pfn > end_pfn_map) 
-		end_pfn = end_pfn_map; 
+	if (end_pfn > end_pfn_map)
+		end_pfn = end_pfn_map;
 
-	printk("end_pfn_map = %lu\n", end_pfn_map);
-	return end_pfn;	
+	printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map);
+	return end_pfn;
 }
 
 /*
  * Mark e820 reserved areas as busy for the resource manager.
  */
-void __init e820_reserve_resources(void)
+void __init e820_reserve_resources(struct resource *code_resource,
+		struct resource *data_resource, struct resource *bss_resource)
 {
 	int i;
 	for (i = 0; i < e820.nr_map; i++) {
@@ -219,13 +230,13 @@ void __init e820_reserve_resources(void)
 		request_resource(&iomem_resource, res);
 		if (e820.map[i].type == E820_RAM) {
 			/*
-			 *  We don't know which RAM region contains kernel data,
-			 *  so we try it repeatedly and let the resource manager
-			 *  test it.
+			 * We don't know which RAM region contains kernel data,
+			 * so we try it repeatedly and let the resource manager
+			 * test it.
 			 */
-			request_resource(res, &code_resource);
-			request_resource(res, &data_resource);
-			request_resource(res, &bss_resource);
+			request_resource(res, code_resource);
+			request_resource(res, data_resource);
+			request_resource(res, bss_resource);
 #ifdef CONFIG_KEXEC
 			if (crashk_res.start != crashk_res.end)
 				request_resource(res, &crashk_res);
@@ -322,9 +333,9 @@ e820_register_active_regions(int nid, un
 			add_active_range(nid, ei_startpfn, ei_endpfn);
 }
 
-/* 
+/*
  * Add a memory region to the kernel e820 map.
- */ 
+ */
 void __init add_memory_region(unsigned long start, unsigned long size, int type)
 {
 	int x = e820.nr_map;
@@ -349,9 +360,7 @@ unsigned long __init e820_hole_size(unsi
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long end_pfn = end >> PAGE_SHIFT;
-	unsigned long ei_startpfn;
-	unsigned long ei_endpfn;
-	unsigned long ram = 0;
+	unsigned long ei_startpfn, ei_endpfn, ram = 0;
 	int i;
 
 	for (i = 0; i < e820.nr_map; i++) {
@@ -363,28 +372,31 @@ unsigned long __init e820_hole_size(unsi
 	return end - start - (ram << PAGE_SHIFT);
 }
 
-void __init e820_print_map(char *who)
+static void __init e820_print_map(char *who)
 {
 	int i;
 
 	for (i = 0; i < e820.nr_map; i++) {
 		printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
-			(unsigned long long) e820.map[i].addr,
-			(unsigned long long) (e820.map[i].addr + e820.map[i].size));
+		       (unsigned long long) e820.map[i].addr,
+		       (unsigned long long)
+		       (e820.map[i].addr + e820.map[i].size));
 		switch (e820.map[i].type) {
-		case E820_RAM:	printk("(usable)\n");
-				break;
+		case E820_RAM:
+			printk(KERN_CONT "(usable)\n");
+			break;
 		case E820_RESERVED:
-				printk("(reserved)\n");
-				break;
+			printk(KERN_CONT "(reserved)\n");
+			break;
 		case E820_ACPI:
-				printk("(ACPI data)\n");
-				break;
+			printk(KERN_CONT "(ACPI data)\n");
+			break;
 		case E820_NVS:
-				printk("(ACPI NVS)\n");
-				break;
-		default:	printk("type %u\n", e820.map[i].type);
-				break;
+			printk(KERN_CONT "(ACPI NVS)\n");
+			break;
+		default:
+			printk(KERN_CONT "type %u\n", e820.map[i].type);
+			break;
 		}
 	}
 }
@@ -392,11 +404,11 @@ void __init e820_print_map(char *who)
 /*
  * Sanitize the BIOS e820 map.
  *
- * Some e820 responses include overlapping entries.  The following 
+ * Some e820 responses include overlapping entries. The following
  * replaces the original e820 map with a new one, removing overlaps.
  *
  */
-static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
+static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
 {
 	struct change_member {
 		struct e820entry *pbios; /* pointer to original bios entry */
@@ -416,7 +428,8 @@ static int __init sanitize_e820_map(stru
 	int i;
 
 	/*
-		Visually we're performing the following (1,2,3,4 = memory types)...
+		Visually we're performing the following
+		(1,2,3,4 = memory types)...
 
 		Sample memory map (w/overlaps):
 		   ____22__________________
@@ -458,22 +471,23 @@ static int __init sanitize_e820_map(stru
 	old_nr = *pnr_map;
 
 	/* bail out if we find any unreasonable addresses in bios map */
-	for (i=0; i<old_nr; i++)
+	for (i = 0; i < old_nr; i++)
 		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
 			return -1;
 
 	/* create pointers for initial change-point information (for sorting) */
-	for (i=0; i < 2*old_nr; i++)
+	for (i = 0; i < 2 * old_nr; i++)
 		change_point[i] = &change_point_list[i];
 
 	/* record all known change-points (starting and ending addresses),
 	   omitting those that are for empty memory regions */
 	chgidx = 0;
-	for (i=0; i < old_nr; i++)	{
+	for (i = 0; i < old_nr; i++)	{
 		if (biosmap[i].size != 0) {
 			change_point[chgidx]->addr = biosmap[i].addr;
 			change_point[chgidx++]->pbios = &biosmap[i];
-			change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
+			change_point[chgidx]->addr = biosmap[i].addr +
+				biosmap[i].size;
 			change_point[chgidx++]->pbios = &biosmap[i];
 		}
 	}
@@ -483,75 +497,106 @@ static int __init sanitize_e820_map(stru
 	still_changing = 1;
 	while (still_changing)	{
 		still_changing = 0;
-		for (i=1; i < chg_nr; i++)  {
-			/* if <current_addr> > <last_addr>, swap */
-			/* or, if current=<start_addr> & last=<end_addr>, swap */
-			if ((change_point[i]->addr < change_point[i-1]->addr) ||
-				((change_point[i]->addr == change_point[i-1]->addr) &&
-				 (change_point[i]->addr == change_point[i]->pbios->addr) &&
-				 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
-			   )
-			{
+		for (i = 1; i < chg_nr; i++)  {
+			unsigned long long curaddr, lastaddr;
+			unsigned long long curpbaddr, lastpbaddr;
+
+			curaddr = change_point[i]->addr;
+			lastaddr = change_point[i - 1]->addr;
+			curpbaddr = change_point[i]->pbios->addr;
+			lastpbaddr = change_point[i - 1]->pbios->addr;
+
+			/*
+			 * swap entries, when:
+			 *
+			 * curaddr > lastaddr or
+			 * curaddr == lastaddr and curaddr == curpbaddr and
+			 * lastaddr != lastpbaddr
+			 */
+			if (curaddr < lastaddr ||
+			    (curaddr == lastaddr && curaddr == curpbaddr &&
+			     lastaddr != lastpbaddr)) {
 				change_tmp = change_point[i];
 				change_point[i] = change_point[i-1];
 				change_point[i-1] = change_tmp;
-				still_changing=1;
+				still_changing = 1;
 			}
 		}
 	}
 
 	/* create a new bios memory map, removing overlaps */
-	overlap_entries=0;	 /* number of entries in the overlap table */
-	new_bios_entry=0;	 /* index for creating new bios map entries */
+	overlap_entries = 0;	 /* number of entries in the overlap table */
+	new_bios_entry = 0;	 /* index for creating new bios map entries */
 	last_type = 0;		 /* start with undefined memory type */
 	last_addr = 0;		 /* start with 0 as last starting address */
+
 	/* loop through change-points, determining affect on the new bios map */
-	for (chgidx=0; chgidx < chg_nr; chgidx++)
-	{
+	for (chgidx = 0; chgidx < chg_nr; chgidx++) {
 		/* keep track of all overlapping bios entries */
-		if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
-		{
-			/* add map entry to overlap list (> 1 entry implies an overlap) */
-			overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
-		}
-		else
-		{
-			/* remove entry from list (order independent, so swap with last) */
-			for (i=0; i<overlap_entries; i++)
-			{
-				if (overlap_list[i] == change_point[chgidx]->pbios)
-					overlap_list[i] = overlap_list[overlap_entries-1];
+		if (change_point[chgidx]->addr ==
+		    change_point[chgidx]->pbios->addr) {
+			/*
+			 * add map entry to overlap list (> 1 entry
+			 * implies an overlap)
+			 */
+			overlap_list[overlap_entries++] =
+				change_point[chgidx]->pbios;
+		} else {
+			/*
+			 * remove entry from list (order independent,
+			 * so swap with last)
+			 */
+			for (i = 0; i < overlap_entries; i++) {
+				if (overlap_list[i] ==
+				    change_point[chgidx]->pbios)
+					overlap_list[i] =
+						overlap_list[overlap_entries-1];
 			}
 			overlap_entries--;
 		}
-		/* if there are overlapping entries, decide which "type" to use */
-		/* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
+		/*
+		 * if there are overlapping entries, decide which
+		 * "type" to use (larger value takes precedence --
+		 * 1=usable, 2,3,4,4+=unusable)
+		 */
 		current_type = 0;
-		for (i=0; i<overlap_entries; i++)
+		for (i = 0; i < overlap_entries; i++)
 			if (overlap_list[i]->type > current_type)
 				current_type = overlap_list[i]->type;
-		/* continue building up new bios map based on this information */
+		/*
+		 * continue building up new bios map based on this
+		 * information
+		 */
 		if (current_type != last_type)	{
 			if (last_type != 0)	 {
 				new_bios[new_bios_entry].size =
 					change_point[chgidx]->addr - last_addr;
-				/* move forward only if the new size was non-zero */
+				/*
+				 * move forward only if the new size
+				 * was non-zero
+				 */
 				if (new_bios[new_bios_entry].size != 0)
+					/*
+					 * no more space left for new
+					 * bios entries ?
+					 */
 					if (++new_bios_entry >= E820MAX)
-						break; 	/* no more space left for new bios entries */
+						break;
 			}
 			if (current_type != 0)	{
-				new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
+				new_bios[new_bios_entry].addr =
+					change_point[chgidx]->addr;
 				new_bios[new_bios_entry].type = current_type;
-				last_addr=change_point[chgidx]->addr;
+				last_addr = change_point[chgidx]->addr;
 			}
 			last_type = current_type;
 		}
 	}
-	new_nr = new_bios_entry;   /* retain count for new bios entries */
+	/* retain count for new bios entries */
+	new_nr = new_bios_entry;
 
 	/* copy new bios mapping into original location */
-	memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
+	memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
 	*pnr_map = new_nr;
 
 	return 0;
@@ -566,7 +611,7 @@ static int __init sanitize_e820_map(stru
  * will have given us a memory map that we can use to properly
  * set up memory.  If we aren't, we'll fake a memory map.
  */
-static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
 {
 	/* Only one memory region (or negative)? Ignore it */
 	if (nr_map < 2)
@@ -583,11 +628,11 @@ static int __init copy_e820_map(struct e
 			return -1;
 
 		add_memory_region(start, size, type);
-	} while (biosmap++,--nr_map);
+	} while (biosmap++, --nr_map);
 	return 0;
 }
 
-void early_panic(char *msg)
+static void early_panic(char *msg)
 {
 	early_printk(msg);
 	panic(msg);
@@ -613,9 +658,9 @@ static int __init parse_memopt(char *p)
 	if (!p)
 		return -EINVAL;
 	end_user_pfn = memparse(p, &p);
-	end_user_pfn >>= PAGE_SHIFT;	
+	end_user_pfn >>= PAGE_SHIFT;
 	return 0;
-} 
+}
 early_param("mem", parse_memopt);
 
 static int userdef __initdata;
@@ -627,9 +672,9 @@ static int __init parse_memmap_opt(char 
 
 	if (!strcmp(p, "exactmap")) {
 #ifdef CONFIG_CRASH_DUMP
-		/* If we are doing a crash dump, we
-		 * still need to know the real mem
-		 * size before original memory map is
+		/*
+		 * If we are doing a crash dump, we still need to know
+		 * the real mem size before original memory map is
 		 * reset.
 		 */
 		e820_register_active_regions(0, 0, -1UL);
@@ -646,6 +691,8 @@ static int __init parse_memmap_opt(char 
 	mem_size = memparse(p, &p);
 	if (p == oldp)
 		return -EINVAL;
+
+	userdef = 1;
 	if (*p == '@') {
 		start_at = memparse(p+1, &p);
 		add_memory_region(start_at, mem_size, E820_RAM);
@@ -665,6 +712,12 @@ early_param("memmap", parse_memmap_opt);
 void __init finish_e820_parsing(void)
 {
 	if (userdef) {
+		char nr = e820.nr_map;
+
+		if (sanitize_e820_map(e820.map, &nr) < 0)
+			early_panic("Invalid user supplied memory map");
+		e820.nr_map = nr;
+
 		printk(KERN_INFO "user-defined physical RAM map:\n");
 		e820_print_map("user");
 	}
@@ -713,8 +766,10 @@ __init void e820_setup_gap(void)
 
 	if (!found) {
 		gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
-		printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n"
-		       KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n");
+		printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
+		       "address range\n"
+		       KERN_ERR "PCI: Unassigned devices with 32bit resource "
+		       "registers may break!\n");
 	}
 
 	/*
@@ -727,8 +782,9 @@ __init void e820_setup_gap(void)
 	/* Fun with two's complement */
 	pci_mem_start = (gapstart + round) & -round;
 
-	printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
-		pci_mem_start, gapstart, gapsize);
+	printk(KERN_INFO
+	       "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
+	       pci_mem_start, gapstart, gapsize);
 }
 
 int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
diff -puN arch/x86/kernel/entry_32.S~git-x86 arch/x86/kernel/entry_32.S
--- a/arch/x86/kernel/entry_32.S~git-x86
+++ a/arch/x86/kernel/entry_32.S
@@ -58,7 +58,7 @@
  * for paravirtualization.  The following will never clobber any registers:
  *   INTERRUPT_RETURN (aka. "iret")
  *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
- *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
+ *   ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit").
  *
  * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
  * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
@@ -283,12 +283,12 @@ END(resume_kernel)
    the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
 
 	# sysenter call handler stub
-ENTRY(sysenter_entry)
+ENTRY(ia32_sysenter_target)
 	CFI_STARTPROC simple
 	CFI_SIGNAL_FRAME
 	CFI_DEF_CFA esp, 0
 	CFI_REGISTER esp, ebp
-	movl TSS_sysenter_esp0(%esp),%esp
+	movl TSS_sysenter_sp0(%esp),%esp
 sysenter_past_esp:
 	/*
 	 * No need to follow this irqs on/off section: the syscall
@@ -351,7 +351,7 @@ sysenter_past_esp:
 	xorl %ebp,%ebp
 	TRACE_IRQS_ON
 1:	mov  PT_FS(%esp), %fs
-	ENABLE_INTERRUPTS_SYSEXIT
+	ENABLE_INTERRUPTS_SYSCALL_RET
 	CFI_ENDPROC
 .pushsection .fixup,"ax"
 2:	movl $0,PT_FS(%esp)
@@ -360,7 +360,7 @@ sysenter_past_esp:
 	.align 4
 	.long 1b,2b
 .popsection
-ENDPROC(sysenter_entry)
+ENDPROC(ia32_sysenter_target)
 
 	# system call handler stub
 ENTRY(system_call)
@@ -743,7 +743,7 @@ END(device_not_available)
  * that sets up the real kernel stack. Check here, since we can't
  * allow the wrong stack to be used.
  *
- * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
+ * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
  * already pushed 3 words if it hits on the sysenter instruction:
  * eflags, cs and eip.
  *
@@ -755,7 +755,7 @@ END(device_not_available)
 	cmpw $__KERNEL_CS,4(%esp);		\
 	jne ok;					\
 label:						\
-	movl TSS_sysenter_esp0+offset(%esp),%esp;	\
+	movl TSS_sysenter_sp0+offset(%esp),%esp;	\
 	CFI_DEF_CFA esp, 0;			\
 	CFI_UNDEFINED eip;			\
 	pushfl;					\
@@ -768,7 +768,7 @@ label:						\
 
 KPROBE_ENTRY(debug)
 	RING0_INT_FRAME
-	cmpl $sysenter_entry,(%esp)
+	cmpl $ia32_sysenter_target,(%esp)
 	jne debug_stack_correct
 	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
 debug_stack_correct:
@@ -799,7 +799,7 @@ KPROBE_ENTRY(nmi)
 	popl %eax
 	CFI_ADJUST_CFA_OFFSET -4
 	je nmi_espfix_stack
-	cmpl $sysenter_entry,(%esp)
+	cmpl $ia32_sysenter_target,(%esp)
 	je nmi_stack_fixup
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
@@ -812,7 +812,7 @@ KPROBE_ENTRY(nmi)
 	popl %eax
 	CFI_ADJUST_CFA_OFFSET -4
 	jae nmi_stack_correct
-	cmpl $sysenter_entry,12(%esp)
+	cmpl $ia32_sysenter_target,12(%esp)
 	je nmi_debug_stack_check
 nmi_stack_correct:
 	/* We have a RING0_INT_FRAME here */
@@ -882,10 +882,10 @@ ENTRY(native_iret)
 .previous
 END(native_iret)
 
-ENTRY(native_irq_enable_sysexit)
+ENTRY(native_irq_enable_syscall_ret)
 	sti
 	sysexit
-END(native_irq_enable_sysexit)
+END(native_irq_enable_syscall_ret)
 #endif
 
 KPROBE_ENTRY(int3)
diff -puN arch/x86/kernel/geode_32.c~git-x86 arch/x86/kernel/geode_32.c
--- a/arch/x86/kernel/geode_32.c~git-x86
+++ a/arch/x86/kernel/geode_32.c
@@ -1,6 +1,7 @@
 /*
  * AMD Geode southbridge support code
  * Copyright (C) 2006, Advanced Micro Devices, Inc.
+ * Copyright (C) 2007, Andres Salomon <dilinger@debian.org>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public License
@@ -51,45 +52,62 @@ EXPORT_SYMBOL_GPL(geode_get_dev_base);
 
 /* === GPIO API === */
 
-void geode_gpio_set(unsigned int gpio, unsigned int reg)
+void geode_gpio_set(u32 gpio, unsigned int reg)
 {
 	u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
 
 	if (!base)
 		return;
 
-	if (gpio < 16)
-		outl(1 << gpio, base + reg);
-	else
-		outl(1 << (gpio - 16), base + 0x80 + reg);
+	/* low bank register */
+	if (gpio & 0xFFFF)
+		outl(gpio & 0xFFFF, base + reg);
+	/* high bank register */
+	gpio >>= 16;
+	if (gpio)
+		outl(gpio, base + 0x80 + reg);
 }
 EXPORT_SYMBOL_GPL(geode_gpio_set);
 
-void geode_gpio_clear(unsigned int gpio, unsigned int reg)
+void geode_gpio_clear(u32 gpio, unsigned int reg)
 {
 	u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
 
 	if (!base)
 		return;
 
-	if (gpio < 16)
-		outl(1 << (gpio + 16), base + reg);
-	else
-		outl(1 << gpio, base + 0x80 + reg);
+	/* low bank register */
+	if (gpio & 0xFFFF)
+		outl((gpio & 0xFFFF) << 16, base + reg);
+	/* high bank register */
+	gpio &= (0xFFFF << 16);
+	if (gpio)
+		outl(gpio, base + 0x80 + reg);
 }
 EXPORT_SYMBOL_GPL(geode_gpio_clear);
 
-int geode_gpio_isset(unsigned int gpio, unsigned int reg)
+int geode_gpio_isset(u32 gpio, unsigned int reg)
 {
 	u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
+	u32 val;
 
 	if (!base)
 		return 0;
 
-	if (gpio < 16)
-		return (inl(base + reg) & (1 << gpio)) ? 1 : 0;
-	else
-		return (inl(base + 0x80 + reg) & (1 << (gpio - 16))) ? 1 : 0;
+	/* low bank register */
+	if (gpio & 0xFFFF) {
+		val = inl(base + reg) & (gpio & 0xFFFF);
+		if ((gpio & 0xFFFF) == val)
+			return 1;
+	}
+	/* high bank register */
+	gpio >>= 16;
+	if (gpio) {
+		val = inl(base + 0x80 + reg) & gpio;
+		if (gpio == val)
+			return 1;
+	}
+	return 0;
 }
 EXPORT_SYMBOL_GPL(geode_gpio_isset);
 
diff -puN arch/x86/kernel/head64.c~git-x86 arch/x86/kernel/head64.c
--- a/arch/x86/kernel/head64.c~git-x86
+++ a/arch/x86/kernel/head64.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/percpu.h>
+#include <linux/start_kernel.h>
 
 #include <asm/processor.h>
 #include <asm/proto.h>
@@ -19,6 +20,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
+#include <asm/kdebug.h>
 
 static void __init zap_identity_mappings(void)
 {
@@ -67,8 +69,6 @@ void __init x86_64_start_kernel(char * r
 
 	pda_init(0);
 	copy_bootdata(__va(real_mode_data));
-#ifdef CONFIG_SMP
-	cpu_set(0, cpu_online_map);
-#endif
+
 	start_kernel();
 }
diff -puN arch/x86/kernel/hpet.c~git-x86 arch/x86/kernel/hpet.c
--- a/arch/x86/kernel/hpet.c~git-x86
+++ a/arch/x86/kernel/hpet.c
@@ -6,7 +6,6 @@
 #include <linux/init.h>
 #include <linux/sysdev.h>
 #include <linux/pm.h>
-#include <linux/delay.h>
 
 #include <asm/fixmap.h>
 #include <asm/hpet.h>
@@ -16,7 +15,8 @@
 #define HPET_MASK	CLOCKSOURCE_MASK(32)
 #define HPET_SHIFT	22
 
-/* FSEC = 10^-15 NSEC = 10^-9 */
+/* FSEC = 10^-15
+   NSEC = 10^-9 */
 #define FSEC_PER_NSEC	1000000
 
 /*
diff -puN arch/x86/kernel/i386_ksyms_32.c~git-x86 arch/x86/kernel/i386_ksyms_32.c
--- a/arch/x86/kernel/i386_ksyms_32.c~git-x86
+++ a/arch/x86/kernel/i386_ksyms_32.c
@@ -22,12 +22,5 @@ EXPORT_SYMBOL(__put_user_8);
 
 EXPORT_SYMBOL(strstr);
 
-#ifdef CONFIG_SMP
-extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
-extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
-EXPORT_SYMBOL(__write_lock_failed);
-EXPORT_SYMBOL(__read_lock_failed);
-#endif
-
 EXPORT_SYMBOL(csum_partial);
 EXPORT_SYMBOL(empty_zero_page);
diff -puN arch/x86/kernel/i8253.c~git-x86 arch/x86/kernel/i8253.c
--- a/arch/x86/kernel/i8253.c~git-x86
+++ a/arch/x86/kernel/i8253.c
@@ -31,9 +31,7 @@ struct clock_event_device *global_clock_
 static void init_pit_timer(enum clock_event_mode mode,
 			   struct clock_event_device *evt)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8253_lock, flags);
+	spin_lock(&i8253_lock);
 
 	switch(mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
@@ -62,7 +60,7 @@ static void init_pit_timer(enum clock_ev
 		/* Nothing to do here */
 		break;
 	}
-	spin_unlock_irqrestore(&i8253_lock, flags);
+	spin_unlock(&i8253_lock);
 }
 
 /*
@@ -72,12 +70,10 @@ static void init_pit_timer(enum clock_ev
  */
 static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8253_lock, flags);
+	spin_lock(&i8253_lock);
 	outb_p(delta & 0xff , PIT_CH0);	/* LSB */
 	outb(delta >> 8 , PIT_CH0);	/* MSB */
-	spin_unlock_irqrestore(&i8253_lock, flags);
+	spin_unlock(&i8253_lock);
 
 	return 0;
 }
diff -puN arch/x86/kernel/i8259_32.c~git-x86 arch/x86/kernel/i8259_32.c
--- a/arch/x86/kernel/i8259_32.c~git-x86
+++ a/arch/x86/kernel/i8259_32.c
@@ -21,8 +21,6 @@
 #include <asm/arch_hooks.h>
 #include <asm/i8259.h>
 
-#include <io_ports.h>
-
 /*
  * This is the 'legacy' 8259A Programmable Interrupt Controller,
  * present in the majority of PC/AT boxes.
@@ -341,7 +339,7 @@ static irqreturn_t math_error_irq(int cp
 	outb(0,0xF0);
 	if (ignore_fpu_irq || !boot_cpu_data.hard_math)
 		return IRQ_NONE;
-	math_error((void __user *)get_irq_regs()->eip);
+	math_error((void __user *)get_irq_regs()->ip);
 	return IRQ_HANDLED;
 }
 
diff -puN arch/x86/kernel/i8259_64.c~git-x86 arch/x86/kernel/i8259_64.c
--- a/arch/x86/kernel/i8259_64.c~git-x86
+++ a/arch/x86/kernel/i8259_64.c
@@ -21,6 +21,7 @@
 #include <asm/delay.h>
 #include <asm/desc.h>
 #include <asm/apic.h>
+#include <asm/i8259.h>
 
 /*
  * Common place to define all x86 IRQ vectors
@@ -48,7 +49,7 @@
  */
 
 /*
- * The IO-APIC gives us many more interrupt sources. Most of these 
+ * The IO-APIC gives us many more interrupt sources. Most of these
  * are unused but an SMP system is supposed to have enough memory ...
  * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
  * across the spectrum, so we really want to be prepared to get all
@@ -114,11 +115,7 @@ static struct irq_chip i8259A_chip = {
 /*
  * This contains the irq mask for both 8259A irq controllers,
  */
-static unsigned int cached_irq_mask = 0xffff;
-
-#define __byte(x,y) 	(((unsigned char *)&(y))[x])
-#define cached_21	(__byte(0,cached_irq_mask))
-#define cached_A1	(__byte(1,cached_irq_mask))
+unsigned int cached_irq_mask = 0xffff;
 
 /*
  * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
@@ -139,9 +136,9 @@ void disable_8259A_irq(unsigned int irq)
 	spin_lock_irqsave(&i8259A_lock, flags);
 	cached_irq_mask |= mask;
 	if (irq & 8)
-		outb(cached_A1,0xA1);
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
 	else
-		outb(cached_21,0x21);
+		outb(cached_master_mask, PIC_MASTER_IMR);
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
@@ -153,9 +150,9 @@ void enable_8259A_irq(unsigned int irq)
 	spin_lock_irqsave(&i8259A_lock, flags);
 	cached_irq_mask &= mask;
 	if (irq & 8)
-		outb(cached_A1,0xA1);
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
 	else
-		outb(cached_21,0x21);
+		outb(cached_master_mask, PIC_MASTER_IMR);
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
@@ -167,9 +164,9 @@ int i8259A_irq_pending(unsigned int irq)
 
 	spin_lock_irqsave(&i8259A_lock, flags);
 	if (irq < 8)
-		ret = inb(0x20) & mask;
+		ret = inb(PIC_MASTER_CMD) & mask;
 	else
-		ret = inb(0xA0) & (mask >> 8);
+		ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 
 	return ret;
@@ -196,14 +193,14 @@ static inline int i8259A_irq_real(unsign
 	int irqmask = 1<<irq;
 
 	if (irq < 8) {
-		outb(0x0B,0x20);		/* ISR register */
-		value = inb(0x20) & irqmask;
-		outb(0x0A,0x20);		/* back to the IRR register */
+		outb(0x0B,PIC_MASTER_CMD);	/* ISR register */
+		value = inb(PIC_MASTER_CMD) & irqmask;
+		outb(0x0A,PIC_MASTER_CMD);	/* back to the IRR register */
 		return value;
 	}
-	outb(0x0B,0xA0);		/* ISR register */
-	value = inb(0xA0) & (irqmask >> 8);
-	outb(0x0A,0xA0);		/* back to the IRR register */
+	outb(0x0B,PIC_SLAVE_CMD);	/* ISR register */
+	value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
+	outb(0x0A,PIC_SLAVE_CMD);	/* back to the IRR register */
 	return value;
 }
 
@@ -240,14 +237,17 @@ static void mask_and_ack_8259A(unsigned 
 
 handle_real_irq:
 	if (irq & 8) {
-		inb(0xA1);		/* DUMMY - (do we need this?) */
-		outb(cached_A1,0xA1);
-		outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */
-		outb(0x62,0x20);	/* 'Specific EOI' to master-IRQ2 */
+		inb(PIC_SLAVE_IMR);	/* DUMMY - (do we need this?) */
+		outb(cached_slave_mask, PIC_SLAVE_IMR);
+		/* 'Specific EOI' to slave */
+		outb(0x60+(irq&7),PIC_SLAVE_CMD);
+		 /* 'Specific EOI' to master-IRQ2 */
+		outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD);
 	} else {
-		inb(0x21);		/* DUMMY - (do we need this?) */
-		outb(cached_21,0x21);
-		outb(0x60+irq,0x20);	/* 'Specific EOI' to master */
+		inb(PIC_MASTER_IMR);	/* DUMMY - (do we need this?) */
+		outb(cached_master_mask, PIC_MASTER_IMR);
+		/* 'Specific EOI' to master */
+		outb(0x60+irq,PIC_MASTER_CMD);
 	}
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 	return;
@@ -270,7 +270,8 @@ spurious_8259A_irq:
 		 * lets ACK and report it. [once per IRQ]
 		 */
 		if (!(spurious_irq_mask & irqmask)) {
-			printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
+			printk(KERN_DEBUG
+			       "spurious 8259A interrupt: IRQ%d.\n", irq);
 			spurious_irq_mask |= irqmask;
 		}
 		atomic_inc(&irq_err_count);
@@ -283,51 +284,6 @@ spurious_8259A_irq:
 	}
 }
 
-void init_8259A(int auto_eoi)
-{
-	unsigned long flags;
-
-	i8259A_auto_eoi = auto_eoi;
-
-	spin_lock_irqsave(&i8259A_lock, flags);
-
-	outb(0xff, 0x21);	/* mask all of 8259A-1 */
-	outb(0xff, 0xA1);	/* mask all of 8259A-2 */
-
-	/*
-	 * outb_p - this has to work on a wide range of PC hardware.
-	 */
-	outb_p(0x11, 0x20);	/* ICW1: select 8259A-1 init */
-	outb_p(IRQ0_VECTOR, 0x21);	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
-	outb_p(0x04, 0x21);	/* 8259A-1 (the master) has a slave on IR2 */
-	if (auto_eoi)
-		outb_p(0x03, 0x21);	/* master does Auto EOI */
-	else
-		outb_p(0x01, 0x21);	/* master expects normal EOI */
-
-	outb_p(0x11, 0xA0);	/* ICW1: select 8259A-2 init */
-	outb_p(IRQ8_VECTOR, 0xA1);	/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
-	outb_p(0x02, 0xA1);	/* 8259A-2 is a slave on master's IR2 */
-	outb_p(0x01, 0xA1);	/* (slave's support for AEOI in flat mode
-				    is to be investigated) */
-
-	if (auto_eoi)
-		/*
-		 * in AEOI mode we just have to mask the interrupt
-		 * when acking.
-		 */
-		i8259A_chip.mask_ack = disable_8259A_irq;
-	else
-		i8259A_chip.mask_ack = mask_and_ack_8259A;
-
-	udelay(100);		/* wait for 8259A to initialize */
-
-	outb(cached_21, 0x21);	/* restore master IRQ mask */
-	outb(cached_A1, 0xA1);	/* restore slave IRQ mask */
-
-	spin_unlock_irqrestore(&i8259A_lock, flags);
-}
-
 static char irq_trigger[2];
 /**
  * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
@@ -364,8 +320,8 @@ static int i8259A_shutdown(struct sys_de
 	 * the kernel initialization code can get it
 	 * out of.
 	 */
-	outb(0xff, 0x21);	/* mask all of 8259A-1 */
-	outb(0xff, 0xA1);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-1 */
 	return 0;
 }
 
@@ -391,6 +347,58 @@ static int __init i8259A_init_sysfs(void
 
 device_initcall(i8259A_init_sysfs);
 
+void init_8259A(int auto_eoi)
+{
+	unsigned long flags;
+
+	i8259A_auto_eoi = auto_eoi;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+
+	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
+
+	/*
+	 * outb_p - this has to work on a wide range of PC hardware.
+	 */
+	outb_p(0x11, PIC_MASTER_CMD);	/* ICW1: select 8259A-1 init */
+	/* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
+	outb_p(IRQ0_VECTOR, PIC_MASTER_IMR);
+	/* 8259A-1 (the master) has a slave on IR2 */
+	outb_p(0x04, PIC_MASTER_IMR);
+	if (auto_eoi)	/* master does Auto EOI */
+		outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
+	else		/* master expects normal EOI */
+		outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
+
+	outb_p(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
+	/* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
+	outb_p(IRQ8_VECTOR, PIC_SLAVE_IMR);
+	/* 8259A-2 is a slave on master's IR2 */
+	outb_p(PIC_CASCADE_IR, PIC_SLAVE_IMR);
+	/* (slave's support for AEOI in flat mode is to be investigated) */
+	outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
+
+	if (auto_eoi)
+		/*
+		 * In AEOI mode we just have to mask the interrupt
+		 * when acking.
+		 */
+		i8259A_chip.mask_ack = disable_8259A_irq;
+	else
+		i8259A_chip.mask_ack = mask_and_ack_8259A;
+
+	udelay(100);		/* wait for 8259A to initialize */
+
+	outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
+	outb(cached_slave_mask, PIC_SLAVE_IMR);	  /* restore slave IRQ mask */
+
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+
+
+
 /*
  * IRQ2 is cascade interrupt to second interrupt controller
  */
diff -puN arch/x86/kernel/init_task.c~git-x86 arch/x86/kernel/init_task.c
--- a/arch/x86/kernel/init_task.c~git-x86
+++ a/arch/x86/kernel/init_task.c
@@ -15,7 +15,6 @@ static struct files_struct init_files = 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
-EXPORT_SYMBOL(init_mm);
 
 /*
  * Initial thread structure.
diff -puN arch/x86/kernel/io_apic_32.c~git-x86 arch/x86/kernel/io_apic_32.c
--- a/arch/x86/kernel/io_apic_32.c~git-x86
+++ a/arch/x86/kernel/io_apic_32.c
@@ -48,8 +48,6 @@
 #include <mach_apic.h>
 #include <mach_apicdef.h>
 
-#include "io_ports.h"
-
 int (*ioapic_renumber_irq)(int ioapic, int irq);
 atomic_t irq_mis_count;
 
diff -puN arch/x86/kernel/io_apic_64.c~git-x86 arch/x86/kernel/io_apic_64.c
--- a/arch/x86/kernel/io_apic_64.c~git-x86
+++ a/arch/x86/kernel/io_apic_64.c
@@ -35,6 +35,7 @@
 #ifdef CONFIG_ACPI
 #include <acpi/acpi_bus.h>
 #endif
+#include <linux/bootmem.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
@@ -1069,7 +1070,7 @@ void __apicdebuginit print_local_APIC(vo
 	v = apic_read(APIC_LVR);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	ver = GET_APIC_VERSION(v);
-	maxlvt = get_maxlvt();
+	maxlvt = lapic_get_maxlvt();
 
 	v = apic_read(APIC_TASKPRI);
 	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
@@ -1171,7 +1172,7 @@ void __apicdebuginit print_PIC(void)
 
 #endif  /*  0  */
 
-static void __init enable_IO_APIC(void)
+void __init enable_IO_APIC(void)
 {
 	union IO_APIC_reg_01 reg_01;
 	int i8259_apic, i8259_pin;
@@ -1408,7 +1409,7 @@ static void irq_complete_move(unsigned i
 	if (likely(!cfg->move_in_progress))
 		return;
 
-	vector = ~get_irq_regs()->orig_rax;
+	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
 	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
 		cpumask_t cleanup_mask;
@@ -1435,7 +1436,7 @@ static void ack_apic_level(unsigned int 
 	int do_unmask_irq = 0;
 
 	irq_complete_move(irq);
-#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
+#ifdef CONFIG_GENERIC_PENDING_IRQ
 	/* If we are moving the irq we need to mask it */
 	if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
 		do_unmask_irq = 1;
@@ -1780,7 +1781,10 @@ __setup("no_timer_check", notimercheck);
 
 void __init setup_IO_APIC(void)
 {
-	enable_IO_APIC();
+
+	/*
+	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
+	 */
 
 	if (acpi_ioapic)
 		io_apic_irqs = ~0;	/* all IRQs go through IOAPIC */
@@ -2280,3 +2284,92 @@ void __init setup_ioapic_dest(void)
 }
 #endif
 
+#define IOAPIC_RESOURCE_NAME_SIZE 11
+
+static struct resource *ioapic_resources;
+
+static struct resource * __init ioapic_setup_resources(void)
+{
+	unsigned long n;
+	struct resource *res;
+	char *mem;
+	int i;
+
+	if (nr_ioapics <= 0)
+		return NULL;
+
+	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
+	n *= nr_ioapics;
+
+	mem = alloc_bootmem(n);
+	res = (void *)mem;
+
+	if (mem != NULL) {
+		memset(mem, 0, n);
+		mem += sizeof(struct resource) * nr_ioapics;
+
+		for (i = 0; i < nr_ioapics; i++) {
+			res[i].name = mem;
+			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+			sprintf(mem,  "IOAPIC %u", i);
+			mem += IOAPIC_RESOURCE_NAME_SIZE;
+		}
+	}
+
+	ioapic_resources = res;
+
+	return res;
+}
+
+void __init ioapic_init_mappings(void)
+{
+	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
+	struct resource *ioapic_res;
+	int i;
+
+	ioapic_res = ioapic_setup_resources();
+	for (i = 0; i < nr_ioapics; i++) {
+		if (smp_found_config) {
+			ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+		} else {
+			ioapic_phys = (unsigned long)
+				alloc_bootmem_pages(PAGE_SIZE);
+			ioapic_phys = __pa(ioapic_phys);
+		}
+		set_fixmap_nocache(idx, ioapic_phys);
+		apic_printk(APIC_VERBOSE,
+			    "mapped IOAPIC to %016lx (%016lx)\n",
+			    __fix_to_virt(idx), ioapic_phys);
+		idx++;
+
+		if (ioapic_res != NULL) {
+			ioapic_res->start = ioapic_phys;
+			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+			ioapic_res++;
+		}
+	}
+}
+
+static int __init ioapic_insert_resources(void)
+{
+	int i;
+	struct resource *r = ioapic_resources;
+
+	if (!r) {
+		printk(KERN_ERR
+		       "IO APIC resources could be not be allocated.\n");
+		return -1;
+	}
+
+	for (i = 0; i < nr_ioapics; i++) {
+		insert_resource(&iomem_resource, r);
+		r++;
+	}
+
+	return 0;
+}
+
+/* Insert the IO APIC resources after PCI initialization has occured to handle
+ * IO APICS that are mapped in on a BAR in PCI space. */
+late_initcall(ioapic_insert_resources);
+
diff -puN arch/x86/kernel/ioport_32.c~git-x86 arch/x86/kernel/ioport_32.c
--- a/arch/x86/kernel/ioport_32.c~git-x86
+++ a/arch/x86/kernel/ioport_32.c
@@ -16,49 +16,27 @@
 #include <linux/syscalls.h>
 
 /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
+static void set_bitmap(unsigned long *bitmap, unsigned int base,
+		       unsigned int extent, int new_value)
 {
-	unsigned long mask;
-	unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG);
-	unsigned int low_index = base & (BITS_PER_LONG-1);
-	int length = low_index + extent;
-
-	if (low_index != 0) {
-		mask = (~0UL << low_index);
-		if (length < BITS_PER_LONG)
-			mask &= ~(~0UL << length);
-		if (new_value)
-			*bitmap_base++ |= mask;
-		else
-			*bitmap_base++ &= ~mask;
-		length -= BITS_PER_LONG;
-	}
-
-	mask = (new_value ? ~0UL : 0UL);
-	while (length >= BITS_PER_LONG) {
-		*bitmap_base++ = mask;
-		length -= BITS_PER_LONG;
-	}
+	unsigned int i;
 
-	if (length > 0) {
-		mask = ~(~0UL << length);
+	for (i = base; i < base + extent; i++) {
 		if (new_value)
-			*bitmap_base++ |= mask;
+			__set_bit(i, bitmap);
 		else
-			*bitmap_base++ &= ~mask;
+			__clear_bit(i, bitmap);
 	}
 }
 
-
 /*
  * this changes the io permissions bitmap in the current task.
  */
 asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 {
-	unsigned long i, max_long, bytes, bytes_updated;
 	struct thread_struct * t = &current->thread;
 	struct tss_struct * tss;
-	unsigned long *bitmap;
+	unsigned long i, max_long;
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
 		return -EINVAL;
@@ -71,7 +49,8 @@ asmlinkage long sys_ioperm(unsigned long
 	 * this is why we delay this operation until now:
 	 */
 	if (!t->io_bitmap_ptr) {
-		bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+		unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+
 		if (!bitmap)
 			return -ENOMEM;
 
@@ -100,10 +79,7 @@ asmlinkage long sys_ioperm(unsigned long
 		if (t->io_bitmap_ptr[i] != ~0UL)
 			max_long = i;
 
-	bytes = (max_long + 1) * sizeof(long);
-	bytes_updated = max(bytes, t->io_bitmap_max);
-
-	t->io_bitmap_max = bytes;
+	t->io_bitmap_max = (max_long + 1) * sizeof(unsigned long);
 
 	/*
 	 * Sets the lazy trigger so that the next I/O operation will
@@ -124,17 +100,17 @@ asmlinkage long sys_ioperm(unsigned long
  * beyond the 0x3ff range: to get the full 65536 ports bitmapped
  * you'd need 8kB of bitmaps/process, which is a bit excessive.
  *
- * Here we just change the eflags value on the stack: we allow
+ * Here we just change the flags value on the stack: we allow
  * only the super-user to do it. This depends on the stack-layout
  * on system-call entry - see also fork() and the signal handling
  * code.
  */
 
-asmlinkage long sys_iopl(unsigned long unused)
+asmlinkage long sys_iopl(unsigned long regsp)
 {
-	volatile struct pt_regs * regs = (struct pt_regs *) &unused;
-	unsigned int level = regs->ebx;
-	unsigned int old = (regs->eflags >> 12) & 3;
+	volatile struct pt_regs *regs = (struct pt_regs *)&regsp;
+	unsigned int level = regs->bx;
+	unsigned int old = (regs->flags >> 12) & 3;
 	struct thread_struct *t = &current->thread;
 
 	if (level > 3)
@@ -144,8 +120,10 @@ asmlinkage long sys_iopl(unsigned long u
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
 	}
+
 	t->iopl = level << 12;
-	regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
+	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | t->iopl;
 	set_iopl_mask(t->iopl);
+
 	return 0;
 }
diff -puN arch/x86/kernel/ioport_64.c~git-x86 arch/x86/kernel/ioport_64.c
--- a/arch/x86/kernel/ioport_64.c~git-x86
+++ a/arch/x86/kernel/ioport_64.c
@@ -95,7 +95,7 @@ asmlinkage long sys_ioperm(unsigned long
  * beyond the 0x3ff range: to get the full 65536 ports bitmapped
  * you'd need 8kB of bitmaps/process, which is a bit excessive.
  *
- * Here we just change the eflags value on the stack: we allow
+ * Here we just change the flags value on the stack: we allow
  * only the super-user to do it. This depends on the stack-layout
  * on system-call entry - see also fork() and the signal handling
  * code.
@@ -103,7 +103,7 @@ asmlinkage long sys_ioperm(unsigned long
 
 asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
 {
-	unsigned int old = (regs->eflags >> 12) & 3;
+	unsigned int old = (regs->flags >> 12) & 3;
 
 	if (level > 3)
 		return -EINVAL;
@@ -112,6 +112,6 @@ asmlinkage long sys_iopl(unsigned int le
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
 	}
-	regs->eflags = (regs->eflags &~ X86_EFLAGS_IOPL) | (level << 12);
+	regs->flags = (regs->flags &~ X86_EFLAGS_IOPL) | (level << 12);
 	return 0;
 }
diff -puN arch/x86/kernel/irq_32.c~git-x86 arch/x86/kernel/irq_32.c
--- a/arch/x86/kernel/irq_32.c~git-x86
+++ a/arch/x86/kernel/irq_32.c
@@ -70,7 +70,7 @@ fastcall unsigned int do_IRQ(struct pt_r
 {	
 	struct pt_regs *old_regs;
 	/* high bit used in ret_from_ code */
-	int irq = ~regs->orig_eax;
+	int irq = ~regs->orig_ax;
 	struct irq_desc *desc = irq_desc + irq;
 #ifdef CONFIG_4KSTACKS
 	union irq_ctx *curctx, *irqctx;
@@ -88,13 +88,13 @@ fastcall unsigned int do_IRQ(struct pt_r
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 	/* Debugging check for stack overflow: is there less than 1KB free? */
 	{
-		long esp;
+		long sp;
 
 		__asm__ __volatile__("andl %%esp,%0" :
-					"=r" (esp) : "0" (THREAD_SIZE - 1));
-		if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
+					"=r" (sp) : "0" (THREAD_SIZE - 1));
+		if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) {
 			printk("do_IRQ: stack overflow: %ld\n",
-				esp - sizeof(struct thread_info));
+				sp - sizeof(struct thread_info));
 			dump_stack();
 		}
 	}
@@ -112,7 +112,7 @@ fastcall unsigned int do_IRQ(struct pt_r
 	 * current stack (which is the irq stack already after all)
 	 */
 	if (curctx != irqctx) {
-		int arg1, arg2, ebx;
+		int arg1, arg2, bx;
 
 		/* build the stack frame on the IRQ stack */
 		isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
@@ -128,10 +128,10 @@ fastcall unsigned int do_IRQ(struct pt_r
 			(curctx->tinfo.preempt_count & SOFTIRQ_MASK);
 
 		asm volatile(
-			"       xchgl  %%ebx,%%esp      \n"
-			"       call   *%%edi           \n"
-			"       movl   %%ebx,%%esp      \n"
-			: "=a" (arg1), "=d" (arg2), "=b" (ebx)
+			"       xchgl  %%ebx,%%esp    \n"
+			"       call   *%%edi         \n"
+			"       movl   %%ebx,%%esp    \n"
+			: "=a" (arg1), "=d" (arg2), "=b" (bx)
 			:  "0" (irq),   "1" (desc),  "2" (isp),
 			   "D" (desc->handle_irq)
 			: "memory", "cc"
diff -puN arch/x86/kernel/irq_64.c~git-x86 arch/x86/kernel/irq_64.c
--- a/arch/x86/kernel/irq_64.c~git-x86
+++ a/arch/x86/kernel/irq_64.c
@@ -20,6 +20,26 @@
 
 atomic_t irq_err_count;
 
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq);
+	/*
+	 * Currently unexpected vectors happen only on SMP and APIC.
+	 * We _must_ ack these because every local APIC has only N
+	 * irq slots per priority level, and a 'hanging, unacked' IRQ
+	 * holds up an irq slot - in excessive cases (when multiple
+	 * unexpected vectors occur) that might lock up the APIC
+	 * completely.
+	 * But don't ack when the APIC is disabled. -AK
+	 */
+	if (!disable_apic)
+		ack_APIC_irq();
+}
+
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 /*
  * Probabilistic stack overflow check:
@@ -33,11 +53,11 @@ static inline void stack_overflow_check(
 	u64 curbase = (u64)task_stack_page(current);
 	static unsigned long warned = -60*HZ;
 
-	if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE &&
-	    regs->rsp <  curbase + sizeof(struct thread_info) + 128 &&
+	if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE &&
+	    regs->sp <  curbase + sizeof(struct thread_info) + 128 &&
 	    time_after(jiffies, warned + 60*HZ)) {
-		printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n",
-		       current->comm, curbase, regs->rsp);
+		printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
+		       current->comm, curbase, regs->sp);
 		show_stack(NULL,NULL);
 		warned = jiffies;
 	}
@@ -142,7 +162,7 @@ asmlinkage unsigned int do_IRQ(struct pt
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
 	/* high bit used in ret_from_ code  */
-	unsigned vector = ~regs->orig_rax;
+	unsigned vector = ~regs->orig_ax;
 	unsigned irq;
 
 	exit_idle();
diff -puN arch/x86/kernel/kprobes_32.c~git-x86 arch/x86/kernel/kprobes_32.c
--- a/arch/x86/kernel/kprobes_32.c~git-x86
+++ a/arch/x86/kernel/kprobes_32.c
@@ -212,27 +212,40 @@ static void __kprobes set_current_kprobe
 {
 	__get_cpu_var(current_kprobe) = p;
 	kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags
-		= (regs->eflags & (TF_MASK | IF_MASK));
+		= (regs->flags & (TF_MASK | IF_MASK));
 	if (is_IF_modifier(p->opcode))
 		kcb->kprobe_saved_eflags &= ~IF_MASK;
 }
 
+static __always_inline void clear_btf(void)
+{
+	if (test_thread_flag(TIF_DEBUGCTLMSR))
+		wrmsr(MSR_IA32_DEBUGCTLMSR, 0, 0);
+}
+
+static __always_inline void restore_btf(void)
+{
+	if (test_thread_flag(TIF_DEBUGCTLMSR))
+		wrmsr(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr, 0);
+}
+
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
-	regs->eflags |= TF_MASK;
-	regs->eflags &= ~IF_MASK;
+	clear_btf();
+	regs->flags |= TF_MASK;
+	regs->flags &= ~IF_MASK;
 	/*single step inline if the instruction is an int3*/
 	if (p->opcode == BREAKPOINT_INSTRUCTION)
-		regs->eip = (unsigned long)p->addr;
+		regs->ip = (unsigned long)p->addr;
 	else
-		regs->eip = (unsigned long)p->ainsn.insn;
+		regs->ip = (unsigned long)p->ainsn.insn;
 }
 
 /* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
-	unsigned long *sara = (unsigned long *)&regs->esp;
+	unsigned long *sara = (unsigned long *)&regs->sp;
 
 	ri->ret_addr = (kprobe_opcode_t *) *sara;
 
@@ -251,7 +264,7 @@ static int __kprobes kprobe_handler(stru
 	kprobe_opcode_t *addr;
 	struct kprobe_ctlblk *kcb;
 
-	addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
+	addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
 
 	/*
 	 * We don't want to be preempted for the entire
@@ -266,8 +279,8 @@ static int __kprobes kprobe_handler(stru
 		if (p) {
 			if (kcb->kprobe_status == KPROBE_HIT_SS &&
 				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
-				regs->eflags &= ~TF_MASK;
-				regs->eflags |= kcb->kprobe_saved_eflags;
+				regs->flags &= ~TF_MASK;
+				regs->flags |= kcb->kprobe_saved_eflags;
 				goto no_kprobe;
 			}
 			/* We have reentered the kprobe_handler(), since
@@ -288,7 +301,7 @@ static int __kprobes kprobe_handler(stru
 			 * another cpu right after we hit, no further
 			 * handling of this interrupt is appropriate
 			 */
-				regs->eip -= sizeof(kprobe_opcode_t);
+				regs->ip -= sizeof(kprobe_opcode_t);
 				ret = 1;
 				goto no_kprobe;
 			}
@@ -312,7 +325,7 @@ static int __kprobes kprobe_handler(stru
 			 * Back up over the (now missing) int3 and run
 			 * the original instruction.
 			 */
-			regs->eip -= sizeof(kprobe_opcode_t);
+			regs->ip -= sizeof(kprobe_opcode_t);
 			ret = 1;
 		}
 		/* Not one of ours: let kernel handle it */
@@ -331,7 +344,7 @@ ss_probe:
 	if (p->ainsn.boostable == 1 && !p->post_handler){
 		/* Boost up -- we can execute copied instructions directly */
 		reset_current_kprobe();
-		regs->eip = (unsigned long)p->ainsn.insn;
+		regs->ip = (unsigned long)p->ainsn.insn;
 		preempt_enable_no_resched();
 		return 1;
 	}
@@ -355,7 +368,7 @@ no_kprobe:
 	asm volatile ( ".global kretprobe_trampoline\n"
 			"kretprobe_trampoline: \n"
 			"	pushf\n"
-			/* skip cs, eip, orig_eax */
+			/* skip cs, ip, orig_ax */
 			"	subl $12, %esp\n"
 			"	pushl %fs\n"
 			"	pushl %ds\n"
@@ -369,10 +382,10 @@ no_kprobe:
 			"	pushl %ebx\n"
 			"	movl %esp, %eax\n"
 			"	call trampoline_handler\n"
-			/* move eflags to cs */
+			/* move flags to cs */
 			"	movl 52(%esp), %edx\n"
 			"	movl %edx, 48(%esp)\n"
-			/* save true return address on eflags */
+			/* save true return address on flags */
 			"	movl %eax, 52(%esp)\n"
 			"	popl %ebx\n"
 			"	popl %ecx\n"
@@ -381,7 +394,7 @@ no_kprobe:
 			"	popl %edi\n"
 			"	popl %ebp\n"
 			"	popl %eax\n"
-			/* skip eip, orig_eax, es, ds, fs */
+			/* skip ip, orig_ax, es, ds, fs */
 			"	addl $20, %esp\n"
 			"	popf\n"
 			"	ret\n");
@@ -402,9 +415,9 @@ fastcall void *__kprobes trampoline_hand
 	spin_lock_irqsave(&kretprobe_lock, flags);
 	head = kretprobe_inst_table_head(current);
 	/* fixup registers */
-	regs->xcs = __KERNEL_CS | get_kernel_rpl();
-	regs->eip = trampoline_address;
-	regs->orig_eax = 0xffffffff;
+	regs->cs = __KERNEL_CS | get_kernel_rpl();
+	regs->ip = trampoline_address;
+	regs->orig_ax = 0xffffffff;
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -465,11 +478,11 @@ fastcall void *__kprobes trampoline_hand
  * interrupt.  We have to fix up the stack as follows:
  *
  * 0) Except in the case of absolute or indirect jump or call instructions,
- * the new eip is relative to the copied instruction.  We need to make
+ * the new ip is relative to the copied instruction.  We need to make
  * it relative to the original instruction.
  *
  * 1) If the single-stepped instruction was pushfl, then the TF and IF
- * flags are set in the just-pushed eflags, and may need to be cleared.
+ * flags are set in the just-pushed flags, and may need to be cleared.
  *
  * 2) If the single-stepped instruction was a call, the return address
  * that is atop the stack is the address following the copied instruction.
@@ -480,11 +493,11 @@ fastcall void *__kprobes trampoline_hand
 static void __kprobes resume_execution(struct kprobe *p,
 		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
 {
-	unsigned long *tos = (unsigned long *)&regs->esp;
+	unsigned long *tos = (unsigned long *)&regs->sp;
 	unsigned long copy_eip = (unsigned long)p->ainsn.insn;
 	unsigned long orig_eip = (unsigned long)p->addr;
 
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	switch (p->ainsn.insn[0]) {
 	case 0x9c:		/* pushfl */
 		*tos &= ~(TF_MASK | IF_MASK);
@@ -495,8 +508,8 @@ static void __kprobes resume_execution(s
 	case 0xca:
 	case 0xcb:
 	case 0xcf:
-	case 0xea:		/* jmp absolute -- eip is correct */
-		/* eip is already adjusted, no more changes required */
+	case 0xea:		/* jmp absolute -- ip is correct */
+		/* ip is already adjusted, no more changes required */
 		p->ainsn.boostable = 1;
 		goto no_change;
 	case 0xe8:		/* call relative - Fix return addr */
@@ -509,14 +522,14 @@ static void __kprobes resume_execution(s
 		if ((p->ainsn.insn[1] & 0x30) == 0x10) {
 			/*
 			 * call absolute, indirect
-			 * Fix return addr; eip is correct.
+			 * Fix return addr; ip is correct.
 			 * But this is not boostable
 			 */
 			*tos = orig_eip + (*tos - copy_eip);
 			goto no_change;
 		} else if (((p->ainsn.insn[1] & 0x31) == 0x20) ||	/* jmp near, absolute indirect */
 			   ((p->ainsn.insn[1] & 0x31) == 0x21)) {	/* jmp far, absolute indirect */
-			/* eip is correct. And this is boostable */
+			/* ip is correct. And this is boostable */
 			p->ainsn.boostable = 1;
 			goto no_change;
 		}
@@ -525,23 +538,25 @@ static void __kprobes resume_execution(s
 	}
 
 	if (p->ainsn.boostable == 0) {
-		if ((regs->eip > copy_eip) &&
-		    (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) {
+		if ((regs->ip > copy_eip) &&
+		    (regs->ip - copy_eip) + 5 < MAX_INSN_SIZE) {
 			/*
 			 * These instructions can be executed directly if it
 			 * jumps back to correct address.
 			 */
-			set_jmp_op((void *)regs->eip,
-				   (void *)orig_eip + (regs->eip - copy_eip));
+			set_jmp_op((void *)regs->ip,
+				   (void *)orig_eip + (regs->ip - copy_eip));
 			p->ainsn.boostable = 1;
 		} else {
 			p->ainsn.boostable = -1;
 		}
 	}
 
-	regs->eip = orig_eip + (regs->eip - copy_eip);
+	regs->ip = orig_eip + (regs->ip - copy_eip);
 
 no_change:
+	restore_btf();
+
 	return;
 }
 
@@ -563,8 +578,8 @@ static int __kprobes post_kprobe_handler
 	}
 
 	resume_execution(cur, regs, kcb);
-	regs->eflags |= kcb->kprobe_saved_eflags;
-	trace_hardirqs_fixup_flags(regs->eflags);
+	regs->flags |= kcb->kprobe_saved_eflags;
+	trace_hardirqs_fixup_flags(regs->flags);
 
 	/*Restore back the original saved kprobes variables and continue. */
 	if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -576,11 +591,11 @@ out:
 	preempt_enable_no_resched();
 
 	/*
-	 * if somebody else is singlestepping across a probe point, eflags
+	 * if somebody else is singlestepping across a probe point, flags
 	 * will have TF set, in which case, continue the remaining processing
 	 * of do_debug, as if this is not a probe hit.
 	 */
-	if (regs->eflags & TF_MASK)
+	if (regs->flags & TF_MASK)
 		return 0;
 
 	return 1;
@@ -597,12 +612,12 @@ int __kprobes kprobe_fault_handler(struc
 		/*
 		 * We are here because the instruction being single
 		 * stepped caused a page fault. We reset the current
-		 * kprobe and the eip points back to the probe address
+		 * kprobe and the ip points back to the probe address
 		 * and allow the page fault handler to continue as a
 		 * normal page fault.
 		 */
-		regs->eip = (unsigned long)cur->addr;
-		regs->eflags |= kcb->kprobe_old_eflags;
+		regs->ip = (unsigned long)cur->addr;
+		regs->flags |= kcb->kprobe_old_eflags;
 		if (kcb->kprobe_status == KPROBE_REENTER)
 			restore_previous_kprobe(kcb);
 		else
@@ -688,7 +703,7 @@ int __kprobes setjmp_pre_handler(struct 
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
 	kcb->jprobe_saved_regs = *regs;
-	kcb->jprobe_saved_esp = &regs->esp;
+	kcb->jprobe_saved_esp = &regs->sp;
 	addr = (unsigned long)(kcb->jprobe_saved_esp);
 
 	/*
@@ -700,9 +715,9 @@ int __kprobes setjmp_pre_handler(struct 
 	 */
 	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
 			MIN_STACK_SIZE(addr));
-	regs->eflags &= ~IF_MASK;
+	regs->flags &= ~IF_MASK;
 	trace_hardirqs_off();
-	regs->eip = (unsigned long)(jp->entry);
+	regs->ip = (unsigned long)(jp->entry);
 	return 1;
 }
 
@@ -721,17 +736,17 @@ void __kprobes jprobe_return(void)
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-	u8 *addr = (u8 *) (regs->eip - 1);
+	u8 *addr = (u8 *) (regs->ip - 1);
 	unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp);
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 
 	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
-		if (&regs->esp != kcb->jprobe_saved_esp) {
+		if (&regs->sp != kcb->jprobe_saved_esp) {
 			struct pt_regs *saved_regs =
 			    container_of(kcb->jprobe_saved_esp,
-					    struct pt_regs, esp);
-			printk("current esp %p does not match saved esp %p\n",
-			       &regs->esp, kcb->jprobe_saved_esp);
+					    struct pt_regs, sp);
+			printk("current sp %p does not match saved sp %p\n",
+			       &regs->sp, kcb->jprobe_saved_esp);
 			printk("Saved registers for jprobe %p\n", jp);
 			show_registers(saved_regs);
 			printk("Current registers\n");
diff -puN arch/x86/kernel/kprobes_64.c~git-x86 arch/x86/kernel/kprobes_64.c
--- a/arch/x86/kernel/kprobes_64.c~git-x86
+++ a/arch/x86/kernel/kprobes_64.c
@@ -251,27 +251,40 @@ static void __kprobes set_current_kprobe
 {
 	__get_cpu_var(current_kprobe) = p;
 	kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags
-		= (regs->eflags & (TF_MASK | IF_MASK));
+		= (regs->flags & (TF_MASK | IF_MASK));
 	if (is_IF_modifier(p->ainsn.insn))
 		kcb->kprobe_saved_rflags &= ~IF_MASK;
 }
 
+static __always_inline void clear_btf(void)
+{
+	if (test_thread_flag(TIF_DEBUGCTLMSR))
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
+}
+
+static __always_inline void restore_btf(void)
+{
+	if (test_thread_flag(TIF_DEBUGCTLMSR))
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr);
+}
+
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
-	regs->eflags |= TF_MASK;
-	regs->eflags &= ~IF_MASK;
+	clear_btf();
+	regs->flags |= TF_MASK;
+	regs->flags &= ~IF_MASK;
 	/*single step inline if the instruction is an int3*/
 	if (p->opcode == BREAKPOINT_INSTRUCTION)
-		regs->rip = (unsigned long)p->addr;
+		regs->ip = (unsigned long)p->addr;
 	else
-		regs->rip = (unsigned long)p->ainsn.insn;
+		regs->ip = (unsigned long)p->ainsn.insn;
 }
 
 /* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
-	unsigned long *sara = (unsigned long *)regs->rsp;
+	unsigned long *sara = (unsigned long *)regs->sp;
 
 	ri->ret_addr = (kprobe_opcode_t *) *sara;
 	/* Replace the return addr with trampoline addr */
@@ -282,7 +295,7 @@ int __kprobes kprobe_handler(struct pt_r
 {
 	struct kprobe *p;
 	int ret = 0;
-	kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t));
+	kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
 	struct kprobe_ctlblk *kcb;
 
 	/*
@@ -298,8 +311,8 @@ int __kprobes kprobe_handler(struct pt_r
 		if (p) {
 			if (kcb->kprobe_status == KPROBE_HIT_SS &&
 				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
-				regs->eflags &= ~TF_MASK;
-				regs->eflags |= kcb->kprobe_saved_rflags;
+				regs->flags &= ~TF_MASK;
+				regs->flags |= kcb->kprobe_saved_rflags;
 				goto no_kprobe;
 			} else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) {
 				/* TODO: Provide re-entrancy from
@@ -308,7 +321,7 @@ int __kprobes kprobe_handler(struct pt_r
 				 * the instruction of the new probe.
 				 */
 				arch_disarm_kprobe(p);
-				regs->rip = (unsigned long)p->addr;
+				regs->ip = (unsigned long)p->addr;
 				reset_current_kprobe();
 				ret = 1;
 			} else {
@@ -332,7 +345,7 @@ int __kprobes kprobe_handler(struct pt_r
 			 * another cpu right after we hit, no further
 			 * handling of this interrupt is appropriate
 			 */
-				regs->rip = (unsigned long)addr;
+				regs->ip = (unsigned long)addr;
 				ret = 1;
 				goto no_kprobe;
 			}
@@ -356,7 +369,7 @@ int __kprobes kprobe_handler(struct pt_r
 			 * Back up over the (now missing) int3 and run
 			 * the original instruction.
 			 */
-			regs->rip = (unsigned long)addr;
+			regs->ip = (unsigned long)addr;
 			ret = 1;
 		}
 		/* Not one of ours: let kernel handle it */
@@ -441,7 +454,7 @@ int __kprobes trampoline_probe_handler(s
 	}
 
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
-	regs->rip = orig_ret_address;
+	regs->ip = orig_ret_address;
 
 	reset_current_kprobe();
 	spin_unlock_irqrestore(&kretprobe_lock, flags);
@@ -471,11 +484,11 @@ int __kprobes trampoline_probe_handler(s
  * interrupt.  We have to fix up the stack as follows:
  *
  * 0) Except in the case of absolute or indirect jump or call instructions,
- * the new rip is relative to the copied instruction.  We need to make
+ * the new ip is relative to the copied instruction.  We need to make
  * it relative to the original instruction.
  *
  * 1) If the single-stepped instruction was pushfl, then the TF and IF
- * flags are set in the just-pushed eflags, and may need to be cleared.
+ * flags are set in the just-pushed flags, and may need to be cleared.
  *
  * 2) If the single-stepped instruction was a call, the return address
  * that is atop the stack is the address following the copied instruction.
@@ -484,7 +497,7 @@ int __kprobes trampoline_probe_handler(s
 static void __kprobes resume_execution(struct kprobe *p,
 		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
 {
-	unsigned long *tos = (unsigned long *)regs->rsp;
+	unsigned long *tos = (unsigned long *)regs->sp;
 	unsigned long next_rip = 0;
 	unsigned long copy_rip = (unsigned long)p->ainsn.insn;
 	unsigned long orig_rip = (unsigned long)p->addr;
@@ -503,8 +516,8 @@ static void __kprobes resume_execution(s
 	case 0xcb:
 	case 0xc2:
 	case 0xca:
-		regs->eflags &= ~TF_MASK;
-		/* rip is already adjusted, no more changes required*/
+		regs->flags &= ~TF_MASK;
+		/* ip is already adjusted, no more changes required*/
 		return;
 	case 0xe8:		/* call relative - Fix return addr */
 		*tos = orig_rip + (*tos - copy_rip);
@@ -512,28 +525,30 @@ static void __kprobes resume_execution(s
 	case 0xff:
 		if ((insn[1] & 0x30) == 0x10) {
 			/* call absolute, indirect */
-			/* Fix return addr; rip is correct. */
-			next_rip = regs->rip;
+			/* Fix return addr; ip is correct. */
+			next_rip = regs->ip;
 			*tos = orig_rip + (*tos - copy_rip);
 		} else if (((insn[1] & 0x31) == 0x20) ||	/* jmp near, absolute indirect */
 			   ((insn[1] & 0x31) == 0x21)) {	/* jmp far, absolute indirect */
-			/* rip is correct. */
-			next_rip = regs->rip;
+			/* ip is correct. */
+			next_rip = regs->ip;
 		}
 		break;
-	case 0xea:		/* jmp absolute -- rip is correct */
-		next_rip = regs->rip;
+	case 0xea:		/* jmp absolute -- ip is correct */
+		next_rip = regs->ip;
 		break;
 	default:
 		break;
 	}
 
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	if (next_rip) {
-		regs->rip = next_rip;
+		regs->ip = next_rip;
 	} else {
-		regs->rip = orig_rip + (regs->rip - copy_rip);
+		regs->ip = orig_rip + (regs->ip - copy_rip);
 	}
+
+	restore_btf();
 }
 
 int __kprobes post_kprobe_handler(struct pt_regs *regs)
@@ -550,8 +565,8 @@ int __kprobes post_kprobe_handler(struct
 	}
 
 	resume_execution(cur, regs, kcb);
-	regs->eflags |= kcb->kprobe_saved_rflags;
-	trace_hardirqs_fixup_flags(regs->eflags);
+	regs->flags |= kcb->kprobe_saved_rflags;
+	trace_hardirqs_fixup_flags(regs->flags);
 
 	/* Restore the original saved kprobes variables and continue. */
 	if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -563,11 +578,11 @@ out:
 	preempt_enable_no_resched();
 
 	/*
-	 * if somebody else is singlestepping across a probe point, eflags
+	 * if somebody else is singlestepping across a probe point, flags
 	 * will have TF set, in which case, continue the remaining processing
 	 * of do_debug, as if this is not a probe hit.
 	 */
-	if (regs->eflags & TF_MASK)
+	if (regs->flags & TF_MASK)
 		return 0;
 
 	return 1;
@@ -585,12 +600,12 @@ int __kprobes kprobe_fault_handler(struc
 		/*
 		 * We are here because the instruction being single
 		 * stepped caused a page fault. We reset the current
-		 * kprobe and the rip points back to the probe address
+		 * kprobe and the ip points back to the probe address
 		 * and allow the page fault handler to continue as a
 		 * normal page fault.
 		 */
-		regs->rip = (unsigned long)cur->addr;
-		regs->eflags |= kcb->kprobe_old_rflags;
+		regs->ip = (unsigned long)cur->addr;
+		regs->flags |= kcb->kprobe_old_rflags;
 		if (kcb->kprobe_status == KPROBE_REENTER)
 			restore_previous_kprobe(kcb);
 		else
@@ -620,9 +635,9 @@ int __kprobes kprobe_fault_handler(struc
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
 		 */
-		fixup = search_exception_tables(regs->rip);
+		fixup = search_exception_tables(regs->ip);
 		if (fixup) {
-			regs->rip = fixup->fixup;
+			regs->ip = fixup->fixup;
 			return 1;
 		}
 
@@ -679,7 +694,7 @@ int __kprobes setjmp_pre_handler(struct 
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
 	kcb->jprobe_saved_regs = *regs;
-	kcb->jprobe_saved_rsp = (long *) regs->rsp;
+	kcb->jprobe_saved_rsp = (long *) regs->sp;
 	addr = (unsigned long)(kcb->jprobe_saved_rsp);
 	/*
 	 * As Linus pointed out, gcc assumes that the callee
@@ -690,9 +705,9 @@ int __kprobes setjmp_pre_handler(struct 
 	 */
 	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
 			MIN_STACK_SIZE(addr));
-	regs->eflags &= ~IF_MASK;
+	regs->flags &= ~IF_MASK;
 	trace_hardirqs_off();
-	regs->rip = (unsigned long)(jp->entry);
+	regs->ip = (unsigned long)(jp->entry);
 	return 1;
 }
 
@@ -711,17 +726,17 @@ void __kprobes jprobe_return(void)
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-	u8 *addr = (u8 *) (regs->rip - 1);
+	u8 *addr = (u8 *) (regs->ip - 1);
 	unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp);
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 
 	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
-		if ((long *)regs->rsp != kcb->jprobe_saved_rsp) {
+		if ((long *)regs->sp != kcb->jprobe_saved_rsp) {
 			struct pt_regs *saved_regs =
 			    container_of(kcb->jprobe_saved_rsp,
-					    struct pt_regs, rsp);
-			printk("current rsp %p does not match saved rsp %p\n",
-			       (long *)regs->rsp, kcb->jprobe_saved_rsp);
+					    struct pt_regs, sp);
+			printk("current sp %p does not match saved sp %p\n",
+			       (long *)regs->sp, kcb->jprobe_saved_rsp);
 			printk("Saved registers for jprobe %p\n", jp);
 			show_registers(saved_regs);
 			printk("Current registers\n");
diff -puN /dev/null arch/x86/kernel/ldt.c
--- /dev/null
+++ a/arch/x86/kernel/ldt.c
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2002 Andi Kleen
+ *
+ * This handles calls from both 32bit and 64bit mode.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_SMP
+static void flush_ldt(void *null)
+{
+	if (current->active_mm)
+		load_LDT(&current->active_mm->context);
+}
+#endif
+
+static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+{
+	void *oldldt, *newldt;
+	int oldsize;
+
+	if (mincount <= pc->size)
+		return 0;
+	oldsize = pc->size;
+	mincount = (mincount + 511) & (~511);
+	if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
+		newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
+	else
+		newldt = kmalloc(mincount * LDT_ENTRY_SIZE, GFP_KERNEL);
+
+	if (!newldt)
+		return -ENOMEM;
+
+	if (oldsize)
+		memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
+	oldldt = pc->ldt;
+	memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
+	       (mincount - oldsize) * LDT_ENTRY_SIZE);
+
+#ifdef CONFIG_X86_64
+	/* CHECKME: Do we really need this ? */
+	wmb();
+#endif
+	pc->ldt = newldt;
+	wmb();
+	pc->size = mincount;
+	wmb();
+
+	if (reload) {
+#ifdef CONFIG_SMP
+		cpumask_t mask;
+
+		preempt_disable();
+		load_LDT(pc);
+		mask = cpumask_of_cpu(smp_processor_id());
+		if (!cpus_equal(current->mm->cpu_vm_mask, mask))
+			smp_call_function(flush_ldt, NULL, 1, 1);
+		preempt_enable();
+#else
+		load_LDT(pc);
+#endif
+	}
+	if (oldsize) {
+		if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
+			vfree(oldldt);
+		else
+			kfree(oldldt);
+	}
+	return 0;
+}
+
+static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+{
+	int err = alloc_ldt(new, old->size, 0);
+
+	if (err < 0)
+		return err;
+	memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE);
+	return 0;
+}
+
+/*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	struct mm_struct *old_mm;
+	int retval = 0;
+
+	mutex_init(&mm->context.lock);
+	mm->context.size = 0;
+	old_mm = current->mm;
+	if (old_mm && old_mm->context.size > 0) {
+		mutex_lock(&old_mm->context.lock);
+		retval = copy_ldt(&mm->context, &old_mm->context);
+		mutex_unlock(&old_mm->context.lock);
+	}
+	return retval;
+}
+
+/*
+ * No need to lock the MM as we are the last user
+ *
+ * 64bit: Don't touch the LDT register - we're already in the next thread.
+ */
+void destroy_context(struct mm_struct *mm)
+{
+	if (mm->context.size) {
+#ifdef CONFIG_X86_32
+		/* CHECKME: Can this ever happen ? */
+		if (mm == current->active_mm)
+			clear_LDT();
+#endif
+		if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
+			vfree(mm->context.ldt);
+		else
+			kfree(mm->context.ldt);
+		mm->context.size = 0;
+	}
+}
+
+static int read_ldt(void __user *ptr, unsigned long bytecount)
+{
+	int err;
+	unsigned long size;
+	struct mm_struct *mm = current->mm;
+
+	if (!mm->context.size)
+		return 0;
+	if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
+		bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
+
+	mutex_lock(&mm->context.lock);
+	size = mm->context.size * LDT_ENTRY_SIZE;
+	if (size > bytecount)
+		size = bytecount;
+
+	err = 0;
+	if (copy_to_user(ptr, mm->context.ldt, size))
+		err = -EFAULT;
+	mutex_unlock(&mm->context.lock);
+	if (err < 0)
+		goto error_return;
+	if (size != bytecount) {
+		/* zero-fill the rest */
+		if (clear_user(ptr + size, bytecount - size) != 0) {
+			err = -EFAULT;
+			goto error_return;
+		}
+	}
+	return bytecount;
+error_return:
+	return err;
+}
+
+static int read_default_ldt(void __user *ptr, unsigned long bytecount)
+{
+	/* CHECKME: Can we use _one_ random number ? */
+#ifdef CONFIG_X86_32
+	unsigned long size = 5 * sizeof(struct desc_struct);
+#else
+	unsigned long size = 128;
+#endif
+	if (bytecount > size)
+		bytecount = size;
+	if (clear_user(ptr, bytecount))
+		return -EFAULT;
+	return bytecount;
+}
+
+static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
+{
+	struct mm_struct *mm = current->mm;
+	__u32 entry_1, entry_2;
+	int error;
+	struct user_desc ldt_info;
+
+	error = -EINVAL;
+	if (bytecount != sizeof(ldt_info))
+		goto out;
+	error = -EFAULT;
+	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+		goto out;
+
+	error = -EINVAL;
+	if (ldt_info.entry_number >= LDT_ENTRIES)
+		goto out;
+	if (ldt_info.contents == 3) {
+		if (oldmode)
+			goto out;
+		if (ldt_info.seg_not_present == 0)
+			goto out;
+	}
+
+	mutex_lock(&mm->context.lock);
+	if (ldt_info.entry_number >= mm->context.size) {
+		error = alloc_ldt(&current->mm->context,
+				  ldt_info.entry_number + 1, 1);
+		if (error < 0)
+			goto out_unlock;
+	}
+
+	/* Allow LDTs to be cleared by the user. */
+	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+		if (oldmode || LDT_empty(&ldt_info)) {
+			entry_1 = 0;
+			entry_2 = 0;
+			goto install;
+		}
+	}
+
+	entry_1 = LDT_entry_a(&ldt_info);
+	entry_2 = LDT_entry_b(&ldt_info);
+	if (oldmode)
+		entry_2 &= ~(1 << 20);
+
+	/* Install the new entry ...  */
+install:
+	write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1,
+			entry_2);
+	error = 0;
+
+out_unlock:
+	mutex_unlock(&mm->context.lock);
+out:
+	return error;
+}
+
+asmlinkage int sys_modify_ldt(int func, void __user *ptr,
+			      unsigned long bytecount)
+{
+	int ret = -ENOSYS;
+
+	switch (func) {
+	case 0:
+		ret = read_ldt(ptr, bytecount);
+		break;
+	case 1:
+		ret = write_ldt(ptr, bytecount, 1);
+		break;
+	case 2:
+		ret = read_default_ldt(ptr, bytecount);
+		break;
+	case 0x11:
+		ret = write_ldt(ptr, bytecount, 0);
+		break;
+	}
+	return ret;
+}
diff -puN arch/x86/kernel/ldt_32.c~git-x86 /dev/null
--- a/arch/x86/kernel/ldt_32.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/ldt.h>
-#include <asm/desc.h>
-#include <asm/mmu_context.h>
-
-#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
-static void flush_ldt(void *null)
-{
-	if (current->active_mm)
-		load_LDT(&current->active_mm->context);
-}
-#endif
-
-static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
-{
-	void *oldldt;
-	void *newldt;
-	int oldsize;
-
-	if (mincount <= pc->size)
-		return 0;
-	oldsize = pc->size;
-	mincount = (mincount+511)&(~511);
-	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
-	else
-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
-
-	if (!newldt)
-		return -ENOMEM;
-
-	if (oldsize)
-		memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
-	oldldt = pc->ldt;
-	memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
-	pc->ldt = newldt;
-	wmb();
-	pc->size = mincount;
-	wmb();
-
-	if (reload) {
-#ifdef CONFIG_SMP
-		cpumask_t mask;
-		preempt_disable();
-		load_LDT(pc);
-		mask = cpumask_of_cpu(smp_processor_id());
-		if (!cpus_equal(current->mm->cpu_vm_mask, mask))
-			smp_call_function(flush_ldt, NULL, 1, 1);
-		preempt_enable();
-#else
-		load_LDT(pc);
-#endif
-	}
-	if (oldsize) {
-		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
-			vfree(oldldt);
-		else
-			kfree(oldldt);
-	}
-	return 0;
-}
-
-static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
-{
-	int err = alloc_ldt(new, old->size, 0);
-	if (err < 0)
-		return err;
-	memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
-	return 0;
-}
-
-/*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
- */
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
-	struct mm_struct * old_mm;
-	int retval = 0;
-
-	mutex_init(&mm->context.lock);
-	mm->context.size = 0;
-	old_mm = current->mm;
-	if (old_mm && old_mm->context.size > 0) {
-		mutex_lock(&old_mm->context.lock);
-		retval = copy_ldt(&mm->context, &old_mm->context);
-		mutex_unlock(&old_mm->context.lock);
-	}
-	return retval;
-}
-
-/*
- * No need to lock the MM as we are the last user
- */
-void destroy_context(struct mm_struct *mm)
-{
-	if (mm->context.size) {
-		if (mm == current->active_mm)
-			clear_LDT();
-		if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
-			vfree(mm->context.ldt);
-		else
-			kfree(mm->context.ldt);
-		mm->context.size = 0;
-	}
-}
-
-static int read_ldt(void __user * ptr, unsigned long bytecount)
-{
-	int err;
-	unsigned long size;
-	struct mm_struct * mm = current->mm;
-
-	if (!mm->context.size)
-		return 0;
-	if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
-		bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
-
-	mutex_lock(&mm->context.lock);
-	size = mm->context.size*LDT_ENTRY_SIZE;
-	if (size > bytecount)
-		size = bytecount;
-
-	err = 0;
-	if (copy_to_user(ptr, mm->context.ldt, size))
-		err = -EFAULT;
-	mutex_unlock(&mm->context.lock);
-	if (err < 0)
-		goto error_return;
-	if (size != bytecount) {
-		/* zero-fill the rest */
-		if (clear_user(ptr+size, bytecount-size) != 0) {
-			err = -EFAULT;
-			goto error_return;
-		}
-	}
-	return bytecount;
-error_return:
-	return err;
-}
-
-static int read_default_ldt(void __user * ptr, unsigned long bytecount)
-{
-	int err;
-	unsigned long size;
-
-	err = 0;
-	size = 5*sizeof(struct desc_struct);
-	if (size > bytecount)
-		size = bytecount;
-
-	err = size;
-	if (clear_user(ptr, size))
-		err = -EFAULT;
-
-	return err;
-}
-
-static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
-{
-	struct mm_struct * mm = current->mm;
-	__u32 entry_1, entry_2;
-	int error;
-	struct user_desc ldt_info;
-
-	error = -EINVAL;
-	if (bytecount != sizeof(ldt_info))
-		goto out;
-	error = -EFAULT; 	
-	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
-		goto out;
-
-	error = -EINVAL;
-	if (ldt_info.entry_number >= LDT_ENTRIES)
-		goto out;
-	if (ldt_info.contents == 3) {
-		if (oldmode)
-			goto out;
-		if (ldt_info.seg_not_present == 0)
-			goto out;
-	}
-
-	mutex_lock(&mm->context.lock);
-	if (ldt_info.entry_number >= mm->context.size) {
-		error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
-		if (error < 0)
-			goto out_unlock;
-	}
-
-   	/* Allow LDTs to be cleared by the user. */
-   	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
-		if (oldmode || LDT_empty(&ldt_info)) {
-			entry_1 = 0;
-			entry_2 = 0;
-			goto install;
-		}
-	}
-
-	entry_1 = LDT_entry_a(&ldt_info);
-	entry_2 = LDT_entry_b(&ldt_info);
-	if (oldmode)
-		entry_2 &= ~(1 << 20);
-
-	/* Install the new entry ...  */
-install:
-	write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2);
-	error = 0;
-
-out_unlock:
-	mutex_unlock(&mm->context.lock);
-out:
-	return error;
-}
-
-asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
-{
-	int ret = -ENOSYS;
-
-	switch (func) {
-	case 0:
-		ret = read_ldt(ptr, bytecount);
-		break;
-	case 1:
-		ret = write_ldt(ptr, bytecount, 1);
-		break;
-	case 2:
-		ret = read_default_ldt(ptr, bytecount);
-		break;
-	case 0x11:
-		ret = write_ldt(ptr, bytecount, 0);
-		break;
-	}
-	return ret;
-}
diff -puN arch/x86/kernel/ldt_64.c~git-x86 /dev/null
--- a/arch/x86/kernel/ldt_64.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
- * Copyright (C) 2002 Andi Kleen
- * 
- * This handles calls from both 32bit and 64bit mode.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/ldt.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-
-#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
-static void flush_ldt(void *null)
-{
-	if (current->active_mm)
-               load_LDT(&current->active_mm->context);
-}
-#endif
-
-static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload)
-{
-	void *oldldt;
-	void *newldt;
-	unsigned oldsize;
-
-	if (mincount <= (unsigned)pc->size)
-		return 0;
-	oldsize = pc->size;
-	mincount = (mincount+511)&(~511);
-	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
-	else
-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
-
-	if (!newldt)
-		return -ENOMEM;
-
-	if (oldsize)
-		memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
-	oldldt = pc->ldt;
-	memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
-	wmb();
-	pc->ldt = newldt;
-	wmb();
-	pc->size = mincount;
-	wmb();
-	if (reload) {
-#ifdef CONFIG_SMP
-		cpumask_t mask;
-
-		preempt_disable();
-		mask = cpumask_of_cpu(smp_processor_id());
-		load_LDT(pc);
-		if (!cpus_equal(current->mm->cpu_vm_mask, mask))
-			smp_call_function(flush_ldt, NULL, 1, 1);
-		preempt_enable();
-#else
-		load_LDT(pc);
-#endif
-	}
-	if (oldsize) {
-		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
-			vfree(oldldt);
-		else
-			kfree(oldldt);
-	}
-	return 0;
-}
-
-static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
-{
-	int err = alloc_ldt(new, old->size, 0);
-	if (err < 0)
-		return err;
-	memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
-	return 0;
-}
-
-/*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
- */
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
-	struct mm_struct * old_mm;
-	int retval = 0;
-
-	mutex_init(&mm->context.lock);
-	mm->context.size = 0;
-	old_mm = current->mm;
-	if (old_mm && old_mm->context.size > 0) {
-		mutex_lock(&old_mm->context.lock);
-		retval = copy_ldt(&mm->context, &old_mm->context);
-		mutex_unlock(&old_mm->context.lock);
-	}
-	return retval;
-}
-
-/*
- * 
- * Don't touch the LDT register - we're already in the next thread.
- */
-void destroy_context(struct mm_struct *mm)
-{
-	if (mm->context.size) {
-		if ((unsigned)mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
-			vfree(mm->context.ldt);
-		else
-			kfree(mm->context.ldt);
-		mm->context.size = 0;
-	}
-}
-
-static int read_ldt(void __user * ptr, unsigned long bytecount)
-{
-	int err;
-	unsigned long size;
-	struct mm_struct * mm = current->mm;
-
-	if (!mm->context.size)
-		return 0;
-	if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
-		bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
-
-	mutex_lock(&mm->context.lock);
-	size = mm->context.size*LDT_ENTRY_SIZE;
-	if (size > bytecount)
-		size = bytecount;
-
-	err = 0;
-	if (copy_to_user(ptr, mm->context.ldt, size))
-		err = -EFAULT;
-	mutex_unlock(&mm->context.lock);
-	if (err < 0)
-		goto error_return;
-	if (size != bytecount) {
-		/* zero-fill the rest */
-		if (clear_user(ptr+size, bytecount-size) != 0) {
-			err = -EFAULT;
-			goto error_return;
-		}
-	}
-	return bytecount;
-error_return:
-	return err;
-}
-
-static int read_default_ldt(void __user * ptr, unsigned long bytecount)
-{
-	/* Arbitrary number */ 
-	/* x86-64 default LDT is all zeros */
-	if (bytecount > 128) 
-		bytecount = 128; 	
-	if (clear_user(ptr, bytecount))
-		return -EFAULT;
-	return bytecount; 
-}
-
-static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
-{
-	struct task_struct *me = current;
-	struct mm_struct * mm = me->mm;
-	__u32 entry_1, entry_2, *lp;
-	int error;
-	struct user_desc ldt_info;
-
-	error = -EINVAL;
-
-	if (bytecount != sizeof(ldt_info))
-		goto out;
-	error = -EFAULT; 	
-	if (copy_from_user(&ldt_info, ptr, bytecount))
-		goto out;
-
-	error = -EINVAL;
-	if (ldt_info.entry_number >= LDT_ENTRIES)
-		goto out;
-	if (ldt_info.contents == 3) {
-		if (oldmode)
-			goto out;
-		if (ldt_info.seg_not_present == 0)
-			goto out;
-	}
-
-	mutex_lock(&mm->context.lock);
-	if (ldt_info.entry_number >= (unsigned)mm->context.size) {
-		error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
-		if (error < 0)
-			goto out_unlock;
-	}
-
-	lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
-
-   	/* Allow LDTs to be cleared by the user. */
-   	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
-		if (oldmode || LDT_empty(&ldt_info)) {
-			entry_1 = 0;
-			entry_2 = 0;
-			goto install;
-		}
-	}
-
-	entry_1 = LDT_entry_a(&ldt_info);
-	entry_2 = LDT_entry_b(&ldt_info);
-	if (oldmode)
-		entry_2 &= ~(1 << 20);
-
-	/* Install the new entry ...  */
-install:
-	*lp	= entry_1;
-	*(lp+1)	= entry_2;
-	error = 0;
-
-out_unlock:
-	mutex_unlock(&mm->context.lock);
-out:
-	return error;
-}
-
-asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
-{
-	int ret = -ENOSYS;
-
-	switch (func) {
-	case 0:
-		ret = read_ldt(ptr, bytecount);
-		break;
-	case 1:
-		ret = write_ldt(ptr, bytecount, 1);
-		break;
-	case 2:
-		ret = read_default_ldt(ptr, bytecount);
-		break;
-	case 0x11:
-		ret = write_ldt(ptr, bytecount, 0);
-		break;
-	}
-	return ret;
-}
diff -puN arch/x86/kernel/machine_kexec_64.c~git-x86 arch/x86/kernel/machine_kexec_64.c
--- a/arch/x86/kernel/machine_kexec_64.c~git-x86
+++ a/arch/x86/kernel/machine_kexec_64.c
@@ -234,10 +234,5 @@ NORET_TYPE void machine_kexec(struct kim
 void arch_crash_save_vmcoreinfo(void)
 {
 	VMCOREINFO_SYMBOL(init_level4_pgt);
-
-#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
-	VMCOREINFO_SYMBOL(node_data);
-	VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
-#endif
 }
 
diff -puN arch/x86/kernel/mpparse_32.c~git-x86 arch/x86/kernel/mpparse_32.c
--- a/arch/x86/kernel/mpparse_32.c~git-x86
+++ a/arch/x86/kernel/mpparse_32.c
@@ -258,7 +258,7 @@ static void __init MP_ioapic_info (struc
 	if (!(m->mpc_flags & MPC_APIC_USABLE))
 		return;
 
-	printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n",
+	printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
 		m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
 	if (nr_ioapics >= MAX_IO_APICS) {
 		printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
@@ -405,9 +405,9 @@ static int __init smp_read_mpc(struct mp
 
 	mps_oem_check(mpc, oem, str);
 
-	printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
+	printk("APIC at: 0x%X\n", mpc->mpc_lapic);
 
-	/* 
+	/*
 	 * Save the local APIC address (it might be non-default) -- but only
 	 * if we're not using ACPI.
 	 */
@@ -918,14 +918,14 @@ void __init mp_register_ioapic(u8 id, u3
 	 */
 	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
 	mp_ioapic_routing[idx].gsi_base = gsi_base;
-	mp_ioapic_routing[idx].gsi_end = gsi_base + 
+	mp_ioapic_routing[idx].gsi_end = gsi_base +
 		io_apic_get_redir_entries(idx);
 
-	printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
-		"GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
-		mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
-		mp_ioapic_routing[idx].gsi_base,
-		mp_ioapic_routing[idx].gsi_end);
+	printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+	       "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
+	       mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+	       mp_ioapic_routing[idx].gsi_base,
+	       mp_ioapic_routing[idx].gsi_end);
 }
 
 void __init
@@ -1041,13 +1041,14 @@ void __init mp_config_acpi_legacy_irqs (
 }
 
 #define MAX_GSI_NUM	4096
+#define IRQ_COMPRESSION_START	64
 
 int mp_register_gsi(u32 gsi, int triggering, int polarity)
 {
 	int ioapic = -1;
 	int ioapic_pin = 0;
 	int idx, bit = 0;
-	static int pci_irq = 16;
+	static int pci_irq = IRQ_COMPRESSION_START;
 	/*
 	 * Mapping between Global System Interrups, which
 	 * represent all possible interrupts, and IRQs
@@ -1086,12 +1087,16 @@ int mp_register_gsi(u32 gsi, int trigger
 	if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
 		Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
 			mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
-		return gsi_to_irq[gsi];
+		return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
 	}
 
 	mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
 
-	if (triggering == ACPI_LEVEL_SENSITIVE) {
+	/*
+	 * For GSI >= 64, use IRQ compression
+	 */
+	if ((gsi >= IRQ_COMPRESSION_START)
+		&& (triggering == ACPI_LEVEL_SENSITIVE)) {
 		/*
 		 * For PCI devices assign IRQs in order, avoiding gaps
 		 * due to unused I/O APIC pins.
diff -puN arch/x86/kernel/nmi_32.c~git-x86 arch/x86/kernel/nmi_32.c
--- a/arch/x86/kernel/nmi_32.c~git-x86
+++ a/arch/x86/kernel/nmi_32.c
@@ -52,13 +52,13 @@ static int unknown_nmi_panic_callback(st
 
 static int endflag __initdata = 0;
 
+#ifdef CONFIG_SMP
 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
  * the CPU is idle. To make sure the NMI watchdog really ticks on all
  * CPUs during the test make them busy.
  */
 static __init void nmi_cpu_busy(void *data)
 {
-#ifdef CONFIG_SMP
 	local_irq_enable_in_hardirq();
 	/* Intentionally don't use cpu_relax here. This is
 	   to make sure that the performance counter really ticks,
@@ -68,8 +68,8 @@ static __init void nmi_cpu_busy(void *da
 	   care if they get somewhat less cycles. */
 	while (endflag == 0)
 		mb();
-#endif
 }
+#endif
 
 static int __init check_nmi_watchdog(void)
 {
@@ -88,11 +88,13 @@ static int __init check_nmi_watchdog(voi
 
 	printk(KERN_INFO "Testing NMI watchdog ... ");
 
+#ifdef CONFIG_SMP
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
+#endif
 
 	for_each_possible_cpu(cpu)
-		prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
+		prev_nmi_count[cpu] = nmi_count(cpu);
 	local_irq_enable();
 	mdelay((20*1000)/nmi_hz); // wait 20 ticks
 
@@ -179,7 +181,6 @@ static int lapic_nmi_resume(struct sys_d
 	return 0;
 }
 
-
 static struct sysdev_class nmi_sysclass = {
 	set_kset_name("lapic_nmi"),
 	.resume		= lapic_nmi_resume,
@@ -242,10 +243,10 @@ void acpi_nmi_disable(void)
 		on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
 }
 
-void setup_apic_nmi_watchdog (void *unused)
+void setup_apic_nmi_watchdog(void *unused)
 {
 	if (__get_cpu_var(wd_enabled))
- 		return;
+		return;
 
 	/* cheap hack to support suspend/resume */
 	/* if cpu0 is not active neither should the other cpus */
@@ -334,7 +335,7 @@ __kprobes int nmi_watchdog_tick(struct p
 	unsigned int sum;
 	int touched = 0;
 	int cpu = smp_processor_id();
-	int rc=0;
+	int rc = 0;
 
 	/* check for other users first */
 	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
diff -puN arch/x86/kernel/nmi_64.c~git-x86 arch/x86/kernel/nmi_64.c
--- a/arch/x86/kernel/nmi_64.c~git-x86
+++ a/arch/x86/kernel/nmi_64.c
@@ -39,7 +39,7 @@ static cpumask_t backtrace_mask = CPU_MA
  *  0: the lapic NMI watchdog is disabled, but can be enabled
  */
 atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
-int panic_on_timeout;
+static int panic_on_timeout;
 
 unsigned int nmi_watchdog = NMI_DEFAULT;
 static unsigned int nmi_hz = HZ;
@@ -78,22 +78,22 @@ static __init void nmi_cpu_busy(void *da
 }
 #endif
 
-int __init check_nmi_watchdog (void)
+int __init check_nmi_watchdog(void)
 {
-	int *counts;
+	int *prev_nmi_count;
 	int cpu;
 
-	if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) 
+	if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED))
 		return 0;
 
 	if (!atomic_read(&nmi_active))
 		return 0;
 
-	counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
-	if (!counts)
+	prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
+	if (!prev_nmi_count)
 		return -1;
 
-	printk(KERN_INFO "testing NMI watchdog ... ");
+	printk(KERN_INFO "Testing NMI watchdog ... ");
 
 #ifdef CONFIG_SMP
 	if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -101,30 +101,29 @@ int __init check_nmi_watchdog (void)
 #endif
 
 	for (cpu = 0; cpu < NR_CPUS; cpu++)
-		counts[cpu] = cpu_pda(cpu)->__nmi_count;
+		prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count;
 	local_irq_enable();
 	mdelay((20*1000)/nmi_hz); // wait 20 ticks
 
 	for_each_online_cpu(cpu) {
 		if (!per_cpu(wd_enabled, cpu))
 			continue;
-		if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
+		if (cpu_pda(cpu)->__nmi_count - prev_nmi_count[cpu] <= 5) {
 			printk(KERN_WARNING "WARNING: CPU#%d: NMI "
 			       "appears to be stuck (%d->%d)!\n",
-			       cpu,
-			       counts[cpu],
-			       cpu_pda(cpu)->__nmi_count);
+				cpu,
+				prev_nmi_count[cpu],
+				cpu_pda(cpu)->__nmi_count);
 			per_cpu(wd_enabled, cpu) = 0;
 			atomic_dec(&nmi_active);
 		}
 	}
+	endflag = 1;
 	if (!atomic_read(&nmi_active)) {
-		kfree(counts);
+		kfree(prev_nmi_count);
 		atomic_set(&nmi_active, -1);
-		endflag = 1;
 		return -1;
 	}
-	endflag = 1;
 	printk("OK.\n");
 
 	/* now that we know it works we can reduce NMI frequency to
@@ -132,11 +131,11 @@ int __init check_nmi_watchdog (void)
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		nmi_hz = lapic_adjust_nmi_hz(1);
 
-	kfree(counts);
+	kfree(prev_nmi_count);
 	return 0;
 }
 
-int __init setup_nmi_watchdog(char *str)
+static int __init setup_nmi_watchdog(char *str)
 {
 	int nmi;
 
@@ -159,34 +158,6 @@ int __init setup_nmi_watchdog(char *str)
 
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 
-
-static void __acpi_nmi_disable(void *__unused)
-{
-	apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
-/*
- * Disable timer based NMIs on all CPUs:
- */
-void acpi_nmi_disable(void)
-{
-	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-		on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
-}
-
-static void __acpi_nmi_enable(void *__unused)
-{
-	apic_write(APIC_LVT0, APIC_DM_NMI);
-}
-
-/*
- * Enable timer based NMIs on all CPUs:
- */
-void acpi_nmi_enable(void)
-{
-	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-		on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
-}
 #ifdef CONFIG_PM
 
 static int nmi_pm_active; /* nmi_active before suspend */
@@ -217,7 +188,7 @@ static struct sysdev_class nmi_sysclass 
 };
 
 static struct sys_device device_lapic_nmi = {
-	.id		= 0,
+	.id	= 0,
 	.cls	= &nmi_sysclass,
 };
 
@@ -231,7 +202,7 @@ static int __init init_lapic_nmi_sysfs(v
 	if (nmi_watchdog != NMI_LOCAL_APIC)
 		return 0;
 
-	if ( atomic_read(&nmi_active) < 0 )
+	if (atomic_read(&nmi_active) < 0)
 		return 0;
 
 	error = sysdev_class_register(&nmi_sysclass);
@@ -244,9 +215,37 @@ late_initcall(init_lapic_nmi_sysfs);
 
 #endif	/* CONFIG_PM */
 
+static void __acpi_nmi_enable(void *__unused)
+{
+	apic_write(APIC_LVT0, APIC_DM_NMI);
+}
+
+/*
+ * Enable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_enable(void)
+{
+	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+		on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
+}
+
+static void __acpi_nmi_disable(void *__unused)
+{
+	apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
+}
+
+/*
+ * Disable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_disable(void)
+{
+	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+		on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
+}
+
 void setup_apic_nmi_watchdog(void *unused)
 {
-	if (__get_cpu_var(wd_enabled) == 1)
+	if (__get_cpu_var(wd_enabled))
 		return;
 
 	/* cheap hack to support suspend/resume */
@@ -311,8 +310,9 @@ void touch_nmi_watchdog(void)
 		}
 	}
 
- 	touch_softlockup_watchdog();
+	touch_softlockup_watchdog();
 }
+EXPORT_SYMBOL(touch_nmi_watchdog);
 
 int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
 {
@@ -479,4 +479,3 @@ void __trigger_all_cpu_backtrace(void)
 
 EXPORT_SYMBOL(nmi_active);
 EXPORT_SYMBOL(nmi_watchdog);
-EXPORT_SYMBOL(touch_nmi_watchdog);
diff -puN arch/x86/kernel/paravirt_32.c~git-x86 arch/x86/kernel/paravirt_32.c
--- a/arch/x86/kernel/paravirt_32.c~git-x86
+++ a/arch/x86/kernel/paravirt_32.c
@@ -60,7 +60,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti"
 DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
 DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
 DEF_NATIVE(pv_cpu_ops, iret, "iret");
-DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
+DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
@@ -88,7 +88,7 @@ static unsigned native_patch(u8 type, u1
 	SITE(pv_irq_ops, restore_fl);
 	SITE(pv_irq_ops, save_fl);
 	SITE(pv_cpu_ops, iret);
-	SITE(pv_cpu_ops, irq_enable_sysexit);
+	SITE(pv_cpu_ops, irq_enable_syscall_ret);
 	SITE(pv_mmu_ops, read_cr2);
 	SITE(pv_mmu_ops, read_cr3);
 	SITE(pv_mmu_ops, write_cr3);
@@ -186,7 +186,7 @@ unsigned paravirt_patch_default(u8 type,
 		/* If the operation is a nop, then nop the callsite */
 		ret = paravirt_patch_nop();
 	else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
-		 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit))
+		 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret))
 		/* If operation requires a jmp, then jmp */
 		ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
 	else
@@ -237,7 +237,7 @@ static void native_flush_tlb_single(unsi
 
 /* These are in entry.S */
 extern void native_iret(void);
-extern void native_irq_enable_sysexit(void);
+extern void native_irq_enable_syscall_ret(void);
 
 static int __init print_banner(void)
 {
@@ -382,9 +382,9 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.write_ldt_entry = write_dt_entry,
 	.write_gdt_entry = write_dt_entry,
 	.write_idt_entry = write_dt_entry,
-	.load_esp0 = native_load_esp0,
+	.load_sp0 = native_load_sp0,
 
-	.irq_enable_sysexit = native_irq_enable_sysexit,
+	.irq_enable_syscall_ret = native_irq_enable_syscall_ret,
 	.iret = native_iret,
 
 	.set_iopl_mask = native_set_iopl_mask,
diff -puN arch/x86/kernel/pci-calgary_64.c~git-x86 arch/x86/kernel/pci-calgary_64.c
--- a/arch/x86/kernel/pci-calgary_64.c~git-x86
+++ a/arch/x86/kernel/pci-calgary_64.c
@@ -30,7 +30,6 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/dma-mapping.h>
-#include <linux/init.h>
 #include <linux/bitops.h>
 #include <linux/pci_ids.h>
 #include <linux/pci.h>
@@ -183,7 +182,7 @@ static struct calgary_bus_info bus_info[
 
 /* enable this to stress test the chip's TCE cache */
 #ifdef CONFIG_IOMMU_DEBUG
-int debugging __read_mostly = 1;
+static int debugging = 1;
 
 static inline unsigned long verify_bit_range(unsigned long* bitmap,
 	int expected, unsigned long start, unsigned long end)
@@ -202,7 +201,7 @@ static inline unsigned long verify_bit_r
 	return ~0UL;
 }
 #else /* debugging is disabled */
-int debugging __read_mostly = 0;
+static int debugging;
 
 static inline unsigned long verify_bit_range(unsigned long* bitmap,
 	int expected, unsigned long start, unsigned long end)
diff -puN arch/x86/kernel/pci-dma_64.c~git-x86 arch/x86/kernel/pci-dma_64.c
--- a/arch/x86/kernel/pci-dma_64.c~git-x86
+++ a/arch/x86/kernel/pci-dma_64.c
@@ -13,7 +13,6 @@
 #include <asm/calgary.h>
 
 int iommu_merge __read_mostly = 0;
-EXPORT_SYMBOL(iommu_merge);
 
 dma_addr_t bad_dma_address __read_mostly;
 EXPORT_SYMBOL(bad_dma_address);
@@ -230,7 +229,7 @@ EXPORT_SYMBOL(dma_set_mask);
  * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
  * documentation.
  */
-__init int iommu_setup(char *p)
+static __init int iommu_setup(char *p)
 {
 	iommu_merge = 1;
 
diff -puN arch/x86/kernel/pci-gart_64.c~git-x86 arch/x86/kernel/pci-gart_64.c
--- a/arch/x86/kernel/pci-gart_64.c~git-x86
+++ a/arch/x86/kernel/pci-gart_64.c
@@ -1,12 +1,12 @@
 /*
  * Dynamic DMA mapping support for AMD Hammer.
- * 
+ *
  * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
  * This allows to use PCI devices that only support 32bit addresses on systems
- * with more than 4GB. 
+ * with more than 4GB.
  *
  * See Documentation/DMA-mapping.txt for the interface specification.
- * 
+ *
  * Copyright 2002 Andi Kleen, SuSE Labs.
  * Subject to the GNU General Public License v2 only.
  */
@@ -37,23 +37,26 @@
 #include <asm/k8.h>
 
 static unsigned long iommu_bus_base;	/* GART remapping area (physical) */
-static unsigned long iommu_size; 	/* size of remapping area bytes */
+static unsigned long iommu_size;	/* size of remapping area bytes */
 static unsigned long iommu_pages;	/* .. and in pages */
 
-static u32 *iommu_gatt_base; 		/* Remapping table */
+static u32 *iommu_gatt_base;		/* Remapping table */
 
-/* If this is disabled the IOMMU will use an optimized flushing strategy
-   of only flushing when an mapping is reused. With it true the GART is flushed 
-   for every mapping. Problem is that doing the lazy flush seems to trigger
-   bugs with some popular PCI cards, in particular 3ware (but has been also
-   also seen with Qlogic at least). */
+/*
+ * If this is disabled the IOMMU will use an optimized flushing strategy
+ * of only flushing when an mapping is reused. With it true the GART is
+ * flushed for every mapping. Problem is that doing the lazy flush seems
+ * to trigger bugs with some popular PCI cards, in particular 3ware (but
+ * has been also also seen with Qlogic at least).
+ */
 int iommu_fullflush = 1;
 
-/* Allocation bitmap for the remapping area */ 
+/* Allocation bitmap for the remapping area: */
 static DEFINE_SPINLOCK(iommu_bitmap_lock);
-static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
+/* Guarded by iommu_bitmap_lock: */
+static unsigned long *iommu_gart_bitmap;
 
-static u32 gart_unmapped_entry; 
+static u32 gart_unmapped_entry;
 
 #define GPTE_VALID    1
 #define GPTE_COHERENT 2
@@ -61,10 +64,10 @@ static u32 gart_unmapped_entry; 
 	(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
 #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
 
-#define to_pages(addr,size) \
+#define to_pages(addr, size) \
 	(round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
 
-#define EMERGENCY_PAGES 32 /* = 128KB */ 
+#define EMERGENCY_PAGES 32 /* = 128KB */
 
 #ifdef CONFIG_AGP
 #define AGPEXTERN extern
@@ -77,130 +80,152 @@ AGPEXTERN int agp_memory_reserved;
 AGPEXTERN __u32 *agp_gatt_table;
 
 static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
-static int need_flush; 		/* global flush state. set for each gart wrap */
+static int need_flush;		/* global flush state. set for each gart wrap */
 
-static unsigned long alloc_iommu(int size) 
-{ 	
+static unsigned long alloc_iommu(int size)
+{
 	unsigned long offset, flags;
 
-	spin_lock_irqsave(&iommu_bitmap_lock, flags);	
-	offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
+	spin_lock_irqsave(&iommu_bitmap_lock, flags);
+	offset = find_next_zero_string(iommu_gart_bitmap, next_bit,
+					iommu_pages, size);
 	if (offset == -1) {
 		need_flush = 1;
-		offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size);
+		offset = find_next_zero_string(iommu_gart_bitmap, 0,
+						iommu_pages, size);
 	}
-	if (offset != -1) { 
-		set_bit_string(iommu_gart_bitmap, offset, size); 
-		next_bit = offset+size; 
-		if (next_bit >= iommu_pages) { 
+	if (offset != -1) {
+		set_bit_string(iommu_gart_bitmap, offset, size);
+		next_bit = offset+size;
+		if (next_bit >= iommu_pages) {
 			next_bit = 0;
 			need_flush = 1;
-		} 
-	} 
+		}
+	}
 	if (iommu_fullflush)
 		need_flush = 1;
-	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);      
+	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
+
 	return offset;
-} 
+}
 
 static void free_iommu(unsigned long offset, int size)
-{ 
+{
 	unsigned long flags;
+
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
 	__clear_bit_string(iommu_gart_bitmap, offset, size);
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
-} 
+}
 
-/* 
+/*
  * Use global flush state to avoid races with multiple flushers.
  */
 static void flush_gart(void)
-{ 
+{
 	unsigned long flags;
+
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
 	if (need_flush) {
 		k8_flush_garts();
 		need_flush = 0;
-	} 
+	}
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
-} 
+}
 
 #ifdef CONFIG_IOMMU_LEAK
 
-#define SET_LEAK(x) if (iommu_leak_tab) \
-			iommu_leak_tab[x] = __builtin_return_address(0);
-#define CLEAR_LEAK(x) if (iommu_leak_tab) \
-			iommu_leak_tab[x] = NULL;
+#define SET_LEAK(x)							\
+	do {								\
+		if (iommu_leak_tab)					\
+			iommu_leak_tab[x] = __builtin_return_address(0);\
+	} while (0)
+
+#define CLEAR_LEAK(x)							\
+	do {								\
+		if (iommu_leak_tab)					\
+			iommu_leak_tab[x] = NULL;			\
+	} while (0)
 
 /* Debugging aid for drivers that don't free their IOMMU tables */
-static void **iommu_leak_tab; 
+static void **iommu_leak_tab;
 static int leak_trace;
 static int iommu_leak_pages = 20;
+
 static void dump_leak(void)
 {
 	int i;
-	static int dump; 
-	if (dump || !iommu_leak_tab) return;
+	static int dump;
+
+	if (dump || !iommu_leak_tab)
+		return;
 	dump = 1;
-	show_stack(NULL,NULL);
-	/* Very crude. dump some from the end of the table too */ 
-	printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages); 
-	for (i = 0; i < iommu_leak_pages; i+=2) {
-		printk("%lu: ", iommu_pages-i);
+	show_stack(NULL, NULL);
+
+	/* Very crude. dump some from the end of the table too */
+	printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n",
+	       iommu_leak_pages);
+	for (i = 0; i < iommu_leak_pages; i += 2) {
+		printk(KERN_DEBUG "%lu: ", iommu_pages-i);
 		printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]);
-		printk("%c", (i+1)%2 == 0 ? '\n' : ' '); 
-	} 
-	printk("\n");
+		printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
+	}
+	printk(KERN_DEBUG "\n");
 }
 #else
-#define SET_LEAK(x)
-#define CLEAR_LEAK(x)
+# define SET_LEAK(x)
+# define CLEAR_LEAK(x)
 #endif
 
 static void iommu_full(struct device *dev, size_t size, int dir)
 {
-	/* 
+	/*
 	 * Ran out of IOMMU space for this operation. This is very bad.
 	 * Unfortunately the drivers cannot handle this operation properly.
-	 * Return some non mapped prereserved space in the aperture and 
+	 * Return some non mapped prereserved space in the aperture and
 	 * let the Northbridge deal with it. This will result in garbage
 	 * in the IO operation. When the size exceeds the prereserved space
-	 * memory corruption will occur or random memory will be DMAed 
+	 * memory corruption will occur or random memory will be DMAed
 	 * out. Hopefully no network devices use single mappings that big.
-	 */ 
-	
-	printk(KERN_ERR 
-  "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n",
-	       size, dev->bus_id);
+	 */
+
+	printk(KERN_ERR
+		"PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n",
+		size, dev->bus_id);
 
 	if (size > PAGE_SIZE*EMERGENCY_PAGES) {
 		if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
 			panic("PCI-DMA: Memory would be corrupted\n");
-		if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) 
-			panic(KERN_ERR "PCI-DMA: Random memory would be DMAed\n");
-	} 
-
+		if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+			panic(KERN_ERR
+				"PCI-DMA: Random memory would be DMAed\n");
+	}
 #ifdef CONFIG_IOMMU_LEAK
-	dump_leak(); 
+	dump_leak();
 #endif
-} 
+}
 
-static inline int need_iommu(struct device *dev, unsigned long addr, size_t size)
-{ 
+static inline int
+need_iommu(struct device *dev, unsigned long addr, size_t size)
+{
 	u64 mask = *dev->dma_mask;
 	int high = addr + size > mask;
 	int mmu = high;
-	if (force_iommu) 
-		mmu = 1; 
-	return mmu; 
+
+	if (force_iommu)
+		mmu = 1;
+
+	return mmu;
 }
 
-static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
-{ 
+static inline int
+nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
+{
 	u64 mask = *dev->dma_mask;
 	int high = addr + size > mask;
 	int mmu = high;
-	return mmu; 
+
+	return mmu;
 }
 
 /* Map a single continuous physical area into the IOMMU.
@@ -208,13 +233,14 @@ static inline int nonforced_iommu(struct
  */
 static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
 				size_t size, int dir)
-{ 
+{
 	unsigned long npages = to_pages(phys_mem, size);
 	unsigned long iommu_page = alloc_iommu(npages);
 	int i;
+
 	if (iommu_page == -1) {
 		if (!nonforced_iommu(dev, phys_mem, size))
-			return phys_mem; 
+			return phys_mem;
 		if (panic_on_overflow)
 			panic("dma_map_area overflow %lu bytes\n", size);
 		iommu_full(dev, size, dir);
@@ -229,35 +255,39 @@ static dma_addr_t dma_map_area(struct de
 	return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
 }
 
-static dma_addr_t gart_map_simple(struct device *dev, char *buf,
-				 size_t size, int dir)
+static dma_addr_t
+gart_map_simple(struct device *dev, char *buf, size_t size, int dir)
 {
 	dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
+
 	flush_gart();
+
 	return map;
 }
 
 /* Map a single area into the IOMMU */
-static dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
+static dma_addr_t
+gart_map_single(struct device *dev, void *addr, size_t size, int dir)
 {
 	unsigned long phys_mem, bus;
 
 	if (!dev)
 		dev = &fallback_dev;
 
-	phys_mem = virt_to_phys(addr); 
+	phys_mem = virt_to_phys(addr);
 	if (!need_iommu(dev, phys_mem, size))
-		return phys_mem; 
+		return phys_mem;
 
 	bus = gart_map_simple(dev, addr, size, dir);
-	return bus; 
+
+	return bus;
 }
 
 /*
  * Free a DMA mapping.
  */
 static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
-		      size_t size, int direction)
+			      size_t size, int direction)
 {
 	unsigned long iommu_page;
 	int npages;
@@ -266,6 +296,7 @@ static void gart_unmap_single(struct dev
 	if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
 	    dma_addr >= iommu_bus_base + iommu_size)
 		return;
+
 	iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
 	npages = to_pages(dma_addr, size);
 	for (i = 0; i < npages; i++) {
@@ -278,7 +309,8 @@ static void gart_unmap_single(struct dev
 /*
  * Wrapper for pci_unmap_single working with scatterlists.
  */
-static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
+static void
+gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
 {
 	struct scatterlist *s;
 	int i;
@@ -303,12 +335,13 @@ static int dma_map_sg_nonforce(struct de
 
 	for_each_sg(sg, s, nents, i) {
 		unsigned long addr = sg_phys(s);
-		if (nonforced_iommu(dev, addr, s->length)) { 
+
+		if (nonforced_iommu(dev, addr, s->length)) {
 			addr = dma_map_area(dev, addr, s->length, dir);
-			if (addr == bad_dma_address) { 
-				if (i > 0) 
+			if (addr == bad_dma_address) {
+				if (i > 0)
 					gart_unmap_sg(dev, sg, i, dir);
-				nents = 0; 
+				nents = 0;
 				sg[0].dma_length = 0;
 				break;
 			}
@@ -317,15 +350,16 @@ static int dma_map_sg_nonforce(struct de
 		s->dma_length = s->length;
 	}
 	flush_gart();
+
 	return nents;
 }
 
 /* Map multiple scatterlist entries continuous into the first. */
 static int __dma_map_cont(struct scatterlist *start, int nelems,
-		      struct scatterlist *sout, unsigned long pages)
+			  struct scatterlist *sout, unsigned long pages)
 {
 	unsigned long iommu_start = alloc_iommu(pages);
-	unsigned long iommu_page = iommu_start; 
+	unsigned long iommu_page = iommu_start;
 	struct scatterlist *s;
 	int i;
 
@@ -335,32 +369,33 @@ static int __dma_map_cont(struct scatter
 	for_each_sg(start, s, nelems, i) {
 		unsigned long pages, addr;
 		unsigned long phys_addr = s->dma_address;
-		
+
 		BUG_ON(s != start && s->offset);
 		if (s == start) {
 			sout->dma_address = iommu_bus_base;
 			sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
 			sout->dma_length = s->length;
-		} else { 
-			sout->dma_length += s->length; 
+		} else {
+			sout->dma_length += s->length;
 		}
 
 		addr = phys_addr;
-		pages = to_pages(s->offset, s->length); 
-		while (pages--) { 
-			iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); 
+		pages = to_pages(s->offset, s->length);
+		while (pages--) {
+			iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
 			SET_LEAK(iommu_page);
 			addr += PAGE_SIZE;
 			iommu_page++;
 		}
-	} 
-	BUG_ON(iommu_page - iommu_start != pages);	
+	}
+	BUG_ON(iommu_page - iommu_start != pages);
+
 	return 0;
 }
 
-static inline int dma_map_cont(struct scatterlist *start, int nelems,
-		      struct scatterlist *sout,
-		      unsigned long pages, int need)
+static inline int
+dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout,
+	     unsigned long pages, int need)
 {
 	if (!need) {
 		BUG_ON(nelems != 1);
@@ -370,22 +405,19 @@ static inline int dma_map_cont(struct sc
 	}
 	return __dma_map_cont(start, nelems, sout, pages);
 }
-		
+
 /*
  * DMA map all entries in a scatterlist.
- * Merge chunks that have page aligned sizes into a continuous mapping. 
+ * Merge chunks that have page aligned sizes into a continuous mapping.
  */
-static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-			int dir)
+static int
+gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
 {
-	int i;
-	int out;
-	int start;
-	unsigned long pages = 0;
-	int need = 0, nextneed;
 	struct scatterlist *s, *ps, *start_sg, *sgmap;
+	int need = 0, nextneed, i, out, start;
+	unsigned long pages = 0;
 
-	if (nents == 0) 
+	if (nents == 0)
 		return 0;
 
 	if (!dev)
@@ -397,15 +429,19 @@ static int gart_map_sg(struct device *de
 	ps = NULL; /* shut up gcc */
 	for_each_sg(sg, s, nents, i) {
 		dma_addr_t addr = sg_phys(s);
+
 		s->dma_address = addr;
-		BUG_ON(s->length == 0); 
+		BUG_ON(s->length == 0);
 
-		nextneed = need_iommu(dev, addr, s->length); 
+		nextneed = need_iommu(dev, addr, s->length);
 
 		/* Handle the previous not yet processed entries */
 		if (i > start) {
-			/* Can only merge when the last chunk ends on a page 
-			   boundary and the new one doesn't have an offset. */
+			/*
+			 * Can only merge when the last chunk ends on a
+			 * page boundary and the new one doesn't have an
+			 * offset.
+			 */
 			if (!iommu_merge || !nextneed || !need || s->offset ||
 			    (ps->offset + ps->length) % PAGE_SIZE) {
 				if (dma_map_cont(start_sg, i - start, sgmap,
@@ -436,6 +472,7 @@ static int gart_map_sg(struct device *de
 error:
 	flush_gart();
 	gart_unmap_sg(dev, sg, out, dir);
+
 	/* When it was forced or merged try again in a dumb way */
 	if (force_iommu || iommu_merge) {
 		out = dma_map_sg_nonforce(dev, sg, nents, dir);
@@ -444,64 +481,68 @@ error:
 	}
 	if (panic_on_overflow)
 		panic("dma_map_sg: overflow on %lu pages\n", pages);
+
 	iommu_full(dev, pages << PAGE_SHIFT, dir);
 	for_each_sg(sg, s, nents, i)
 		s->dma_address = bad_dma_address;
 	return 0;
-} 
+}
 
 static int no_agp;
 
 static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
-{ 
-	unsigned long a; 
-	if (!iommu_size) { 
-		iommu_size = aper_size; 
-		if (!no_agp) 
-			iommu_size /= 2; 
-	} 
+{
+	unsigned long a;
 
-	a = aper + iommu_size; 
+	if (!iommu_size) {
+		iommu_size = aper_size;
+		if (!no_agp)
+			iommu_size /= 2;
+	}
+
+	a = aper + iommu_size;
 	iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
 
-	if (iommu_size < 64*1024*1024) 
+	if (iommu_size < 64*1024*1024) {
 		printk(KERN_WARNING
-  "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20); 
-	
+			"PCI-DMA: Warning: Small IOMMU %luMB."
+			" Consider increasing the AGP aperture in BIOS\n",
+				iommu_size >> 20);
+	}
+
 	return iommu_size;
-} 
+}
 
-static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) 
-{ 
-	unsigned aper_size = 0, aper_base_32;
+static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
+{
+	unsigned aper_size = 0, aper_base_32, aper_order;
 	u64 aper_base;
-	unsigned aper_order;
 
-	pci_read_config_dword(dev, 0x94, &aper_base_32); 
+	pci_read_config_dword(dev, 0x94, &aper_base_32);
 	pci_read_config_dword(dev, 0x90, &aper_order);
-	aper_order = (aper_order >> 1) & 7;	
+	aper_order = (aper_order >> 1) & 7;
 
-	aper_base = aper_base_32 & 0x7fff; 
+	aper_base = aper_base_32 & 0x7fff;
 	aper_base <<= 25;
 
-	aper_size = (32 * 1024 * 1024) << aper_order; 
-       if (aper_base + aper_size > 0x100000000UL || !aper_size)
+	aper_size = (32 * 1024 * 1024) << aper_order;
+	if (aper_base + aper_size > 0x100000000UL || !aper_size)
 		aper_base = 0;
 
 	*size = aper_size;
 	return aper_base;
-} 
+}
 
-/* 
+/*
  * Private Northbridge GATT initialization in case we cannot use the
- * AGP driver for some reason.  
+ * AGP driver for some reason.
  */
 static __init int init_k8_gatt(struct agp_kern_info *info)
-{ 
+{
+	unsigned aper_size, gatt_size, new_aper_size;
+	unsigned aper_base, new_aper_base;
 	struct pci_dev *dev;
 	void *gatt;
-	unsigned aper_base, new_aper_base;
-	unsigned aper_size, gatt_size, new_aper_size;
 	int i;
 
 	printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
@@ -509,75 +550,77 @@ static __init int init_k8_gatt(struct ag
 	dev = NULL;
 	for (i = 0; i < num_k8_northbridges; i++) {
 		dev = k8_northbridges[i];
-		new_aper_base = read_aperture(dev, &new_aper_size); 
-		if (!new_aper_base) 
-			goto nommu; 
-		
-		if (!aper_base) { 
+		new_aper_base = read_aperture(dev, &new_aper_size);
+		if (!new_aper_base)
+			goto nommu;
+
+		if (!aper_base) {
 			aper_size = new_aper_size;
 			aper_base = new_aper_base;
-		} 
-		if (aper_size != new_aper_size || aper_base != new_aper_base) 
+		}
+		if (aper_size != new_aper_size || aper_base != new_aper_base)
 			goto nommu;
 	}
 	if (!aper_base)
-		goto nommu; 
+		goto nommu;
 	info->aper_base = aper_base;
-	info->aper_size = aper_size>>20; 
+	info->aper_size = aper_size >> 20;
 
-	gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); 
-	gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); 
-	if (!gatt) 
+	gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
+	gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
+	if (!gatt)
 		panic("Cannot allocate GATT table");
-	if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT, PAGE_KERNEL_NOCACHE))
+	if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT,
+				  PAGE_KERNEL_NOCACHE))
 		panic("Could not set GART PTEs to uncacheable pages");
 	global_flush_tlb();
 
-	memset(gatt, 0, gatt_size); 
+	memset(gatt, 0, gatt_size);
 	agp_gatt_table = gatt;
 
 	for (i = 0; i < num_k8_northbridges; i++) {
-		u32 ctl; 
-		u32 gatt_reg; 
+		u32 gatt_reg;
+		u32 ctl;
 
 		dev = k8_northbridges[i];
-		gatt_reg = __pa(gatt) >> 12; 
-		gatt_reg <<= 4; 
+		gatt_reg = __pa(gatt) >> 12;
+		gatt_reg <<= 4;
 		pci_write_config_dword(dev, 0x98, gatt_reg);
-		pci_read_config_dword(dev, 0x90, &ctl); 
+		pci_read_config_dword(dev, 0x90, &ctl);
 
 		ctl |= 1;
 		ctl &= ~((1<<4) | (1<<5));
 
-		pci_write_config_dword(dev, 0x90, ctl); 
+		pci_write_config_dword(dev, 0x90, ctl);
 	}
 	flush_gart();
-	
-	printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); 
+
+	printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
+	       aper_base, aper_size>>10);
 	return 0;
 
  nommu:
- 	/* Should not happen anymore */
+	/* Should not happen anymore */
 	printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
 	       KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n");
-	return -1; 
-} 
+	return -1;
+}
 
 extern int agp_amd64_init(void);
 
 static const struct dma_mapping_ops gart_dma_ops = {
-	.mapping_error = NULL,
-	.map_single = gart_map_single,
-	.map_simple = gart_map_simple,
-	.unmap_single = gart_unmap_single,
-	.sync_single_for_cpu = NULL,
-	.sync_single_for_device = NULL,
-	.sync_single_range_for_cpu = NULL,
-	.sync_single_range_for_device = NULL,
-	.sync_sg_for_cpu = NULL,
-	.sync_sg_for_device = NULL,
-	.map_sg = gart_map_sg,
-	.unmap_sg = gart_unmap_sg,
+	.mapping_error			= NULL,
+	.map_single			= gart_map_single,
+	.map_simple			= gart_map_simple,
+	.unmap_single			= gart_unmap_single,
+	.sync_single_for_cpu		= NULL,
+	.sync_single_for_device		= NULL,
+	.sync_single_range_for_cpu	= NULL,
+	.sync_single_range_for_device	= NULL,
+	.sync_sg_for_cpu		= NULL,
+	.sync_sg_for_device		= NULL,
+	.map_sg				= gart_map_sg,
+	.unmap_sg			= gart_unmap_sg,
 };
 
 void gart_iommu_shutdown(void)
@@ -588,23 +631,23 @@ void gart_iommu_shutdown(void)
 	if (no_agp && (dma_ops != &gart_dma_ops))
 		return;
 
-        for (i = 0; i < num_k8_northbridges; i++) {
-                u32 ctl;
+	for (i = 0; i < num_k8_northbridges; i++) {
+		u32 ctl;
 
-                dev = k8_northbridges[i];
-                pci_read_config_dword(dev, 0x90, &ctl);
+		dev = k8_northbridges[i];
+		pci_read_config_dword(dev, 0x90, &ctl);
 
-                ctl &= ~1;
+		ctl &= ~1;
 
-                pci_write_config_dword(dev, 0x90, ctl);
-        }
+		pci_write_config_dword(dev, 0x90, ctl);
+	}
 }
 
 void __init gart_iommu_init(void)
-{ 
+{
 	struct agp_kern_info info;
-	unsigned long aper_size;
 	unsigned long iommu_start;
+	unsigned long aper_size;
 	unsigned long scratch;
 	long i;
 
@@ -614,14 +657,14 @@ void __init gart_iommu_init(void)
 	}
 
 #ifndef CONFIG_AGP_AMD64
-	no_agp = 1; 
+	no_agp = 1;
 #else
 	/* Makefile puts PCI initialization via subsys_initcall first. */
 	/* Add other K8 AGP bridge drivers here */
-	no_agp = no_agp || 
-		(agp_amd64_init() < 0) || 
+	no_agp = no_agp ||
+		(agp_amd64_init() < 0) ||
 		(agp_copy_info(agp_bridge, &info) < 0);
-#endif	
+#endif
 
 	if (swiotlb)
 		return;
@@ -643,77 +686,78 @@ void __init gart_iommu_init(void)
 	}
 
 	printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
-	aper_size = info.aper_size * 1024 * 1024;	
-	iommu_size = check_iommu_size(info.aper_base, aper_size); 
-	iommu_pages = iommu_size >> PAGE_SHIFT; 
-
-	iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL, 
-						    get_order(iommu_pages/8)); 
-	if (!iommu_gart_bitmap) 
-		panic("Cannot allocate iommu bitmap\n"); 
+	aper_size = info.aper_size * 1024 * 1024;
+	iommu_size = check_iommu_size(info.aper_base, aper_size);
+	iommu_pages = iommu_size >> PAGE_SHIFT;
+
+	iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL,
+						      get_order(iommu_pages/8));
+	if (!iommu_gart_bitmap)
+		panic("Cannot allocate iommu bitmap\n");
 	memset(iommu_gart_bitmap, 0, iommu_pages/8);
 
 #ifdef CONFIG_IOMMU_LEAK
-	if (leak_trace) { 
-		iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, 
+	if (leak_trace) {
+		iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
 				  get_order(iommu_pages*sizeof(void *)));
-		if (iommu_leak_tab) 
-			memset(iommu_leak_tab, 0, iommu_pages * 8); 
+		if (iommu_leak_tab)
+			memset(iommu_leak_tab, 0, iommu_pages * 8);
 		else
-			printk("PCI-DMA: Cannot allocate leak trace area\n"); 
-	} 
+			printk(KERN_DEBUG
+			       "PCI-DMA: Cannot allocate leak trace area\n");
+	}
 #endif
 
-	/* 
+	/*
 	 * Out of IOMMU space handling.
-	 * Reserve some invalid pages at the beginning of the GART. 
-	 */ 
-	set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); 
+	 * Reserve some invalid pages at the beginning of the GART.
+	 */
+	set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
 
-	agp_memory_reserved = iommu_size;	
+	agp_memory_reserved = iommu_size;
 	printk(KERN_INFO
 	       "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
-	       iommu_size>>20); 
+	       iommu_size >> 20);
 
-	iommu_start = aper_size - iommu_size;	
-	iommu_bus_base = info.aper_base + iommu_start; 
+	iommu_start = aper_size - iommu_size;
+	iommu_bus_base = info.aper_base + iommu_start;
 	bad_dma_address = iommu_bus_base;
 	iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
 
-	/* 
+	/*
 	 * Unmap the IOMMU part of the GART. The alias of the page is
 	 * always mapped with cache enabled and there is no full cache
 	 * coherency across the GART remapping. The unmapping avoids
 	 * automatic prefetches from the CPU allocating cache lines in
 	 * there. All CPU accesses are done via the direct mapping to
 	 * the backing memory. The GART address is only used by PCI
-	 * devices. 
+	 * devices.
 	 */
 	clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
 
-	/* 
-	 * Try to workaround a bug (thanks to BenH) 
-	 * Set unmapped entries to a scratch page instead of 0. 
+	/*
+	 * Try to workaround a bug (thanks to BenH)
+	 * Set unmapped entries to a scratch page instead of 0.
 	 * Any prefetches that hit unmapped entries won't get an bus abort
 	 * then.
 	 */
-	scratch = get_zeroed_page(GFP_KERNEL); 
-	if (!scratch) 
+	scratch = get_zeroed_page(GFP_KERNEL);
+	if (!scratch)
 		panic("Cannot allocate iommu scratch page");
 	gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
-	for (i = EMERGENCY_PAGES; i < iommu_pages; i++) 
+	for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
 		iommu_gatt_base[i] = gart_unmapped_entry;
 
 	flush_gart();
 	dma_ops = &gart_dma_ops;
-} 
+}
 
 void __init gart_parse_options(char *p)
 {
 	int arg;
 
 #ifdef CONFIG_IOMMU_LEAK
-	if (!strncmp(p,"leak",4)) {
+	if (!strncmp(p, "leak", 4)) {
 		leak_trace = 1;
 		p += 4;
 		if (*p == '=') ++p;
@@ -723,18 +767,18 @@ void __init gart_parse_options(char *p)
 #endif
 	if (isdigit(*p) && get_option(&p, &arg))
 		iommu_size = arg;
-	if (!strncmp(p, "fullflush",8))
+	if (!strncmp(p, "fullflush", 8))
 		iommu_fullflush = 1;
-	if (!strncmp(p, "nofullflush",11))
+	if (!strncmp(p, "nofullflush", 11))
 		iommu_fullflush = 0;
-	if (!strncmp(p,"noagp",5))
+	if (!strncmp(p, "noagp", 5))
 		no_agp = 1;
-	if (!strncmp(p, "noaperture",10))
+	if (!strncmp(p, "noaperture", 10))
 		fix_aperture = 0;
 	/* duplicated from pci-dma.c */
-	if (!strncmp(p,"force",5))
+	if (!strncmp(p, "force", 5))
 		gart_iommu_aperture_allowed = 1;
-	if (!strncmp(p,"allowed",7))
+	if (!strncmp(p, "allowed", 7))
 		gart_iommu_aperture_allowed = 1;
 	if (!strncmp(p, "memaper", 7)) {
 		fallback_aper_force = 1;
diff -puN arch/x86/kernel/pci-swiotlb_64.c~git-x86 arch/x86/kernel/pci-swiotlb_64.c
--- a/arch/x86/kernel/pci-swiotlb_64.c~git-x86
+++ a/arch/x86/kernel/pci-swiotlb_64.c
@@ -10,7 +10,6 @@
 #include <asm/dma.h>
 
 int swiotlb __read_mostly;
-EXPORT_SYMBOL(swiotlb);
 
 const struct dma_mapping_ops swiotlb_dma_ops = {
 	.mapping_error = swiotlb_dma_mapping_error,
diff -puN arch/x86/kernel/pmtimer_64.c~git-x86 arch/x86/kernel/pmtimer_64.c
--- a/arch/x86/kernel/pmtimer_64.c~git-x86
+++ a/arch/x86/kernel/pmtimer_64.c
@@ -19,13 +19,13 @@
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/cpumask.h>
+#include <linux/acpi_pmtmr.h>
+
 #include <asm/io.h>
 #include <asm/proto.h>
 #include <asm/msr.h>
 #include <asm/vsyscall.h>
 
-#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
-
 static inline u32 cyc2us(u32 cycles)
 {
 	/* The Power Management Timer ticks at 3.579545 ticks per microsecond.
diff -puN arch/x86/kernel/process_32.c~git-x86 arch/x86/kernel/process_32.c
--- a/arch/x86/kernel/process_32.c~git-x86
+++ a/arch/x86/kernel/process_32.c
@@ -55,6 +55,7 @@
 
 #include <asm/tlbflush.h>
 #include <asm/cpu.h>
+#include <asm/kdebug.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -74,7 +75,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number);
  */
 unsigned long thread_saved_pc(struct task_struct *tsk)
 {
-	return ((unsigned long *)tsk->thread.esp)[3];
+	return ((unsigned long *)tsk->thread.sp)[3];
 }
 
 /*
@@ -113,10 +114,19 @@ void default_idle(void)
 		smp_mb();
 
 		local_irq_disable();
-		if (!need_resched())
+		if (!need_resched()) {
+			ktime_t t0, t1;
+			u64 t0n, t1n;
+
+			t0 = ktime_get();
+			t0n = ktime_to_ns(t0);
 			safe_halt();	/* enables interrupts racelessly */
-		else
-			local_irq_enable();
+			local_irq_disable();
+			t1 = ktime_get();
+			t1n = ktime_to_ns(t1);
+			sched_clock_idle_wakeup_event(t1n - t0n);
+		}
+		local_irq_enable();
 		current_thread_info()->status |= TS_POLLING;
 	} else {
 		/* loop is done by the caller */
@@ -132,7 +142,7 @@ EXPORT_SYMBOL(default_idle);
  * to poll the ->work.need_resched flag instead of waiting for the
  * cross-CPU IPI to arrive. Use this option with caution.
  */
-static void poll_idle (void)
+static void poll_idle(void)
 {
 	cpu_relax();
 }
@@ -244,13 +254,13 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
  * New with Core Duo processors, MWAIT can take some hints based on CPU
  * capability.
  */
-void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
 	if (!need_resched()) {
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
 		if (!need_resched())
-			__mwait(eax, ecx);
+			__mwait(ax, cx);
 	}
 }
 
@@ -299,15 +309,15 @@ void __show_registers(struct pt_regs *re
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
 	unsigned long d0, d1, d2, d3, d6, d7;
-	unsigned long esp;
+	unsigned long sp;
 	unsigned short ss, gs;
 
 	if (user_mode_vm(regs)) {
-		esp = regs->esp;
-		ss = regs->xss & 0xffff;
+		sp = regs->sp;
+		ss = regs->ss & 0xffff;
 		savesegment(gs, gs);
 	} else {
-		esp = (unsigned long) (&regs->esp);
+		sp = (unsigned long) (&regs->sp);
 		savesegment(ss, ss);
 		savesegment(gs, gs);
 	}
@@ -320,17 +330,17 @@ void __show_registers(struct pt_regs *re
 			init_utsname()->version);
 
 	printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
-			0xffff & regs->xcs, regs->eip, regs->eflags,
+			0xffff & regs->cs, regs->ip, regs->flags,
 			smp_processor_id());
-	print_symbol("EIP is at %s\n", regs->eip);
+	print_symbol("EIP is at %s\n", regs->ip);
 
 	printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
-		regs->eax, regs->ebx, regs->ecx, regs->edx);
+		regs->ax, regs->bx, regs->cx, regs->dx);
 	printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
-		regs->esi, regs->edi, regs->ebp, esp);
+		regs->si, regs->di, regs->bp, sp);
 	printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
-	       regs->xds & 0xffff, regs->xes & 0xffff,
-	       regs->xfs & 0xffff, gs, ss);
+	       regs->ds & 0xffff, regs->es & 0xffff,
+	       regs->fs & 0xffff, gs, ss);
 
 	if (!all)
 		return;
@@ -358,12 +368,12 @@ void __show_registers(struct pt_regs *re
 void show_regs(struct pt_regs *regs)
 {
 	__show_registers(regs, 1);
-	show_trace(NULL, regs, &regs->esp);
+	show_trace(NULL, regs, &regs->sp);
 }
 
 /*
- * This gets run with %ebx containing the
- * function to call, and %edx containing
+ * This gets run with %bx containing the
+ * function to call, and %dx containing
  * the "args".
  */
 extern void kernel_thread_helper(void);
@@ -377,16 +387,16 @@ int kernel_thread(int (*fn)(void *), voi
 
 	memset(&regs, 0, sizeof(regs));
 
-	regs.ebx = (unsigned long) fn;
-	regs.edx = (unsigned long) arg;
+	regs.bx = (unsigned long) fn;
+	regs.dx = (unsigned long) arg;
 
-	regs.xds = __USER_DS;
-	regs.xes = __USER_DS;
-	regs.xfs = __KERNEL_PERCPU;
-	regs.orig_eax = -1;
-	regs.eip = (unsigned long) kernel_thread_helper;
-	regs.xcs = __KERNEL_CS | get_kernel_rpl();
-	regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
+	regs.ds = __USER_DS;
+	regs.es = __USER_DS;
+	regs.fs = __KERNEL_PERCPU;
+	regs.orig_ax = -1;
+	regs.ip = (unsigned long) kernel_thread_helper;
+	regs.cs = __KERNEL_CS | get_kernel_rpl();
+	regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
 
 	/* Ok, create the new process.. */
 	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
@@ -424,7 +434,12 @@ void flush_thread(void)
 {
 	struct task_struct *tsk = current;
 
-	memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+	tsk->thread.debugreg0 = 0;
+	tsk->thread.debugreg1 = 0;
+	tsk->thread.debugreg2 = 0;
+	tsk->thread.debugreg3 = 0;
+	tsk->thread.debugreg6 = 0;
+	tsk->thread.debugreg7 = 0;
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));	
 	clear_tsk_thread_flag(tsk, TIF_DEBUG);
 	/*
@@ -449,7 +464,7 @@ void prepare_to_copy(struct task_struct 
 	unlazy_fpu(tsk);
 }
 
-int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
+int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 	unsigned long unused,
 	struct task_struct * p, struct pt_regs * regs)
 {
@@ -459,15 +474,15 @@ int copy_thread(int nr, unsigned long cl
 
 	childregs = task_pt_regs(p);
 	*childregs = *regs;
-	childregs->eax = 0;
-	childregs->esp = esp;
+	childregs->ax = 0;
+	childregs->sp = sp;
 
-	p->thread.esp = (unsigned long) childregs;
-	p->thread.esp0 = (unsigned long) (childregs+1);
+	p->thread.sp = (unsigned long) childregs;
+	p->thread.sp0 = (unsigned long) (childregs+1);
 
-	p->thread.eip = (unsigned long) ret_from_fork;
+	p->thread.ip = (unsigned long) ret_from_fork;
 
-	savesegment(gs,p->thread.gs);
+	savesegment(gs, p->thread.gs);
 
 	tsk = current;
 	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -480,32 +495,15 @@ int copy_thread(int nr, unsigned long cl
 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
 	}
 
+	err = 0;
+
 	/*
 	 * Set a new TLS for the child thread?
 	 */
-	if (clone_flags & CLONE_SETTLS) {
-		struct desc_struct *desc;
-		struct user_desc info;
-		int idx;
-
-		err = -EFAULT;
-		if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info)))
-			goto out;
-		err = -EINVAL;
-		if (LDT_empty(&info))
-			goto out;
-
-		idx = info.entry_number;
-		if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-			goto out;
-
-		desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-		desc->a = LDT_entry_a(&info);
-		desc->b = LDT_entry_b(&info);
-	}
+	if (clone_flags & CLONE_SETTLS)
+		err = do_set_thread_area(p, -1,
+			(struct user_desc __user *)childregs->si, 0);
 
-	err = 0;
- out:
 	if (err && p->thread.io_bitmap_ptr) {
 		kfree(p->thread.io_bitmap_ptr);
 		p->thread.io_bitmap_max = 0;
@@ -518,54 +516,60 @@ int copy_thread(int nr, unsigned long cl
  */
 void dump_thread(struct pt_regs * regs, struct user * dump)
 {
-	int i;
+	u16 gs;
 
 /* changed the size calculations - should hopefully work better. lbt */
 	dump->magic = CMAGIC;
 	dump->start_code = 0;
-	dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
+	dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
 	dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
 	dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
 	dump->u_dsize -= dump->u_tsize;
 	dump->u_ssize = 0;
-	for (i = 0; i < 8; i++)
-		dump->u_debugreg[i] = current->thread.debugreg[i];  
+	dump->u_debugreg[0] = current->thread.debugreg0;
+	dump->u_debugreg[1] = current->thread.debugreg1;
+	dump->u_debugreg[2] = current->thread.debugreg2;
+	dump->u_debugreg[3] = current->thread.debugreg3;
+	dump->u_debugreg[4] = 0;
+	dump->u_debugreg[5] = 0;
+	dump->u_debugreg[6] = current->thread.debugreg6;
+	dump->u_debugreg[7] = current->thread.debugreg7;
 
 	if (dump->start_stack < TASK_SIZE)
 		dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
 
-	dump->regs.ebx = regs->ebx;
-	dump->regs.ecx = regs->ecx;
-	dump->regs.edx = regs->edx;
-	dump->regs.esi = regs->esi;
-	dump->regs.edi = regs->edi;
-	dump->regs.ebp = regs->ebp;
-	dump->regs.eax = regs->eax;
-	dump->regs.ds = regs->xds;
-	dump->regs.es = regs->xes;
-	dump->regs.fs = regs->xfs;
-	savesegment(gs,dump->regs.gs);
-	dump->regs.orig_eax = regs->orig_eax;
-	dump->regs.eip = regs->eip;
-	dump->regs.cs = regs->xcs;
-	dump->regs.eflags = regs->eflags;
-	dump->regs.esp = regs->esp;
-	dump->regs.ss = regs->xss;
+	dump->regs.bx = regs->bx;
+	dump->regs.cx = regs->cx;
+	dump->regs.dx = regs->dx;
+	dump->regs.si = regs->si;
+	dump->regs.di = regs->di;
+	dump->regs.bp = regs->bp;
+	dump->regs.ax = regs->ax;
+	dump->regs.ds = (u16)regs->ds;
+	dump->regs.es = (u16)regs->es;
+	dump->regs.fs = (u16)regs->fs;
+	savesegment(gs,gs);
+	dump->regs.orig_ax = regs->orig_ax;
+	dump->regs.ip = regs->ip;
+	dump->regs.cs = (u16)regs->cs;
+	dump->regs.flags = regs->flags;
+	dump->regs.sp = regs->sp;
+	dump->regs.ss = (u16)regs->ss;
 
 	dump->u_fpvalid = dump_fpu (regs, &dump->i387);
 }
 EXPORT_SYMBOL(dump_thread);
 
-/* 
+/*
  * Capture the user space registers if the task is not running (in user space)
  */
 int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
 {
 	struct pt_regs ptregs = *task_pt_regs(tsk);
-	ptregs.xcs &= 0xffff;
-	ptregs.xds &= 0xffff;
-	ptregs.xes &= 0xffff;
-	ptregs.xss &= 0xffff;
+	ptregs.cs &= 0xffff;
+	ptregs.ds &= 0xffff;
+	ptregs.es &= 0xffff;
+	ptregs.ss &= 0xffff;
 
 	elf_core_copy_regs(regs, &ptregs);
 
@@ -598,18 +602,32 @@ static noinline void
 __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		 struct tss_struct *tss)
 {
-	struct thread_struct *next;
+	struct thread_struct *prev, *next;
+	unsigned long debugctl;
 
+	prev = &prev_p->thread;
 	next = &next_p->thread;
 
+	debugctl = prev->debugctlmsr;
+	if (next->ds_area_msr != prev->ds_area_msr) {
+		/* we clear debugctl to make sure DS
+		 * is not in use when we change it */
+		debugctl = 0;
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
+		wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
+	}
+
+	if (next->debugctlmsr != debugctl)
+		wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0);
+
 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
-		set_debugreg(next->debugreg[0], 0);
-		set_debugreg(next->debugreg[1], 1);
-		set_debugreg(next->debugreg[2], 2);
-		set_debugreg(next->debugreg[3], 3);
+		set_debugreg(next->debugreg0, 0);
+		set_debugreg(next->debugreg1, 1);
+		set_debugreg(next->debugreg2, 2);
+		set_debugreg(next->debugreg3, 3);
 		/* no 4 and 5 */
-		set_debugreg(next->debugreg[6], 6);
-		set_debugreg(next->debugreg[7], 7);
+		set_debugreg(next->debugreg6, 6);
+		set_debugreg(next->debugreg7, 7);
 	}
 
 #ifdef CONFIG_SECCOMP
@@ -623,6 +641,13 @@ __switch_to_xtra(struct task_struct *pre
 	}
 #endif
 
+	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
+		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
+
+	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
+		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
+
+
 	if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
 		/*
 		 * Disable the bitmap via an invalid offset. We still cache
@@ -676,7 +701,7 @@ __switch_to_xtra(struct task_struct *pre
  * More important, however, is the fact that this allows us much
  * more flexibility.
  *
- * The return value (in %eax) will be the "prev" task after
+ * The return value (in %ax) will be the "prev" task after
  * the task-switch, and shows up in ret_from_fork in entry.S,
  * for example.
  */
@@ -699,7 +724,7 @@ struct task_struct fastcall * __switch_t
 	/*
 	 * Reload esp0.
 	 */
-	load_esp0(tss, next);
+	load_sp0(tss, next);
 
 	/*
 	 * Save away %gs. No need to save %fs, as it was saved on the
@@ -763,7 +788,7 @@ struct task_struct fastcall * __switch_t
 
 asmlinkage int sys_fork(struct pt_regs regs)
 {
-	return do_fork(SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
+	return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
 }
 
 asmlinkage int sys_clone(struct pt_regs regs)
@@ -772,12 +797,12 @@ asmlinkage int sys_clone(struct pt_regs 
 	unsigned long newsp;
 	int __user *parent_tidptr, *child_tidptr;
 
-	clone_flags = regs.ebx;
-	newsp = regs.ecx;
-	parent_tidptr = (int __user *)regs.edx;
-	child_tidptr = (int __user *)regs.edi;
+	clone_flags = regs.bx;
+	newsp = regs.cx;
+	parent_tidptr = (int __user *)regs.dx;
+	child_tidptr = (int __user *)regs.di;
 	if (!newsp)
-		newsp = regs.esp;
+		newsp = regs.sp;
 	return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
 }
 
@@ -793,7 +818,7 @@ asmlinkage int sys_clone(struct pt_regs 
  */
 asmlinkage int sys_vfork(struct pt_regs regs)
 {
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0, NULL, NULL);
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
 }
 
 /*
@@ -804,18 +829,15 @@ asmlinkage int sys_execve(struct pt_regs
 	int error;
 	char * filename;
 
-	filename = getname((char __user *) regs.ebx);
+	filename = getname((char __user *) regs.bx);
 	error = PTR_ERR(filename);
 	if (IS_ERR(filename))
 		goto out;
 	error = do_execve(filename,
-			(char __user * __user *) regs.ecx,
-			(char __user * __user *) regs.edx,
+			(char __user * __user *) regs.cx,
+			(char __user * __user *) regs.dx,
 			&regs);
 	if (error == 0) {
-		task_lock(current);
-		current->ptrace &= ~PT_DTRACE;
-		task_unlock(current);
 		/* Make sure we don't return using sysenter.. */
 		set_thread_flag(TIF_IRET);
 	}
@@ -829,145 +851,37 @@ out:
 
 unsigned long get_wchan(struct task_struct *p)
 {
-	unsigned long ebp, esp, eip;
+	unsigned long bp, sp, ip;
 	unsigned long stack_page;
 	int count = 0;
 	if (!p || p == current || p->state == TASK_RUNNING)
 		return 0;
 	stack_page = (unsigned long)task_stack_page(p);
-	esp = p->thread.esp;
-	if (!stack_page || esp < stack_page || esp > top_esp+stack_page)
+	sp = p->thread.sp;
+	if (!stack_page || sp < stack_page || sp > top_esp+stack_page)
 		return 0;
-	/* include/asm-i386/system.h:switch_to() pushes ebp last. */
-	ebp = *(unsigned long *) esp;
+	/* include/asm-i386/system.h:switch_to() pushes bp last. */
+	bp = *(unsigned long *) sp;
 	do {
-		if (ebp < stack_page || ebp > top_ebp+stack_page)
+		if (bp < stack_page || bp > top_ebp+stack_page)
 			return 0;
-		eip = *(unsigned long *) (ebp+4);
-		if (!in_sched_functions(eip))
-			return eip;
-		ebp = *(unsigned long *) ebp;
+		ip = *(unsigned long *) (bp+4);
+		if (!in_sched_functions(ip))
+			return ip;
+		bp = *(unsigned long *) bp;
 	} while (count++ < 16);
 	return 0;
 }
 
-/*
- * sys_alloc_thread_area: get a yet unused TLS descriptor index.
- */
-static int get_free_idx(void)
-{
-	struct thread_struct *t = &current->thread;
-	int idx;
-
-	for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
-		if (desc_empty(t->tls_array + idx))
-			return idx + GDT_ENTRY_TLS_MIN;
-	return -ESRCH;
-}
-
-/*
- * Set a given TLS descriptor:
- */
-asmlinkage int sys_set_thread_area(struct user_desc __user *u_info)
-{
-	struct thread_struct *t = &current->thread;
-	struct user_desc info;
-	struct desc_struct *desc;
-	int cpu, idx;
-
-	if (copy_from_user(&info, u_info, sizeof(info)))
-		return -EFAULT;
-	idx = info.entry_number;
-
-	/*
-	 * index -1 means the kernel should try to find and
-	 * allocate an empty descriptor:
-	 */
-	if (idx == -1) {
-		idx = get_free_idx();
-		if (idx < 0)
-			return idx;
-		if (put_user(idx, &u_info->entry_number))
-			return -EFAULT;
-	}
-
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-
-	desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	/*
-	 * We must not get preempted while modifying the TLS.
-	 */
-	cpu = get_cpu();
-
-	if (LDT_empty(&info)) {
-		desc->a = 0;
-		desc->b = 0;
-	} else {
-		desc->a = LDT_entry_a(&info);
-		desc->b = LDT_entry_b(&info);
-	}
-	load_TLS(t, cpu);
-
-	put_cpu();
-
-	return 0;
-}
-
-/*
- * Get the current Thread-Local Storage area:
- */
-
-#define GET_BASE(desc) ( \
-	(((desc)->a >> 16) & 0x0000ffff) | \
-	(((desc)->b << 16) & 0x00ff0000) | \
-	( (desc)->b        & 0xff000000)   )
-
-#define GET_LIMIT(desc) ( \
-	((desc)->a & 0x0ffff) | \
-	 ((desc)->b & 0xf0000) )
-	
-#define GET_32BIT(desc)		(((desc)->b >> 22) & 1)
-#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
-#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
-#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
-#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
-#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
-
-asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
-{
-	struct user_desc info;
-	struct desc_struct *desc;
-	int idx;
-
-	if (get_user(idx, &u_info->entry_number))
-		return -EFAULT;
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-
-	memset(&info, 0, sizeof(info));
-
-	desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	info.entry_number = idx;
-	info.base_addr = GET_BASE(desc);
-	info.limit = GET_LIMIT(desc);
-	info.seg_32bit = GET_32BIT(desc);
-	info.contents = GET_CONTENTS(desc);
-	info.read_exec_only = !GET_WRITABLE(desc);
-	info.limit_in_pages = GET_LIMIT_PAGES(desc);
-	info.seg_not_present = !GET_PRESENT(desc);
-	info.useable = GET_USEABLE(desc);
-
-	if (copy_to_user(u_info, &info, sizeof(info)))
-		return -EFAULT;
-	return 0;
-}
-
 unsigned long arch_align_stack(unsigned long sp)
 {
 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 		sp -= get_random_int() % 8192;
 	return sp & ~0xf;
 }
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long range_end = mm->brk + 0x02000000;
+	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
diff -puN arch/x86/kernel/process_64.c~git-x86 arch/x86/kernel/process_64.c
--- a/arch/x86/kernel/process_64.c~git-x86
+++ a/arch/x86/kernel/process_64.c
@@ -3,7 +3,7 @@
  *
  *  Pentium III FXSR, SSE support
  *	Gareth Hughes <gareth@valinux.com>, May 2000
- * 
+ *
  *  X86-64 port
  *	Andi Kleen.
  *
@@ -19,19 +19,19 @@
 #include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/fs.h>
 #include <linux/elfcore.h>
 #include <linux/smp.h>
 #include <linux/slab.h>
 #include <linux/user.h>
-#include <linux/module.h>
 #include <linux/a.out.h>
 #include <linux/interrupt.h>
+#include <linux/utsname.h>
 #include <linux/delay.h>
+#include <linux/module.h>
 #include <linux/ptrace.h>
-#include <linux/utsname.h>
 #include <linux/random.h>
 #include <linux/notifier.h>
 #include <linux/kprobes.h>
@@ -72,13 +72,6 @@ void idle_notifier_register(struct notif
 {
 	atomic_notifier_chain_register(&idle_notifier, n);
 }
-EXPORT_SYMBOL_GPL(idle_notifier_register);
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
-	atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-EXPORT_SYMBOL(idle_notifier_unregister);
 
 void enter_idle(void)
 {
@@ -106,7 +99,7 @@ void exit_idle(void)
  * We use this if we don't have any better
  * idle routine..
  */
-static void default_idle(void)
+void default_idle(void)
 {
 	current_thread_info()->status &= ~TS_POLLING;
 	/*
@@ -116,9 +109,16 @@ static void default_idle(void)
 	smp_mb();
 	local_irq_disable();
 	if (!need_resched()) {
-		/* Enables interrupts one instruction before HLT.
-		   x86 special cases this so there is no race. */
-		safe_halt();
+		ktime_t t0, t1;
+		u64 t0n, t1n;
+
+		t0 = ktime_get();
+		t0n = ktime_to_ns(t0);
+		safe_halt();	/* enables interrupts racelessly */
+		local_irq_disable();
+		t1 = ktime_get();
+		t1n = ktime_to_ns(t1);
+		sched_clock_idle_wakeup_event(t1n - t0n);
 	} else
 		local_irq_enable();
 	current_thread_info()->status |= TS_POLLING;
@@ -129,43 +129,12 @@ static void default_idle(void)
  * to poll the ->need_resched flag instead of waiting for the
  * cross-CPU IPI to arrive. Use this option with caution.
  */
-static void poll_idle (void)
+static void poll_idle(void)
 {
 	local_irq_enable();
 	cpu_relax();
 }
 
-void cpu_idle_wait(void)
-{
-	unsigned int cpu, this_cpu = get_cpu();
-	cpumask_t map, tmp = current->cpus_allowed;
-
-	set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
-	put_cpu();
-
-	cpus_clear(map);
-	for_each_online_cpu(cpu) {
-		per_cpu(cpu_idle_state, cpu) = 1;
-		cpu_set(cpu, map);
-	}
-
-	__get_cpu_var(cpu_idle_state) = 0;
-
-	wmb();
-	do {
-		ssleep(1);
-		for_each_online_cpu(cpu) {
-			if (cpu_isset(cpu, map) &&
-					!per_cpu(cpu_idle_state, cpu))
-				cpu_clear(cpu, map);
-		}
-		cpus_and(map, map, cpu_online_map);
-	} while (!cpus_empty(map));
-
-	set_cpus_allowed(current, tmp);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
 #ifdef CONFIG_HOTPLUG_CPU
 DECLARE_PER_CPU(int, cpu_state);
 
@@ -196,7 +165,7 @@ static inline void play_dead(void)
  * low exit latency (ie sit in a loop waiting for
  * somebody to say that they'd like to reschedule)
  */
-void cpu_idle (void)
+void cpu_idle(void)
 {
 	current_thread_info()->status |= TS_POLLING;
 	/* endless idle loop with no priority at all */
@@ -236,6 +205,36 @@ void cpu_idle (void)
 	}
 }
 
+void cpu_idle_wait(void)
+{
+	unsigned int cpu, this_cpu = get_cpu();
+	cpumask_t map, tmp = current->cpus_allowed;
+
+	set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+	put_cpu();
+
+	cpus_clear(map);
+	for_each_online_cpu(cpu) {
+		per_cpu(cpu_idle_state, cpu) = 1;
+		cpu_set(cpu, map);
+	}
+
+	__get_cpu_var(cpu_idle_state) = 0;
+
+	wmb();
+	do {
+		ssleep(1);
+		for_each_online_cpu(cpu) {
+			if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
+				cpu_clear(cpu, map);
+		}
+		cpus_and(map, map, cpu_online_map);
+	} while (!cpus_empty(map));
+
+	set_cpus_allowed(current, tmp);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 /*
  * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
  * which can obviate IPI to trigger checking of need_resched.
@@ -246,13 +245,13 @@ void cpu_idle (void)
  * New with Core Duo processors, MWAIT can take some hints based on CPU
  * capability.
  */
-void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
 	if (!need_resched()) {
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
 		if (!need_resched())
-			__mwait(eax, ecx);
+			__mwait(ax, cx);
 	}
 }
 
@@ -289,7 +288,7 @@ void __cpuinit select_idle_routine(const
 	}
 }
 
-static int __init idle_setup (char *str)
+static int __init idle_setup(char *str)
 {
 	if (!strcmp(str, "poll")) {
 		printk("using polling idle threads.\n");
@@ -304,13 +303,13 @@ static int __init idle_setup (char *str)
 }
 early_param("idle", idle_setup);
 
-/* Prints also some state that isn't saved in the pt_regs */ 
+/* Prints also some state that isn't saved in the pt_regs */
 void __show_regs(struct pt_regs * regs)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
 	unsigned long d0, d1, d2, d3, d6, d7;
-	unsigned int fsindex,gsindex;
-	unsigned int ds,cs,es; 
+	unsigned int fsindex, gsindex;
+	unsigned int ds, cs, es;
 
 	printk("\n");
 	print_modules();
@@ -319,16 +318,16 @@ void __show_regs(struct pt_regs * regs)
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
-	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
-	printk_address(regs->rip); 
-	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
-		regs->eflags);
+	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+	printk_address(regs->ip);
+	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
+		regs->flags);
 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
-	       regs->rax, regs->rbx, regs->rcx);
+	       regs->ax, regs->bx, regs->cx);
 	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
-	       regs->rdx, regs->rsi, regs->rdi); 
+	       regs->dx, regs->si, regs->di);
 	printk("RBP: %016lx R08: %016lx R09: %016lx\n",
-	       regs->rbp, regs->r8, regs->r9); 
+	       regs->bp, regs->r8, regs->r9);
 	printk("R10: %016lx R11: %016lx R12: %016lx\n",
 	       regs->r10, regs->r11, regs->r12); 
 	printk("R13: %016lx R14: %016lx R15: %016lx\n",
@@ -379,7 +378,7 @@ void exit_thread(void)
 	struct task_struct *me = current;
 	struct thread_struct *t = &me->thread;
 
-	if (me->thread.io_bitmap_ptr) { 
+	if (me->thread.io_bitmap_ptr) {
 		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
 
 		kfree(t->io_bitmap_ptr);
@@ -415,7 +414,7 @@ void flush_thread(void)
 	tsk->thread.debugreg3 = 0;
 	tsk->thread.debugreg6 = 0;
 	tsk->thread.debugreg7 = 0;
-	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));	
+	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 	/*
 	 * Forget coprocessor state..
 	 */
@@ -438,7 +437,7 @@ void release_thread(struct task_struct *
 
 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
 {
-	struct user_desc ud = { 
+	struct user_desc ud = {
 		.base_addr = addr,
 		.limit = 0xfffff,
 		.seg_32bit = 1,
@@ -447,17 +446,13 @@ static inline void set_32bit_tls(struct 
 	};
 	struct n_desc_struct *desc = (void *)t->thread.tls_array;
 	desc += tls;
-	desc->a = LDT_entry_a(&ud); 
-	desc->b = LDT_entry_b(&ud); 
+	desc->a = LDT_entry_a(&ud);
+	desc->b = LDT_entry_b(&ud);
 }
 
 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
 {
-	struct desc_struct *desc = (void *)t->thread.tls_array;
-	desc += tls;
-	return desc->base0 | 
-		(((u32)desc->base1) << 16) | 
-		(((u32)desc->base2) << 24);
+	return get_desc_base(&t->thread.tls_array[tls]);
 }
 
 /*
@@ -469,7 +464,7 @@ void prepare_to_copy(struct task_struct 
 	unlazy_fpu(tsk);
 }
 
-int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, 
+int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 		unsigned long unused,
 	struct task_struct * p, struct pt_regs * regs)
 {
@@ -481,14 +476,14 @@ int copy_thread(int nr, unsigned long cl
 			(THREAD_SIZE + task_stack_page(p))) - 1;
 	*childregs = *regs;
 
-	childregs->rax = 0;
-	childregs->rsp = rsp;
-	if (rsp == ~0UL)
-		childregs->rsp = (unsigned long)childregs;
-
-	p->thread.rsp = (unsigned long) childregs;
-	p->thread.rsp0 = (unsigned long) (childregs+1);
-	p->thread.userrsp = me->thread.userrsp; 
+	childregs->ax = 0;
+	childregs->sp = sp;
+	if (sp == ~0UL)
+		childregs->sp = (unsigned long)childregs;
+
+	p->thread.sp = (unsigned long) childregs;
+	p->thread.sp0 = (unsigned long) (childregs+1);
+	p->thread.usersp = me->thread.usersp; 
 
 	set_tsk_thread_flag(p, TIF_FORK);
 
@@ -509,7 +504,7 @@ int copy_thread(int nr, unsigned long cl
 		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
 				IO_BITMAP_BYTES);
 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
-	} 
+	}
 
 	/*
 	 * Set a new TLS for the child thread?
@@ -517,7 +512,8 @@ int copy_thread(int nr, unsigned long cl
 	if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_IA32_EMULATION
 		if (test_thread_flag(TIF_IA32))
-			err = ia32_child_tls(p, childregs); 
+			err = do_set_thread_area(p, -1,
+				(struct user_desc __user *)childregs->si, 0);
 		else 			
 #endif	 
 			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
@@ -536,17 +532,48 @@ out:
 /*
  * This special macro can be used to load a debugging register
  */
-#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
+#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
+
+/*
+ * Capture the user space registers if the task is not running (in user space)
+ */
+int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
+{
+	struct pt_regs *pp, ptregs;
+
+	pp = task_pt_regs(tsk);
+
+	ptregs = *pp;
+	ptregs.cs &= 0xffff;
+	ptregs.ss &= 0xffff;
+
+	elf_core_copy_regs(regs, &ptregs);
+
+	return 1;
+}
 
 static inline void __switch_to_xtra(struct task_struct *prev_p,
-			     	    struct task_struct *next_p,
-			     	    struct tss_struct *tss)
+				    struct task_struct *next_p,
+				    struct tss_struct *tss)
 {
 	struct thread_struct *prev, *next;
+	unsigned long debugctl;
 
 	prev = &prev_p->thread,
 	next = &next_p->thread;
 
+	debugctl = prev->debugctlmsr;
+	if (next->ds_area_msr != prev->ds_area_msr) {
+		/* we clear debugctl to make sure DS
+		 * is not in use when we change it */
+		debugctl = 0;
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
+		wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
+	}
+
+	if (next->debugctlmsr != debugctl)
+		wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
+
 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
 		loaddebug(next, 0);
 		loaddebug(next, 1);
@@ -570,12 +597,22 @@ static inline void __switch_to_xtra(stru
 		 */
 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 	}
+
+	/*
+	 * Last branch recording recofiguration of trace hardware and
+	 * disentangling of trace data per task.
+	 */
+	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
+		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
+
+	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
+		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
 }
 
 /*
  *	switch_to(x,y) should switch tasks from x to y.
  *
- * This could still be optimized: 
+ * This could still be optimized:
  * - fold all the options into a flag word and test it with a single test.
  * - could test fs/gs bitsliced
  *
@@ -586,7 +623,7 @@ __switch_to(struct task_struct *prev_p, 
 {
 	struct thread_struct *prev = &prev_p->thread,
 				 *next = &next_p->thread;
-	int cpu = smp_processor_id();  
+	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 
 	/* we're going to use this soon, after a few expensive things */
@@ -596,7 +633,7 @@ __switch_to(struct task_struct *prev_p, 
 	/*
 	 * Reload esp0, LDT and the page table pointer:
 	 */
-	tss->rsp0 = next->rsp0;
+	tss->sp0 = next->sp0;
 
 	/* 
 	 * Switch DS and ES.
@@ -655,8 +692,8 @@ __switch_to(struct task_struct *prev_p, 
 	/* 
 	 * Switch the PDA and FPU contexts.
 	 */
-	prev->userrsp = read_pda(oldrsp); 
-	write_pda(oldrsp, next->userrsp); 
+	prev->usersp = read_pda(oldrsp); 
+	write_pda(oldrsp, next->usersp); 
 	write_pda(pcurrent, next_p); 
 
 	write_pda(kernelstack,
@@ -673,8 +710,8 @@ __switch_to(struct task_struct *prev_p, 
 	/*
 	 * Now maybe reload the debug registers and handle I/O bitmaps
 	 */
-	if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
-	    || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
+	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
+		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev_p, next_p, tss);
 
 	/* If the task has used fpu the last 5 timeslices, just do a full
@@ -689,7 +726,7 @@ __switch_to(struct task_struct *prev_p, 
 /*
  * sys_execve() executes a new program.
  */
-asmlinkage 
+asmlinkage
 long sys_execve(char __user *name, char __user * __user *argv,
 		char __user * __user *envp, struct pt_regs regs)
 {
@@ -701,11 +738,6 @@ long sys_execve(char __user *name, char 
 	if (IS_ERR(filename)) 
 		return error;
 	error = do_execve(filename, argv, envp, &regs); 
-	if (error == 0) {
-		task_lock(current);
-		current->ptrace &= ~PT_DTRACE;
-		task_unlock(current);
-	}
 	putname(filename);
 	return error;
 }
@@ -715,18 +747,18 @@ void set_personality_64bit(void)
 	/* inherit personality from parent */
 
 	/* Make sure to be in 64bit mode */
-	clear_thread_flag(TIF_IA32); 
+	clear_thread_flag(TIF_IA32);
 
 	/* TBD: overwrites user setup. Should have two bits.
 	   But 64bit processes have always behaved this way,
 	   so it's not too bad. The main problem is just that
-   	   32bit childs are affected again. */
+	   32bit childs are affected again. */
 	current->personality &= ~READ_IMPLIES_EXEC;
 }
 
 asmlinkage long sys_fork(struct pt_regs *regs)
 {
-	return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
+	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
 }
 
 asmlinkage long
@@ -734,7 +766,7 @@ sys_clone(unsigned long clone_flags, uns
 	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
 {
 	if (!newsp)
-		newsp = regs->rsp;
+		newsp = regs->sp;
 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 }
 
@@ -750,29 +782,29 @@ sys_clone(unsigned long clone_flags, uns
  */
 asmlinkage long sys_vfork(struct pt_regs *regs)
 {
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
 		    NULL, NULL);
 }
 
 unsigned long get_wchan(struct task_struct *p)
 {
 	unsigned long stack;
-	u64 fp,rip;
+	u64 fp,ip;
 	int count = 0;
 
 	if (!p || p == current || p->state==TASK_RUNNING)
 		return 0; 
 	stack = (unsigned long)task_stack_page(p);
-	if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
+	if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
 		return 0;
-	fp = *(u64 *)(p->thread.rsp);
+	fp = *(u64 *)(p->thread.sp);
 	do { 
 		if (fp < (unsigned long)stack ||
 		    fp > (unsigned long)stack+THREAD_SIZE)
 			return 0; 
-		rip = *(u64 *)(fp+8); 
-		if (!in_sched_functions(rip))
-			return rip; 
+		ip = *(u64 *)(fp+8);
+		if (!in_sched_functions(ip))
+			return ip;
 		fp = *(u64 *)fp; 
 	} while (count++ < 16); 
 	return 0;
@@ -813,19 +845,19 @@ long do_arch_prctl(struct task_struct *t
 		/* Not strictly needed for fs, but do it for symmetry
 		   with gs */
 		if (addr >= TASK_SIZE_OF(task))
-			return -EPERM; 
+			return -EPERM;
 		cpu = get_cpu();
-		/* handle small bases via the GDT because that's faster to 
+		/* handle small bases via the GDT because that's faster to
 		   switch. */
-		if (addr <= 0xffffffff) { 
+		if (addr <= 0xffffffff) {
 			set_32bit_tls(task, FS_TLS, addr);
-			if (doit) { 
-				load_TLS(&task->thread, cpu); 
+			if (doit) {
+				load_TLS(&task->thread, cpu);
 				asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
 			}
 			task->thread.fsindex = FS_TLS_SEL;
 			task->thread.fs = 0;
-		} else { 
+		} else {
 			task->thread.fsindex = 0;
 			task->thread.fs = addr;
 			if (doit) {
@@ -837,24 +869,24 @@ long do_arch_prctl(struct task_struct *t
 		}
 		put_cpu();
 		break;
-	case ARCH_GET_FS: { 
-		unsigned long base; 
+	case ARCH_GET_FS: {
+		unsigned long base;
 		if (task->thread.fsindex == FS_TLS_SEL)
 			base = read_32bit_tls(task, FS_TLS);
 		else if (doit)
 			rdmsrl(MSR_FS_BASE, base);
 		else
 			base = task->thread.fs;
-		ret = put_user(base, (unsigned long __user *)addr); 
-		break; 
+		ret = put_user(base, (unsigned long __user *)addr);
+		break;
 	}
-	case ARCH_GET_GS: { 
+	case ARCH_GET_GS: {
 		unsigned long base;
 		unsigned gsindex;
 		if (task->thread.gsindex == GS_TLS_SEL)
 			base = read_32bit_tls(task, GS_TLS);
 		else if (doit) {
- 			asm("movl %%gs,%0" : "=r" (gsindex));
+			asm("movl %%gs,%0" : "=r" (gsindex));
 			if (gsindex)
 				rdmsrl(MSR_KERNEL_GS_BASE, base);
 			else
@@ -862,39 +894,21 @@ long do_arch_prctl(struct task_struct *t
 		}
 		else
 			base = task->thread.gs;
-		ret = put_user(base, (unsigned long __user *)addr); 
+		ret = put_user(base, (unsigned long __user *)addr);
 		break;
 	}
 
 	default:
 		ret = -EINVAL;
 		break;
-	} 
+	}
 
-	return ret;	
-} 
+	return ret;
+}
 
 long sys_arch_prctl(int code, unsigned long addr)
 {
 	return do_arch_prctl(current, code, addr);
-} 
-
-/* 
- * Capture the user space registers if the task is not running (in user space)
- */
-int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
-{
-	struct pt_regs *pp, ptregs;
-
-	pp = task_pt_regs(tsk);
-
-	ptregs = *pp; 
-	ptregs.cs &= 0xffff;
-	ptregs.ss &= 0xffff;
-
-	elf_core_copy_regs(regs, &ptregs);
- 
-	return 1;
 }
 
 unsigned long arch_align_stack(unsigned long sp)
@@ -903,3 +917,10 @@ unsigned long arch_align_stack(unsigned 
 		sp -= get_random_int() % 8192;
 	return sp & ~0xf;
 }
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long range_end = mm->brk + 0x02000000;
+	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
+
diff -puN /dev/null arch/x86/kernel/ptrace.c
--- /dev/null
+++ a/arch/x86/kernel/ptrace.c
@@ -0,0 +1,1349 @@
+/* By Ross Biro 1/23/92 */
+/*
+ * Pentium III FXSR, SSE support
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ * BTS tracing
+ *	Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/audit.h>
+#include <linux/seccomp.h>
+#include <linux/signal.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/debugreg.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+#include <asm/prctl.h>
+#include <asm/proto.h>
+#include <asm/ds.h>
+
+
+/*
+ * The maximal size of a BTS buffer per traced task in number of BTS
+ * records.
+ */
+#define PTRACE_BTS_BUFFER_MAX 4000
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/*
+ * Determines which flags the user has access to [1 = access, 0 = no access].
+ */
+#define FLAG_MASK_32		((unsigned long)			\
+				 (X86_EFLAGS_CF | X86_EFLAGS_PF |	\
+				  X86_EFLAGS_AF | X86_EFLAGS_ZF |	\
+				  X86_EFLAGS_SF | X86_EFLAGS_TF |	\
+				  X86_EFLAGS_DF | X86_EFLAGS_OF |	\
+				  X86_EFLAGS_RF | X86_EFLAGS_AC))
+
+/*
+ * Determines whether a value may be installed in a segment register.
+ */
+static inline bool invalid_selector(u16 value)
+{
+	return unlikely(value != 0 && (value & SEGMENT_RPL_MASK) != USER_RPL);
+}
+
+#ifdef CONFIG_X86_32
+
+#define FLAG_MASK		FLAG_MASK_32
+
+static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
+{
+	BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
+	regno >>= 2;
+	if (regno > FS)
+		--regno;
+	return &regs->bx + regno;
+}
+
+static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
+{
+	/*
+	 * Returning the value truncates it to 16 bits.
+	 */
+	unsigned int retval;
+	if (offset != offsetof(struct user_regs_struct, gs))
+		retval = *pt_regs_access(task_pt_regs(task), offset);
+	else {
+		retval = task->thread.gs;
+		if (task == current)
+			savesegment(gs, retval);
+	}
+	return retval;
+}
+
+static int set_segment_reg(struct task_struct *task,
+			   unsigned long offset, u16 value)
+{
+	/*
+	 * The value argument was already truncated to 16 bits.
+	 */
+	if (invalid_selector(value))
+		return -EIO;
+
+	if (offset != offsetof(struct user_regs_struct, gs))
+		*pt_regs_access(task_pt_regs(task), offset) = value;
+	else {
+		task->thread.gs = value;
+		if (task == current)
+			/*
+			 * The user-mode %gs is not affected by
+			 * kernel entry, so we must update the CPU.
+			 */
+			loadsegment(gs, value);
+	}
+
+	return 0;
+}
+
+static unsigned long debugreg_addr_limit(struct task_struct *task)
+{
+	return TASK_SIZE - 3;
+}
+
+#else  /* CONFIG_X86_64 */
+
+#define FLAG_MASK		(FLAG_MASK_32 | X86_EFLAGS_NT)
+
+static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset)
+{
+	BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0);
+	return &regs->r15 + (offset / sizeof(regs->r15));
+}
+
+static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
+{
+	/*
+	 * Returning the value truncates it to 16 bits.
+	 */
+	unsigned int seg;
+
+	switch (offset) {
+	case offsetof(struct user_regs_struct, fs):
+		if (task == current) {
+			/* Older gas can't assemble movq %?s,%r?? */
+			asm("movl %%fs,%0" : "=r" (seg));
+			return seg;
+		}
+		return task->thread.fsindex;
+	case offsetof(struct user_regs_struct, gs):
+		if (task == current) {
+			asm("movl %%gs,%0" : "=r" (seg));
+			return seg;
+		}
+		return task->thread.gsindex;
+	case offsetof(struct user_regs_struct, ds):
+		if (task == current) {
+			asm("movl %%ds,%0" : "=r" (seg));
+			return seg;
+		}
+		return task->thread.ds;
+	case offsetof(struct user_regs_struct, es):
+		if (task == current) {
+			asm("movl %%es,%0" : "=r" (seg));
+			return seg;
+		}
+		return task->thread.es;
+
+	case offsetof(struct user_regs_struct, cs):
+	case offsetof(struct user_regs_struct, ss):
+		break;
+	}
+	return *pt_regs_access(task_pt_regs(task), offset);
+}
+
+static int set_segment_reg(struct task_struct *task,
+			   unsigned long offset, u16 value)
+{
+	/*
+	 * The value argument was already truncated to 16 bits.
+	 */
+	if (invalid_selector(value))
+		return -EIO;
+
+	switch (offset) {
+	case offsetof(struct user_regs_struct,fs):
+		/*
+		 * If this is setting fs as for normal 64-bit use but
+		 * setting fs_base has implicitly changed it, leave it.
+		 */
+		if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
+		     task->thread.fs != 0) ||
+		    (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
+		     task->thread.fs == 0))
+			break;
+		task->thread.fsindex = value;
+		if (task == current)
+			loadsegment(fs, task->thread.fsindex);
+		break;
+	case offsetof(struct user_regs_struct,gs):
+		/*
+		 * If this is setting gs as for normal 64-bit use but
+		 * setting gs_base has implicitly changed it, leave it.
+		 */
+		if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
+		     task->thread.gs != 0) ||
+		    (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
+		     task->thread.gs == 0))
+			break;
+		task->thread.gsindex = value;
+		if (task == current)
+			load_gs_index(task->thread.gsindex);
+		break;
+	case offsetof(struct user_regs_struct,ds):
+		task->thread.ds = value;
+		if (task == current)
+			loadsegment(ds, task->thread.ds);
+		break;
+	case offsetof(struct user_regs_struct,es):
+		task->thread.es = value;
+		if (task == current)
+			loadsegment(es, task->thread.es);
+		break;
+
+		/*
+		 * Can't actually change these in 64-bit mode.
+		 */
+	case offsetof(struct user_regs_struct,cs):
+#ifdef CONFIG_IA32_EMULATION
+		if (test_tsk_thread_flag(task, TIF_IA32))
+			task_pt_regs(task)->cs = value;
+#endif
+		break;
+	case offsetof(struct user_regs_struct,ss):
+#ifdef CONFIG_IA32_EMULATION
+		if (test_tsk_thread_flag(task, TIF_IA32))
+			task_pt_regs(task)->ss = value;
+#endif
+		break;
+	}
+
+	return 0;
+}
+
+static unsigned long debugreg_addr_limit(struct task_struct *task)
+{
+#ifdef CONFIG_IA32_EMULATION
+	if (test_tsk_thread_flag(task, TIF_IA32))
+		return IA32_PAGE_OFFSET - 3;
+#endif
+	return TASK_SIZE64 - 7;
+}
+
+#endif	/* CONFIG_X86_32 */
+
+static unsigned long get_flags(struct task_struct *task)
+{
+	unsigned long retval = task_pt_regs(task)->flags;
+
+	/*
+	 * If the debugger set TF, hide it from the readout.
+	 */
+	if (test_tsk_thread_flag(task, TIF_FORCED_TF))
+		retval &= ~X86_EFLAGS_TF;
+
+	return retval;
+}
+
+static int set_flags(struct task_struct *task, unsigned long value)
+{
+	struct pt_regs *regs = task_pt_regs(task);
+
+	/*
+	 * If the user value contains TF, mark that
+	 * it was not "us" (the debugger) that set it.
+	 * If not, make sure it stays set if we had.
+	 */
+	if (value & X86_EFLAGS_TF)
+		clear_tsk_thread_flag(task, TIF_FORCED_TF);
+	else if (test_tsk_thread_flag(task, TIF_FORCED_TF))
+		value |= X86_EFLAGS_TF;
+
+	regs->flags = (regs->flags & ~FLAG_MASK) | (value & FLAG_MASK);
+
+	return 0;
+}
+
+static int putreg(struct task_struct *child,
+		  unsigned long offset, unsigned long value)
+{
+	switch (offset) {
+	case offsetof(struct user_regs_struct, cs):
+	case offsetof(struct user_regs_struct, ds):
+	case offsetof(struct user_regs_struct, es):
+	case offsetof(struct user_regs_struct, fs):
+	case offsetof(struct user_regs_struct, gs):
+	case offsetof(struct user_regs_struct, ss):
+		return set_segment_reg(child, offset, value);
+
+	case offsetof(struct user_regs_struct, flags):
+		return set_flags(child, value);
+
+#ifdef CONFIG_X86_64
+	case offsetof(struct user_regs_struct,fs_base):
+		if (value >= TASK_SIZE_OF(child))
+			return -EIO;
+		/*
+		 * When changing the segment base, use do_arch_prctl
+		 * to set either thread.fs or thread.fsindex and the
+		 * corresponding GDT slot.
+		 */
+		if (child->thread.fs != value)
+			return do_arch_prctl(child, ARCH_SET_FS, value);
+		return 0;
+	case offsetof(struct user_regs_struct,gs_base):
+		/*
+		 * Exactly the same here as the %fs handling above.
+		 */
+		if (value >= TASK_SIZE_OF(child))
+			return -EIO;
+		if (child->thread.gs != value)
+			return do_arch_prctl(child, ARCH_SET_GS, value);
+		return 0;
+#endif
+	}
+
+	*pt_regs_access(task_pt_regs(child), offset) = value;
+	return 0;
+}
+
+static unsigned long getreg(struct task_struct *task, unsigned long offset)
+{
+	switch (offset) {
+	case offsetof(struct user_regs_struct, cs):
+	case offsetof(struct user_regs_struct, ds):
+	case offsetof(struct user_regs_struct, es):
+	case offsetof(struct user_regs_struct, fs):
+	case offsetof(struct user_regs_struct, gs):
+	case offsetof(struct user_regs_struct, ss):
+		return get_segment_reg(task, offset);
+
+	case offsetof(struct user_regs_struct, flags):
+		return get_flags(task);
+
+#ifdef CONFIG_X86_64
+	case offsetof(struct user_regs_struct, fs_base): {
+		/*
+		 * do_arch_prctl may have used a GDT slot instead of
+		 * the MSR.  To userland, it appears the same either
+		 * way, except the %fs segment selector might not be 0.
+		 */
+		unsigned int seg = task->thread.fsindex;
+		if (task->thread.fs != 0)
+			return task->thread.fs;
+		if (task == current)
+			asm("movl %%fs,%0" : "=r" (seg));
+		if (seg != FS_TLS_SEL)
+			return 0;
+		return get_desc_base(&task->thread.tls_array[FS_TLS]);
+	}
+	case offsetof(struct user_regs_struct, gs_base): {
+		/*
+		 * Exactly the same here as the %fs handling above.
+		 */
+		unsigned int seg = task->thread.gsindex;
+		if (task->thread.gs != 0)
+			return task->thread.gs;
+		if (task == current)
+			asm("movl %%gs,%0" : "=r" (seg));
+		if (seg != GS_TLS_SEL)
+			return 0;
+		return get_desc_base(&task->thread.tls_array[GS_TLS]);
+	}
+#endif
+	}
+
+	return *pt_regs_access(task_pt_regs(task), offset);
+}
+
+/*
+ * This function is trivial and will be inlined by the compiler.
+ * Having it separates the implementation details of debug
+ * registers from the interface details of ptrace.
+ */
+static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
+{
+	switch (n) {
+	case 0:		return child->thread.debugreg0;
+	case 1:		return child->thread.debugreg1;
+	case 2:		return child->thread.debugreg2;
+	case 3:		return child->thread.debugreg3;
+	case 6:		return child->thread.debugreg6;
+	case 7:		return child->thread.debugreg7;
+	}
+	return 0;
+}
+
+static int ptrace_set_debugreg(struct task_struct *child,
+			       int n, unsigned long data)
+{
+	int i;
+
+	if (unlikely(n == 4 || n == 5))
+		return -EIO;
+
+	if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
+		return -EIO;
+
+	switch (n) {
+	case 0:		child->thread.debugreg0 = data; break;
+	case 1:		child->thread.debugreg1 = data; break;
+	case 2:		child->thread.debugreg2 = data; break;
+	case 3:		child->thread.debugreg3 = data; break;
+
+	case 6:
+		if ((data & ~0xffffffffUL) != 0)
+			return -EIO;
+		child->thread.debugreg6 = data;
+		break;
+
+	case 7:
+		/*
+		 * Sanity-check data. Take one half-byte at once with
+		 * check = (val >> (16 + 4*i)) & 0xf. It contains the
+		 * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
+		 * 2 and 3 are LENi. Given a list of invalid values,
+		 * we do mask |= 1 << invalid_value, so that
+		 * (mask >> check) & 1 is a correct test for invalid
+		 * values.
+		 *
+		 * R/Wi contains the type of the breakpoint /
+		 * watchpoint, LENi contains the length of the watched
+		 * data in the watchpoint case.
+		 *
+		 * The invalid values are:
+		 * - LENi == 0x10 (undefined), so mask |= 0x0f00.	[32-bit]
+		 * - R/Wi == 0x10 (break on I/O reads or writes), so
+		 *   mask |= 0x4444.
+		 * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
+		 *   0x1110.
+		 *
+		 * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
+		 *
+		 * See the Intel Manual "System Programming Guide",
+		 * 15.2.4
+		 *
+		 * Note that LENi == 0x10 is defined on x86_64 in long
+		 * mode (i.e. even for 32-bit userspace software, but
+		 * 64-bit kernel), so the x86_64 mask value is 0x5454.
+		 * See the AMD manual no. 24593 (AMD64 System Programming)
+		 */
+#ifdef CONFIG_X86_32
+#define	DR7_MASK	0x5f54
+#else
+#define	DR7_MASK	0x5554
+#endif
+		data &= ~DR_CONTROL_RESERVED;
+		for (i = 0; i < 4; i++)
+			if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
+				return -EIO;
+		child->thread.debugreg7 = data;
+		if (data)
+			set_tsk_thread_flag(child, TIF_DEBUG);
+		else
+			clear_tsk_thread_flag(child, TIF_DEBUG);
+		break;
+	}
+
+	return 0;
+}
+
+static int ptrace_bts_max_buffer_size(void)
+{
+	return PTRACE_BTS_BUFFER_MAX;
+}
+
+static int ptrace_bts_get_buffer_size(struct task_struct *child)
+{
+	if (!child->thread.ds_area_msr)
+		return -ENXIO;
+
+	return ds_get_bts_size((void *)child->thread.ds_area_msr);
+}
+
+static int ptrace_bts_get_index(struct task_struct *child)
+{
+	if (!child->thread.ds_area_msr)
+		return -ENXIO;
+
+	return ds_get_bts_index((void *)child->thread.ds_area_msr);
+}
+
+static int ptrace_bts_read_record(struct task_struct *child,
+				  long index,
+				  struct bts_struct __user *out)
+{
+	struct bts_struct ret;
+	int retval;
+
+	if (!child->thread.ds_area_msr)
+		return -ENXIO;
+
+	retval = ds_read_bts((void *)child->thread.ds_area_msr,
+			     index, &ret);
+	if (retval)
+		return retval;
+
+	if (copy_to_user(out, &ret, sizeof(ret)))
+		return -EFAULT;
+
+	return sizeof(ret);
+}
+
+static int ptrace_bts_write_record(struct task_struct *child,
+				   const struct bts_struct *in)
+{
+	int retval;
+
+	if (!child->thread.ds_area_msr)
+		return -ENXIO;
+
+	retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
+	if (retval)
+		return retval;
+
+	return sizeof(*in);
+}
+
+static int ptrace_bts_config(struct task_struct *child,
+			     unsigned long options)
+{
+	unsigned long debugctl_mask = ds_debugctl_mask();
+	int retval;
+
+	retval = ptrace_bts_get_buffer_size(child);
+	if (retval < 0)
+		return retval;
+	if (retval == 0)
+		return -ENXIO;
+
+	if (options & PTRACE_BTS_O_TRACE_TASK) {
+		child->thread.debugctlmsr |= debugctl_mask;
+		set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+	} else {
+		/* there is no way for us to check whether we 'own'
+		 * the respective bits in the DEBUGCTL MSR, we're
+		 * about to clear */
+		child->thread.debugctlmsr &= ~debugctl_mask;
+
+		if (!child->thread.debugctlmsr)
+			clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+	}
+
+	if (options & PTRACE_BTS_O_TIMESTAMPS)
+		set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+	else
+		clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+
+	return 0;
+}
+
+static int ptrace_bts_status(struct task_struct *child)
+{
+	unsigned long debugctl_mask = ds_debugctl_mask();
+	int retval, status = 0;
+
+	retval = ptrace_bts_get_buffer_size(child);
+	if (retval < 0)
+		return retval;
+	if (retval == 0)
+		return -ENXIO;
+
+	if (ptrace_bts_get_buffer_size(child) <= 0)
+		return -ENXIO;
+
+	if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
+	    child->thread.debugctlmsr & debugctl_mask)
+		status |= PTRACE_BTS_O_TRACE_TASK;
+	if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
+		status |= PTRACE_BTS_O_TIMESTAMPS;
+
+	return status;
+}
+
+static int ptrace_bts_allocate_bts(struct task_struct *child,
+				   int size_in_records)
+{
+	int retval = 0;
+	void *ds;
+
+	if (size_in_records < 0)
+		return -EINVAL;
+
+	if (size_in_records > ptrace_bts_max_buffer_size())
+		return -EINVAL;
+
+	if (size_in_records == 0) {
+		ptrace_bts_config(child, /* options = */ 0);
+	} else {
+		retval = ds_allocate(&ds, size_in_records);
+		if (retval)
+			return retval;
+	}
+
+	if (child->thread.ds_area_msr)
+		ds_free((void **)&child->thread.ds_area_msr);
+
+	child->thread.ds_area_msr = (unsigned long)ds;
+	if (child->thread.ds_area_msr)
+		set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+	else
+		clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+
+	return retval;
+}
+
+void ptrace_bts_take_timestamp(struct task_struct *tsk,
+			       enum bts_qualifier qualifier)
+{
+	struct bts_struct rec = {
+		.qualifier = qualifier,
+		.variant.timestamp = sched_clock()
+	};
+
+	if (ptrace_bts_get_buffer_size(tsk) <= 0)
+		return;
+
+	ptrace_bts_write_record(tsk, &rec);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure the single step bit is not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+	user_disable_single_step(child);
+#ifdef TIF_SYSCALL_EMU
+	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+#endif
+	ptrace_bts_config(child, /* options = */ 0);
+	if (child->thread.ds_area_msr) {
+	    ds_free((void **)&child->thread.ds_area_msr);
+	    clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+	}
+}
+
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+{
+	int i, ret;
+	unsigned long __user *datap = (unsigned long __user *)data;
+
+	switch (request) {
+	/* when I and D space are separate, these will need to be fixed. */
+	case PTRACE_PEEKTEXT: /* read word at location addr. */
+	case PTRACE_PEEKDATA:
+		ret = generic_ptrace_peekdata(child, addr, data);
+		break;
+
+	/* read the word at location addr in the USER area. */
+	case PTRACE_PEEKUSR: {
+		unsigned long tmp;
+
+		ret = -EIO;
+		if ((addr & (sizeof(data) - 1)) || addr < 0 ||
+		    addr >= sizeof(struct user))
+			break;
+
+		tmp = 0;  /* Default return condition */
+		if (addr < sizeof(struct user_regs_struct))
+			tmp = getreg(child, addr);
+		else if (addr >= offsetof(struct user, u_debugreg[0]) &&
+			 addr <= offsetof(struct user, u_debugreg[7])) {
+			addr -= offsetof(struct user, u_debugreg[0]);
+			tmp = ptrace_get_debugreg(child, addr / sizeof(data));
+		}
+		ret = put_user(tmp, datap);
+		break;
+	}
+
+	/* when I and D space are separate, this will have to be fixed. */
+	case PTRACE_POKETEXT: /* write the word at location addr. */
+	case PTRACE_POKEDATA:
+		ret = generic_ptrace_pokedata(child, addr, data);
+		break;
+
+	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
+		ret = -EIO;
+		if ((addr & (sizeof(data) - 1)) || addr < 0 ||
+		    addr >= sizeof(struct user))
+			break;
+
+		if (addr < sizeof(struct user_regs_struct))
+			ret = putreg(child, addr, data);
+		else if (addr >= offsetof(struct user, u_debugreg[0]) &&
+			 addr <= offsetof(struct user, u_debugreg[7])) {
+			addr -= offsetof(struct user, u_debugreg[0]);
+			ret = ptrace_set_debugreg(child,
+						  addr / sizeof(data), data);
+		}
+		break;
+
+	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
+		if (!access_ok(VERIFY_WRITE, datap, sizeof(struct user_regs_struct))) {
+			ret = -EIO;
+			break;
+		}
+		for (i = 0; i < sizeof(struct user_regs_struct); i += sizeof(long)) {
+			__put_user(getreg(child, i), datap);
+			datap++;
+		}
+		ret = 0;
+		break;
+	}
+
+	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
+		unsigned long tmp;
+		if (!access_ok(VERIFY_READ, datap, sizeof(struct user_regs_struct))) {
+			ret = -EIO;
+			break;
+		}
+		for (i = 0; i < sizeof(struct user_regs_struct); i += sizeof(long)) {
+			__get_user(tmp, datap);
+			putreg(child, i, tmp);
+			datap++;
+		}
+		ret = 0;
+		break;
+	}
+
+	case PTRACE_GETFPREGS: { /* Get the child FPU state. */
+		if (!access_ok(VERIFY_WRITE, datap,
+			       sizeof(struct user_i387_struct))) {
+			ret = -EIO;
+			break;
+		}
+		ret = 0;
+		if (!tsk_used_math(child))
+			init_fpu(child);
+		get_fpregs((struct user_i387_struct __user *)data, child);
+		break;
+	}
+
+	case PTRACE_SETFPREGS: { /* Set the child FPU state. */
+		if (!access_ok(VERIFY_READ, datap,
+			       sizeof(struct user_i387_struct))) {
+			ret = -EIO;
+			break;
+		}
+		set_stopped_child_used_math(child);
+		set_fpregs(child, (struct user_i387_struct __user *)data);
+		ret = 0;
+		break;
+	}
+
+#ifdef CONFIG_X86_32
+	case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */
+		if (!access_ok(VERIFY_WRITE, datap,
+			       sizeof(struct user_fxsr_struct))) {
+			ret = -EIO;
+			break;
+		}
+		if (!tsk_used_math(child))
+			init_fpu(child);
+		ret = get_fpxregs((struct user_fxsr_struct __user *)data, child);
+		break;
+	}
+
+	case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */
+		if (!access_ok(VERIFY_READ, datap,
+			       sizeof(struct user_fxsr_struct))) {
+			ret = -EIO;
+			break;
+		}
+		set_stopped_child_used_math(child);
+		ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data);
+		break;
+	}
+#endif
+
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+	case PTRACE_GET_THREAD_AREA:
+		if (addr < 0)
+			return -EIO;
+		ret = do_get_thread_area(child, addr,
+					 (struct user_desc __user *) data);
+		break;
+
+	case PTRACE_SET_THREAD_AREA:
+		if (addr < 0)
+			return -EIO;
+		ret = do_set_thread_area(child, addr,
+					 (struct user_desc __user *) data, 0);
+		break;
+#endif
+
+#ifdef CONFIG_X86_64
+		/* normal 64bit interface to access TLS data.
+		   Works just like arch_prctl, except that the arguments
+		   are reversed. */
+	case PTRACE_ARCH_PRCTL:
+		ret = do_arch_prctl(child, data, addr);
+		break;
+#endif
+
+	case PTRACE_BTS_MAX_BUFFER_SIZE:
+		ret = ptrace_bts_max_buffer_size();
+		break;
+
+	case PTRACE_BTS_ALLOCATE_BUFFER:
+		ret = ptrace_bts_allocate_bts(child, data);
+		break;
+
+	case PTRACE_BTS_GET_BUFFER_SIZE:
+		ret = ptrace_bts_get_buffer_size(child);
+		break;
+
+	case PTRACE_BTS_GET_INDEX:
+		ret = ptrace_bts_get_index(child);
+		break;
+
+	case PTRACE_BTS_READ_RECORD:
+		ret = ptrace_bts_read_record
+			(child, data,
+			 (struct bts_struct __user *) addr);
+		break;
+
+	case PTRACE_BTS_CONFIG:
+		ret = ptrace_bts_config(child, data);
+		break;
+
+	case PTRACE_BTS_STATUS:
+		ret = ptrace_bts_status(child);
+		break;
+
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_IA32_EMULATION
+
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <asm/ia32.h>
+#include <asm/fpu32.h>
+#include <asm/user32.h>
+
+#define R32(l,q)							\
+	case offsetof(struct user32, regs.l):				\
+		regs->q = value; break
+
+#define SEG32(rs)							\
+	case offsetof(struct user32, regs.rs):				\
+		return set_segment_reg(child,				\
+				       offsetof(struct user_regs_struct, rs), \
+				       value);				\
+		break
+
+static int putreg32(struct task_struct *child, unsigned regno, u32 value)
+{
+	struct pt_regs *regs = task_pt_regs(child);
+
+	switch (regno) {
+
+	SEG32(cs);
+	SEG32(ds);
+	SEG32(es);
+	SEG32(fs);
+	SEG32(gs);
+	SEG32(ss);
+
+	R32(ebx, bx);
+	R32(ecx, cx);
+	R32(edx, dx);
+	R32(edi, di);
+	R32(esi, si);
+	R32(ebp, bp);
+	R32(eax, ax);
+	R32(orig_eax, orig_ax);
+	R32(eip, ip);
+	R32(esp, sp);
+
+	case offsetof(struct user32, regs.eflags):
+		return set_flags(child, value);
+
+	case offsetof(struct user32, u_debugreg[0]) ...
+		offsetof(struct user32, u_debugreg[7]):
+		regno -= offsetof(struct user32, u_debugreg[0]);
+		return ptrace_set_debugreg(child, regno / 4, value);
+
+	default:
+		if (regno > sizeof(struct user32) || (regno & 3))
+			return -EIO;
+
+		/*
+		 * Other dummy fields in the virtual user structure
+		 * are ignored
+		 */
+		break;
+	}
+	return 0;
+}
+
+#undef R32
+#undef SEG32
+
+#define R32(l,q)							\
+	case offsetof(struct user32, regs.l):				\
+		*val = regs->q; break
+
+#define SEG32(rs)							\
+	case offsetof(struct user32, regs.rs):				\
+		*val = get_segment_reg(child,				\
+				       offsetof(struct user_regs_struct, rs)); \
+		break
+
+static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
+{
+	struct pt_regs *regs = task_pt_regs(child);
+
+	switch (regno) {
+
+	SEG32(ds);
+	SEG32(es);
+	SEG32(fs);
+	SEG32(gs);
+
+	R32(cs, cs);
+	R32(ss, ss);
+	R32(ebx, bx);
+	R32(ecx, cx);
+	R32(edx, dx);
+	R32(edi, di);
+	R32(esi, si);
+	R32(ebp, bp);
+	R32(eax, ax);
+	R32(orig_eax, orig_ax);
+	R32(eip, ip);
+	R32(esp, sp);
+
+	case offsetof(struct user32, regs.eflags):
+		*val = get_flags(child);
+		break;
+
+	case offsetof(struct user32, u_debugreg[0]) ...
+		offsetof(struct user32, u_debugreg[7]):
+		regno -= offsetof(struct user32, u_debugreg[0]);
+		*val = ptrace_get_debugreg(child, regno / 4);
+		break;
+
+	default:
+		if (regno > sizeof(struct user32) || (regno & 3))
+			return -EIO;
+
+		/*
+		 * Other dummy fields in the virtual user structure
+		 * are ignored
+		 */
+		*val = 0;
+		break;
+	}
+	return 0;
+}
+
+#undef R32
+#undef SEG32
+
+static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
+{
+	siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t));
+	compat_siginfo_t __user *si32 = compat_ptr(data);
+	siginfo_t ssi;
+	int ret;
+
+	if (request == PTRACE_SETSIGINFO) {
+		memset(&ssi, 0, sizeof(siginfo_t));
+		ret = copy_siginfo_from_user32(&ssi, si32);
+		if (ret)
+			return ret;
+		if (copy_to_user(si, &ssi, sizeof(siginfo_t)))
+			return -EFAULT;
+	}
+	ret = sys_ptrace(request, pid, addr, (unsigned long)si);
+	if (ret)
+		return ret;
+	if (request == PTRACE_GETSIGINFO) {
+		if (copy_from_user(&ssi, si, sizeof(siginfo_t)))
+			return -EFAULT;
+		ret = copy_siginfo_to_user32(si32, &ssi);
+	}
+	return ret;
+}
+
+asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
+{
+	struct task_struct *child;
+	struct pt_regs *childregs;
+	void __user *datap = compat_ptr(data);
+	int ret;
+	__u32 val;
+
+	switch (request) {
+	case PTRACE_TRACEME:
+	case PTRACE_ATTACH:
+	case PTRACE_KILL:
+	case PTRACE_CONT:
+	case PTRACE_SINGLESTEP:
+	case PTRACE_SINGLEBLOCK:
+	case PTRACE_DETACH:
+	case PTRACE_SYSCALL:
+	case PTRACE_OLDSETOPTIONS:
+	case PTRACE_SETOPTIONS:
+	case PTRACE_SET_THREAD_AREA:
+	case PTRACE_GET_THREAD_AREA:
+	case PTRACE_BTS_MAX_BUFFER_SIZE:
+	case PTRACE_BTS_ALLOCATE_BUFFER:
+	case PTRACE_BTS_GET_BUFFER_SIZE:
+	case PTRACE_BTS_GET_INDEX:
+	case PTRACE_BTS_READ_RECORD:
+	case PTRACE_BTS_CONFIG:
+	case PTRACE_BTS_STATUS:
+		return sys_ptrace(request, pid, addr, data);
+
+	default:
+		return -EINVAL;
+
+	case PTRACE_PEEKTEXT:
+	case PTRACE_PEEKDATA:
+	case PTRACE_POKEDATA:
+	case PTRACE_POKETEXT:
+	case PTRACE_POKEUSR:
+	case PTRACE_PEEKUSR:
+	case PTRACE_GETREGS:
+	case PTRACE_SETREGS:
+	case PTRACE_SETFPREGS:
+	case PTRACE_GETFPREGS:
+	case PTRACE_SETFPXREGS:
+	case PTRACE_GETFPXREGS:
+	case PTRACE_GETEVENTMSG:
+		break;
+
+	case PTRACE_SETSIGINFO:
+	case PTRACE_GETSIGINFO:
+		return ptrace32_siginfo(request, pid, addr, data);
+	}
+
+	child = ptrace_get_task_struct(pid);
+	if (IS_ERR(child))
+		return PTR_ERR(child);
+
+	ret = ptrace_check_attach(child, request == PTRACE_KILL);
+	if (ret < 0)
+		goto out;
+
+	childregs = task_pt_regs(child);
+
+	switch (request) {
+	case PTRACE_PEEKDATA:
+	case PTRACE_PEEKTEXT:
+		ret = 0;
+		if (access_process_vm(child, addr, &val, sizeof(u32), 0) !=
+		    sizeof(u32))
+			ret = -EIO;
+		else
+			ret = put_user(val, (unsigned int __user *)datap);
+		break;
+
+	case PTRACE_POKEDATA:
+	case PTRACE_POKETEXT:
+		ret = 0;
+		if (access_process_vm(child, addr, &data, sizeof(u32), 1) !=
+		    sizeof(u32))
+			ret = -EIO;
+		break;
+
+	case PTRACE_PEEKUSR:
+		ret = getreg32(child, addr, &val);
+		if (ret == 0)
+			ret = put_user(val, (__u32 __user *)datap);
+		break;
+
+	case PTRACE_POKEUSR:
+		ret = putreg32(child, addr, data);
+		break;
+
+	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
+		int i;
+
+		if (!access_ok(VERIFY_WRITE, datap, 16*4)) {
+			ret = -EIO;
+			break;
+		}
+		ret = 0;
+		for (i = 0; i < sizeof(struct user_regs_struct32); i += sizeof(__u32)) {
+			getreg32(child, i, &val);
+			ret |= __put_user(val, (u32 __user *)datap);
+			datap += sizeof(u32);
+		}
+		break;
+	}
+
+	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
+		unsigned long tmp;
+		int i;
+
+		if (!access_ok(VERIFY_READ, datap, 16*4)) {
+			ret = -EIO;
+			break;
+		}
+		ret = 0;
+		for (i = 0; i < sizeof(struct user_regs_struct32); i += sizeof(u32)) {
+			ret |= __get_user(tmp, (u32 __user *)datap);
+			putreg32(child, i, tmp);
+			datap += sizeof(u32);
+		}
+		break;
+	}
+
+	case PTRACE_GETFPREGS:
+		ret = -EIO;
+		if (!access_ok(VERIFY_READ, compat_ptr(data),
+			       sizeof(struct user_i387_struct)))
+			break;
+		save_i387_ia32(child, datap, childregs, 1);
+		ret = 0;
+			break;
+
+	case PTRACE_SETFPREGS:
+		ret = -EIO;
+		if (!access_ok(VERIFY_WRITE, datap,
+			       sizeof(struct user_i387_struct)))
+			break;
+		ret = 0;
+		/* don't check EFAULT to be bug-to-bug compatible to i386 */
+		restore_i387_ia32(child, datap, 1);
+		break;
+
+	case PTRACE_GETFPXREGS: {
+		struct user32_fxsr_struct __user *u = datap;
+
+		init_fpu(child);
+		ret = -EIO;
+		if (!access_ok(VERIFY_WRITE, u, sizeof(*u)))
+			break;
+			ret = -EFAULT;
+		if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u)))
+			break;
+		ret = __put_user(childregs->cs, &u->fcs);
+		ret |= __put_user(child->thread.ds, &u->fos);
+		break;
+	}
+	case PTRACE_SETFPXREGS: {
+		struct user32_fxsr_struct __user *u = datap;
+
+		unlazy_fpu(child);
+		ret = -EIO;
+		if (!access_ok(VERIFY_READ, u, sizeof(*u)))
+			break;
+		/*
+		 * no checking to be bug-to-bug compatible with i386.
+		 * but silence warning
+		 */
+		if (__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u)))
+			;
+		set_stopped_child_used_math(child);
+		child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
+		ret = 0;
+		break;
+	}
+
+	case PTRACE_GETEVENTMSG:
+		ret = put_user(child->ptrace_message,
+			       (unsigned int __user *)compat_ptr(data));
+		break;
+
+	default:
+		BUG();
+	}
+
+ out:
+	put_task_struct(child);
+	return ret;
+}
+
+#endif	/* CONFIG_IA32_EMULATION */
+
+#ifdef CONFIG_X86_32
+
+void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
+{
+	struct siginfo info;
+
+	tsk->thread.trap_no = 1;
+	tsk->thread.error_code = error_code;
+
+	memset(&info, 0, sizeof(info));
+	info.si_signo = SIGTRAP;
+	info.si_code = TRAP_BRKPT;
+
+	/* User-mode ip? */
+	info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL;
+
+	/* Send us the fake SIGTRAP */
+	force_sig_info(SIGTRAP, &info, tsk);
+}
+
+/* notification of system call entry/exit
+ * - triggered by current->work.syscall_trace
+ */
+__attribute__((regparm(3)))
+int do_syscall_trace(struct pt_regs *regs, int entryexit)
+{
+	int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
+	/*
+	 * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
+	 * interception
+	 */
+	int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
+	int ret = 0;
+
+	/* do the secure computing check first */
+	if (!entryexit)
+		secure_computing(regs->orig_ax);
+
+	if (unlikely(current->audit_context)) {
+		if (entryexit)
+			audit_syscall_exit(AUDITSC_RESULT(regs->ax),
+						regs->ax);
+		/* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
+		 * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
+		 * not used, entry.S will call us only on syscall exit, not
+		 * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
+		 * calling send_sigtrap() on syscall entry.
+		 *
+		 * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
+		 * is_singlestep is false, despite his name, so we will still do
+		 * the correct thing.
+		 */
+		else if (is_singlestep)
+			goto out;
+	}
+
+	if (!(current->ptrace & PT_PTRACED))
+		goto out;
+
+	/* If a process stops on the 1st tracepoint with SYSCALL_TRACE
+	 * and then is resumed with SYSEMU_SINGLESTEP, it will come in
+	 * here. We have to check this and return */
+	if (is_sysemu && entryexit)
+		return 0;
+
+	/* Fake a debug trap */
+	if (is_singlestep)
+		send_sigtrap(current, regs, 0);
+
+ 	if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
+		goto out;
+
+	/* the 0x80 provides a way for the tracing parent to distinguish
+	   between a syscall stop and SIGTRAP delivery */
+	/* Note that the debugger could change the result of test_thread_flag!*/
+	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
+
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
+	 * for normal use.  strace only continues with a signal if the
+	 * stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+	ret = is_sysemu;
+out:
+	if (unlikely(current->audit_context) && !entryexit)
+		audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax,
+				    regs->bx, regs->cx, regs->dx, regs->si);
+	if (ret == 0)
+		return 0;
+
+	regs->orig_ax = -1; /* force skip of syscall restarting */
+	if (unlikely(current->audit_context))
+		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
+	return 1;
+}
+
+#else  /* CONFIG_X86_64 */
+
+static void syscall_trace(struct pt_regs *regs)
+{
+
+#if 0
+	printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
+	       current->comm,
+	       regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
+	       current_thread_info()->flags, current->ptrace);
+#endif
+
+	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+				? 0x80 : 0));
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
+	 * for normal use.  strace only continues with a signal if the
+	 * stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+}
+
+asmlinkage void syscall_trace_enter(struct pt_regs *regs)
+{
+	/* do the secure computing check first */
+	secure_computing(regs->orig_ax);
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE)
+	    && (current->ptrace & PT_PTRACED))
+		syscall_trace(regs);
+
+	if (unlikely(current->audit_context)) {
+		if (test_thread_flag(TIF_IA32)) {
+			audit_syscall_entry(AUDIT_ARCH_I386,
+					    regs->orig_ax,
+					    regs->bx, regs->cx,
+					    regs->dx, regs->si);
+		} else {
+			audit_syscall_entry(AUDIT_ARCH_X86_64,
+					    regs->orig_ax,
+					    regs->di, regs->si,
+					    regs->dx, regs->r10);
+		}
+	}
+}
+
+asmlinkage void syscall_trace_leave(struct pt_regs *regs)
+{
+	if (unlikely(current->audit_context))
+		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
+
+	if ((test_thread_flag(TIF_SYSCALL_TRACE)
+	     || test_thread_flag(TIF_SINGLESTEP))
+	    && (current->ptrace & PT_PTRACED))
+		syscall_trace(regs);
+}
+
+#endif	/* CONFIG_X86_32 */
diff -puN arch/x86/kernel/ptrace_32.c~git-x86 /dev/null
--- a/arch/x86/kernel/ptrace_32.c
+++ /dev/null
@@ -1,717 +0,0 @@
-/* By Ross Biro 1/23/92 */
-/*
- * Pentium III FXSR, SSE support
- *	Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/audit.h>
-#include <linux/seccomp.h>
-#include <linux/signal.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/debugreg.h>
-#include <asm/ldt.h>
-#include <asm/desc.h>
-
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
-
-/*
- * Determines which flags the user has access to [1 = access, 0 = no access].
- * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), NT(14), IOPL(12-13), IF(9).
- * Also masks reserved bits (31-22, 15, 5, 3, 1).
- */
-#define FLAG_MASK 0x00050dd5
-
-/* set's the trap flag. */
-#define TRAP_FLAG 0x100
-
-/*
- * Offset of eflags on child stack..
- */
-#define EFL_OFFSET offsetof(struct pt_regs, eflags)
-
-static inline struct pt_regs *get_child_regs(struct task_struct *task)
-{
-	void *stack_top = (void *)task->thread.esp0;
-	return stack_top - sizeof(struct pt_regs);
-}
-
-/*
- * This routine will get a word off of the processes privileged stack.
- * the offset is bytes into the pt_regs structure on the stack.
- * This routine assumes that all the privileged stacks are in our
- * data space.
- */   
-static inline int get_stack_long(struct task_struct *task, int offset)
-{
-	unsigned char *stack;
-
-	stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs);
-	stack += offset;
-	return (*((int *)stack));
-}
-
-/*
- * This routine will put a word on the processes privileged stack.
- * the offset is bytes into the pt_regs structure on the stack.
- * This routine assumes that all the privileged stacks are in our
- * data space.
- */
-static inline int put_stack_long(struct task_struct *task, int offset,
-	unsigned long data)
-{
-	unsigned char * stack;
-
-	stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs);
-	stack += offset;
-	*(unsigned long *) stack = data;
-	return 0;
-}
-
-static int putreg(struct task_struct *child,
-	unsigned long regno, unsigned long value)
-{
-	switch (regno >> 2) {
-		case GS:
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.gs = value;
-			return 0;
-		case DS:
-		case ES:
-		case FS:
-			if (value && (value & 3) != 3)
-				return -EIO;
-			value &= 0xffff;
-			break;
-		case SS:
-		case CS:
-			if ((value & 3) != 3)
-				return -EIO;
-			value &= 0xffff;
-			break;
-		case EFL:
-			value &= FLAG_MASK;
-			value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK;
-			break;
-	}
-	if (regno > FS*4)
-		regno -= 1*4;
-	put_stack_long(child, regno, value);
-	return 0;
-}
-
-static unsigned long getreg(struct task_struct *child,
-	unsigned long regno)
-{
-	unsigned long retval = ~0UL;
-
-	switch (regno >> 2) {
-		case GS:
-			retval = child->thread.gs;
-			break;
-		case DS:
-		case ES:
-		case FS:
-		case SS:
-		case CS:
-			retval = 0xffff;
-			/* fall through */
-		default:
-			if (regno > FS*4)
-				regno -= 1*4;
-			retval &= get_stack_long(child, regno);
-	}
-	return retval;
-}
-
-#define LDT_SEGMENT 4
-
-static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_regs *regs)
-{
-	unsigned long addr, seg;
-
-	addr = regs->eip;
-	seg = regs->xcs & 0xffff;
-	if (regs->eflags & VM_MASK) {
-		addr = (addr & 0xffff) + (seg << 4);
-		return addr;
-	}
-
-	/*
-	 * We'll assume that the code segments in the GDT
-	 * are all zero-based. That is largely true: the
-	 * TLS segments are used for data, and the PNPBIOS
-	 * and APM bios ones we just ignore here.
-	 */
-	if (seg & LDT_SEGMENT) {
-		u32 *desc;
-		unsigned long base;
-
-		seg &= ~7UL;
-
-		mutex_lock(&child->mm->context.lock);
-		if (unlikely((seg >> 3) >= child->mm->context.size))
-			addr = -1L; /* bogus selector, access would fault */
-		else {
-			desc = child->mm->context.ldt + seg;
-			base = ((desc[0] >> 16) |
-				((desc[1] & 0xff) << 16) |
-				(desc[1] & 0xff000000));
-
-			/* 16-bit code segment? */
-			if (!((desc[1] >> 22) & 1))
-				addr &= 0xffff;
-			addr += base;
-		}
-		mutex_unlock(&child->mm->context.lock);
-	}
-	return addr;
-}
-
-static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
-{
-	int i, copied;
-	unsigned char opcode[15];
-	unsigned long addr = convert_eip_to_linear(child, regs);
-
-	copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
-	for (i = 0; i < copied; i++) {
-		switch (opcode[i]) {
-		/* popf and iret */
-		case 0x9d: case 0xcf:
-			return 1;
-		/* opcode and address size prefixes */
-		case 0x66: case 0x67:
-			continue;
-		/* irrelevant prefixes (segment overrides and repeats) */
-		case 0x26: case 0x2e:
-		case 0x36: case 0x3e:
-		case 0x64: case 0x65:
-		case 0xf0: case 0xf2: case 0xf3:
-			continue;
-
-		/*
-		 * pushf: NOTE! We should probably not let
-		 * the user see the TF bit being set. But
-		 * it's more pain than it's worth to avoid
-		 * it, and a debugger could emulate this
-		 * all in user space if it _really_ cares.
-		 */
-		case 0x9c:
-		default:
-			return 0;
-		}
-	}
-	return 0;
-}
-
-static void set_singlestep(struct task_struct *child)
-{
-	struct pt_regs *regs = get_child_regs(child);
-
-	/*
-	 * Always set TIF_SINGLESTEP - this guarantees that 
-	 * we single-step system calls etc..  This will also
-	 * cause us to set TF when returning to user mode.
-	 */
-	set_tsk_thread_flag(child, TIF_SINGLESTEP);
-
-	/*
-	 * If TF was already set, don't do anything else
-	 */
-	if (regs->eflags & TRAP_FLAG)
-		return;
-
-	/* Set TF on the kernel stack.. */
-	regs->eflags |= TRAP_FLAG;
-
-	/*
-	 * ..but if TF is changed by the instruction we will trace,
-	 * don't mark it as being "us" that set it, so that we
-	 * won't clear it by hand later.
-	 */
-	if (is_setting_trap_flag(child, regs))
-		return;
-	
-	child->ptrace |= PT_DTRACE;
-}
-
-static void clear_singlestep(struct task_struct *child)
-{
-	/* Always clear TIF_SINGLESTEP... */
-	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-
-	/* But touch TF only if it was set by us.. */
-	if (child->ptrace & PT_DTRACE) {
-		struct pt_regs *regs = get_child_regs(child);
-		regs->eflags &= ~TRAP_FLAG;
-		child->ptrace &= ~PT_DTRACE;
-	}
-}
-
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure the single step bit is not set.
- */
-void ptrace_disable(struct task_struct *child)
-{ 
-	clear_singlestep(child);
-	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-}
-
-/*
- * Perform get_thread_area on behalf of the traced child.
- */
-static int
-ptrace_get_thread_area(struct task_struct *child,
-		       int idx, struct user_desc __user *user_desc)
-{
-	struct user_desc info;
-	struct desc_struct *desc;
-
-/*
- * Get the current Thread-Local Storage area:
- */
-
-#define GET_BASE(desc) ( \
-	(((desc)->a >> 16) & 0x0000ffff) | \
-	(((desc)->b << 16) & 0x00ff0000) | \
-	( (desc)->b        & 0xff000000)   )
-
-#define GET_LIMIT(desc) ( \
-	((desc)->a & 0x0ffff) | \
-	 ((desc)->b & 0xf0000) )
-
-#define GET_32BIT(desc)		(((desc)->b >> 22) & 1)
-#define GET_CONTENTS(desc)	(((desc)->b >> 10) & 3)
-#define GET_WRITABLE(desc)	(((desc)->b >>  9) & 1)
-#define GET_LIMIT_PAGES(desc)	(((desc)->b >> 23) & 1)
-#define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
-#define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
-
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-
-	desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	info.entry_number = idx;
-	info.base_addr = GET_BASE(desc);
-	info.limit = GET_LIMIT(desc);
-	info.seg_32bit = GET_32BIT(desc);
-	info.contents = GET_CONTENTS(desc);
-	info.read_exec_only = !GET_WRITABLE(desc);
-	info.limit_in_pages = GET_LIMIT_PAGES(desc);
-	info.seg_not_present = !GET_PRESENT(desc);
-	info.useable = GET_USEABLE(desc);
-
-	if (copy_to_user(user_desc, &info, sizeof(info)))
-		return -EFAULT;
-
-	return 0;
-}
-
-/*
- * Perform set_thread_area on behalf of the traced child.
- */
-static int
-ptrace_set_thread_area(struct task_struct *child,
-		       int idx, struct user_desc __user *user_desc)
-{
-	struct user_desc info;
-	struct desc_struct *desc;
-
-	if (copy_from_user(&info, user_desc, sizeof(info)))
-		return -EFAULT;
-
-	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
-		return -EINVAL;
-
-	desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-	if (LDT_empty(&info)) {
-		desc->a = 0;
-		desc->b = 0;
-	} else {
-		desc->a = LDT_entry_a(&info);
-		desc->b = LDT_entry_b(&info);
-	}
-
-	return 0;
-}
-
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
-{
-	struct user * dummy = NULL;
-	int i, ret;
-	unsigned long __user *datap = (unsigned long __user *)data;
-
-	switch (request) {
-	/* when I and D space are separate, these will need to be fixed. */
-	case PTRACE_PEEKTEXT: /* read word at location addr. */ 
-	case PTRACE_PEEKDATA:
-		ret = generic_ptrace_peekdata(child, addr, data);
-		break;
-
-	/* read the word at location addr in the USER area. */
-	case PTRACE_PEEKUSR: {
-		unsigned long tmp;
-
-		ret = -EIO;
-		if ((addr & 3) || addr < 0 || 
-		    addr > sizeof(struct user) - 3)
-			break;
-
-		tmp = 0;  /* Default return condition */
-		if(addr < FRAME_SIZE*sizeof(long))
-			tmp = getreg(child, addr);
-		if(addr >= (long) &dummy->u_debugreg[0] &&
-		   addr <= (long) &dummy->u_debugreg[7]){
-			addr -= (long) &dummy->u_debugreg[0];
-			addr = addr >> 2;
-			tmp = child->thread.debugreg[addr];
-		}
-		ret = put_user(tmp, datap);
-		break;
-	}
-
-	/* when I and D space are separate, this will have to be fixed. */
-	case PTRACE_POKETEXT: /* write the word at location addr. */
-	case PTRACE_POKEDATA:
-		ret = generic_ptrace_pokedata(child, addr, data);
-		break;
-
-	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
-		ret = -EIO;
-		if ((addr & 3) || addr < 0 || 
-		    addr > sizeof(struct user) - 3)
-			break;
-
-		if (addr < FRAME_SIZE*sizeof(long)) {
-			ret = putreg(child, addr, data);
-			break;
-		}
-		/* We need to be very careful here.  We implicitly
-		   want to modify a portion of the task_struct, and we
-		   have to be selective about what portions we allow someone
-		   to modify. */
-
-		  ret = -EIO;
-		  if(addr >= (long) &dummy->u_debugreg[0] &&
-		     addr <= (long) &dummy->u_debugreg[7]){
-
-			  if(addr == (long) &dummy->u_debugreg[4]) break;
-			  if(addr == (long) &dummy->u_debugreg[5]) break;
-			  if(addr < (long) &dummy->u_debugreg[4] &&
-			     ((unsigned long) data) >= TASK_SIZE-3) break;
-			  
-			  /* Sanity-check data. Take one half-byte at once with
-			   * check = (val >> (16 + 4*i)) & 0xf. It contains the
-			   * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
-			   * 2 and 3 are LENi. Given a list of invalid values,
-			   * we do mask |= 1 << invalid_value, so that
-			   * (mask >> check) & 1 is a correct test for invalid
-			   * values.
-			   *
-			   * R/Wi contains the type of the breakpoint /
-			   * watchpoint, LENi contains the length of the watched
-			   * data in the watchpoint case.
-			   *
-			   * The invalid values are:
-			   * - LENi == 0x10 (undefined), so mask |= 0x0f00.
-			   * - R/Wi == 0x10 (break on I/O reads or writes), so
-			   *   mask |= 0x4444.
-			   * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
-			   *   0x1110.
-			   *
-			   * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
-			   *
-			   * See the Intel Manual "System Programming Guide",
-			   * 15.2.4
-			   *
-			   * Note that LENi == 0x10 is defined on x86_64 in long
-			   * mode (i.e. even for 32-bit userspace software, but
-			   * 64-bit kernel), so the x86_64 mask value is 0x5454.
-			   * See the AMD manual no. 24593 (AMD64 System
-			   * Programming)*/
-
-			  if(addr == (long) &dummy->u_debugreg[7]) {
-				  data &= ~DR_CONTROL_RESERVED;
-				  for(i=0; i<4; i++)
-					  if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
-						  goto out_tsk;
-				  if (data)
-					  set_tsk_thread_flag(child, TIF_DEBUG);
-				  else
-					  clear_tsk_thread_flag(child, TIF_DEBUG);
-			  }
-			  addr -= (long) &dummy->u_debugreg;
-			  addr = addr >> 2;
-			  child->thread.debugreg[addr] = data;
-			  ret = 0;
-		  }
-		  break;
-
-	case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */
-	case PTRACE_SYSCALL:	/* continue and stop at next (return from) syscall */
-	case PTRACE_CONT:	/* restart after signal. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		if (request == PTRACE_SYSEMU) {
-			set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		} else if (request == PTRACE_SYSCALL) {
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-			clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-		} else {
-			clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		}
-		child->exit_code = data;
-		/* make sure the single step bit is not set. */
-		clear_singlestep(child);
-		wake_up_process(child);
-		ret = 0;
-		break;
-
-/*
- * make the child exit.  Best I can do is send it a sigkill. 
- * perhaps it should be put in the status that it wants to 
- * exit.
- */
-	case PTRACE_KILL:
-		ret = 0;
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			break;
-		child->exit_code = SIGKILL;
-		/* make sure the single step bit is not set. */
-		clear_singlestep(child);
-		wake_up_process(child);
-		break;
-
-	case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */
-	case PTRACE_SINGLESTEP:	/* set the trap flag. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-
-		if (request == PTRACE_SYSEMU_SINGLESTEP)
-			set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-		else
-			clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		set_singlestep(child);
-		child->exit_code = data;
-		/* give it a chance to run. */
-		wake_up_process(child);
-		ret = 0;
-		break;
-
-	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
-	  	if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) {
-			ret = -EIO;
-			break;
-		}
-		for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
-			__put_user(getreg(child, i), datap);
-			datap++;
-		}
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
-		unsigned long tmp;
-	  	if (!access_ok(VERIFY_READ, datap, FRAME_SIZE*sizeof(long))) {
-			ret = -EIO;
-			break;
-		}
-		for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
-			__get_user(tmp, datap);
-			putreg(child, i, tmp);
-			datap++;
-		}
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_GETFPREGS: { /* Get the child FPU state. */
-		if (!access_ok(VERIFY_WRITE, datap,
-			       sizeof(struct user_i387_struct))) {
-			ret = -EIO;
-			break;
-		}
-		ret = 0;
-		if (!tsk_used_math(child))
-			init_fpu(child);
-		get_fpregs((struct user_i387_struct __user *)data, child);
-		break;
-	}
-
-	case PTRACE_SETFPREGS: { /* Set the child FPU state. */
-		if (!access_ok(VERIFY_READ, datap,
-			       sizeof(struct user_i387_struct))) {
-			ret = -EIO;
-			break;
-		}
-		set_stopped_child_used_math(child);
-		set_fpregs(child, (struct user_i387_struct __user *)data);
-		ret = 0;
-		break;
-	}
-
-	case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */
-		if (!access_ok(VERIFY_WRITE, datap,
-			       sizeof(struct user_fxsr_struct))) {
-			ret = -EIO;
-			break;
-		}
-		if (!tsk_used_math(child))
-			init_fpu(child);
-		ret = get_fpxregs((struct user_fxsr_struct __user *)data, child);
-		break;
-	}
-
-	case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */
-		if (!access_ok(VERIFY_READ, datap,
-			       sizeof(struct user_fxsr_struct))) {
-			ret = -EIO;
-			break;
-		}
-		set_stopped_child_used_math(child);
-		ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data);
-		break;
-	}
-
-	case PTRACE_GET_THREAD_AREA:
-		ret = ptrace_get_thread_area(child, addr,
-					(struct user_desc __user *) data);
-		break;
-
-	case PTRACE_SET_THREAD_AREA:
-		ret = ptrace_set_thread_area(child, addr,
-					(struct user_desc __user *) data);
-		break;
-
-	default:
-		ret = ptrace_request(child, request, addr, data);
-		break;
-	}
- out_tsk:
-	return ret;
-}
-
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
-{
-	struct siginfo info;
-
-	tsk->thread.trap_no = 1;
-	tsk->thread.error_code = error_code;
-
-	memset(&info, 0, sizeof(info));
-	info.si_signo = SIGTRAP;
-	info.si_code = TRAP_BRKPT;
-
-	/* User-mode eip? */
-	info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL;
-
-	/* Send us the fake SIGTRAP */
-	force_sig_info(SIGTRAP, &info, tsk);
-}
-
-/* notification of system call entry/exit
- * - triggered by current->work.syscall_trace
- */
-__attribute__((regparm(3)))
-int do_syscall_trace(struct pt_regs *regs, int entryexit)
-{
-	int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
-	/*
-	 * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
-	 * interception
-	 */
-	int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
-	int ret = 0;
-
-	/* do the secure computing check first */
-	if (!entryexit)
-		secure_computing(regs->orig_eax);
-
-	if (unlikely(current->audit_context)) {
-		if (entryexit)
-			audit_syscall_exit(AUDITSC_RESULT(regs->eax),
-						regs->eax);
-		/* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
-		 * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
-		 * not used, entry.S will call us only on syscall exit, not
-		 * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
-		 * calling send_sigtrap() on syscall entry.
-		 *
-		 * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
-		 * is_singlestep is false, despite his name, so we will still do
-		 * the correct thing.
-		 */
-		else if (is_singlestep)
-			goto out;
-	}
-
-	if (!(current->ptrace & PT_PTRACED))
-		goto out;
-
-	/* If a process stops on the 1st tracepoint with SYSCALL_TRACE
-	 * and then is resumed with SYSEMU_SINGLESTEP, it will come in
-	 * here. We have to check this and return */
-	if (is_sysemu && entryexit)
-		return 0;
-
-	/* Fake a debug trap */
-	if (is_singlestep)
-		send_sigtrap(current, regs, 0);
-
- 	if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
-		goto out;
-
-	/* the 0x80 provides a way for the tracing parent to distinguish
-	   between a syscall stop and SIGTRAP delivery */
-	/* Note that the debugger could change the result of test_thread_flag!*/
-	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
-
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
-	ret = is_sysemu;
-out:
-	if (unlikely(current->audit_context) && !entryexit)
-		audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_eax,
-				    regs->ebx, regs->ecx, regs->edx, regs->esi);
-	if (ret == 0)
-		return 0;
-
-	regs->orig_eax = -1; /* force skip of syscall restarting */
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->eax), regs->eax);
-	return 1;
-}
diff -puN arch/x86/kernel/ptrace_64.c~git-x86 /dev/null
--- a/arch/x86/kernel/ptrace_64.c
+++ /dev/null
@@ -1,621 +0,0 @@
-/* By Ross Biro 1/23/92 */
-/*
- * Pentium III FXSR, SSE support
- *	Gareth Hughes <gareth@valinux.com>, May 2000
- * 
- * x86-64 port 2000-2002 Andi Kleen
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/audit.h>
-#include <linux/seccomp.h>
-#include <linux/signal.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/debugreg.h>
-#include <asm/ldt.h>
-#include <asm/desc.h>
-#include <asm/proto.h>
-#include <asm/ia32.h>
-
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
-
-/*
- * Determines which flags the user has access to [1 = access, 0 = no access].
- * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9).
- * Also masks reserved bits (63-22, 15, 5, 3, 1).
- */
-#define FLAG_MASK 0x54dd5UL
-
-/* set's the trap flag. */
-#define TRAP_FLAG 0x100UL
-
-/*
- * eflags and offset of eflags on child stack..
- */
-#define EFLAGS offsetof(struct pt_regs, eflags)
-#define EFL_OFFSET ((int)(EFLAGS-sizeof(struct pt_regs)))
-
-/*
- * this routine will get a word off of the processes privileged stack. 
- * the offset is how far from the base addr as stored in the TSS.  
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */   
-static inline unsigned long get_stack_long(struct task_struct *task, int offset)
-{
-	unsigned char *stack;
-
-	stack = (unsigned char *)task->thread.rsp0;
-	stack += offset;
-	return (*((unsigned long *)stack));
-}
-
-/*
- * this routine will put a word on the processes privileged stack. 
- * the offset is how far from the base addr as stored in the TSS.  
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
-static inline long put_stack_long(struct task_struct *task, int offset,
-	unsigned long data)
-{
-	unsigned char * stack;
-
-	stack = (unsigned char *) task->thread.rsp0;
-	stack += offset;
-	*(unsigned long *) stack = data;
-	return 0;
-}
-
-#define LDT_SEGMENT 4
-
-unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs)
-{
-	unsigned long addr, seg;
-
-	addr = regs->rip;
-	seg = regs->cs & 0xffff;
-
-	/*
-	 * We'll assume that the code segments in the GDT
-	 * are all zero-based. That is largely true: the
-	 * TLS segments are used for data, and the PNPBIOS
-	 * and APM bios ones we just ignore here.
-	 */
-	if (seg & LDT_SEGMENT) {
-		u32 *desc;
-		unsigned long base;
-
-		seg &= ~7UL;
-
-		mutex_lock(&child->mm->context.lock);
-		if (unlikely((seg >> 3) >= child->mm->context.size))
-			addr = -1L; /* bogus selector, access would fault */
-		else {
-			desc = child->mm->context.ldt + seg;
-			base = ((desc[0] >> 16) |
-				((desc[1] & 0xff) << 16) |
-				(desc[1] & 0xff000000));
-
-			/* 16-bit code segment? */
-			if (!((desc[1] >> 22) & 1))
-				addr &= 0xffff;
-			addr += base;
-		}
-		mutex_unlock(&child->mm->context.lock);
-	}
-
-	return addr;
-}
-
-static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
-{
-	int i, copied;
-	unsigned char opcode[15];
-	unsigned long addr = convert_rip_to_linear(child, regs);
-
-	copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
-	for (i = 0; i < copied; i++) {
-		switch (opcode[i]) {
-		/* popf and iret */
-		case 0x9d: case 0xcf:
-			return 1;
-
-			/* CHECKME: 64 65 */
-
-		/* opcode and address size prefixes */
-		case 0x66: case 0x67:
-			continue;
-		/* irrelevant prefixes (segment overrides and repeats) */
-		case 0x26: case 0x2e:
-		case 0x36: case 0x3e:
-		case 0x64: case 0x65:
-		case 0xf2: case 0xf3:
-			continue;
-
-		case 0x40 ... 0x4f:
-			if (regs->cs != __USER_CS)
-				/* 32-bit mode: register increment */
-				return 0;
-			/* 64-bit mode: REX prefix */
-			continue;
-
-			/* CHECKME: f2, f3 */
-
-		/*
-		 * pushf: NOTE! We should probably not let
-		 * the user see the TF bit being set. But
-		 * it's more pain than it's worth to avoid
-		 * it, and a debugger could emulate this
-		 * all in user space if it _really_ cares.
-		 */
-		case 0x9c:
-		default:
-			return 0;
-		}
-	}
-	return 0;
-}
-
-static void set_singlestep(struct task_struct *child)
-{
-	struct pt_regs *regs = task_pt_regs(child);
-
-	/*
-	 * Always set TIF_SINGLESTEP - this guarantees that
-	 * we single-step system calls etc..  This will also
-	 * cause us to set TF when returning to user mode.
-	 */
-	set_tsk_thread_flag(child, TIF_SINGLESTEP);
-
-	/*
-	 * If TF was already set, don't do anything else
-	 */
-	if (regs->eflags & TRAP_FLAG)
-		return;
-
-	/* Set TF on the kernel stack.. */
-	regs->eflags |= TRAP_FLAG;
-
-	/*
-	 * ..but if TF is changed by the instruction we will trace,
-	 * don't mark it as being "us" that set it, so that we
-	 * won't clear it by hand later.
-	 */
-	if (is_setting_trap_flag(child, regs))
-		return;
-
-	child->ptrace |= PT_DTRACE;
-}
-
-static void clear_singlestep(struct task_struct *child)
-{
-	/* Always clear TIF_SINGLESTEP... */
-	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-
-	/* But touch TF only if it was set by us.. */
-	if (child->ptrace & PT_DTRACE) {
-		struct pt_regs *regs = task_pt_regs(child);
-		regs->eflags &= ~TRAP_FLAG;
-		child->ptrace &= ~PT_DTRACE;
-	}
-}
-
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure the single step bit is not set.
- */
-void ptrace_disable(struct task_struct *child)
-{ 
-	clear_singlestep(child);
-}
-
-static int putreg(struct task_struct *child,
-	unsigned long regno, unsigned long value)
-{
-	unsigned long tmp; 
-	
-	switch (regno) {
-		case offsetof(struct user_regs_struct,fs):
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.fsindex = value & 0xffff; 
-			return 0;
-		case offsetof(struct user_regs_struct,gs):
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.gsindex = value & 0xffff;
-			return 0;
-		case offsetof(struct user_regs_struct,ds):
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.ds = value & 0xffff;
-			return 0;
-		case offsetof(struct user_regs_struct,es): 
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.es = value & 0xffff;
-			return 0;
-		case offsetof(struct user_regs_struct,ss):
-			if ((value & 3) != 3)
-				return -EIO;
-			value &= 0xffff;
-			return 0;
-		case offsetof(struct user_regs_struct,fs_base):
-			if (value >= TASK_SIZE_OF(child))
-				return -EIO;
-			child->thread.fs = value;
-			return 0;
-		case offsetof(struct user_regs_struct,gs_base):
-			if (value >= TASK_SIZE_OF(child))
-				return -EIO;
-			child->thread.gs = value;
-			return 0;
-		case offsetof(struct user_regs_struct, eflags):
-			value &= FLAG_MASK;
-			tmp = get_stack_long(child, EFL_OFFSET); 
-			tmp &= ~FLAG_MASK; 
-			value |= tmp;
-			break;
-		case offsetof(struct user_regs_struct,cs): 
-			if ((value & 3) != 3)
-				return -EIO;
-			value &= 0xffff;
-			break;
-	}
-	put_stack_long(child, regno - sizeof(struct pt_regs), value);
-	return 0;
-}
-
-static unsigned long getreg(struct task_struct *child, unsigned long regno)
-{
-	unsigned long val;
-	switch (regno) {
-		case offsetof(struct user_regs_struct, fs):
-			return child->thread.fsindex;
-		case offsetof(struct user_regs_struct, gs):
-			return child->thread.gsindex;
-		case offsetof(struct user_regs_struct, ds):
-			return child->thread.ds;
-		case offsetof(struct user_regs_struct, es):
-			return child->thread.es; 
-		case offsetof(struct user_regs_struct, fs_base):
-			return child->thread.fs;
-		case offsetof(struct user_regs_struct, gs_base):
-			return child->thread.gs;
-		default:
-			regno = regno - sizeof(struct pt_regs);
-			val = get_stack_long(child, regno);
-			if (test_tsk_thread_flag(child, TIF_IA32))
-				val &= 0xffffffff;
-			return val;
-	}
-
-}
-
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
-{
-	long i, ret;
-	unsigned ui;
-
-	switch (request) {
-	/* when I and D space are separate, these will need to be fixed. */
-	case PTRACE_PEEKTEXT: /* read word at location addr. */ 
-	case PTRACE_PEEKDATA:
-		ret = generic_ptrace_peekdata(child, addr, data);
-		break;
-
-	/* read the word at location addr in the USER area. */
-	case PTRACE_PEEKUSR: {
-		unsigned long tmp;
-
-		ret = -EIO;
-		if ((addr & 7) ||
-		    addr > sizeof(struct user) - 7)
-			break;
-
-		switch (addr) { 
-		case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
-			tmp = getreg(child, addr);
-			break;
-		case offsetof(struct user, u_debugreg[0]):
-			tmp = child->thread.debugreg0;
-			break;
-		case offsetof(struct user, u_debugreg[1]):
-			tmp = child->thread.debugreg1;
-			break;
-		case offsetof(struct user, u_debugreg[2]):
-			tmp = child->thread.debugreg2;
-			break;
-		case offsetof(struct user, u_debugreg[3]):
-			tmp = child->thread.debugreg3;
-			break;
-		case offsetof(struct user, u_debugreg[6]):
-			tmp = child->thread.debugreg6;
-			break;
-		case offsetof(struct user, u_debugreg[7]):
-			tmp = child->thread.debugreg7;
-			break;
-		default:
-			tmp = 0;
-			break;
-		}
-		ret = put_user(tmp,(unsigned long __user *) data);
-		break;
-	}
-
-	/* when I and D space are separate, this will have to be fixed. */
-	case PTRACE_POKETEXT: /* write the word at location addr. */
-	case PTRACE_POKEDATA:
-		ret = generic_ptrace_pokedata(child, addr, data);
-		break;
-
-	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
-	{
-		int dsize = test_tsk_thread_flag(child, TIF_IA32) ? 3 : 7;
-		ret = -EIO;
-		if ((addr & 7) ||
-		    addr > sizeof(struct user) - 7)
-			break;
-
-		switch (addr) { 
-		case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
-			ret = putreg(child, addr, data);
-			break;
-		/* Disallows to set a breakpoint into the vsyscall */
-		case offsetof(struct user, u_debugreg[0]):
-			if (data >= TASK_SIZE_OF(child) - dsize) break;
-			child->thread.debugreg0 = data;
-			ret = 0;
-			break;
-		case offsetof(struct user, u_debugreg[1]):
-			if (data >= TASK_SIZE_OF(child) - dsize) break;
-			child->thread.debugreg1 = data;
-			ret = 0;
-			break;
-		case offsetof(struct user, u_debugreg[2]):
-			if (data >= TASK_SIZE_OF(child) - dsize) break;
-			child->thread.debugreg2 = data;
-			ret = 0;
-			break;
-		case offsetof(struct user, u_debugreg[3]):
-			if (data >= TASK_SIZE_OF(child) - dsize) break;
-			child->thread.debugreg3 = data;
-			ret = 0;
-			break;
-		case offsetof(struct user, u_debugreg[6]):
-				  if (data >> 32)
-				break; 
-			child->thread.debugreg6 = data;
-			ret = 0;
-			break;
-		case offsetof(struct user, u_debugreg[7]):
-			/* See arch/i386/kernel/ptrace.c for an explanation of
-			 * this awkward check.*/
-			data &= ~DR_CONTROL_RESERVED;
-			for(i=0; i<4; i++)
-				if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
-					break;
-			if (i == 4) {
-			  child->thread.debugreg7 = data;
-			  if (data)
-			  	set_tsk_thread_flag(child, TIF_DEBUG);
-			  else
-			  	clear_tsk_thread_flag(child, TIF_DEBUG);
-			  ret = 0;
-		  	}
-		  break;
-		}
-		break;
-	}
-	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
-	case PTRACE_CONT:    /* restart after signal. */
-
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		if (request == PTRACE_SYSCALL)
-			set_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
-		else
-			clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
-		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-		child->exit_code = data;
-		/* make sure the single step bit is not set. */
-		clear_singlestep(child);
-		wake_up_process(child);
-		ret = 0;
-		break;
-
-#ifdef CONFIG_IA32_EMULATION
-		/* This makes only sense with 32bit programs. Allow a
-		   64bit debugger to fully examine them too. Better
-		   don't use it against 64bit processes, use
-		   PTRACE_ARCH_PRCTL instead. */
-	case PTRACE_SET_THREAD_AREA: {
-		struct user_desc __user *p;
-		int old; 
-		p = (struct user_desc __user *)data;
-		get_user(old,  &p->entry_number); 
-		put_user(addr, &p->entry_number);
-		ret = do_set_thread_area(&child->thread, p);
-		put_user(old,  &p->entry_number); 
-		break;
-	case PTRACE_GET_THREAD_AREA:
-		p = (struct user_desc __user *)data;
-		get_user(old,  &p->entry_number); 
-		put_user(addr, &p->entry_number);
-		ret = do_get_thread_area(&child->thread, p);
-		put_user(old,  &p->entry_number); 
-		break;
-	} 
-#endif
-		/* normal 64bit interface to access TLS data. 
-		   Works just like arch_prctl, except that the arguments
-		   are reversed. */
-	case PTRACE_ARCH_PRCTL: 
-		ret = do_arch_prctl(child, data, addr);
-		break;
-
-/*
- * make the child exit.  Best I can do is send it a sigkill. 
- * perhaps it should be put in the status that it wants to 
- * exit.
- */
-	case PTRACE_KILL:
-		ret = 0;
-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
-			break;
-		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-		child->exit_code = SIGKILL;
-		/* make sure the single step bit is not set. */
-		clear_singlestep(child);
-		wake_up_process(child);
-		break;
-
-	case PTRACE_SINGLESTEP:    /* set the trap flag. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			break;
-		clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
-		set_singlestep(child);
-		child->exit_code = data;
-		/* give it a chance to run. */
-		wake_up_process(child);
-		ret = 0;
-		break;
-
-	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
-	  	if (!access_ok(VERIFY_WRITE, (unsigned __user *)data,
-			       sizeof(struct user_regs_struct))) {
-			ret = -EIO;
-			break;
-		}
-		ret = 0;
-		for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) {
-			ret |= __put_user(getreg(child, ui),(unsigned long __user *) data);
-			data += sizeof(long);
-		}
-		break;
-	}
-
-	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
-		unsigned long tmp;
-	  	if (!access_ok(VERIFY_READ, (unsigned __user *)data,
-			       sizeof(struct user_regs_struct))) {
-			ret = -EIO;
-			break;
-		}
-		ret = 0;
-		for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) {
-			ret = __get_user(tmp, (unsigned long __user *) data);
-			if (ret)
-				break;
-			ret = putreg(child, ui, tmp);
-			if (ret)
-				break;
-			data += sizeof(long);
-		}
-		break;
-	}
-
-	case PTRACE_GETFPREGS: { /* Get the child extended FPU state. */
-		if (!access_ok(VERIFY_WRITE, (unsigned __user *)data,
-			       sizeof(struct user_i387_struct))) {
-			ret = -EIO;
-			break;
-		}
-		ret = get_fpregs((struct user_i387_struct __user *)data, child);
-		break;
-	}
-
-	case PTRACE_SETFPREGS: { /* Set the child extended FPU state. */
-		if (!access_ok(VERIFY_READ, (unsigned __user *)data,
-			       sizeof(struct user_i387_struct))) {
-			ret = -EIO;
-			break;
-		}
-		set_stopped_child_used_math(child);
-		ret = set_fpregs(child, (struct user_i387_struct __user *)data);
-		break;
-	}
-
-	default:
-		ret = ptrace_request(child, request, addr, data);
-		break;
-	}
-	return ret;
-}
-
-static void syscall_trace(struct pt_regs *regs)
-{
-
-#if 0
-	printk("trace %s rip %lx rsp %lx rax %d origrax %d caller %lx tiflags %x ptrace %x\n",
-	       current->comm,
-	       regs->rip, regs->rsp, regs->rax, regs->orig_rax, __builtin_return_address(0),
-	       current_thread_info()->flags, current->ptrace); 
-#endif
-
-	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-				? 0x80 : 0));
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
-}
-
-asmlinkage void syscall_trace_enter(struct pt_regs *regs)
-{
-	/* do the secure computing check first */
-	secure_computing(regs->orig_rax);
-
-	if (test_thread_flag(TIF_SYSCALL_TRACE)
-	    && (current->ptrace & PT_PTRACED))
-		syscall_trace(regs);
-
-	if (unlikely(current->audit_context)) {
-		if (test_thread_flag(TIF_IA32)) {
-			audit_syscall_entry(AUDIT_ARCH_I386,
-					    regs->orig_rax,
-					    regs->rbx, regs->rcx,
-					    regs->rdx, regs->rsi);
-		} else {
-			audit_syscall_entry(AUDIT_ARCH_X86_64,
-					    regs->orig_rax,
-					    regs->rdi, regs->rsi,
-					    regs->rdx, regs->r10);
-		}
-	}
-}
-
-asmlinkage void syscall_trace_leave(struct pt_regs *regs)
-{
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->rax), regs->rax);
-
-	if ((test_thread_flag(TIF_SYSCALL_TRACE)
-	     || test_thread_flag(TIF_SINGLESTEP))
-	    && (current->ptrace & PT_PTRACED))
-		syscall_trace(regs);
-}
diff -puN arch/x86/kernel/reboot_fixups_32.c~git-x86 arch/x86/kernel/reboot_fixups_32.c
--- a/arch/x86/kernel/reboot_fixups_32.c~git-x86
+++ a/arch/x86/kernel/reboot_fixups_32.c
@@ -30,6 +30,19 @@ static void cs5536_warm_reset(struct pci
 	udelay(50); /* shouldn't get here but be safe and spin a while */
 }
 
+static void rdc321x_reset(struct pci_dev *dev)
+{
+	unsigned i;
+	/* Voluntary reset the watchdog timer */
+	outl(0x80003840, 0xCF8);
+	/* Generate a CPU reset on next tick */
+	i = inl(0xCFC);
+	/* Use the minimum timer resolution */
+	i |= 0x1600;
+	outl(i, 0xCFC);
+	outb(1, 0x92);
+}
+
 struct device_fixup {
 	unsigned int vendor;
 	unsigned int device;
@@ -40,6 +53,7 @@ static struct device_fixup fixups_table[
 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
 { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
+{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset },
 };
 
 /*
diff -puN /dev/null arch/x86/kernel/rtc.c
--- /dev/null
+++ a/arch/x86/kernel/rtc.c
@@ -0,0 +1,196 @@
+/*
+ * RTC related functions
+ */
+#include <linux/acpi.h>
+#include <linux/bcd.h>
+#include <linux/mc146818rtc.h>
+
+#include <asm/time.h>
+
+#ifdef CONFIG_X86_32
+# define CMOS_YEARS_OFFS 1900
+/*
+ * This is a special lock that is owned by the CPU and holds the index
+ * register we are working with.  It is required for NMI access to the
+ * CMOS/RTC registers.  See include/asm-i386/mc146818rtc.h for details.
+ */
+volatile unsigned long cmos_lock = 0;
+EXPORT_SYMBOL(cmos_lock);
+#else
+/*
+ * x86-64 systems only exists since 2002.
+ * This will work up to Dec 31, 2100
+ */
+# define CMOS_YEARS_OFFS 2000
+#endif
+
+DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL(rtc_lock);
+
+/*
+ * In order to set the CMOS clock precisely, set_rtc_mmss has to be
+ * called 500 ms after the second nowtime has started, because when
+ * nowtime is written into the registers of the CMOS clock, it will
+ * jump to the next second precisely 500 ms later. Check the Motorola
+ * MC146818A or Dallas DS12887 data sheet for details.
+ *
+ * BUG: This routine does not handle hour overflow properly; it just
+ *      sets the minutes. Usually you'll only notice that after reboot!
+ */
+int mach_set_rtc_mmss(unsigned long nowtime)
+{
+	int retval = 0;
+	int real_seconds, real_minutes, cmos_minutes;
+	unsigned char save_control, save_freq_select;
+
+	 /* tell the clock it's being set */
+	save_control = CMOS_READ(RTC_CONTROL);
+	CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
+
+	/* stop and reset prescaler */
+	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+	CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+	cmos_minutes = CMOS_READ(RTC_MINUTES);
+	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+		BCD_TO_BIN(cmos_minutes);
+
+	/*
+	 * since we're only adjusting minutes and seconds,
+	 * don't interfere with hour overflow. This avoids
+	 * messing with unknown time zones but requires your
+	 * RTC not to be off by more than 15 minutes
+	 */
+	real_seconds = nowtime % 60;
+	real_minutes = nowtime / 60;
+	/* correct for half hour time zone */
+	if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
+		real_minutes += 30;
+	real_minutes %= 60;
+
+	if (abs(real_minutes - cmos_minutes) < 30) {
+		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+			BIN_TO_BCD(real_seconds);
+			BIN_TO_BCD(real_minutes);
+		}
+		CMOS_WRITE(real_seconds,RTC_SECONDS);
+		CMOS_WRITE(real_minutes,RTC_MINUTES);
+	} else {
+		printk(KERN_WARNING
+		       "set_rtc_mmss: can't update from %d to %d\n",
+		       cmos_minutes, real_minutes);
+		retval = -1;
+	}
+
+	/* The following flags have to be released exactly in this order,
+	 * otherwise the DS12887 (popular MC146818A clone with integrated
+	 * battery and quartz) will not reset the oscillator and will not
+	 * update precisely 500 ms later. You won't find this mentioned in
+	 * the Dallas Semiconductor data sheets, but who believes data
+	 * sheets anyway ...                           -- Markus Kuhn
+	 */
+	CMOS_WRITE(save_control, RTC_CONTROL);
+	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+
+	return retval;
+}
+
+unsigned long mach_get_cmos_time(void)
+{
+	unsigned int year, mon, day, hour, min, sec, century = 0;
+
+	/*
+	 * If UIP is clear, then we have >= 244 microseconds before
+	 * RTC registers will be updated.  Spec sheet says that this
+	 * is the reliable way to read RTC - registers. If UIP is set
+	 * then the register access might be invalid.
+	 */
+	while ((CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
+		cpu_relax();
+
+	sec = CMOS_READ(RTC_SECONDS);
+	min = CMOS_READ(RTC_MINUTES);
+	hour = CMOS_READ(RTC_HOURS);
+	day = CMOS_READ(RTC_DAY_OF_MONTH);
+	mon = CMOS_READ(RTC_MONTH);
+	year = CMOS_READ(RTC_YEAR);
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_X86_64)
+	/* CHECKME: Is this really 64bit only ??? */
+	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
+	    acpi_gbl_FADT.century)
+		century = CMOS_READ(acpi_gbl_FADT.century);
+#endif
+
+	if (RTC_ALWAYS_BCD || !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)) {
+		BCD_TO_BIN(sec);
+		BCD_TO_BIN(min);
+		BCD_TO_BIN(hour);
+		BCD_TO_BIN(day);
+		BCD_TO_BIN(mon);
+		BCD_TO_BIN(year);
+	}
+
+	if (century) {
+		BCD_TO_BIN(century);
+		year += century * 100;
+		printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
+	} else {
+		year += CMOS_YEARS_OFFS;
+		if (year < 1970)
+			year += 100;
+	}
+
+	return mktime(year, mon, day, hour, min, sec);
+}
+
+/* Routines for accessing the CMOS RAM/RTC. */
+unsigned char rtc_cmos_read(unsigned char addr)
+{
+	unsigned char val;
+
+	lock_cmos_prefix(addr);
+	outb_p(addr, RTC_PORT(0));
+	val = inb_p(RTC_PORT(1));
+	lock_cmos_suffix(addr);
+	return val;
+}
+EXPORT_SYMBOL(rtc_cmos_read);
+
+void rtc_cmos_write(unsigned char val, unsigned char addr)
+{
+	lock_cmos_prefix(addr);
+	outb_p(addr, RTC_PORT(0));
+	outb_p(val, RTC_PORT(1));
+	lock_cmos_suffix(addr);
+}
+EXPORT_SYMBOL(rtc_cmos_write);
+
+static int set_rtc_mmss(unsigned long nowtime)
+{
+	int retval;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	retval = set_wallclock(nowtime);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+
+	return retval;
+}
+
+/* not static: needed by APM */
+unsigned long read_persistent_clock(void)
+{
+	unsigned long retval, flags;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	retval = get_wallclock();
+	spin_unlock_irqrestore(&rtc_lock, flags);
+
+	return retval;
+}
+
+int update_persistent_clock(struct timespec now)
+{
+	return set_rtc_mmss(now.tv_sec);
+}
diff -puN arch/x86/kernel/setup64.c~git-x86 arch/x86/kernel/setup64.c
--- a/arch/x86/kernel/setup64.c~git-x86
+++ a/arch/x86/kernel/setup64.c
@@ -169,7 +169,8 @@ void syscall_init(void)
 #endif
 
 	/* Flags to clear on syscall */
-	wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); 
+	wrmsrl(MSR_SYSCALL_MASK,
+	       X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
 }
 
 void __cpuinit check_efer(void)
diff -puN arch/x86/kernel/setup_32.c~git-x86 arch/x86/kernel/setup_32.c
--- a/arch/x86/kernel/setup_32.c~git-x86
+++ a/arch/x86/kernel/setup_32.c
@@ -44,6 +44,7 @@
 #include <linux/crash_dump.h>
 #include <linux/dmi.h>
 #include <linux/pfn.h>
+#include <linux/pci.h>
 
 #include <video/edid.h>
 
@@ -72,9 +73,80 @@ int disable_pse __devinitdata = 0;
 /*
  * Machine setup..
  */
-extern struct resource code_resource;
-extern struct resource data_resource;
-extern struct resource bss_resource;
+static struct resource data_resource = {
+	.name	= "Kernel data",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource code_resource = {
+	.name	= "Kernel code",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource bss_resource = {
+	.name	= "Kernel bss",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource video_ram_resource = {
+	.name	= "Video RAM area",
+	.start	= 0xa0000,
+	.end	= 0xbffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource standard_io_resources[] = { {
+	.name	= "dma1",
+	.start	= 0x0000,
+	.end	= 0x001f,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "pic1",
+	.start	= 0x0020,
+	.end	= 0x0021,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name   = "timer0",
+	.start	= 0x0040,
+	.end    = 0x0043,
+	.flags  = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name   = "timer1",
+	.start  = 0x0050,
+	.end    = 0x0053,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "keyboard",
+	.start	= 0x0060,
+	.end	= 0x006f,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "dma page reg",
+	.start	= 0x0080,
+	.end	= 0x008f,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "pic2",
+	.start	= 0x00a0,
+	.end	= 0x00a1,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "dma2",
+	.start	= 0x00c0,
+	.end	= 0x00df,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "fpu",
+	.start	= 0x00f0,
+	.end	= 0x00ff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+} };
 
 /* cpu data as detected by the assembly code in head.S */
 struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
@@ -166,7 +238,7 @@ static int __init parse_mem(char *arg)
 		return -EINVAL;
 
 	if (strcmp(arg, "nopentium") == 0) {
-		clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE);
 		disable_pse = 1;
 	} else {
 		/* If the user specifies memory size, we
@@ -661,9 +733,7 @@ void __init setup_arch(char **cmdline_p)
 	acpi_boot_table_init();
 #endif
 
-#ifdef CONFIG_PCI
 	early_quirks();
-#endif
 
 #ifdef CONFIG_ACPI
 	acpi_boot_init();
@@ -692,3 +762,32 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #endif
 }
+
+/*
+ * Request address space for all standard resources
+ *
+ * This is called just before pcibios_init(), which is also a
+ * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
+ */
+static int __init request_standard_resources(void)
+{
+	int i;
+
+	printk(KERN_INFO "Setting up standard PCI resources\n");
+	if (efi_enabled)
+		efi_initialize_iomem_resources(&code_resource,
+				&data_resource, &bss_resource);
+	else
+		legacy_init_iomem_resources(&code_resource,
+				&data_resource, &bss_resource);
+
+	/* EFI systems may still have VGA */
+	request_resource(&iomem_resource, &video_ram_resource);
+
+	/* request I/O space for devices used on all i[345]86 PCs */
+	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
+		request_resource(&ioport_resource, &standard_io_resources[i]);
+	return 0;
+}
+
+subsys_initcall(request_standard_resources);
diff -puN arch/x86/kernel/setup_64.c~git-x86 arch/x86/kernel/setup_64.c
--- a/arch/x86/kernel/setup_64.c~git-x86
+++ a/arch/x86/kernel/setup_64.c
@@ -59,6 +59,8 @@
 #include <asm/sections.h>
 #include <asm/dmi.h>
 #include <asm/cacheflush.h>
+#include <asm/mce.h>
+#include <asm/ds.h>
 
 /*
  * Machine setup..
@@ -76,7 +78,7 @@ unsigned long saved_video_mode;
 
 int force_mwait __cpuinitdata;
 
-/* 
+/*
  * Early DMI memory
  */
 int dmi_alloc_index;
@@ -122,25 +124,27 @@ struct resource standard_io_resources[] 
 
 #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
 
-struct resource data_resource = {
+static struct resource data_resource = {
 	.name = "Kernel data",
 	.start = 0,
 	.end = 0,
 	.flags = IORESOURCE_RAM,
 };
-struct resource code_resource = {
+static struct resource code_resource = {
 	.name = "Kernel code",
 	.start = 0,
 	.end = 0,
 	.flags = IORESOURCE_RAM,
 };
-struct resource bss_resource = {
+static struct resource bss_resource = {
 	.name = "Kernel bss",
 	.start = 0,
 	.end = 0,
 	.flags = IORESOURCE_RAM,
 };
 
+static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
+
 #ifdef CONFIG_PROC_VMCORE
 /* elfcorehdr= specifies the location of elf core header
  * stored by the crashed kernel. This option will be passed
@@ -166,12 +170,12 @@ contig_initmem_init(unsigned long start_
 	bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
 	bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
 	if (bootmap == -1L)
-		panic("Cannot find bootmem map of size %ld\n",bootmap_size);
+		panic("Cannot find bootmem map of size %ld\n", bootmap_size);
 	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
 	e820_register_active_regions(0, start_pfn, end_pfn);
 	free_bootmem_with_active_regions(0, end_pfn);
 	reserve_bootmem(bootmap, bootmap_size);
-} 
+}
 #endif
 
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
@@ -205,7 +209,8 @@ static void __init reserve_crashkernel(v
 	unsigned long long crash_size, crash_base;
 	int ret;
 
-	free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
+	free_mem =
+		((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
 
 	ret = parse_crashkernel(boot_command_line, free_mem,
 			&crash_size, &crash_base);
@@ -237,7 +242,7 @@ unsigned __initdata ebda_size;
 static void discover_ebda(void)
 {
 	/*
-	 * there is a real-mode segmented pointer pointing to the 
+	 * there is a real-mode segmented pointer pointing to the
 	 * 4K EBDA area at 0x40E
 	 */
 	ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
@@ -256,6 +261,8 @@ static void discover_ebda(void)
 
 void __init setup_arch(char **cmdline_p)
 {
+	unsigned i;
+
 	printk(KERN_INFO "Command line: %s\n", boot_command_line);
 
 	ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
@@ -340,13 +347,13 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 #ifdef CONFIG_NUMA
-	numa_initmem_init(0, end_pfn); 
+	numa_initmem_init(0, end_pfn);
 #else
 	contig_initmem_init(0, end_pfn);
 #endif
 
 	/* Reserve direct mapping */
-	reserve_bootmem_generic(table_start << PAGE_SHIFT, 
+	reserve_bootmem_generic(table_start << PAGE_SHIFT,
 				(table_end - table_start) << PAGE_SHIFT);
 
 	/* reserve kernel */
@@ -374,14 +381,14 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 #ifdef CONFIG_ACPI_SLEEP
-       /*
-        * Reserve low memory region for sleep support.
-        */
-       acpi_reserve_bootmem();
-#endif
 	/*
-	 * Find and reserve possible boot-time SMP configuration:
+	 * Reserve low memory region for sleep support.
 	 */
+       acpi_reserve_bootmem();
+#endif
+       /*
+	* Find and reserve possible boot-time SMP configuration:
+	*/
 	find_smp_config();
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
@@ -405,9 +412,7 @@ void __init setup_arch(char **cmdline_p)
 	reserve_crashkernel();
 	paging_init();
 
-#ifdef CONFIG_PCI
 	early_quirks();
-#endif
 
 	/*
 	 * set this early, so we dont allocate cpu0
@@ -430,19 +435,17 @@ void __init setup_arch(char **cmdline_p)
 	if (smp_found_config)
 		get_smp_config();
 	init_apic_mappings();
+	ioapic_init_mappings();
 
 	/*
 	 * We trust e820 completely. No explicit ROM probing in memory.
- 	 */
-	e820_reserve_resources(); 
+	 */
+	e820_reserve_resources(&code_resource, &data_resource, &bss_resource);
 	e820_mark_nosave_regions();
 
-	{
-	unsigned i;
 	/* request I/O space for devices used on all i[345]86 PCs */
 	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
 		request_resource(&ioport_resource, &standard_io_resources[i]);
-	}
 
 	e820_setup_gap();
 
@@ -479,9 +482,10 @@ static void __cpuinit display_cacheinfo(
 
 	if (n >= 0x80000005) {
 		cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
-		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
-			edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
-		c->x86_cache_size=(ecx>>24)+(edx>>24);
+		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
+		       "D cache %dK (%d bytes/line)\n",
+		       edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+		c->x86_cache_size = (ecx>>24) + (edx>>24);
 		/* On K8 L1 TLB is inclusive, so don't count it */
 		c->x86_tlbsize = 0;
 	}
@@ -497,9 +501,9 @@ static void __cpuinit display_cacheinfo(
 	}
 
 	if (n >= 0x80000007)
-		cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power); 
+		cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
 	if (n >= 0x80000008) {
-		cpuid(0x80000008, &eax, &dummy, &dummy, &dummy); 
+		cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
 		c->x86_virt_bits = (eax >> 8) & 0xff;
 		c->x86_phys_bits = eax & 0xff;
 	}
@@ -508,14 +512,15 @@ static void __cpuinit display_cacheinfo(
 #ifdef CONFIG_NUMA
 static int nearby_node(int apicid)
 {
-	int i;
+	int i, node;
+
 	for (i = apicid - 1; i >= 0; i--) {
-		int node = apicid_to_node[i];
+		node = apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
 	for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
-		int node = apicid_to_node[i];
+		node = apicid_to_node[i];
 		if (node != NUMA_NO_NODE && node_online(node))
 			return node;
 	}
@@ -523,6 +528,32 @@ static int nearby_node(int apicid)
 }
 #endif
 
+/*[39:8] */
+/* why not using 0xfe000000 ? */
+#define FAM10H_PCI_MMIO_BASE 0xc0000000
+static void __cpuinit fam10h_check_enable_mmcfg(struct cpuinfo_x86 *c)
+{
+	u32 low, high, address;
+
+	address = 0xc0010058;
+	if (rdmsr_safe(address, &low, &high))
+		return;
+
+	if (low & 1)
+		return;
+
+	printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n");
+	/*
+	 * If it is not enabled, enable it and assume only one segment
+	 * with 256 busses.
+	 */
+	low &= ~(0xfff00000 | (0xf<<2));
+	low |= (8<<2) | (1<<0);
+	high &= ~(0x0000ffff);
+	high |= (FAM10H_PCI_MMIO_BASE>>(32-8));
+	wrmsr_safe(address, low, high);
+}
+
 /*
  * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
  * Assumes number of cores is a power of two.
@@ -536,7 +567,54 @@ static void __init amd_detect_cmp(struct
 	int node = 0;
 	unsigned apicid = hard_smp_processor_id();
 #endif
-	unsigned ecx = cpuid_ecx(0x80000008);
+	bits = c->x86_coreid_bits;
+
+	/* Low order bits define the core id (index of core in socket) */
+	c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
+	/* Convert the APIC ID into the socket ID */
+	c->phys_proc_id = phys_pkg_id(bits);
+
+#ifdef CONFIG_NUMA
+	node = c->phys_proc_id;
+	if (apicid_to_node[apicid] != NUMA_NO_NODE)
+		node = apicid_to_node[apicid];
+	if (!node_online(node)) {
+		/* Two possibilities here:
+		   - The CPU is missing memory and no node was created.
+		   In that case try picking one from a nearby CPU
+		   - The APIC IDs differ from the HyperTransport node IDs
+		   which the K8 northbridge parsing fills in.
+		   Assume they are all increased by a constant offset,
+		   but in the same order as the HT nodeids.
+		   If that doesn't result in a usable node fall back to the
+		   path for the previous case.  */
+
+		int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
+
+		if (ht_nodeid >= 0 &&
+		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+			node = apicid_to_node[ht_nodeid];
+		/* Pick a nearby node */
+		if (!node_online(node))
+			node = nearby_node(apicid);
+	}
+	numa_set_node(cpu, node);
+
+	printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+#endif
+#endif
+}
+
+static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+	unsigned bits, ecx;
+
+	/* Multi core CPU? */
+	if (c->extended_cpuid_level < 0x80000008)
+		return;
+
+	ecx = cpuid_ecx(0x80000008);
 
 	c->x86_max_cores = (ecx & 0xff) + 1;
 
@@ -549,37 +627,8 @@ static void __init amd_detect_cmp(struct
 			bits++;
 	}
 
-	/* Low order bits define the core id (index of core in socket) */
-	c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
-	/* Convert the APIC ID into the socket ID */
-	c->phys_proc_id = phys_pkg_id(bits);
+	c->x86_coreid_bits = bits;
 
-#ifdef CONFIG_NUMA
-  	node = c->phys_proc_id;
- 	if (apicid_to_node[apicid] != NUMA_NO_NODE)
- 		node = apicid_to_node[apicid];
- 	if (!node_online(node)) {
- 		/* Two possibilities here:
- 		   - The CPU is missing memory and no node was created.
- 		   In that case try picking one from a nearby CPU
- 		   - The APIC IDs differ from the HyperTransport node IDs
- 		   which the K8 northbridge parsing fills in.
- 		   Assume they are all increased by a constant offset,
- 		   but in the same order as the HT nodeids.
- 		   If that doesn't result in a usable node fall back to the
- 		   path for the previous case.  */
-		int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
- 		if (ht_nodeid >= 0 &&
- 		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
- 			node = apicid_to_node[ht_nodeid];
- 		/* Pick a nearby node */
- 		if (!node_online(node))
- 			node = nearby_node(apicid);
- 	}
-	numa_set_node(cpu, node);
-
-	printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
-#endif
 #endif
 }
 
@@ -595,8 +644,8 @@ static void __init amd_detect_cmp(struct
 /* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
 static __cpuinit int amd_apic_timer_broken(void)
 {
-	u32 lo, hi;
-	u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+	u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+
 	switch (eax & CPUID_XFAM) {
 	case CPUID_XFAM_K8:
 		if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
@@ -624,7 +673,7 @@ static void __cpuinit init_amd(struct cp
 	/*
 	 * Disable TLB flush filter by setting HWCR.FFDIS on K8
 	 * bit 6 of msr C001_0015
- 	 *
+	 *
 	 * Errata 63 for SH-B3 steppings
 	 * Errata 122 for all steppings (F+ have it disabled by default)
 	 */
@@ -637,34 +686,35 @@ static void __cpuinit init_amd(struct cp
 
 	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
 	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
-	clear_bit(0*32+31, &c->x86_capability);
-	
+	clear_bit(0*32+31, (unsigned long *)&c->x86_capability);
+
 	/* On C+ stepping K8 rep microcode works well for copy/memset */
 	level = cpuid_eax(1);
-	if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
-		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+	if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) ||
+			     level >= 0x0f58))
+		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 	if (c->x86 == 0x10 || c->x86 == 0x11)
-		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 
 	/* Enable workaround for FXSAVE leak */
 	if (c->x86 >= 6)
-		set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
 
 	level = get_model_name(c);
 	if (!level) {
-		switch (c->x86) { 
+		switch (c->x86) {
 		case 15:
 			/* Should distinguish Models here, but this is only
 			   a fallback anyways. */
 			strcpy(c->x86_model_id, "Hammer");
-			break; 
-		} 
-	} 
+			break;
+		}
+	}
 	display_cacheinfo(c);
 
 	/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
 	if (c->x86_power & (1<<8))
-		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 
 	/* Multi core CPU? */
 	if (c->extended_cpuid_level >= 0x80000008)
@@ -677,14 +727,17 @@ static void __cpuinit init_amd(struct cp
 		num_cache_leaves = 3;
 
 	if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
-		set_bit(X86_FEATURE_K8, &c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_K8);
 
 	/* RDTSC can be speculated around */
-	clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+	clear_cpu_cap(c, X86_FEATURE_SYNC_RDTSC);
 
 	/* Family 10 doesn't support C states in MWAIT so don't use it */
 	if (c->x86 == 0x10 && !force_mwait)
-		clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
+		clear_cpu_cap(c, X86_FEATURE_MWAIT);
+
+	if (c->x86 == 0x10)
+		fam10h_check_enable_mmcfg(c);
 
 	if (amd_apic_timer_broken())
 		disable_apic_timer = 1;
@@ -693,25 +746,26 @@ static void __cpuinit init_amd(struct cp
 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-	u32 	eax, ebx, ecx, edx;
-	int 	index_msb, core_bits;
+	u32 eax, ebx, ecx, edx;
+	int index_msb, core_bits;
 
 	cpuid(1, &eax, &ebx, &ecx, &edx);
 
 
 	if (!cpu_has(c, X86_FEATURE_HT))
 		return;
- 	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
+	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
 		goto out;
 
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
 
 	if (smp_num_siblings == 1) {
 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
-	} else if (smp_num_siblings > 1 ) {
+	} else if (smp_num_siblings > 1) {
 
 		if (smp_num_siblings > NR_CPUS) {
-			printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
+			printk(KERN_WARNING "CPU: Unsupported number of "
+			       "siblings %d", smp_num_siblings);
 			smp_num_siblings = 1;
 			return;
 		}
@@ -721,7 +775,7 @@ static void __cpuinit detect_ht(struct c
 
 		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
 
-		index_msb = get_count_order(smp_num_siblings) ;
+		index_msb = get_count_order(smp_num_siblings);
 
 		core_bits = get_count_order(c->x86_max_cores);
 
@@ -730,8 +784,10 @@ static void __cpuinit detect_ht(struct c
 	}
 out:
 	if ((c->x86_max_cores * smp_num_siblings) > 1) {
-		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n", c->phys_proc_id);
-		printk(KERN_INFO  "CPU: Processor Core ID: %d\n", c->cpu_core_id);
+		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+		       c->phys_proc_id);
+		printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
+		       c->cpu_core_id);
 	}
 
 #endif
@@ -779,22 +835,26 @@ static void __cpuinit init_intel(struct 
 	unsigned n;
 
 	init_intel_cacheinfo(c);
-	if (c->cpuid_level > 9 ) {
+	if (c->cpuid_level > 9) {
 		unsigned eax = cpuid_eax(10);
 		/* Check for version and the number of counters */
 		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
-			set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
+			set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
 	}
 
 	if (cpu_has_ds) {
 		unsigned int l1, l2;
 		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
 		if (!(l1 & (1<<11)))
-			set_bit(X86_FEATURE_BTS, c->x86_capability);
+			set_cpu_cap(c, X86_FEATURE_BTS);
 		if (!(l1 & (1<<12)))
-			set_bit(X86_FEATURE_PEBS, c->x86_capability);
+			set_cpu_cap(c, X86_FEATURE_PEBS);
 	}
 
+
+	if (cpu_has_bts)
+		ds_init_intel(c);
+
 	n = c->extended_cpuid_level;
 	if (n >= 0x80000008) {
 		unsigned eax = cpuid_eax(0x80000008);
@@ -811,14 +871,14 @@ static void __cpuinit init_intel(struct 
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
 	    (c->x86 == 0x6 && c->x86_model >= 0x0e))
-		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 	if (c->x86 == 6)
-		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 	if (c->x86 == 15)
-		set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_SYNC_RDTSC);
 	else
-		clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
- 	c->x86_max_cores = intel_num_cpu_cores(c);
+		clear_cpu_cap(c, X86_FEATURE_SYNC_RDTSC);
+	c->x86_max_cores = intel_num_cpu_cores(c);
 
 	srat_detect_node();
 }
@@ -844,9 +904,9 @@ struct cpu_model_info {
 /* Do some early cpuid on the boot CPU to get some parameter that are
    needed before check_bugs. Everything advanced is in identify_cpu
    below. */
-void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
+static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 {
-	u32 tfms;
+	u32 tfms, xlvl;
 
 	c->loops_per_jiffy = loops_per_jiffy;
 	c->x86_cache_size = -1;
@@ -857,6 +917,7 @@ void __cpuinit early_identify_cpu(struct
 	c->x86_clflush_size = 64;
 	c->x86_cache_alignment = c->x86_clflush_size;
 	c->x86_max_cores = 1;
+	c->x86_coreid_bits = 0;
 	c->extended_cpuid_level = 0;
 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
 
@@ -865,7 +926,7 @@ void __cpuinit early_identify_cpu(struct
 	      (unsigned int *)&c->x86_vendor_id[0],
 	      (unsigned int *)&c->x86_vendor_id[8],
 	      (unsigned int *)&c->x86_vendor_id[4]);
-		
+
 	get_cpu_vendor(c);
 
 	/* Initialize the standard set of capabilities */
@@ -883,7 +944,7 @@ void __cpuinit early_identify_cpu(struct
 			c->x86 += (tfms >> 20) & 0xff;
 		if (c->x86 >= 0x6)
 			c->x86_model += ((tfms >> 16) & 0xF) << 4;
-		if (c->x86_capability[0] & (1<<19)) 
+		if (c->x86_capability[0] & (1<<19))
 			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
 	} else {
 		/* Have CPUID level 0 only - unheard of */
@@ -893,18 +954,6 @@ void __cpuinit early_identify_cpu(struct
 #ifdef CONFIG_SMP
 	c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
 #endif
-}
-
-/*
- * This does the hard work of actually picking apart the CPU stuff...
- */
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
-{
-	int i;
-	u32 xlvl;
-
-	early_identify_cpu(c);
-
 	/* AMD-defined flags: level 0x80000001 */
 	xlvl = cpuid_eax(0x80000000);
 	c->extended_cpuid_level = xlvl;
@@ -925,6 +974,23 @@ void __cpuinit identify_cpu(struct cpuin
 			c->x86_capability[2] = cpuid_edx(0x80860001);
 	}
 
+	switch (c->x86_vendor) {
+	case X86_VENDOR_AMD:
+		early_init_amd(c);
+		break;
+	}
+
+}
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+{
+	int i;
+
+	early_identify_cpu(c);
+
 	init_scattered_cpuid_features(c);
 
 	c->apicid = phys_pkg_id(0);
@@ -955,7 +1021,7 @@ void __cpuinit identify_cpu(struct cpuin
 	}
 
 	select_idle_routine(c);
-	detect_ht(c); 
+	detect_ht(c);
 
 	/*
 	 * On SMP, boot_cpu_data holds the common feature set between
@@ -965,7 +1031,7 @@ void __cpuinit identify_cpu(struct cpuin
 	 */
 	if (c != &boot_cpu_data) {
 		/* AND the already accumulated flags with these */
-		for (i = 0 ; i < NCAPINTS ; i++)
+		for (i = 0; i < NCAPINTS; i++)
 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
 	}
 
@@ -978,17 +1044,16 @@ void __cpuinit identify_cpu(struct cpuin
 	numa_add_cpu(smp_processor_id());
 #endif
 }
- 
 
 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 {
 	if (c->x86_model_id[0])
-		printk("%s", c->x86_model_id);
+		printk(KERN_INFO "%s", c->x86_model_id);
 
-	if (c->x86_mask || c->cpuid_level >= 0) 
-		printk(" stepping %02x\n", c->x86_mask);
+	if (c->x86_mask || c->cpuid_level >= 0)
+		printk(KERN_CONT " stepping %02x\n", c->x86_mask);
 	else
-		printk("\n");
+		printk(KERN_CONT "\n");
 }
 
 /*
@@ -998,9 +1063,9 @@ void __cpuinit print_cpu_info(struct cpu
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	struct cpuinfo_x86 *c = v;
-	int cpu = 0;
+	int cpu = 0, i;
 
-	/* 
+	/*
 	 * These flag bits must match the definitions in <asm/cpufeature.h>.
 	 * NULL means this bit is undefined or reserved; either way it doesn't
 	 * have meaning as far as Linux is concerned.  Note that it's important
@@ -1010,10 +1075,10 @@ static int show_cpuinfo(struct seq_file 
 	 */
 	static const char *const x86_cap_flags[] = {
 		/* Intel-defined */
-	        "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
-	        "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
-	        "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
-	        "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
+		"fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+		"cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+		"pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+		"fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
 
 		/* AMD-defined */
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1080,34 +1145,35 @@ static int show_cpuinfo(struct seq_file 
 	cpu = c->cpu_index;
 #endif
 
-	seq_printf(m,"processor\t: %u\n"
-		     "vendor_id\t: %s\n"
-		     "cpu family\t: %d\n"
-		     "model\t\t: %d\n"
-		     "model name\t: %s\n",
-		     (unsigned)cpu,
-		     c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
-		     c->x86,
-		     (int)c->x86_model,
-		     c->x86_model_id[0] ? c->x86_model_id : "unknown");
-	
+	seq_printf(m, "processor\t: %u\n"
+		   "vendor_id\t: %s\n"
+		   "cpu family\t: %d\n"
+		   "model\t\t: %d\n"
+		   "model name\t: %s\n",
+		   (unsigned)cpu,
+		   c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
+		   c->x86,
+		   (int)c->x86_model,
+		   c->x86_model_id[0] ? c->x86_model_id : "unknown");
+
 	if (c->x86_mask || c->cpuid_level >= 0)
 		seq_printf(m, "stepping\t: %d\n", c->x86_mask);
 	else
 		seq_printf(m, "stepping\t: unknown\n");
-	
-	if (cpu_has(c,X86_FEATURE_TSC)) {
+
+	if (cpu_has(c, X86_FEATURE_TSC)) {
 		unsigned int freq = cpufreq_quick_get((unsigned)cpu);
+
 		if (!freq)
 			freq = cpu_khz;
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
-			     freq / 1000, (freq % 1000));
+			   freq / 1000, (freq % 1000));
 	}
 
 	/* Cache size */
-	if (c->x86_cache_size >= 0) 
+	if (c->x86_cache_size >= 0)
 		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
-	
+
 #ifdef CONFIG_SMP
 	if (smp_num_siblings * c->x86_max_cores > 1) {
 		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
@@ -1116,48 +1182,43 @@ static int show_cpuinfo(struct seq_file 
 		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
 		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
 	}
-#endif	
+#endif
 
 	seq_printf(m,
-	        "fpu\t\t: yes\n"
-	        "fpu_exception\t: yes\n"
-	        "cpuid level\t: %d\n"
-	        "wp\t\t: yes\n"
-	        "flags\t\t:",
+		   "fpu\t\t: yes\n"
+		   "fpu_exception\t: yes\n"
+		   "cpuid level\t: %d\n"
+		   "wp\t\t: yes\n"
+		   "flags\t\t:",
 		   c->cpuid_level);
 
-	{ 
-		int i; 
-		for ( i = 0 ; i < 32*NCAPINTS ; i++ )
-			if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
-				seq_printf(m, " %s", x86_cap_flags[i]);
-	}
-		
+	for (i = 0; i < 32*NCAPINTS; i++)
+		if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
+			seq_printf(m, " %s", x86_cap_flags[i]);
+
 	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
 		   c->loops_per_jiffy/(500000/HZ),
 		   (c->loops_per_jiffy/(5000/HZ)) % 100);
 
-	if (c->x86_tlbsize > 0) 
+	if (c->x86_tlbsize > 0)
 		seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
 	seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
 	seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
 
-	seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", 
+	seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
 		   c->x86_phys_bits, c->x86_virt_bits);
 
 	seq_printf(m, "power management:");
-	{
-		unsigned i;
-		for (i = 0; i < 32; i++) 
-			if (c->x86_power & (1 << i)) {
-				if (i < ARRAY_SIZE(x86_power_flags) &&
-					x86_power_flags[i])
-					seq_printf(m, "%s%s",
-						x86_power_flags[i][0]?" ":"",
-						x86_power_flags[i]);
-				else
-					seq_printf(m, " [%d]", i);
-			}
+	for (i = 0; i < 32; i++) {
+		if (c->x86_power & (1 << i)) {
+			if (i < ARRAY_SIZE(x86_power_flags) &&
+			    x86_power_flags[i])
+				seq_printf(m, "%s%s",
+					   x86_power_flags[i][0]?" ":"",
+					   x86_power_flags[i]);
+			else
+				seq_printf(m, " [%d]", i);
+		}
 	}
 
 	seq_printf(m, "\n\n");
@@ -1185,7 +1246,7 @@ static void c_stop(struct seq_file *m, v
 }
 
 struct seq_operations cpuinfo_op = {
-	.start =c_start,
+	.start = c_start,
 	.next =	c_next,
 	.stop =	c_stop,
 	.show =	show_cpuinfo,
diff -puN arch/x86/kernel/signal_32.c~git-x86 arch/x86/kernel/signal_32.c
--- a/arch/x86/kernel/signal_32.c~git-x86
+++ a/arch/x86/kernel/signal_32.c
@@ -23,6 +23,7 @@
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
+#include <asm/vdso.h>
 #include "sigframe_32.h"
 
 #define DEBUG_SIG 0
@@ -81,14 +82,14 @@ sys_sigaction(int sig, const struct old_
 }
 
 asmlinkage int
-sys_sigaltstack(unsigned long ebx)
+sys_sigaltstack(unsigned long bx)
 {
 	/* This is needed to make gcc realize it doesn't own the "struct pt_regs" */
-	struct pt_regs *regs = (struct pt_regs *)&ebx;
-	const stack_t __user *uss = (const stack_t __user *)ebx;
-	stack_t __user *uoss = (stack_t __user *)regs->ecx;
+	struct pt_regs *regs = (struct pt_regs *)&bx;
+	const stack_t __user *uss = (const stack_t __user *)bx;
+	stack_t __user *uoss = (stack_t __user *)regs->cx;
 
-	return do_sigaltstack(uss, uoss, regs->esp);
+	return do_sigaltstack(uss, uoss, regs->sp);
 }
 
 
@@ -109,12 +110,12 @@ restore_sigcontext(struct pt_regs *regs,
 #define COPY_SEG(seg)							\
 	{ unsigned short tmp;						\
 	  err |= __get_user(tmp, &sc->seg);				\
-	  regs->x##seg = tmp; }
+	  regs->seg = tmp; }
 
 #define COPY_SEG_STRICT(seg)						\
 	{ unsigned short tmp;						\
 	  err |= __get_user(tmp, &sc->seg);				\
-	  regs->x##seg = tmp|3; }
+	  regs->seg = tmp|3; }
 
 #define GET_SEG(seg)							\
 	{ unsigned short tmp;						\
@@ -130,22 +131,22 @@ restore_sigcontext(struct pt_regs *regs,
 	COPY_SEG(fs);
 	COPY_SEG(es);
 	COPY_SEG(ds);
-	COPY(edi);
-	COPY(esi);
-	COPY(ebp);
-	COPY(esp);
-	COPY(ebx);
-	COPY(edx);
-	COPY(ecx);
-	COPY(eip);
+	COPY(di);
+	COPY(si);
+	COPY(bp);
+	COPY(sp);
+	COPY(bx);
+	COPY(dx);
+	COPY(cx);
+	COPY(ip);
 	COPY_SEG_STRICT(cs);
 	COPY_SEG_STRICT(ss);
 	
 	{
 		unsigned int tmpflags;
-		err |= __get_user(tmpflags, &sc->eflags);
-		regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
-		regs->orig_eax = -1;		/* disable syscall checks */
+		err |= __get_user(tmpflags, &sc->flags);
+		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+		regs->orig_ax = -1;		/* disable syscall checks */
 	}
 
 	{
@@ -164,7 +165,7 @@ restore_sigcontext(struct pt_regs *regs,
 		}
 	}
 
-	err |= __get_user(*peax, &sc->eax);
+	err |= __get_user(*peax, &sc->ax);
 	return err;
 
 badframe:
@@ -174,9 +175,9 @@ badframe:
 asmlinkage int sys_sigreturn(unsigned long __unused)
 {
 	struct pt_regs *regs = (struct pt_regs *) &__unused;
-	struct sigframe __user *frame = (struct sigframe __user *)(regs->esp - 8);
+	struct sigframe __user *frame = (struct sigframe __user *)(regs->sp - 8);
 	sigset_t set;
-	int eax;
+	int ax;
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
@@ -192,17 +193,17 @@ asmlinkage int sys_sigreturn(unsigned lo
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 	
-	if (restore_sigcontext(regs, &frame->sc, &eax))
+	if (restore_sigcontext(regs, &frame->sc, &ax))
 		goto badframe;
-	return eax;
+	return ax;
 
 badframe:
 	if (show_unhandled_signals && printk_ratelimit())
-		printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx"
-		       " esp:%lx oeax:%lx\n",
+		printk("%s%s[%d] bad frame in sigreturn frame:%p ip:%lx"
+		       " sp:%lx oeax:%lx\n",
 		    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
-		    current->comm, task_pid_nr(current), frame, regs->eip,
-		    regs->esp, regs->orig_eax);
+		    current->comm, task_pid_nr(current), frame, regs->ip,
+		    regs->sp, regs->orig_ax);
 
 	force_sig(SIGSEGV, current);
 	return 0;
@@ -211,9 +212,9 @@ badframe:
 asmlinkage int sys_rt_sigreturn(unsigned long __unused)
 {
 	struct pt_regs *regs = (struct pt_regs *) &__unused;
-	struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->esp - 4);
+	struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->sp - 4);
 	sigset_t set;
-	int eax;
+	int ax;
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
@@ -226,13 +227,13 @@ asmlinkage int sys_rt_sigreturn(unsigned
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 	
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
-	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->esp) == -EFAULT)
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
 		goto badframe;
 
-	return eax;
+	return ax;
 
 badframe:
 	force_sig(SIGSEGV, current);
@@ -249,27 +250,27 @@ setup_sigcontext(struct sigcontext __use
 {
 	int tmp, err = 0;
 
-	err |= __put_user(regs->xfs, (unsigned int __user *)&sc->fs);
+	err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
 	savesegment(gs, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
 
-	err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
-	err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds);
-	err |= __put_user(regs->edi, &sc->edi);
-	err |= __put_user(regs->esi, &sc->esi);
-	err |= __put_user(regs->ebp, &sc->ebp);
-	err |= __put_user(regs->esp, &sc->esp);
-	err |= __put_user(regs->ebx, &sc->ebx);
-	err |= __put_user(regs->edx, &sc->edx);
-	err |= __put_user(regs->ecx, &sc->ecx);
-	err |= __put_user(regs->eax, &sc->eax);
+	err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
+	err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
+	err |= __put_user(regs->di, &sc->di);
+	err |= __put_user(regs->si, &sc->si);
+	err |= __put_user(regs->bp, &sc->bp);
+	err |= __put_user(regs->sp, &sc->sp);
+	err |= __put_user(regs->bx, &sc->bx);
+	err |= __put_user(regs->dx, &sc->dx);
+	err |= __put_user(regs->cx, &sc->cx);
+	err |= __put_user(regs->ax, &sc->ax);
 	err |= __put_user(current->thread.trap_no, &sc->trapno);
 	err |= __put_user(current->thread.error_code, &sc->err);
-	err |= __put_user(regs->eip, &sc->eip);
-	err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs);
-	err |= __put_user(regs->eflags, &sc->eflags);
-	err |= __put_user(regs->esp, &sc->esp_at_signal);
-	err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss);
+	err |= __put_user(regs->ip, &sc->ip);
+	err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
+	err |= __put_user(regs->flags, &sc->flags);
+	err |= __put_user(regs->sp, &sc->sp_at_signal);
+	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
 
 	tmp = save_i387(fpstate);
 	if (tmp < 0)
@@ -290,29 +291,36 @@ setup_sigcontext(struct sigcontext __use
 static inline void __user *
 get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
 {
-	unsigned long esp;
+	unsigned long sp;
 
 	/* Default to using normal stack */
-	esp = regs->esp;
+	sp = regs->sp;
+
+	/*
+	 * If we are on the alternate signal stack and would overflow it, don't.
+	 * Return an always-bogus address instead so we will die with SIGSEGV.
+	 */
+	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
+		return (void __user *) -1L;
 
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
-		if (sas_ss_flags(esp) == 0)
-			esp = current->sas_ss_sp + current->sas_ss_size;
+		if (sas_ss_flags(sp) == 0)
+			sp = current->sas_ss_sp + current->sas_ss_size;
 	}
 
 	/* This is the legacy signal stack switching. */
-	else if ((regs->xss & 0xffff) != __USER_DS &&
+	else if ((regs->ss & 0xffff) != __USER_DS &&
 		 !(ka->sa.sa_flags & SA_RESTORER) &&
 		 ka->sa.sa_restorer) {
-		esp = (unsigned long) ka->sa.sa_restorer;
+		sp = (unsigned long) ka->sa.sa_restorer;
 	}
 
-	esp -= frame_size;
+	sp -= frame_size;
 	/* Align the stack pointer according to the i386 ABI,
 	 * i.e. so that on function entry ((sp + 4) & 15) == 0. */
-	esp = ((esp + 4) & -16ul) - 4;
-	return (void __user *) esp;
+	sp = ((sp + 4) & -16ul) - 4;
+	return (void __user *) sp;
 }
 
 /* These symbols are defined with the addresses in the vsyscall page.
@@ -355,7 +363,7 @@ static int setup_frame(int sig, struct k
 	}
 
 	if (current->binfmt->hasvdso)
-		restorer = (void *)VDSO_SYM(&__kernel_sigreturn);
+		restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
 	else
 		restorer = (void *)&frame->retcode;
 	if (ka->sa.sa_flags & SA_RESTORER)
@@ -379,16 +387,16 @@ static int setup_frame(int sig, struct k
 		goto give_sigsegv;
 
 	/* Set up registers for signal handler */
-	regs->esp = (unsigned long) frame;
-	regs->eip = (unsigned long) ka->sa.sa_handler;
-	regs->eax = (unsigned long) sig;
-	regs->edx = (unsigned long) 0;
-	regs->ecx = (unsigned long) 0;
-
-	regs->xds = __USER_DS;
-	regs->xes = __USER_DS;
-	regs->xss = __USER_DS;
-	regs->xcs = __USER_CS;
+	regs->sp = (unsigned long) frame;
+	regs->ip = (unsigned long) ka->sa.sa_handler;
+	regs->ax = (unsigned long) sig;
+	regs->dx = (unsigned long) 0;
+	regs->cx = (unsigned long) 0;
+
+	regs->ds = __USER_DS;
+	regs->es = __USER_DS;
+	regs->ss = __USER_DS;
+	regs->cs = __USER_CS;
 
 	/*
 	 * Clear TF when entering the signal handler, but
@@ -396,13 +404,13 @@ static int setup_frame(int sig, struct k
 	 * The tracer may want to single-step inside the
 	 * handler too.
 	 */
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
 #if DEBUG_SIG
 	printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
-		current->comm, current->pid, frame, regs->eip, frame->pretcode);
+		current->comm, current->pid, frame, regs->ip, frame->pretcode);
 #endif
 
 	return 0;
@@ -442,7 +450,7 @@ static int setup_rt_frame(int sig, struc
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(regs->esp),
+	err |= __put_user(sas_ss_flags(regs->sp),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
@@ -452,13 +460,13 @@ static int setup_rt_frame(int sig, struc
 		goto give_sigsegv;
 
 	/* Set up to return from userspace.  */
-	restorer = (void *)VDSO_SYM(&__kernel_rt_sigreturn);
+	restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
 	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 	err |= __put_user(restorer, &frame->pretcode);
 	 
 	/*
-	 * This is movl $,%eax ; int $0x80
+	 * This is movl $,%ax ; int $0x80
 	 *
 	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
 	 * reasons and because gdb uses it as a signature to notice
@@ -472,16 +480,16 @@ static int setup_rt_frame(int sig, struc
 		goto give_sigsegv;
 
 	/* Set up registers for signal handler */
-	regs->esp = (unsigned long) frame;
-	regs->eip = (unsigned long) ka->sa.sa_handler;
-	regs->eax = (unsigned long) usig;
-	regs->edx = (unsigned long) &frame->info;
-	regs->ecx = (unsigned long) &frame->uc;
-
-	regs->xds = __USER_DS;
-	regs->xes = __USER_DS;
-	regs->xss = __USER_DS;
-	regs->xcs = __USER_CS;
+	regs->sp = (unsigned long) frame;
+	regs->ip = (unsigned long) ka->sa.sa_handler;
+	regs->ax = (unsigned long) usig;
+	regs->dx = (unsigned long) &frame->info;
+	regs->cx = (unsigned long) &frame->uc;
+
+	regs->ds = __USER_DS;
+	regs->es = __USER_DS;
+	regs->ss = __USER_DS;
+	regs->cs = __USER_CS;
 
 	/*
 	 * Clear TF when entering the signal handler, but
@@ -489,13 +497,13 @@ static int setup_rt_frame(int sig, struc
 	 * The tracer may want to single-step inside the
 	 * handler too.
 	 */
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
 #if DEBUG_SIG
 	printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
-		current->comm, current->pid, frame, regs->eip, frame->pretcode);
+		current->comm, current->pid, frame, regs->ip, frame->pretcode);
 #endif
 
 	return 0;
@@ -516,35 +524,33 @@ handle_signal(unsigned long sig, siginfo
 	int ret;
 
 	/* Are we from a system call? */
-	if (regs->orig_eax >= 0) {
+	if (regs->orig_ax >= 0) {
 		/* If so, check system call restarting.. */
-		switch (regs->eax) {
+		switch (regs->ax) {
 		        case -ERESTART_RESTARTBLOCK:
 			case -ERESTARTNOHAND:
-				regs->eax = -EINTR;
+				regs->ax = -EINTR;
 				break;
 
 			case -ERESTARTSYS:
 				if (!(ka->sa.sa_flags & SA_RESTART)) {
-					regs->eax = -EINTR;
+					regs->ax = -EINTR;
 					break;
 				}
 			/* fallthrough */
 			case -ERESTARTNOINTR:
-				regs->eax = regs->orig_eax;
-				regs->eip -= 2;
+				regs->ax = regs->orig_ax;
+				regs->ip -= 2;
 		}
 	}
 
 	/*
-	 * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
-	 * that register information in the sigcontext is correct.
+	 * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
+	 * flag so that register information in the sigcontext is correct.
 	 */
-	if (unlikely(regs->eflags & TF_MASK)
-	    && likely(current->ptrace & PT_DTRACE)) {
-		current->ptrace &= ~PT_DTRACE;
-		regs->eflags &= ~TF_MASK;
-	}
+	if (unlikely(regs->flags & X86_EFLAGS_TF) &&
+	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
+		regs->flags &= ~X86_EFLAGS_TF;
 
 	/* Set up the stack frame */
 	if (ka->sa.sa_flags & SA_SIGINFO)
@@ -599,8 +605,8 @@ static void fastcall do_signal(struct pt
 		 * have been cleared if the watchpoint triggered
 		 * inside the kernel.
 		 */
-		if (unlikely(current->thread.debugreg[7]))
-			set_debugreg(current->thread.debugreg[7], 7);
+		if (unlikely(current->thread.debugreg7))
+			set_debugreg(current->thread.debugreg7, 7);
 
 		/* Whee!  Actually deliver the signal.  */
 		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
@@ -616,19 +622,19 @@ static void fastcall do_signal(struct pt
 	}
 
 	/* Did we come from a system call? */
-	if (regs->orig_eax >= 0) {
+	if (regs->orig_ax >= 0) {
 		/* Restart the system call - no handlers present */
-		switch (regs->eax) {
+		switch (regs->ax) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
-			regs->eax = regs->orig_eax;
-			regs->eip -= 2;
+			regs->ax = regs->orig_ax;
+			regs->ip -= 2;
 			break;
 
 		case -ERESTART_RESTARTBLOCK:
-			regs->eax = __NR_restart_syscall;
-			regs->eip -= 2;
+			regs->ax = __NR_restart_syscall;
+			regs->ip -= 2;
 			break;
 		}
 	}
@@ -651,7 +657,7 @@ void do_notify_resume(struct pt_regs *re
 {
 	/* Pending single-step? */
 	if (thread_info_flags & _TIF_SINGLESTEP) {
-		regs->eflags |= TF_MASK;
+		regs->flags |= TF_MASK;
 		clear_thread_flag(TIF_SINGLESTEP);
 	}
 
diff -puN arch/x86/kernel/signal_64.c~git-x86 arch/x86/kernel/signal_64.c
--- a/arch/x86/kernel/signal_64.c~git-x86
+++ a/arch/x86/kernel/signal_64.c
@@ -39,7 +39,7 @@ asmlinkage long
 sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		struct pt_regs *regs)
 {
-	return do_sigaltstack(uss, uoss, regs->rsp);
+	return do_sigaltstack(uss, uoss, regs->sp);
 }
 
 
@@ -64,8 +64,8 @@ restore_sigcontext(struct pt_regs *regs,
 
 #define COPY(x)		err |= __get_user(regs->x, &sc->x)
 
-	COPY(rdi); COPY(rsi); COPY(rbp); COPY(rsp); COPY(rbx);
-	COPY(rdx); COPY(rcx); COPY(rip);
+	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
+	COPY(dx); COPY(cx); COPY(ip);
 	COPY(r8);
 	COPY(r9);
 	COPY(r10);
@@ -86,9 +86,9 @@ restore_sigcontext(struct pt_regs *regs,
 
 	{
 		unsigned int tmpflags;
-		err |= __get_user(tmpflags, &sc->eflags);
-		regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
-		regs->orig_rax = -1;		/* disable syscall checks */
+		err |= __get_user(tmpflags, &sc->flags);
+		regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5);
+		regs->orig_ax = -1;		/* disable syscall checks */
 	}
 
 	{
@@ -108,7 +108,7 @@ restore_sigcontext(struct pt_regs *regs,
 		}
 	}
 
-	err |= __get_user(*prax, &sc->rax);
+	err |= __get_user(*prax, &sc->ax);
 	return err;
 
 badframe:
@@ -119,9 +119,9 @@ asmlinkage long sys_rt_sigreturn(struct 
 {
 	struct rt_sigframe __user *frame;
 	sigset_t set;
-	unsigned long eax;
+	unsigned long ax;
 
-	frame = (struct rt_sigframe __user *)(regs->rsp - 8);
+	frame = (struct rt_sigframe __user *)(regs->sp - 8);
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) {
 		goto badframe;
 	} 
@@ -135,17 +135,17 @@ asmlinkage long sys_rt_sigreturn(struct 
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 	
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
 #ifdef DEBUG_SIG
-	printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs->rip,regs->rsp,frame,eax);
+	printk("%d sigreturn ip:%lx sp:%lx frame:%p ax:%lx\n",current->pid,regs->ip,regs->sp,frame,ax);
 #endif
 
-	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT)
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
 		goto badframe;
 
-	return eax;
+	return ax;
 
 badframe:
 	signal_fault(regs,frame,"sigreturn");
@@ -165,14 +165,14 @@ setup_sigcontext(struct sigcontext __use
 	err |= __put_user(0, &sc->gs);
 	err |= __put_user(0, &sc->fs);
 
-	err |= __put_user(regs->rdi, &sc->rdi);
-	err |= __put_user(regs->rsi, &sc->rsi);
-	err |= __put_user(regs->rbp, &sc->rbp);
-	err |= __put_user(regs->rsp, &sc->rsp);
-	err |= __put_user(regs->rbx, &sc->rbx);
-	err |= __put_user(regs->rdx, &sc->rdx);
-	err |= __put_user(regs->rcx, &sc->rcx);
-	err |= __put_user(regs->rax, &sc->rax);
+	err |= __put_user(regs->di, &sc->di);
+	err |= __put_user(regs->si, &sc->si);
+	err |= __put_user(regs->bp, &sc->bp);
+	err |= __put_user(regs->sp, &sc->sp);
+	err |= __put_user(regs->bx, &sc->bx);
+	err |= __put_user(regs->dx, &sc->dx);
+	err |= __put_user(regs->cx, &sc->cx);
+	err |= __put_user(regs->ax, &sc->ax);
 	err |= __put_user(regs->r8, &sc->r8);
 	err |= __put_user(regs->r9, &sc->r9);
 	err |= __put_user(regs->r10, &sc->r10);
@@ -183,8 +183,8 @@ setup_sigcontext(struct sigcontext __use
 	err |= __put_user(regs->r15, &sc->r15);
 	err |= __put_user(me->thread.trap_no, &sc->trapno);
 	err |= __put_user(me->thread.error_code, &sc->err);
-	err |= __put_user(regs->rip, &sc->rip);
-	err |= __put_user(regs->eflags, &sc->eflags);
+	err |= __put_user(regs->ip, &sc->ip);
+	err |= __put_user(regs->flags, &sc->flags);
 	err |= __put_user(mask, &sc->oldmask);
 	err |= __put_user(me->thread.cr2, &sc->cr2);
 
@@ -198,18 +198,18 @@ setup_sigcontext(struct sigcontext __use
 static void __user *
 get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
 {
-	unsigned long rsp;
+	unsigned long sp;
 
 	/* Default to using normal stack - redzone*/
-	rsp = regs->rsp - 128;
+	sp = regs->sp - 128;
 
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
-		if (sas_ss_flags(rsp) == 0)
-			rsp = current->sas_ss_sp + current->sas_ss_size;
+		if (sas_ss_flags(sp) == 0)
+			sp = current->sas_ss_sp + current->sas_ss_size;
 	}
 
-	return (void __user *)round_down(rsp - size, 16); 
+	return (void __user *)round_down(sp - size, 16);
 }
 
 static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
@@ -246,7 +246,7 @@ static int setup_rt_frame(int sig, struc
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(regs->rsp),
+	err |= __put_user(sas_ss_flags(regs->sp),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
@@ -271,21 +271,21 @@ static int setup_rt_frame(int sig, struc
 		goto give_sigsegv;
 
 #ifdef DEBUG_SIG
-	printk("%d old rip %lx old rsp %lx old rax %lx\n", current->pid,regs->rip,regs->rsp,regs->rax);
+	printk("%d old ip %lx old sp %lx old ax %lx\n", current->pid,regs->ip,regs->sp,regs->ax);
 #endif
 
 	/* Set up registers for signal handler */
-	regs->rdi = sig;
+	regs->di = sig;
 	/* In case the signal handler was declared without prototypes */ 
-	regs->rax = 0;	
+	regs->ax = 0;
 
 	/* This also works for non SA_SIGINFO handlers because they expect the
 	   next argument after the signal number on the stack. */
-	regs->rsi = (unsigned long)&frame->info; 
-	regs->rdx = (unsigned long)&frame->uc; 
-	regs->rip = (unsigned long) ka->sa.sa_handler;
+	regs->si = (unsigned long)&frame->info;
+	regs->dx = (unsigned long)&frame->uc;
+	regs->ip = (unsigned long) ka->sa.sa_handler;
 
-	regs->rsp = (unsigned long)frame;
+	regs->sp = (unsigned long)frame;
 
 	/* Set up the CS register to run signal handlers in 64-bit mode,
 	   even if the handler happens to be interrupting 32-bit code. */
@@ -295,12 +295,12 @@ static int setup_rt_frame(int sig, struc
 	   see include/asm-x86_64/uaccess.h for details. */
 	set_fs(USER_DS);
 
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 #ifdef DEBUG_SIG
 	printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n",
-		current->comm, current->pid, frame, regs->rip, frame->pretcode);
+		current->comm, current->pid, frame, regs->ip, frame->pretcode);
 #endif
 
 	return 0;
@@ -321,44 +321,40 @@ handle_signal(unsigned long sig, siginfo
 	int ret;
 
 #ifdef DEBUG_SIG
-	printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n",
+	printk("handle_signal pid:%d sig:%lu ip:%lx sp:%lx regs=%p\n",
 		current->pid, sig,
-		regs->rip, regs->rsp, regs);
+		regs->ip, regs->sp, regs);
 #endif
 
 	/* Are we from a system call? */
-	if ((long)regs->orig_rax >= 0) {
+	if ((long)regs->orig_ax >= 0) {
 		/* If so, check system call restarting.. */
-		switch (regs->rax) {
+		switch (regs->ax) {
 		        case -ERESTART_RESTARTBLOCK:
 			case -ERESTARTNOHAND:
-				regs->rax = -EINTR;
+				regs->ax = -EINTR;
 				break;
 
 			case -ERESTARTSYS:
 				if (!(ka->sa.sa_flags & SA_RESTART)) {
-					regs->rax = -EINTR;
+					regs->ax = -EINTR;
 					break;
 				}
 				/* fallthrough */
 			case -ERESTARTNOINTR:
-				regs->rax = regs->orig_rax;
-				regs->rip -= 2;
+				regs->ax = regs->orig_ax;
+				regs->ip -= 2;
 				break;
 		}
 	}
 
 	/*
-	 * If TF is set due to a debugger (PT_DTRACE), clear the TF
-	 * flag so that register information in the sigcontext is
-	 * correct.
+	 * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
+	 * flag so that register information in the sigcontext is correct.
 	 */
-	if (unlikely(regs->eflags & TF_MASK)) {
-		if (likely(current->ptrace & PT_DTRACE)) {
-			current->ptrace &= ~PT_DTRACE;
-			regs->eflags &= ~TF_MASK;
-		}
-	}
+	if (unlikely(regs->flags & X86_EFLAGS_TF) &&
+	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
+		regs->flags &= ~X86_EFLAGS_TF;
 
 #ifdef CONFIG_IA32_EMULATION
 	if (test_thread_flag(TIF_IA32)) {
@@ -430,21 +426,21 @@ static void do_signal(struct pt_regs *re
 	}
 
 	/* Did we come from a system call? */
-	if ((long)regs->orig_rax >= 0) {
+	if ((long)regs->orig_ax >= 0) {
 		/* Restart the system call - no handlers present */
-		long res = regs->rax;
+		long res = regs->ax;
 		switch (res) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
-			regs->rax = regs->orig_rax;
-			regs->rip -= 2;
+			regs->ax = regs->orig_ax;
+			regs->ip -= 2;
 			break;
 		case -ERESTART_RESTARTBLOCK:
-			regs->rax = test_thread_flag(TIF_IA32) ?
+			regs->ax = test_thread_flag(TIF_IA32) ?
 					__NR_ia32_restart_syscall :
 					__NR_restart_syscall;
-			regs->rip -= 2;
+			regs->ip -= 2;
 			break;
 		}
 	}
@@ -461,13 +457,13 @@ void
 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
 #ifdef DEBUG_SIG
-	printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%p pending:%x\n",
-	       thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current)); 
+	printk("do_notify_resume flags:%x ip:%lx sp:%lx caller:%p pending:%x\n",
+	       thread_info_flags, regs->ip, regs->sp, __builtin_return_address(0),signal_pending(current));
 #endif
 	       
 	/* Pending single-step? */
 	if (thread_info_flags & _TIF_SINGLESTEP) {
-		regs->eflags |= TF_MASK;
+		regs->flags |= TF_MASK;
 		clear_thread_flag(TIF_SINGLESTEP);
 	}
 
@@ -486,8 +482,8 @@ void signal_fault(struct pt_regs *regs, 
 { 
 	struct task_struct *me = current; 
 	if (show_unhandled_signals && printk_ratelimit())
-		printk("%s[%d] bad frame in %s frame:%p rip:%lx rsp:%lx orax:%lx\n",
-	       me->comm,me->pid,where,frame,regs->rip,regs->rsp,regs->orig_rax); 
+		printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx\n",
+	       me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax);
 
 	force_sig(SIGSEGV, me); 
 } 
diff -puN arch/x86/kernel/smp_32.c~git-x86 arch/x86/kernel/smp_32.c
--- a/arch/x86/kernel/smp_32.c~git-x86
+++ a/arch/x86/kernel/smp_32.c
@@ -223,7 +223,7 @@ void send_IPI_mask_sequence(cpumask_t ma
 	 */ 
 
 	local_irq_save(flags);
-	for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
+	for_each_possible_cpu(query_cpu) {
 		if (cpu_isset(query_cpu, mask)) {
 			__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
 					      vector);
@@ -675,7 +675,7 @@ static int convert_apicid_to_cpu(int api
 {
 	int i;
 
-	for (i = 0; i < NR_CPUS; i++) {
+	for_each_possible_cpu(i) {
 		if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
 			return i;
 	}
diff -puN arch/x86/kernel/smp_64.c~git-x86 arch/x86/kernel/smp_64.c
--- a/arch/x86/kernel/smp_64.c~git-x86
+++ a/arch/x86/kernel/smp_64.c
@@ -29,7 +29,7 @@
 #include <asm/idle.h>
 
 /*
- *	Smarter SMP flushing macros. 
+ *	Smarter SMP flushing macros.
  *		c/o Linus Torvalds.
  *
  *	These mean you can really definitely utterly forget about
@@ -37,15 +37,15 @@
  *
  *	Optimizations Manfred Spraul <manfred@colorfullife.com>
  *
- * 	More scalable flush, from Andi Kleen
+ *	More scalable flush, from Andi Kleen
  *
- * 	To avoid global state use 8 different call vectors.
- * 	Each CPU uses a specific vector to trigger flushes on other
- * 	CPUs. Depending on the received vector the target CPUs look into
+ *	To avoid global state use 8 different call vectors.
+ *	Each CPU uses a specific vector to trigger flushes on other
+ *	CPUs. Depending on the received vector the target CPUs look into
  *	the right per cpu variable for the flush data.
  *
- * 	With more than 8 CPUs they are hashed to the 8 available
- * 	vectors. The limited global vector space forces us to this right now.
+ *	With more than 8 CPUs they are hashed to the 8 available
+ *	vectors. The limited global vector space forces us to this right now.
  *	In future when interrupts are split into per CPU domains this could be
  *	fixed, at the cost of triggering multiple IPIs in some cases.
  */
@@ -55,7 +55,6 @@ union smp_flush_state {
 		cpumask_t flush_cpumask;
 		struct mm_struct *flush_mm;
 		unsigned long flush_va;
-#define FLUSH_ALL	-1ULL
 		spinlock_t tlbstate_lock;
 	};
 	char pad[SMP_CACHE_BYTES];
@@ -67,7 +66,7 @@ union smp_flush_state {
 static DEFINE_PER_CPU(union smp_flush_state, flush_state);
 
 /*
- * We cannot call mmdrop() because we are in interrupt context, 
+ * We cannot call mmdrop() because we are in interrupt context,
  * instead update mm->cpu_vm_mask.
  */
 static inline void leave_mm(int cpu)
@@ -85,25 +84,25 @@ static inline void leave_mm(int cpu)
  * 1) switch_mm() either 1a) or 1b)
  * 1a) thread switch to a different mm
  * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- * 	Stop ipi delivery for the old mm. This is not synchronized with
- * 	the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * 	for the wrong mm, and in the worst case we perform a superfluous
- * 	tlb flush.
+ *	Stop ipi delivery for the old mm. This is not synchronized with
+ *	the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ *	for the wrong mm, and in the worst case we perform a superfluous
+ *	tlb flush.
  * 1a2) set cpu mmu_state to TLBSTATE_OK
- * 	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ *	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
  *	was in lazy tlb mode.
  * 1a3) update cpu active_mm
- * 	Now cpu0 accepts tlb flushes for the new mm.
+ *	Now cpu0 accepts tlb flushes for the new mm.
  * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- * 	Now the other cpus will send tlb flush ipis.
+ *	Now the other cpus will send tlb flush ipis.
  * 1a4) change cr3.
  * 1b) thread switch without mm change
  *	cpu active_mm is correct, cpu0 already handles
  *	flush ipis.
  * 1b1) set cpu mmu_state to TLBSTATE_OK
  * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * 	Atomically set the bit [other cpus will start sending flush ipis],
- * 	and test the bit.
+ *	Atomically set the bit [other cpus will start sending flush ipis],
+ *	and test the bit.
  * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
  * 2) switch %%esp, ie current
  *
@@ -137,12 +136,12 @@ asmlinkage void smp_invalidate_interrupt
 	 * orig_rax contains the negated interrupt vector.
 	 * Use that to determine where the sender put the data.
 	 */
-	sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START;
+	sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
 	f = &per_cpu(flush_state, sender);
 
 	if (!cpu_isset(cpu, f->flush_cpumask))
 		goto out;
-		/* 
+		/*
 		 * This was a BUG() but until someone can quote me the
 		 * line from the intel manual that guarantees an IPI to
 		 * multiple CPUs is retried _only_ on the erroring CPUs
@@ -150,10 +149,10 @@ asmlinkage void smp_invalidate_interrupt
 		 *
 		 * BUG();
 		 */
-		 
+
 	if (f->flush_mm == read_pda(active_mm)) {
 		if (read_pda(mmu_state) == TLBSTATE_OK) {
-			if (f->flush_va == FLUSH_ALL)
+			if (f->flush_va == TLB_FLUSH_ALL)
 				local_flush_tlb();
 			else
 				__flush_tlb_one(f->flush_va);
@@ -166,19 +165,22 @@ out:
 	add_pda(irq_tlb_count, 1);
 }
 
-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
-						unsigned long va)
+void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+			     unsigned long va)
 {
 	int sender;
 	union smp_flush_state *f;
+	cpumask_t cpumask = *cpumaskp;
 
 	/* Caller has disabled preemption */
 	sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
 	f = &per_cpu(flush_state, sender);
 
-	/* Could avoid this lock when
-	   num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
-	   probably not worth checking this for a cache-hot lock. */
+	/*
+	 * Could avoid this lock when
+	 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
+	 * probably not worth checking this for a cache-hot lock.
+	 */
 	spin_lock(&f->tlbstate_lock);
 
 	f->flush_mm = mm;
@@ -202,14 +204,14 @@ static void flush_tlb_others(cpumask_t c
 int __cpuinit init_smp_flush(void)
 {
 	int i;
+
 	for_each_cpu_mask(i, cpu_possible_map) {
 		spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
 	}
 	return 0;
 }
-
 core_initcall(init_smp_flush);
-	
+
 void flush_tlb_current_task(void)
 {
 	struct mm_struct *mm = current->mm;
@@ -221,10 +223,9 @@ void flush_tlb_current_task(void)
 
 	local_flush_tlb();
 	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
 	preempt_enable();
 }
-EXPORT_SYMBOL(flush_tlb_current_task);
 
 void flush_tlb_mm (struct mm_struct * mm)
 {
@@ -241,11 +242,10 @@ void flush_tlb_mm (struct mm_struct * mm
 			leave_mm(smp_processor_id());
 	}
 	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
 
 	preempt_enable();
 }
-EXPORT_SYMBOL(flush_tlb_mm);
 
 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 {
@@ -259,8 +259,8 @@ void flush_tlb_page(struct vm_area_struc
 	if (current->active_mm == mm) {
 		if(current->mm)
 			__flush_tlb_one(va);
-		 else
-		 	leave_mm(smp_processor_id());
+		else
+			leave_mm(smp_processor_id());
 	}
 
 	if (!cpus_empty(cpu_mask))
@@ -268,7 +268,6 @@ void flush_tlb_page(struct vm_area_struc
 
 	preempt_enable();
 }
-EXPORT_SYMBOL(flush_tlb_page);
 
 static void do_flush_tlb_all(void* info)
 {
@@ -325,11 +324,9 @@ void unlock_ipi_call_lock(void)
  * this function sends a 'generic call function' IPI to all other CPU
  * of the system defined in the mask.
  */
-
-static int
-__smp_call_function_mask(cpumask_t mask,
-			 void (*func)(void *), void *info,
-			 int wait)
+static int __smp_call_function_mask(cpumask_t mask,
+				    void (*func)(void *), void *info,
+				    int wait)
 {
 	struct call_data_struct data;
 	cpumask_t allbutself;
@@ -417,11 +414,10 @@ EXPORT_SYMBOL(smp_call_function_mask);
  */
 
 int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
-	int nonatomic, int wait)
+			      int nonatomic, int wait)
 {
 	/* prevent preemption and reschedule on another processor */
-	int ret;
-	int me = get_cpu();
+	int ret, me = get_cpu();
 
 	/* Can deadlock when called with interrupts disabled */
 	WARN_ON(irqs_disabled());
@@ -471,9 +467,9 @@ static void stop_this_cpu(void *dummy)
 	 */
 	cpu_clear(smp_processor_id(), cpu_online_map);
 	disable_local_APIC();
-	for (;;) 
+	for (;;)
 		halt();
-} 
+}
 
 void smp_send_stop(void)
 {
diff -puN arch/x86/kernel/smpboot_32.c~git-x86 arch/x86/kernel/smpboot_32.c
--- a/arch/x86/kernel/smpboot_32.c~git-x86
+++ a/arch/x86/kernel/smpboot_32.c
@@ -83,7 +83,6 @@ EXPORT_SYMBOL(cpu_online_map);
 
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
-EXPORT_SYMBOL(cpu_callout_map);
 cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
 static cpumask_t smp_commenced_mask;
@@ -448,38 +447,38 @@ void __devinit initialize_secondary(void
 {
 	/*
 	 * We don't actually need to load the full TSS,
-	 * basically just the stack pointer and the eip.
+	 * basically just the stack pointer and the ip.
 	 */
 
 	asm volatile(
 		"movl %0,%%esp\n\t"
 		"jmp *%1"
 		:
-		:"m" (current->thread.esp),"m" (current->thread.eip));
+		:"m" (current->thread.sp),"m" (current->thread.ip));
 }
 
 /* Static state in head.S used to set up a CPU */
 extern struct {
-	void * esp;
+	void * sp;
 	unsigned short ss;
 } stack_start;
 
 #ifdef CONFIG_NUMA
 
 /* which logical CPUs are on which nodes */
-cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly =
+cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly =
 				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
-EXPORT_SYMBOL(node_2_cpu_mask);
+EXPORT_SYMBOL(node_to_cpumask_map);
 /* which node each logical CPU is on */
-int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_2_node);
+int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
+EXPORT_SYMBOL(cpu_to_node_map);
 
 /* set up a mapping between cpu and node. */
 static inline void map_cpu_to_node(int cpu, int node)
 {
 	printk("Mapping cpu %d to node %d\n", cpu, node);
-	cpu_set(cpu, node_2_cpu_mask[node]);
-	cpu_2_node[cpu] = node;
+	cpu_set(cpu, node_to_cpumask_map[node]);
+	cpu_to_node_map[cpu] = node;
 }
 
 /* undo a mapping between cpu and node. */
@@ -489,8 +488,8 @@ static inline void unmap_cpu_to_node(int
 
 	printk("Unmapping cpu %d from all nodes\n", cpu);
 	for (node = 0; node < MAX_NUMNODES; node ++)
-		cpu_clear(cpu, node_2_cpu_mask[node]);
-	cpu_2_node[cpu] = 0;
+		cpu_clear(cpu, node_to_cpumask_map[node]);
+	cpu_to_node_map[cpu] = 0;
 }
 #else /* !CONFIG_NUMA */
 
@@ -668,7 +667,7 @@ wakeup_secondary_cpu(int phys_apicid, un
 	 * target processor state.
 	 */
 	startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
-		         (unsigned long) stack_start.esp);
+		         (unsigned long) stack_start.sp);
 
 	/*
 	 * Run STARTUP IPI loop.
@@ -754,7 +753,7 @@ static inline struct task_struct * alloc
 		/* initialize thread_struct.  we really want to avoid destroy
 		 * idle tread
 		 */
-		idle->thread.esp = (unsigned long)task_pt_regs(idle);
+		idle->thread.sp = (unsigned long)task_pt_regs(idle);
 		init_idle(idle, cpu);
 		return idle;
 	}
@@ -799,7 +798,7 @@ static int __cpuinit do_boot_cpu(int api
  	per_cpu(current_task, cpu) = idle;
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 
-	idle->thread.eip = (unsigned long) start_secondary;
+	idle->thread.ip = (unsigned long) start_secondary;
 	/* start_eip had better be page-aligned! */
 	start_eip = setup_trampoline();
 
@@ -807,9 +806,9 @@ static int __cpuinit do_boot_cpu(int api
 	alternatives_smp_switch(1);
 
 	/* So we see what's up   */
-	printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+	printk("Booting processor %d/%d ip %lx\n", cpu, apicid, start_eip);
 	/* Stack for startup_32 can be just as for start_secondary onwards */
-	stack_start.esp = (void *) idle->thread.esp;
+	stack_start.sp = (void *) idle->thread.sp;
 
 	irq_ctx_init(cpu);
 
@@ -1091,7 +1090,7 @@ static void __init smp_boot_cpus(unsigne
 	 * Allow the user to impress friends.
 	 */
 	Dprintk("Before bogomips.\n");
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
+	for_each_possible_cpu(cpu)
 		if (cpu_isset(cpu, cpu_callout_map))
 			bogosum += cpu_data(cpu).loops_per_jiffy;
 	printk(KERN_INFO
@@ -1122,7 +1121,7 @@ static void __init smp_boot_cpus(unsigne
 	 * construct cpu_sibling_map, so that we can tell sibling CPUs
 	 * efficiently.
 	 */
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_cpu(cpu) {
 		cpus_clear(per_cpu(cpu_sibling_map, cpu));
 		cpus_clear(per_cpu(cpu_core_map, cpu));
 	}
diff -puN arch/x86/kernel/smpboot_64.c~git-x86 arch/x86/kernel/smpboot_64.c
--- a/arch/x86/kernel/smpboot_64.c~git-x86
+++ a/arch/x86/kernel/smpboot_64.c
@@ -78,8 +78,6 @@ EXPORT_SYMBOL(cpu_online_map);
  */
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
-EXPORT_SYMBOL(cpu_callout_map);
-
 cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
 
@@ -212,6 +210,7 @@ void __cpuinit smp_callin(void)
 
 	Dprintk("CALLIN, before setup_local_APIC().\n");
 	setup_local_APIC();
+	end_local_APIC_setup();
 
 	/*
 	 * Get our bogomips.
@@ -384,19 +383,20 @@ static void inquire_remote_apic(int apic
 	unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
 	char *names[] = { "ID", "VERSION", "SPIV" };
 	int timeout;
-	unsigned int status;
+	u32 status;
 
 	printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
 
 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
-		printk("... APIC #%d %s: ", apicid, names[i]);
+		printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]);
 
 		/*
 		 * Wait for idle.
 		 */
 		status = safe_apic_wait_icr_idle();
 		if (status)
-			printk("a previous APIC delivery may have failed\n");
+			printk(KERN_CONT
+			       "a previous APIC delivery may have failed\n");
 
 		apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
 		apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
@@ -410,10 +410,10 @@ static void inquire_remote_apic(int apic
 		switch (status) {
 		case APIC_ICR_RR_VALID:
 			status = apic_read(APIC_RRR);
-			printk("%08x\n", status);
+			printk(KERN_CONT "%08x\n", status);
 			break;
 		default:
-			printk("failed\n");
+			printk(KERN_CONT "failed\n");
 		}
 	}
 }
@@ -466,7 +466,7 @@ static int __cpuinit wakeup_secondary_vi
 	 */
 	Dprintk("#startup loops: %d.\n", num_starts);
 
-	maxlvt = get_maxlvt();
+	maxlvt = lapic_get_maxlvt();
 
 	for (j = 1; j <= num_starts; j++) {
 		Dprintk("Sending STARTUP #%d.\n",j);
@@ -577,7 +577,7 @@ static int __cpuinit do_boot_cpu(int cpu
 	c_idle.idle = get_idle_for_cpu(cpu);
 
 	if (c_idle.idle) {
-		c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *)
+		c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *)
 			(THREAD_SIZE +  task_stack_page(c_idle.idle))) - 1);
 		init_idle(c_idle.idle, cpu);
 		goto do_rest;
@@ -613,8 +613,8 @@ do_rest:
 
 	start_rip = setup_trampoline();
 
-	init_rsp = c_idle.idle->thread.rsp;
-	per_cpu(init_tss,cpu).rsp0 = init_rsp;
+	init_rsp = c_idle.idle->thread.sp;
+	per_cpu(init_tss,cpu).sp0 = init_rsp;
 	initial_code = start_secondary;
 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
 
@@ -691,7 +691,7 @@ do_rest:
 	}
 	if (boot_error) {
 		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
-		clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
+		clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */
 		clear_node_cpumask(cpu); /* was set by numa_add_cpu */
 		cpu_clear(cpu, cpu_present_map);
 		cpu_clear(cpu, cpu_possible_map);
@@ -861,6 +861,18 @@ void __init smp_set_apicids(void)
 	x86_cpu_to_apicid_ptr = NULL;
 }
 
+static void __init smp_cpu_index_default(void)
+{
+	int i;
+	struct cpuinfo_x86 *c;
+
+	for_each_cpu_mask(i, cpu_possible_map) {
+		c = &cpu_data(i);
+		/* mark all to hotplug */
+		c->cpu_index = NR_CPUS;
+	}
+}
+
 /*
  * Prepare for SMP bootup.  The MP table or ACPI has been read
  * earlier.  Just do some sanity checking here and enable APIC mode.
@@ -868,6 +880,7 @@ void __init smp_set_apicids(void)
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
 	nmi_watchdog_default();
+	smp_cpu_index_default();
 	current_cpu_data = boot_cpu_data;
 	current_thread_info()->cpu = 0;  /* needed? */
 	smp_set_apicids();
@@ -885,6 +898,13 @@ void __init smp_prepare_cpus(unsigned in
 	 */
 	setup_local_APIC();
 
+	/*
+	 * Enable IO APIC before setting up error vector
+	 */
+	if (!skip_ioapic_setup && nr_ioapics)
+		enable_IO_APIC();
+	end_local_APIC_setup();
+
 	if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
 		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
 		      GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
@@ -1016,7 +1036,7 @@ void remove_cpu_from_maps(void)
 
 	cpu_clear(cpu, cpu_callout_map);
 	cpu_clear(cpu, cpu_callin_map);
-	clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
+	clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */
 	clear_node_cpumask(cpu);
 }
 
diff -puN arch/x86/kernel/stacktrace.c~git-x86 arch/x86/kernel/stacktrace.c
--- a/arch/x86/kernel/stacktrace.c~git-x86
+++ a/arch/x86/kernel/stacktrace.c
@@ -49,4 +49,3 @@ void save_stack_trace(struct stack_trace
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
-EXPORT_SYMBOL(save_stack_trace);
diff -puN /dev/null arch/x86/kernel/step.c
--- /dev/null
+++ a/arch/x86/kernel/step.c
@@ -0,0 +1,210 @@
+/*
+ * x86 single-step support code, common to 32-bit and 64-bit.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/ptrace.h>
+
+#ifdef CONFIG_X86_32
+static
+#endif
+unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs)
+{
+	unsigned long addr, seg;
+
+	addr = regs->ip;
+	seg = regs->cs & 0xffff;
+	if (v8086_mode(regs)) {
+		addr = (addr & 0xffff) + (seg << 4);
+		return addr;
+	}
+
+	/*
+	 * We'll assume that the code segments in the GDT
+	 * are all zero-based. That is largely true: the
+	 * TLS segments are used for data, and the PNPBIOS
+	 * and APM bios ones we just ignore here.
+	 */
+	if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) {
+		u32 *desc;
+		unsigned long base;
+
+		seg &= ~7UL;
+
+		mutex_lock(&child->mm->context.lock);
+		if (unlikely((seg >> 3) >= child->mm->context.size))
+			addr = -1L; /* bogus selector, access would fault */
+		else {
+			desc = child->mm->context.ldt + seg;
+			base = ((desc[0] >> 16) |
+				((desc[1] & 0xff) << 16) |
+				(desc[1] & 0xff000000));
+
+			/* 16-bit code segment? */
+			if (!((desc[1] >> 22) & 1))
+				addr &= 0xffff;
+			addr += base;
+		}
+		mutex_unlock(&child->mm->context.lock);
+	}
+
+	return addr;
+}
+
+static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
+{
+	int i, copied;
+	unsigned char opcode[15];
+	unsigned long addr = convert_rip_to_linear(child, regs);
+
+	copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
+	for (i = 0; i < copied; i++) {
+		switch (opcode[i]) {
+		/* popf and iret */
+		case 0x9d: case 0xcf:
+			return 1;
+
+			/* CHECKME: 64 65 */
+
+		/* opcode and address size prefixes */
+		case 0x66: case 0x67:
+			continue;
+		/* irrelevant prefixes (segment overrides and repeats) */
+		case 0x26: case 0x2e:
+		case 0x36: case 0x3e:
+		case 0x64: case 0x65:
+		case 0xf0: case 0xf2: case 0xf3:
+			continue;
+
+#ifdef CONFIG_X86_64
+		case 0x40 ... 0x4f:
+			if (regs->cs != __USER_CS)
+				/* 32-bit mode: register increment */
+				return 0;
+			/* 64-bit mode: REX prefix */
+			continue;
+#endif
+
+			/* CHECKME: f2, f3 */
+
+		/*
+		 * pushf: NOTE! We should probably not let
+		 * the user see the TF bit being set. But
+		 * it's more pain than it's worth to avoid
+		 * it, and a debugger could emulate this
+		 * all in user space if it _really_ cares.
+		 */
+		case 0x9c:
+		default:
+			return 0;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Enable single-stepping.  Return nonzero if user mode is not using TF itself.
+ */
+static int enable_single_step(struct task_struct *child)
+{
+	struct pt_regs *regs = task_pt_regs(child);
+
+	/*
+	 * Always set TIF_SINGLESTEP - this guarantees that
+	 * we single-step system calls etc..  This will also
+	 * cause us to set TF when returning to user mode.
+	 */
+	set_tsk_thread_flag(child, TIF_SINGLESTEP);
+
+	/*
+	 * If TF was already set, don't do anything else
+	 */
+	if (regs->flags & X86_EFLAGS_TF)
+		return 0;
+
+	/* Set TF on the kernel stack.. */
+	regs->flags |= X86_EFLAGS_TF;
+
+	/*
+	 * ..but if TF is changed by the instruction we will trace,
+	 * don't mark it as being "us" that set it, so that we
+	 * won't clear it by hand later.
+	 */
+	if (is_setting_trap_flag(child, regs))
+		return 0;
+
+	set_tsk_thread_flag(child, TIF_FORCED_TF);
+
+	return 1;
+}
+
+/*
+ * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running.
+ */
+static void write_debugctlmsr(struct task_struct *child, unsigned long val)
+{
+	child->thread.debugctlmsr = val;
+
+	if (child != current)
+		return;
+
+#ifdef CONFIG_X86_64
+	wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
+#else
+	wrmsr(MSR_IA32_DEBUGCTLMSR, val, 0);
+#endif
+}
+
+/*
+ * Enable single or block step.
+ */
+static void enable_step(struct task_struct *child, bool block)
+{
+	/*
+	 * Make sure block stepping (BTF) is not enabled unless it should be.
+	 * Note that we don't try to worry about any is_setting_trap_flag()
+	 * instructions after the first when using block stepping.
+	 * So noone should try to use debugger block stepping in a program
+	 * that uses user-mode single stepping itself.
+	 */
+	if (enable_single_step(child) && block) {
+		set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+		write_debugctlmsr(child,
+				  child->thread.debugctlmsr | DEBUGCTLMSR_BTF);
+	} else {
+	    write_debugctlmsr(child,
+			      child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
+
+	    if (!child->thread.debugctlmsr)
+		    clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+	}
+}
+
+void user_enable_single_step(struct task_struct *child)
+{
+	enable_step(child, 0);
+}
+
+void user_enable_block_step(struct task_struct *child)
+{
+	enable_step(child, 1);
+}
+
+void user_disable_single_step(struct task_struct *child)
+{
+	/*
+	 * Make sure block stepping (BTF) is disabled.
+	 */
+	write_debugctlmsr(child,
+			  child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
+
+	if (!child->thread.debugctlmsr)
+		clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+
+	/* Always clear TIF_SINGLESTEP... */
+	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+
+	/* But touch TF only if it was set by us.. */
+	if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF))
+		task_pt_regs(child)->flags &= ~X86_EFLAGS_TF;
+}
diff -puN arch/x86/kernel/suspend_64.c~git-x86 arch/x86/kernel/suspend_64.c
--- a/arch/x86/kernel/suspend_64.c~git-x86
+++ a/arch/x86/kernel/suspend_64.c
@@ -53,7 +53,6 @@ void __save_processor_state(struct saved
 	ctxt->cr2 = read_cr2();
 	ctxt->cr3 = read_cr3();
 	ctxt->cr4 = read_cr4();
-	ctxt->cr8 = read_cr8();
 }
 
 void save_processor_state(void)
@@ -75,7 +74,6 @@ void __restore_processor_state(struct sa
 	 * control registers
 	 */
 	wrmsrl(MSR_EFER, ctxt->efer);
-	write_cr8(ctxt->cr8);
 	write_cr4(ctxt->cr4);
 	write_cr3(ctxt->cr3);
 	write_cr2(ctxt->cr2);
diff -puN arch/x86/kernel/suspend_asm_64.S~git-x86 arch/x86/kernel/suspend_asm_64.S
--- a/arch/x86/kernel/suspend_asm_64.S~git-x86
+++ a/arch/x86/kernel/suspend_asm_64.S
@@ -18,13 +18,13 @@
 
 ENTRY(swsusp_arch_suspend)
 	movq	$saved_context, %rax
-	movq	%rsp, pt_regs_rsp(%rax)
-	movq	%rbp, pt_regs_rbp(%rax)
-	movq	%rsi, pt_regs_rsi(%rax)
-	movq	%rdi, pt_regs_rdi(%rax)
-	movq	%rbx, pt_regs_rbx(%rax)
-	movq	%rcx, pt_regs_rcx(%rax)
-	movq	%rdx, pt_regs_rdx(%rax)
+	movq	%rsp, pt_regs_sp(%rax)
+	movq	%rbp, pt_regs_bp(%rax)
+	movq	%rsi, pt_regs_si(%rax)
+	movq	%rdi, pt_regs_di(%rax)
+	movq	%rbx, pt_regs_bx(%rax)
+	movq	%rcx, pt_regs_cx(%rax)
+	movq	%rdx, pt_regs_dx(%rax)
 	movq	%r8, pt_regs_r8(%rax)
 	movq	%r9, pt_regs_r9(%rax)
 	movq	%r10, pt_regs_r10(%rax)
@@ -34,7 +34,7 @@ ENTRY(swsusp_arch_suspend)
 	movq	%r14, pt_regs_r14(%rax)
 	movq	%r15, pt_regs_r15(%rax)
 	pushfq
-	popq	pt_regs_eflags(%rax)
+	popq	pt_regs_flags(%rax)
 
 	/* save the address of restore_registers */
 	movq	$restore_registers, %rax
@@ -115,13 +115,13 @@ ENTRY(restore_registers)
 
 	/* We don't restore %rax, it must be 0 anyway */
 	movq	$saved_context, %rax
-	movq	pt_regs_rsp(%rax), %rsp
-	movq	pt_regs_rbp(%rax), %rbp
-	movq	pt_regs_rsi(%rax), %rsi
-	movq	pt_regs_rdi(%rax), %rdi
-	movq	pt_regs_rbx(%rax), %rbx
-	movq	pt_regs_rcx(%rax), %rcx
-	movq	pt_regs_rdx(%rax), %rdx
+	movq	pt_regs_sp(%rax), %rsp
+	movq	pt_regs_bp(%rax), %rbp
+	movq	pt_regs_si(%rax), %rsi
+	movq	pt_regs_di(%rax), %rdi
+	movq	pt_regs_bx(%rax), %rbx
+	movq	pt_regs_cx(%rax), %rcx
+	movq	pt_regs_dx(%rax), %rdx
 	movq	pt_regs_r8(%rax), %r8
 	movq	pt_regs_r9(%rax), %r9
 	movq	pt_regs_r10(%rax), %r10
@@ -130,7 +130,7 @@ ENTRY(restore_registers)
 	movq	pt_regs_r13(%rax), %r13
 	movq	pt_regs_r14(%rax), %r14
 	movq	pt_regs_r15(%rax), %r15
-	pushq	pt_regs_eflags(%rax)
+	pushq	pt_regs_flags(%rax)
 	popfq
 
 	xorq	%rax, %rax
diff -puN arch/x86/kernel/sys_x86_64.c~git-x86 arch/x86/kernel/sys_x86_64.c
--- a/arch/x86/kernel/sys_x86_64.c~git-x86
+++ a/arch/x86/kernel/sys_x86_64.c
@@ -12,6 +12,7 @@
 #include <linux/file.h>
 #include <linux/utsname.h>
 #include <linux/personality.h>
+#include <linux/random.h>
 
 #include <asm/uaccess.h>
 #include <asm/ia32.h>
@@ -65,6 +66,7 @@ static void find_start_end(unsigned long
 			   unsigned long *end)
 {
 	if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) {
+		unsigned long new_begin;
 		/* This is usually used needed to map code in small
 		   model, so it needs to be in the first 31bit. Limit
 		   it to that.  This means we need to move the
@@ -74,6 +76,11 @@ static void find_start_end(unsigned long
 		   of playground for now. -AK */ 
 		*begin = 0x40000000; 
 		*end = 0x80000000;		
+		if (current->flags & PF_RANDOMIZE) {
+			new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
+			if (new_begin)
+				*begin = new_begin;
+		}
 	} else {
 		*begin = TASK_UNMAPPED_BASE;
 		*end = TASK_SIZE; 
@@ -143,6 +150,97 @@ full_search:
 	}
 }
 
+
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+			  const unsigned long len, const unsigned long pgoff,
+			  const unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long addr = addr0;
+
+	/* requested length too big for entire address space */
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED)
+		return addr;
+
+	/* for MAP_32BIT mappings we force the legact mmap base */
+	if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT))
+		goto bottomup;
+
+	/* requesting a specific address */
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+				(!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	/* check if free_area_cache is useful for us */
+	if (len <= mm->cached_hole_size) {
+		mm->cached_hole_size = 0;
+		mm->free_area_cache = mm->mmap_base;
+	}
+
+	/* either no address requested or can't fit in requested address hole */
+	addr = mm->free_area_cache;
+
+	/* make sure it can fit in the remaining address space */
+	if (addr > len) {
+		vma = find_vma(mm, addr-len);
+		if (!vma || addr <= vma->vm_start)
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr-len);
+	}
+
+	if (mm->mmap_base < len)
+		goto bottomup;
+
+	addr = mm->mmap_base-len;
+
+	do {
+		/*
+		 * Lookup failure means no vma is above this address,
+		 * else if new region fits below vma->vm_start,
+		 * return with success:
+		 */
+		vma = find_vma(mm, addr);
+		if (!vma || addr+len <= vma->vm_start)
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr);
+
+		/* remember the largest hole we saw so far */
+		if (addr + mm->cached_hole_size < vma->vm_start)
+			mm->cached_hole_size = vma->vm_start - addr;
+
+		/* try just below the current vma->vm_start */
+		addr = vma->vm_start-len;
+	} while (len < vma->vm_start);
+
+bottomup:
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	mm->cached_hole_size = ~0UL;
+	mm->free_area_cache = TASK_UNMAPPED_BASE;
+	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+	/*
+	 * Restore the topdown base:
+	 */
+	mm->free_area_cache = mm->mmap_base;
+	mm->cached_hole_size = ~0UL;
+
+	return addr;
+}
+
+
 asmlinkage long sys_uname(struct new_utsname __user * name)
 {
 	int err;
diff -puN arch/x86/kernel/sysenter_32.c~git-x86 /dev/null
--- a/arch/x86/kernel/sysenter_32.c
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * (C) Copyright 2002 Linus Torvalds
- * Portions based on the vdso-randomization code from exec-shield:
- * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
- *
- * This file contains the needed initializations to support sysenter.
- */
-
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/thread_info.h>
-#include <linux/sched.h>
-#include <linux/gfp.h>
-#include <linux/string.h>
-#include <linux/elf.h>
-#include <linux/mm.h>
-#include <linux/err.h>
-#include <linux/module.h>
-
-#include <asm/cpufeature.h>
-#include <asm/msr.h>
-#include <asm/pgtable.h>
-#include <asm/unistd.h>
-#include <asm/elf.h>
-#include <asm/tlbflush.h>
-
-enum {
-	VDSO_DISABLED = 0,
-	VDSO_ENABLED = 1,
-	VDSO_COMPAT = 2,
-};
-
-#ifdef CONFIG_COMPAT_VDSO
-#define VDSO_DEFAULT	VDSO_COMPAT
-#else
-#define VDSO_DEFAULT	VDSO_ENABLED
-#endif
-
-/*
- * Should the kernel map a VDSO page into processes and pass its
- * address down to glibc upon exec()?
- */
-unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
-
-EXPORT_SYMBOL_GPL(vdso_enabled);
-
-static int __init vdso_setup(char *s)
-{
-	vdso_enabled = simple_strtoul(s, NULL, 0);
-
-	return 1;
-}
-
-__setup("vdso=", vdso_setup);
-
-extern asmlinkage void sysenter_entry(void);
-
-static __init void reloc_symtab(Elf32_Ehdr *ehdr,
-				unsigned offset, unsigned size)
-{
-	Elf32_Sym *sym = (void *)ehdr + offset;
-	unsigned nsym = size / sizeof(*sym);
-	unsigned i;
-
-	for(i = 0; i < nsym; i++, sym++) {
-		if (sym->st_shndx == SHN_UNDEF ||
-		    sym->st_shndx == SHN_ABS)
-			continue;  /* skip */
-
-		if (sym->st_shndx > SHN_LORESERVE) {
-			printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
-			       sym->st_shndx);
-			continue;
-		}
-
-		switch(ELF_ST_TYPE(sym->st_info)) {
-		case STT_OBJECT:
-		case STT_FUNC:
-		case STT_SECTION:
-		case STT_FILE:
-			sym->st_value += VDSO_HIGH_BASE;
-		}
-	}
-}
-
-static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
-{
-	Elf32_Dyn *dyn = (void *)ehdr + offset;
-
-	for(; dyn->d_tag != DT_NULL; dyn++)
-		switch(dyn->d_tag) {
-		case DT_PLTGOT:
-		case DT_HASH:
-		case DT_STRTAB:
-		case DT_SYMTAB:
-		case DT_RELA:
-		case DT_INIT:
-		case DT_FINI:
-		case DT_REL:
-		case DT_DEBUG:
-		case DT_JMPREL:
-		case DT_VERSYM:
-		case DT_VERDEF:
-		case DT_VERNEED:
-		case DT_ADDRRNGLO ... DT_ADDRRNGHI:
-			/* definitely pointers needing relocation */
-			dyn->d_un.d_ptr += VDSO_HIGH_BASE;
-			break;
-
-		case DT_ENCODING ... OLD_DT_LOOS-1:
-		case DT_LOOS ... DT_HIOS-1:
-			/* Tags above DT_ENCODING are pointers if
-			   they're even */
-			if (dyn->d_tag >= DT_ENCODING &&
-			    (dyn->d_tag & 1) == 0)
-				dyn->d_un.d_ptr += VDSO_HIGH_BASE;
-			break;
-
-		case DT_VERDEFNUM:
-		case DT_VERNEEDNUM:
-		case DT_FLAGS_1:
-		case DT_RELACOUNT:
-		case DT_RELCOUNT:
-		case DT_VALRNGLO ... DT_VALRNGHI:
-			/* definitely not pointers */
-			break;
-
-		case OLD_DT_LOOS ... DT_LOOS-1:
-		case DT_HIOS ... DT_VALRNGLO-1:
-		default:
-			if (dyn->d_tag > DT_ENCODING)
-				printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
-				       dyn->d_tag);
-			break;
-		}
-}
-
-static __init void relocate_vdso(Elf32_Ehdr *ehdr)
-{
-	Elf32_Phdr *phdr;
-	Elf32_Shdr *shdr;
-	int i;
-
-	BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
-	       !elf_check_arch(ehdr) ||
-	       ehdr->e_type != ET_DYN);
-
-	ehdr->e_entry += VDSO_HIGH_BASE;
-
-	/* rebase phdrs */
-	phdr = (void *)ehdr + ehdr->e_phoff;
-	for (i = 0; i < ehdr->e_phnum; i++) {
-		phdr[i].p_vaddr += VDSO_HIGH_BASE;
-
-		/* relocate dynamic stuff */
-		if (phdr[i].p_type == PT_DYNAMIC)
-			reloc_dyn(ehdr, phdr[i].p_offset);
-	}
-
-	/* rebase sections */
-	shdr = (void *)ehdr + ehdr->e_shoff;
-	for(i = 0; i < ehdr->e_shnum; i++) {
-		if (!(shdr[i].sh_flags & SHF_ALLOC))
-			continue;
-
-		shdr[i].sh_addr += VDSO_HIGH_BASE;
-
-		if (shdr[i].sh_type == SHT_SYMTAB ||
-		    shdr[i].sh_type == SHT_DYNSYM)
-			reloc_symtab(ehdr, shdr[i].sh_offset,
-				     shdr[i].sh_size);
-	}
-}
-
-void enable_sep_cpu(void)
-{
-	int cpu = get_cpu();
-	struct tss_struct *tss = &per_cpu(init_tss, cpu);
-
-	if (!boot_cpu_has(X86_FEATURE_SEP)) {
-		put_cpu();
-		return;
-	}
-
-	tss->x86_tss.ss1 = __KERNEL_CS;
-	tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
-	wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
-	wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0);
-	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
-	put_cpu();	
-}
-
-static struct vm_area_struct gate_vma;
-
-static int __init gate_vma_init(void)
-{
-	gate_vma.vm_mm = NULL;
-	gate_vma.vm_start = FIXADDR_USER_START;
-	gate_vma.vm_end = FIXADDR_USER_END;
-	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
-	gate_vma.vm_page_prot = __P101;
-	/*
-	 * Make sure the vDSO gets into every core dump.
-	 * Dumping its contents makes post-mortem fully interpretable later
-	 * without matching up the same kernel and hardware config to see
-	 * what PC values meant.
-	 */
-	gate_vma.vm_flags |= VM_ALWAYSDUMP;
-	return 0;
-}
-
-/*
- * These symbols are defined by vsyscall.o to mark the bounds
- * of the ELF DSO images included therein.
- */
-extern const char vsyscall_int80_start, vsyscall_int80_end;
-extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
-static struct page *syscall_pages[1];
-
-static void map_compat_vdso(int map)
-{
-	static int vdso_mapped;
-
-	if (map == vdso_mapped)
-		return;
-
-	vdso_mapped = map;
-
-	__set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT,
-		     map ? PAGE_READONLY_EXEC : PAGE_NONE);
-
-	/* flush stray tlbs */
-	flush_tlb_all();
-}
-
-int __init sysenter_setup(void)
-{
-	void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
-	const void *vsyscall;
-	size_t vsyscall_len;
-
-	syscall_pages[0] = virt_to_page(syscall_page);
-
-	gate_vma_init();
-
-	printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
-
-	if (!boot_cpu_has(X86_FEATURE_SEP)) {
-		vsyscall = &vsyscall_int80_start;
-		vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start;
-	} else {
-		vsyscall = &vsyscall_sysenter_start;
-		vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start;
-	}
-
-	memcpy(syscall_page, vsyscall, vsyscall_len);
-	relocate_vdso(syscall_page);
-
-	return 0;
-}
-
-/* Defined in vsyscall-sysenter.S */
-extern void SYSENTER_RETURN;
-
-/* Setup a VMA at program startup for the vsyscall page */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
-{
-	struct mm_struct *mm = current->mm;
-	unsigned long addr;
-	int ret = 0;
-	bool compat;
-
-	down_write(&mm->mmap_sem);
-
-	/* Test compat mode once here, in case someone
-	   changes it via sysctl */
-	compat = (vdso_enabled == VDSO_COMPAT);
-
-	map_compat_vdso(compat);
-
-	if (compat)
-		addr = VDSO_HIGH_BASE;
-	else {
-		addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
-		if (IS_ERR_VALUE(addr)) {
-			ret = addr;
-			goto up_fail;
-		}
-
-		/*
-		 * MAYWRITE to allow gdb to COW and set breakpoints
-		 *
-		 * Make sure the vDSO gets into every core dump.
-		 * Dumping its contents makes post-mortem fully
-		 * interpretable later without matching up the same
-		 * kernel and hardware config to see what PC values
-		 * meant.
-		 */
-		ret = install_special_mapping(mm, addr, PAGE_SIZE,
-					      VM_READ|VM_EXEC|
-					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
-					      VM_ALWAYSDUMP,
-					      syscall_pages);
-
-		if (ret)
-			goto up_fail;
-	}
-
-	current->mm->context.vdso = (void *)addr;
-	current_thread_info()->sysenter_return =
-		(void *)VDSO_SYM(&SYSENTER_RETURN);
-
-  up_fail:
-	up_write(&mm->mmap_sem);
-
-	return ret;
-}
-
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
-	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
-		return "[vdso]";
-	return NULL;
-}
-
-struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
-{
-	struct mm_struct *mm = tsk->mm;
-
-	/* Check to see if this task was created in compat vdso mode */
-	if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
-		return &gate_vma;
-	return NULL;
-}
-
-int in_gate_area(struct task_struct *task, unsigned long addr)
-{
-	const struct vm_area_struct *vma = get_gate_vma(task);
-
-	return vma && addr >= vma->vm_start && addr < vma->vm_end;
-}
-
-int in_gate_area_no_task(unsigned long addr)
-{
-	return 0;
-}
diff -puN arch/x86/kernel/time_32.c~git-x86 arch/x86/kernel/time_32.c
--- a/arch/x86/kernel/time_32.c~git-x86
+++ a/arch/x86/kernel/time_32.c
@@ -28,98 +28,20 @@
  *	serialize accesses to xtime/lost_ticks).
  */
 
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
+#include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/time.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/sysdev.h>
-#include <linux/bcd.h>
-#include <linux/efi.h>
 #include <linux/mca.h>
 
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/irq.h>
-#include <asm/msr.h>
-#include <asm/delay.h>
-#include <asm/mpspec.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-#include <asm/timer.h>
-#include <asm/time.h>
-
-#include "mach_time.h"
-
-#include <linux/timex.h>
-
-#include <asm/hpet.h>
-
 #include <asm/arch_hooks.h>
-
-#include "io_ports.h"
-
-#include <asm/i8259.h>
+#include <asm/hpet.h>
+#include <asm/time.h>
 
 #include "do_timer.h"
 
 unsigned int cpu_khz;	/* Detected as we calibrate the TSC */
 EXPORT_SYMBOL(cpu_khz);
 
-DEFINE_SPINLOCK(rtc_lock);
-EXPORT_SYMBOL(rtc_lock);
-
-/*
- * This is a special lock that is owned by the CPU and holds the index
- * register we are working with.  It is required for NMI access to the
- * CMOS/RTC registers.  See include/asm-i386/mc146818rtc.h for details.
- */
-volatile unsigned long cmos_lock = 0;
-EXPORT_SYMBOL(cmos_lock);
-
-/* Routines for accessing the CMOS RAM/RTC. */
-unsigned char rtc_cmos_read(unsigned char addr)
-{
-	unsigned char val;
-	lock_cmos_prefix(addr);
-	outb_p(addr, RTC_PORT(0));
-	val = inb_p(RTC_PORT(1));
-	lock_cmos_suffix(addr);
-	return val;
-}
-EXPORT_SYMBOL(rtc_cmos_read);
-
-void rtc_cmos_write(unsigned char val, unsigned char addr)
-{
-	lock_cmos_prefix(addr);
-	outb_p(addr, RTC_PORT(0));
-	outb_p(val, RTC_PORT(1));
-	lock_cmos_suffix(addr);
-}
-EXPORT_SYMBOL(rtc_cmos_write);
-
-static int set_rtc_mmss(unsigned long nowtime)
-{
-	int retval;
-	unsigned long flags;
-
-	/* gets recalled with irq locally disabled */
-	/* XXX - does irqsave resolve this? -johnstul */
-	spin_lock_irqsave(&rtc_lock, flags);
-	retval = set_wallclock(nowtime);
-	spin_unlock_irqrestore(&rtc_lock, flags);
-
-	return retval;
-}
-
-
 int timer_ack;
 
 unsigned long profile_pc(struct pt_regs *regs)
@@ -127,17 +49,17 @@ unsigned long profile_pc(struct pt_regs 
 	unsigned long pc = instruction_pointer(regs);
 
 #ifdef CONFIG_SMP
-	if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) &&
+	if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->cs) &&
 	    in_lock_functions(pc)) {
 #ifdef CONFIG_FRAME_POINTER
-		return *(unsigned long *)(regs->ebp + 4);
+		return *(unsigned long *)(regs->bp + 4);
 #else
-		unsigned long *sp = (unsigned long *)&regs->esp;
+		unsigned long *sp = (unsigned long *)&regs->sp;
 
 		/* Return address is either directly at stack pointer
-		   or above a saved eflags. Eflags has bits 22-31 zero,
+		   or above a saved flags. Eflags has bits 22-31 zero,
 		   kernel addresses don't. */
- 		if (sp[0] >> 22)
+		if (sp[0] >> 22)
 			return sp[0];
 		if (sp[1] >> 22)
 			return sp[1];
@@ -193,26 +115,6 @@ irqreturn_t timer_interrupt(int irq, voi
 	return IRQ_HANDLED;
 }
 
-/* not static: needed by APM */
-unsigned long read_persistent_clock(void)
-{
-	unsigned long retval;
-	unsigned long flags;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-
-	retval = get_wallclock();
-
-	spin_unlock_irqrestore(&rtc_lock, flags);
-
-	return retval;
-}
-
-int update_persistent_clock(struct timespec now)
-{
-	return set_rtc_mmss(now.tv_sec);
-}
-
 extern void (*late_time_init)(void);
 /* Duplicate of time_init() below, with hpet_enable part added */
 void __init hpet_time_init(void)
diff -puN arch/x86/kernel/time_64.c~git-x86 arch/x86/kernel/time_64.c
--- a/arch/x86/kernel/time_64.c~git-x86
+++ a/arch/x86/kernel/time_64.c
@@ -11,44 +11,17 @@
  *  RTC support code taken from arch/i386/kernel/timers/time_hpet.c
  */
 
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
+#include <linux/clockchips.h>
 #include <linux/init.h>
-#include <linux/mc146818rtc.h>
-#include <linux/time.h>
-#include <linux/ioport.h>
+#include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/device.h>
-#include <linux/sysdev.h>
-#include <linux/bcd.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/kallsyms.h>
-#include <linux/acpi.h>
-#include <linux/clockchips.h>
+#include <linux/time.h>
 
-#ifdef CONFIG_ACPI
-#include <acpi/achware.h>	/* for PM timer frequency */
-#include <acpi/acpi_bus.h>
-#endif
 #include <asm/i8253.h>
-#include <asm/pgtable.h>
-#include <asm/vsyscall.h>
-#include <asm/timex.h>
-#include <asm/proto.h>
-#include <asm/hpet.h>
-#include <asm/sections.h>
-#include <linux/hpet.h>
-#include <asm/apic.h>
 #include <asm/hpet.h>
-#include <asm/mpspec.h>
 #include <asm/nmi.h>
 #include <asm/vgtod.h>
 
-DEFINE_SPINLOCK(rtc_lock);
-EXPORT_SYMBOL(rtc_lock);
-
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 
 unsigned long profile_pc(struct pt_regs *regs)
@@ -56,10 +29,10 @@ unsigned long profile_pc(struct pt_regs 
 	unsigned long pc = instruction_pointer(regs);
 
 	/* Assume the lock function has either no stack frame or a copy
-	   of eflags from PUSHF
+	   of flags from PUSHF
 	   Eflags always has bits 22 and up cleared unlike kernel addresses. */
 	if (!user_mode(regs) && in_lock_functions(pc)) {
-		unsigned long *sp = (unsigned long *)regs->rsp;
+		unsigned long *sp = (unsigned long *)regs->sp;
 		if (sp[0] >> 22)
 			return sp[0];
 		if (sp[1] >> 22)
@@ -69,82 +42,6 @@ unsigned long profile_pc(struct pt_regs 
 }
 EXPORT_SYMBOL(profile_pc);
 
-/*
- * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500
- * ms after the second nowtime has started, because when nowtime is written
- * into the registers of the CMOS clock, it will jump to the next second
- * precisely 500 ms later. Check the Motorola MC146818A or Dallas DS12887 data
- * sheet for details.
- */
-
-static int set_rtc_mmss(unsigned long nowtime)
-{
-	int retval = 0;
-	int real_seconds, real_minutes, cmos_minutes;
-	unsigned char control, freq_select;
-	unsigned long flags;
-
-/*
- * set_rtc_mmss is called when irqs are enabled, so disable irqs here
- */
-	spin_lock_irqsave(&rtc_lock, flags);
-/*
- * Tell the clock it's being set and stop it.
- */
-	control = CMOS_READ(RTC_CONTROL);
-	CMOS_WRITE(control | RTC_SET, RTC_CONTROL);
-
-	freq_select = CMOS_READ(RTC_FREQ_SELECT);
-	CMOS_WRITE(freq_select | RTC_DIV_RESET2, RTC_FREQ_SELECT);
-
-	cmos_minutes = CMOS_READ(RTC_MINUTES);
-		BCD_TO_BIN(cmos_minutes);
-
-/*
- * since we're only adjusting minutes and seconds, don't interfere with hour
- * overflow. This avoids messing with unknown time zones but requires your RTC
- * not to be off by more than 15 minutes. Since we're calling it only when
- * our clock is externally synchronized using NTP, this shouldn't be a problem.
- */
-
-	real_seconds = nowtime % 60;
-	real_minutes = nowtime / 60;
-	if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
-		real_minutes += 30;		/* correct for half hour time zone */
-	real_minutes %= 60;
-
-	if (abs(real_minutes - cmos_minutes) >= 30) {
-		printk(KERN_WARNING "time.c: can't update CMOS clock "
-		       "from %d to %d\n", cmos_minutes, real_minutes);
-		retval = -1;
-	} else {
-		BIN_TO_BCD(real_seconds);
-		BIN_TO_BCD(real_minutes);
-		CMOS_WRITE(real_seconds, RTC_SECONDS);
-		CMOS_WRITE(real_minutes, RTC_MINUTES);
-	}
-
-/*
- * The following flags have to be released exactly in this order, otherwise the
- * DS12887 (popular MC146818A clone with integrated battery and quartz) will
- * not reset the oscillator and will not update precisely 500 ms later. You
- * won't find this mentioned in the Dallas Semiconductor data sheets, but who
- * believes data sheets anyway ... -- Markus Kuhn
- */
-
-	CMOS_WRITE(control, RTC_CONTROL);
-	CMOS_WRITE(freq_select, RTC_FREQ_SELECT);
-
-	spin_unlock_irqrestore(&rtc_lock, flags);
-
-	return retval;
-}
-
-int update_persistent_clock(struct timespec now)
-{
-	return set_rtc_mmss(now.tv_sec);
-}
-
 static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
 {
 	add_pda(irq0_irqs, 1);
@@ -154,63 +51,6 @@ static irqreturn_t timer_event_interrupt
 	return IRQ_HANDLED;
 }
 
-unsigned long read_persistent_clock(void)
-{
-	unsigned int year, mon, day, hour, min, sec;
-	unsigned long flags;
-	unsigned century = 0;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	/*
-	 * if UIP is clear, then we have >= 244 microseconds before RTC
-	 * registers will be updated.  Spec sheet says that this is the
-	 * reliable way to read RTC - registers invalid (off bus) during update
-	 */
-	while ((CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
-		cpu_relax();
-
-
-	/* now read all RTC registers while stable with interrupts disabled */
-	sec = CMOS_READ(RTC_SECONDS);
-	min = CMOS_READ(RTC_MINUTES);
-	hour = CMOS_READ(RTC_HOURS);
-	day = CMOS_READ(RTC_DAY_OF_MONTH);
-	mon = CMOS_READ(RTC_MONTH);
-	year = CMOS_READ(RTC_YEAR);
-#ifdef CONFIG_ACPI
-	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
-				acpi_gbl_FADT.century)
-		century = CMOS_READ(acpi_gbl_FADT.century);
-#endif
-	spin_unlock_irqrestore(&rtc_lock, flags);
-
-	/*
-	 * We know that x86-64 always uses BCD format, no need to check the
-	 * config register.
-	 */
-
-	BCD_TO_BIN(sec);
-	BCD_TO_BIN(min);
-	BCD_TO_BIN(hour);
-	BCD_TO_BIN(day);
-	BCD_TO_BIN(mon);
-	BCD_TO_BIN(year);
-
-	if (century) {
-		BCD_TO_BIN(century);
-		year += century * 100;
-		printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
-	} else {
-		/*
-		 * x86-64 systems only exists since 2002.
-		 * This will work up to Dec 31, 2100
-		 */
-		year += 2000;
-	}
-
-	return mktime(year, mon, day, hour, min, sec);
-}
-
 /* calibrate_cpu is used on systems with fixed rate TSCs to determine
  * processor frequency */
 #define TICK_COUNT 100000000
diff -puN /dev/null arch/x86/kernel/tls.c
--- /dev/null
+++ a/arch/x86/kernel/tls.c
@@ -0,0 +1,136 @@
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/user.h>
+
+#include <asm/uaccess.h>
+#include <asm/desc.h>
+#include <asm/system.h>
+#include <asm/ldt.h>
+#include <asm/processor.h>
+#include <asm/proto.h>
+
+/*
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
+ */
+static int get_free_idx(void)
+{
+	struct thread_struct *t = &current->thread;
+	int idx;
+
+	for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+		if (desc_empty(&t->tls_array[idx]))
+			return idx + GDT_ENTRY_TLS_MIN;
+	return -ESRCH;
+}
+
+/*
+ * Set a given TLS descriptor:
+ */
+int do_set_thread_area(struct task_struct *p, int idx,
+		       struct user_desc __user *u_info,
+		       int can_allocate)
+{
+	struct thread_struct *t = &p->thread;
+	struct user_desc info;
+	u32 *desc;
+	int cpu;
+
+	if (copy_from_user(&info, u_info, sizeof(info)))
+		return -EFAULT;
+
+	if (idx == -1)
+		idx = info.entry_number;
+
+	/*
+	 * index -1 means the kernel should try to find and
+	 * allocate an empty descriptor:
+	 */
+	if (idx == -1 && can_allocate) {
+		idx = get_free_idx();
+		if (idx < 0)
+			return idx;
+		if (put_user(idx, &u_info->entry_number))
+			return -EFAULT;
+	}
+
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	desc = (u32 *) &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
+
+	/*
+	 * We must not get preempted while modifying the TLS.
+	 */
+	cpu = get_cpu();
+
+	if (LDT_empty(&info)) {
+		desc[0] = 0;
+		desc[1] = 0;
+	} else {
+		desc[0] = LDT_entry_a(&info);
+		desc[1] = LDT_entry_b(&info);
+	}
+	if (t == &current->thread)
+		load_TLS(t, cpu);
+
+	put_cpu();
+	return 0;
+}
+
+asmlinkage int sys_set_thread_area(struct user_desc __user *u_info)
+{
+	return do_set_thread_area(current, -1, u_info, 1);
+}
+
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_LIMIT(desc)		(((desc)[0] & 0x0ffff) | ((desc)[1] & 0xf0000))
+#define GET_32BIT(desc)		(((desc)[1] >> 22) & 1)
+#define GET_CONTENTS(desc)	(((desc)[1] >> 10) & 3)
+#define GET_WRITABLE(desc)	(((desc)[1] >>  9) & 1)
+#define GET_LIMIT_PAGES(desc)	(((desc)[1] >> 23) & 1)
+#define GET_PRESENT(desc)	(((desc)[1] >> 15) & 1)
+#define GET_USEABLE(desc)	(((desc)[1] >> 20) & 1)
+#define GET_LONGMODE(desc)	(((desc)[1] >> 21) & 1)
+
+int do_get_thread_area(struct task_struct *p, int idx,
+		       struct user_desc __user *u_info)
+{
+	struct thread_struct *t = &p->thread;
+	struct user_desc info;
+	u32 *desc;
+
+	if (idx == -1 && get_user(idx, &u_info->entry_number))
+		return -EFAULT;
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	desc = (u32 *) &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
+
+	memset(&info, 0, sizeof(struct user_desc));
+	info.entry_number = idx;
+	info.base_addr = get_desc_base((void *)desc);
+	info.limit = GET_LIMIT(desc);
+	info.seg_32bit = GET_32BIT(desc);
+	info.contents = GET_CONTENTS(desc);
+	info.read_exec_only = !GET_WRITABLE(desc);
+	info.limit_in_pages = GET_LIMIT_PAGES(desc);
+	info.seg_not_present = !GET_PRESENT(desc);
+	info.useable = GET_USEABLE(desc);
+#ifdef CONFIG_X86_64
+	info.lm = GET_LONGMODE(desc);
+#endif
+
+	if (copy_to_user(u_info, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
+{
+	return do_get_thread_area(current, -1, u_info);
+}
diff -puN arch/x86/kernel/traps_32.c~git-x86 arch/x86/kernel/traps_32.c
--- a/arch/x86/kernel/traps_32.c~git-x86
+++ a/arch/x86/kernel/traps_32.c
@@ -114,11 +114,11 @@ struct stack_frame {
 };
 
 static inline unsigned long print_context_stack(struct thread_info *tinfo,
-				unsigned long *stack, unsigned long ebp,
+				unsigned long *stack, unsigned long bp,
 				const struct stacktrace_ops *ops, void *data)
 {
 #ifdef	CONFIG_FRAME_POINTER
-	struct stack_frame *frame = (struct stack_frame *)ebp;
+	struct stack_frame *frame = (struct stack_frame *)bp;
 	while (valid_stack_ptr(tinfo, frame, sizeof(*frame))) {
 		struct stack_frame *next;
 		unsigned long addr;
@@ -145,7 +145,7 @@ static inline unsigned long print_contex
 			ops->address(data, addr);
 	}
 #endif
-	return ebp;
+	return bp;
 }
 
 #define MSG(msg) ops->warning(data, msg)
@@ -154,7 +154,7 @@ void dump_trace(struct task_struct *task
 	        unsigned long *stack,
 		const struct stacktrace_ops *ops, void *data)
 {
-	unsigned long ebp = 0;
+	unsigned long bp = 0;
 
 	if (!task)
 		task = current;
@@ -163,17 +163,17 @@ void dump_trace(struct task_struct *task
 		unsigned long dummy;
 		stack = &dummy;
 		if (task != current)
-			stack = (unsigned long *)task->thread.esp;
+			stack = (unsigned long *)task->thread.sp;
 	}
 
 #ifdef CONFIG_FRAME_POINTER
-	if (!ebp) {
+	if (!bp) {
 		if (task == current) {
-			/* Grab ebp right from our regs */
-			asm ("movl %%ebp, %0" : "=r" (ebp) : );
+			/* Grab bp right from our regs */
+			asm ("movl %%ebp, %0" : "=r" (bp) : );
 		} else {
-			/* ebp is the last reg pushed by switch_to */
-			ebp = *(unsigned long *) task->thread.esp;
+			/* bp is the last reg pushed by switch_to */
+			bp = *(unsigned long *) task->thread.sp;
 		}
 	}
 #endif
@@ -182,7 +182,7 @@ void dump_trace(struct task_struct *task
 		struct thread_info *context;
 		context = (struct thread_info *)
 			((unsigned long)stack & (~(THREAD_SIZE - 1)));
-		ebp = print_context_stack(context, stack, ebp, ops, data);
+		bp = print_context_stack(context, stack, bp, ops, data);
 		/* Should be after the line below, but somewhere
 		   in early boot context comes out corrupted and we
 		   can't reference it -AK */
@@ -246,19 +246,19 @@ void show_trace(struct task_struct *task
 }
 
 static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-			       unsigned long *esp, char *log_lvl)
+			       unsigned long *sp, char *log_lvl)
 {
 	unsigned long *stack;
 	int i;
 
-	if (esp == NULL) {
+	if (sp == NULL) {
 		if (task)
-			esp = (unsigned long*)task->thread.esp;
+			sp = (unsigned long*)task->thread.sp;
 		else
-			esp = (unsigned long *)&esp;
+			sp = (unsigned long *)&sp;
 	}
 
-	stack = esp;
+	stack = sp;
 	for(i = 0; i < kstack_depth_to_print; i++) {
 		if (kstack_end(stack))
 			break;
@@ -267,13 +267,13 @@ static void show_stack_log_lvl(struct ta
 		printk("%08lx ", *stack++);
 	}
 	printk("\n%sCall Trace:\n", log_lvl);
-	show_trace_log_lvl(task, regs, esp, log_lvl);
+	show_trace_log_lvl(task, regs, sp, log_lvl);
 }
 
-void show_stack(struct task_struct *task, unsigned long *esp)
+void show_stack(struct task_struct *task, unsigned long *sp)
 {
 	printk("       ");
-	show_stack_log_lvl(task, NULL, esp, "");
+	show_stack_log_lvl(task, NULL, sp, "");
 }
 
 /*
@@ -307,30 +307,30 @@ void show_registers(struct pt_regs *regs
 	 * time of the fault..
 	 */
 	if (!user_mode_vm(regs)) {
-		u8 *eip;
+		u8 *ip;
 		unsigned int code_prologue = code_bytes * 43 / 64;
 		unsigned int code_len = code_bytes;
 		unsigned char c;
 
 		printk("\n" KERN_EMERG "Stack: ");
-		show_stack_log_lvl(NULL, regs, &regs->esp, KERN_EMERG);
+		show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
 
 		printk(KERN_EMERG "Code: ");
 
-		eip = (u8 *)regs->eip - code_prologue;
-		if (eip < (u8 *)PAGE_OFFSET ||
-			probe_kernel_address(eip, c)) {
+		ip = (u8 *)regs->ip - code_prologue;
+		if (ip < (u8 *)PAGE_OFFSET ||
+			probe_kernel_address(ip, c)) {
 			/* try starting at EIP */
-			eip = (u8 *)regs->eip;
+			ip = (u8 *)regs->ip;
 			code_len = code_len - code_prologue + 1;
 		}
-		for (i = 0; i < code_len; i++, eip++) {
-			if (eip < (u8 *)PAGE_OFFSET ||
-				probe_kernel_address(eip, c)) {
+		for (i = 0; i < code_len; i++, ip++) {
+			if (ip < (u8 *)PAGE_OFFSET ||
+				probe_kernel_address(ip, c)) {
 				printk(" Bad EIP value.");
 				break;
 			}
-			if (eip == (u8 *)regs->eip)
+			if (ip == (u8 *)regs->ip)
 				printk("<%02x> ", c);
 			else
 				printk("%02x ", c);
@@ -339,13 +339,13 @@ void show_registers(struct pt_regs *regs
 	printk("\n");
 }	
 
-int is_valid_bugaddr(unsigned long eip)
+int is_valid_bugaddr(unsigned long ip)
 {
 	unsigned short ud2;
 
-	if (eip < PAGE_OFFSET)
+	if (ip < PAGE_OFFSET)
 		return 0;
-	if (probe_kernel_address((unsigned short *)eip, ud2))
+	if (probe_kernel_address((unsigned short *)ip, ud2))
 		return 0;
 
 	return ud2 == 0x0b0f;
@@ -383,10 +383,10 @@ void die(const char * str, struct pt_reg
 		raw_local_save_flags(flags);
 
 	if (++die.lock_owner_depth < 3) {
-		unsigned long esp;
+		unsigned long sp;
 		unsigned short ss;
 
-		report_bug(regs->eip, regs);
+		report_bug(regs->ip, regs);
 
 		printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff,
 		       ++die_counter);
@@ -407,15 +407,15 @@ void die(const char * str, struct pt_reg
 				NOTIFY_STOP) {
 			show_registers(regs);
 			/* Executive summary in case the oops scrolled away */
-			esp = (unsigned long) (&regs->esp);
+			sp = (unsigned long) (&regs->sp);
 			savesegment(ss, ss);
 			if (user_mode(regs)) {
-				esp = regs->esp;
-				ss = regs->xss & 0xffff;
+				sp = regs->sp;
+				ss = regs->ss & 0xffff;
 			}
-			printk(KERN_EMERG "EIP: [<%08lx>] ", regs->eip);
-			print_symbol("%s", regs->eip);
-			printk(" SS:ESP %04x:%08lx\n", ss, esp);
+			printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
+			print_symbol("%s", regs->ip);
+			printk(" SS:ESP %04x:%08lx\n", ss, sp);
 		}
 		else
 			regs = NULL;
@@ -456,7 +456,7 @@ static void __kprobes do_trap(int trapnr
 {
 	struct task_struct *tsk = current;
 
-	if (regs->eflags & VM_MASK) {
+	if (regs->flags & VM_MASK) {
 		if (vm86)
 			goto vm86_trap;
 		goto trap_signal;
@@ -549,13 +549,13 @@ fastcall void do_##name(struct pt_regs *
 	do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
 }
 
-DO_VM86_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->eip)
+DO_VM86_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->ip)
 #ifndef CONFIG_KPROBES
 DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
 #endif
 DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
 DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO( 6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip, 0)
+DO_ERROR_INFO( 6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
 DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
@@ -597,7 +597,7 @@ fastcall void __kprobes do_general_prote
 	}
 	put_cpu();
 
-	if (regs->eflags & VM_MASK)
+	if (regs->flags & VM_MASK)
 		goto gp_in_vm86;
 
 	if (!user_mode(regs))
@@ -608,9 +608,9 @@ fastcall void __kprobes do_general_prote
 	if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
 	    printk_ratelimit())
 		printk(KERN_INFO
-		    "%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
+		    "%s[%d] general protection ip:%lx sp:%lx error:%lx\n",
 		    current->comm, task_pid_nr(current),
-		    regs->eip, regs->esp, error_code);
+		    regs->ip, regs->sp, error_code);
 
 	force_sig(SIGSEGV, current);
 	return;
@@ -706,8 +706,8 @@ void __kprobes die_nmi(struct pt_regs *r
 	*/
 	bust_spinlocks(1);
 	printk(KERN_EMERG "%s", msg);
-	printk(" on CPU%d, eip %08lx, registers:\n",
-		smp_processor_id(), regs->eip);
+	printk(" on CPU%d, ip %08lx, registers:\n",
+		smp_processor_id(), regs->ip);
 	show_registers(regs);
 	console_silent();
 	spin_unlock(&nmi_print_lock);
@@ -838,24 +838,30 @@ fastcall void __kprobes do_debug(struct 
 
 	get_debugreg(condition, 6);
 
+	/*
+	 * The processor cleared BTF, so don't mark that we need it set.
+	 */
+	clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
+	tsk->thread.debugctlmsr = 0;
+
 	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
 					SIGTRAP) == NOTIFY_STOP)
 		return;
 	/* It's safe to allow irq's after DR6 has been saved */
-	if (regs->eflags & X86_EFLAGS_IF)
+	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_enable();
 
 	/* Mask out spurious debug traps due to lazy DR7 setting */
 	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
-		if (!tsk->thread.debugreg[7])
+		if (!tsk->thread.debugreg7)
 			goto clear_dr7;
 	}
 
-	if (regs->eflags & VM_MASK)
+	if (regs->flags & VM_MASK)
 		goto debug_vm86;
 
 	/* Save debug status register where ptrace can see it */
-	tsk->thread.debugreg[6] = condition;
+	tsk->thread.debugreg6 = condition;
 
 	/*
 	 * Single-stepping through TF: make sure we ignore any events in
@@ -887,7 +893,7 @@ debug_vm86:
 
 clear_TF_reenable:
 	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	return;
 }
 
@@ -896,7 +902,7 @@ clear_TF_reenable:
  * the correct behaviour even in the presence of the asynchronous
  * IRQ13 behaviour
  */
-void math_error(void __user *eip)
+void math_error(void __user *ip)
 {
 	struct task_struct * task;
 	siginfo_t info;
@@ -912,7 +918,7 @@ void math_error(void __user *eip)
 	info.si_signo = SIGFPE;
 	info.si_errno = 0;
 	info.si_code = __SI_FAULT;
-	info.si_addr = eip;
+	info.si_addr = ip;
 	/*
 	 * (~cwd & swd) will mask out exceptions that are not set to unmasked
 	 * status.  0x3f is the exception bits in these regs, 0x200 is the
@@ -958,10 +964,10 @@ void math_error(void __user *eip)
 fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code)
 {
 	ignore_fpu_irq = 1;
-	math_error((void __user *)regs->eip);
+	math_error((void __user *)regs->ip);
 }
 
-static void simd_math_error(void __user *eip)
+static void simd_math_error(void __user *ip)
 {
 	struct task_struct * task;
 	siginfo_t info;
@@ -977,7 +983,7 @@ static void simd_math_error(void __user 
 	info.si_signo = SIGFPE;
 	info.si_errno = 0;
 	info.si_code = __SI_FAULT;
-	info.si_addr = eip;
+	info.si_addr = ip;
 	/*
 	 * The SIMD FPU exceptions are handled a little differently, as there
 	 * is only a single status/control register.  Thus, to determine which
@@ -1015,13 +1021,13 @@ fastcall void do_simd_coprocessor_error(
 	if (cpu_has_xmm) {
 		/* Handle SIMD FPU exceptions on PIII+ processors. */
 		ignore_fpu_irq = 1;
-		simd_math_error((void __user *)regs->eip);
+		simd_math_error((void __user *)regs->ip);
 	} else {
 		/*
 		 * Handle strange cache flush from user space exception
 		 * in all other cases.  This is undocumented behaviour.
 		 */
-		if (regs->eflags & VM_MASK) {
+		if (regs->flags & VM_MASK) {
 			handle_vm86_fault((struct kernel_vm86_regs *)regs,
 					  error_code);
 			return;
diff -puN arch/x86/kernel/traps_64.c~git-x86 arch/x86/kernel/traps_64.c
--- a/arch/x86/kernel/traps_64.c~git-x86
+++ a/arch/x86/kernel/traps_64.c
@@ -76,20 +76,20 @@ asmlinkage void spurious_interrupt_bug(v
 
 static inline void conditional_sti(struct pt_regs *regs)
 {
-	if (regs->eflags & X86_EFLAGS_IF)
+	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_enable();
 }
 
 static inline void preempt_conditional_sti(struct pt_regs *regs)
 {
 	preempt_disable();
-	if (regs->eflags & X86_EFLAGS_IF)
+	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_enable();
 }
 
 static inline void preempt_conditional_cli(struct pt_regs *regs)
 {
-	if (regs->eflags & X86_EFLAGS_IF)
+	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_disable();
 	/* Make sure to not schedule here because we could be running
 	   on an exception stack. */
@@ -230,7 +230,7 @@ void dump_trace(struct task_struct *tsk,
 		unsigned long dummy;
 		stack = &dummy;
 		if (tsk && tsk != current)
-			stack = (unsigned long *)tsk->thread.rsp;
+			stack = (unsigned long *)tsk->thread.sp;
 	}
 
 	/*
@@ -353,7 +353,7 @@ show_trace(struct task_struct *tsk, stru
 }
 
 static void
-_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
+_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp)
 {
 	unsigned long *stack;
 	int i;
@@ -364,14 +364,14 @@ _show_stack(struct task_struct *tsk, str
 	// debugging aid: "show_stack(NULL, NULL);" prints the
 	// back trace for this cpu.
 
-	if (rsp == NULL) {
+	if (sp == NULL) {
 		if (tsk)
-			rsp = (unsigned long *)tsk->thread.rsp;
+			sp = (unsigned long *)tsk->thread.sp;
 		else
-			rsp = (unsigned long *)&rsp;
+			sp = (unsigned long *)&sp;
 	}
 
-	stack = rsp;
+	stack = sp;
 	for(i=0; i < kstack_depth_to_print; i++) {
 		if (stack >= irqstack && stack <= irqstack_end) {
 			if (stack == irqstack_end) {
@@ -387,12 +387,12 @@ _show_stack(struct task_struct *tsk, str
 		printk(" %016lx", *stack++);
 		touch_nmi_watchdog();
 	}
-	show_trace(tsk, regs, rsp);
+	show_trace(tsk, regs, sp);
 }
 
-void show_stack(struct task_struct *tsk, unsigned long * rsp)
+void show_stack(struct task_struct *tsk, unsigned long * sp)
 {
-	_show_stack(tsk, NULL, rsp);
+	_show_stack(tsk, NULL, sp);
 }
 
 /*
@@ -416,11 +416,11 @@ void show_registers(struct pt_regs *regs
 {
 	int i;
 	int in_kernel = !user_mode(regs);
-	unsigned long rsp;
+	unsigned long sp;
 	const int cpu = smp_processor_id();
 	struct task_struct *cur = cpu_pda(cpu)->pcurrent;
 
-	rsp = regs->rsp;
+	sp = regs->sp;
 	printk("CPU %d ", cpu);
 	__show_regs(regs);
 	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
@@ -432,15 +432,15 @@ void show_registers(struct pt_regs *regs
 	 */
 	if (in_kernel) {
 		printk("Stack: ");
-		_show_stack(NULL, regs, (unsigned long*)rsp);
+		_show_stack(NULL, regs, (unsigned long*)sp);
 
 		printk("\nCode: ");
-		if (regs->rip < PAGE_OFFSET)
+		if (regs->ip < PAGE_OFFSET)
 			goto bad;
 
 		for (i=0; i<20; i++) {
 			unsigned char c;
-			if (__get_user(c, &((unsigned char*)regs->rip)[i])) {
+			if (__get_user(c, &((unsigned char*)regs->ip)[i])) {
 bad:
 				printk(" Bad RIP value.");
 				break;
@@ -451,24 +451,16 @@ bad:
 	printk("\n");
 }	
 
-int is_valid_bugaddr(unsigned long rip)
+int is_valid_bugaddr(unsigned long ip)
 {
 	unsigned short ud2;
 
-	if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2)))
+	if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
 		return 0;
 
 	return ud2 == 0x0b0f;
 }
 
-#ifdef CONFIG_BUG
-void out_of_line_bug(void)
-{ 
-	BUG(); 
-} 
-EXPORT_SYMBOL(out_of_line_bug);
-#endif
-
 static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
 static int die_owner = -1;
 static unsigned int die_nest_count;
@@ -530,8 +522,8 @@ void __kprobes __die(const char * str, s
 	add_taint(TAINT_DIE);
 	/* Executive summary in case the oops scrolled away */
 	printk(KERN_ALERT "RIP ");
-	printk_address(regs->rip); 
-	printk(" RSP <%016lx>\n", regs->rsp); 
+	printk_address(regs->ip);
+	printk(" RSP <%016lx>\n", regs->sp);
 	if (kexec_should_crash(current))
 		crash_kexec(regs);
 }
@@ -541,7 +533,7 @@ void die(const char * str, struct pt_reg
 	unsigned long flags = oops_begin();
 
 	if (!user_mode(regs))
-		report_bug(regs->rip, regs);
+		report_bug(regs->ip, regs);
 
 	__die(str, regs, err);
 	oops_end(flags);
@@ -591,9 +583,9 @@ static void __kprobes do_trap(int trapnr
 		if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
 		    printk_ratelimit())
 			printk(KERN_INFO
-			       "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
+			       "%s[%d] trap %s ip:%lx sp:%lx error:%lx\n",
 			       tsk->comm, tsk->pid, str,
-			       regs->rip, regs->rsp, error_code); 
+			       regs->ip, regs->sp, error_code);
 
 		if (info)
 			force_sig_info(signr, info, tsk);
@@ -606,9 +598,9 @@ static void __kprobes do_trap(int trapnr
 	/* kernel trap */ 
 	{	     
 		const struct exception_table_entry *fixup;
-		fixup = search_exception_tables(regs->rip);
+		fixup = search_exception_tables(regs->ip);
 		if (fixup)
-			regs->rip = fixup->fixup;
+			regs->ip = fixup->fixup;
 		else {
 			tsk->thread.error_code = error_code;
 			tsk->thread.trap_no = trapnr;
@@ -643,10 +635,10 @@ asmlinkage void do_##name(struct pt_regs
 	do_trap(trapnr, signr, str, regs, error_code, &info); \
 }
 
-DO_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->rip)
+DO_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->ip)
 DO_ERROR( 4, SIGSEGV, "overflow", overflow)
 DO_ERROR( 5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO( 6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->rip)
+DO_ERROR_INFO( 6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
 DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
 DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
@@ -696,9 +688,9 @@ asmlinkage void __kprobes do_general_pro
 		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 		    printk_ratelimit())
 			printk(KERN_INFO
-		       "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
+		       "%s[%d] general protection ip:%lx sp:%lx error:%lx\n",
 			       tsk->comm, tsk->pid,
-			       regs->rip, regs->rsp, error_code); 
+			       regs->ip, regs->sp, error_code);
 
 		force_sig(SIGSEGV, tsk);
 		return;
@@ -707,9 +699,9 @@ asmlinkage void __kprobes do_general_pro
 	/* kernel gp */
 	{
 		const struct exception_table_entry *fixup;
-		fixup = search_exception_tables(regs->rip);
+		fixup = search_exception_tables(regs->ip);
 		if (fixup) {
-			regs->rip = fixup->fixup;
+			regs->ip = fixup->fixup;
 			return;
 		}
 
@@ -832,15 +824,15 @@ asmlinkage __kprobes struct pt_regs *syn
 {
 	struct pt_regs *regs = eregs;
 	/* Did already sync */
-	if (eregs == (struct pt_regs *)eregs->rsp)
+	if (eregs == (struct pt_regs *)eregs->sp)
 		;
 	/* Exception from user space */
 	else if (user_mode(eregs))
 		regs = task_pt_regs(current);
 	/* Exception from kernel and interrupts are enabled. Move to
  	   kernel process stack. */
-	else if (eregs->eflags & X86_EFLAGS_IF)
-		regs = (struct pt_regs *)(eregs->rsp -= sizeof(struct pt_regs));
+	else if (eregs->flags & X86_EFLAGS_IF)
+		regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
 	if (eregs != regs)
 		*regs = *eregs;
 	return regs;
@@ -858,6 +850,12 @@ asmlinkage void __kprobes do_debug(struc
 
 	get_debugreg(condition, 6);
 
+	/*
+	 * The processor cleared BTF, so don't mark that we need it set.
+	 */
+	clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
+	tsk->thread.debugctlmsr = 0;
+
 	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
 						SIGTRAP) == NOTIFY_STOP)
 		return;
@@ -873,27 +871,14 @@ asmlinkage void __kprobes do_debug(struc
 
 	tsk->thread.debugreg6 = condition;
 
-	/* Mask out spurious TF errors due to lazy TF clearing */
+
+	/*
+	 * Single-stepping through TF: make sure we ignore any events in
+	 * kernel space (but re-enable TF when returning to user mode).
+	 */
 	if (condition & DR_STEP) {
-		/*
-		 * The TF error should be masked out only if the current
-		 * process is not traced and if the TRAP flag has been set
-		 * previously by a tracing process (condition detected by
-		 * the PT_DTRACE flag); remember that the i386 TRAP flag
-		 * can be modified by the process itself in user mode,
-		 * allowing programs to debug themselves without the ptrace()
-		 * interface.
-		 */
                 if (!user_mode(regs))
                        goto clear_TF_reenable;
-		/*
-		 * Was the TF flag set by a debugger? If so, clear it now,
-		 * so that register information is correct.
-		 */
-		if (tsk->ptrace & PT_DTRACE) {
-			regs->eflags &= ~TF_MASK;
-			tsk->ptrace &= ~PT_DTRACE;
-		}
 	}
 
 	/* Ok, finally something we can handle */
@@ -902,7 +887,7 @@ asmlinkage void __kprobes do_debug(struc
 	info.si_signo = SIGTRAP;
 	info.si_errno = 0;
 	info.si_code = TRAP_BRKPT;
-	info.si_addr = user_mode(regs) ? (void __user *)regs->rip : NULL;
+	info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
 	force_sig_info(SIGTRAP, &info, tsk);
 
 clear_dr7:
@@ -912,16 +897,16 @@ clear_dr7:
 
 clear_TF_reenable:
 	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
-	regs->eflags &= ~TF_MASK;
+	regs->flags &= ~TF_MASK;
 	preempt_conditional_cli(regs);
 }
 
 static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
 {
 	const struct exception_table_entry *fixup;
-	fixup = search_exception_tables(regs->rip);
+	fixup = search_exception_tables(regs->ip);
 	if (fixup) {
-		regs->rip = fixup->fixup;
+		regs->ip = fixup->fixup;
 		return 1;
 	}
 	notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
@@ -938,7 +923,7 @@ static int kernel_math_error(struct pt_r
  */
 asmlinkage void do_coprocessor_error(struct pt_regs *regs)
 {
-	void __user *rip = (void __user *)(regs->rip);
+	void __user *ip = (void __user *)(regs->ip);
 	struct task_struct * task;
 	siginfo_t info;
 	unsigned short cwd, swd;
@@ -958,7 +943,7 @@ asmlinkage void do_coprocessor_error(str
 	info.si_signo = SIGFPE;
 	info.si_errno = 0;
 	info.si_code = __SI_FAULT;
-	info.si_addr = rip;
+	info.si_addr = ip;
 	/*
 	 * (~cwd & swd) will mask out exceptions that are not set to unmasked
 	 * status.  0x3f is the exception bits in these regs, 0x200 is the
@@ -1007,7 +992,7 @@ asmlinkage void bad_intr(void)
 
 asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
 {
-	void __user *rip = (void __user *)(regs->rip);
+	void __user *ip = (void __user *)(regs->ip);
 	struct task_struct * task;
 	siginfo_t info;
 	unsigned short mxcsr;
@@ -1027,7 +1012,7 @@ asmlinkage void do_simd_coprocessor_erro
 	info.si_signo = SIGFPE;
 	info.si_errno = 0;
 	info.si_code = __SI_FAULT;
-	info.si_addr = rip;
+	info.si_addr = ip;
 	/*
 	 * The SIMD FPU exceptions are handled a little differently, as there
 	 * is only a single status/control register.  Thus, to determine which
diff -puN arch/x86/kernel/tsc_32.c~git-x86 arch/x86/kernel/tsc_32.c
--- a/arch/x86/kernel/tsc_32.c~git-x86
+++ a/arch/x86/kernel/tsc_32.c
@@ -5,6 +5,7 @@
 #include <linux/jiffies.h>
 #include <linux/init.h>
 #include <linux/dmi.h>
+#include <linux/percpu.h>
 
 #include <asm/delay.h>
 #include <asm/tsc.h>
@@ -80,13 +81,31 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable);
  *
  *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
  */
-unsigned long cyc2ns_scale __read_mostly;
 
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+DEFINE_PER_CPU(unsigned long, cyc2ns);
 
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
+static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 {
-	cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
+	unsigned long flags, prev_scale, *scale;
+	unsigned long long tsc_now, ns_now;
+
+	local_irq_save(flags);
+	sched_clock_idle_sleep_event();
+
+	scale = &per_cpu(cyc2ns, cpu);
+
+	rdtscll(tsc_now);
+	ns_now = __cycles_2_ns(tsc_now);
+
+	prev_scale = *scale;
+	if (cpu_khz)
+		*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
+
+	/*
+	 * Start smoothly with the new frequency:
+	 */
+	sched_clock_idle_wakeup_event(0);
+	local_irq_restore(flags);
 }
 
 /*
@@ -239,7 +258,9 @@ time_cpufreq_notifier(struct notifier_bl
 						ref_freq, freq->new);
 			if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
 				tsc_khz = cpu_khz;
-				set_cyc2ns_scale(cpu_khz);
+				preempt_disable();
+				set_cyc2ns_scale(cpu_khz, smp_processor_id());
+				preempt_enable();
 				/*
 				 * TSC based sched_clock turns
 				 * to junk w/ cpufreq
@@ -367,6 +388,8 @@ static inline void check_geode_tsc_relia
 
 void __init tsc_init(void)
 {
+	int cpu;
+
 	if (!cpu_has_tsc || tsc_disable)
 		goto out_no_tsc;
 
@@ -380,7 +403,15 @@ void __init tsc_init(void)
 				(unsigned long)cpu_khz / 1000,
 				(unsigned long)cpu_khz % 1000);
 
-	set_cyc2ns_scale(cpu_khz);
+	/*
+	 * Secondary CPUs do not run through tsc_init(), so set up
+	 * all the scale factors for all CPUs, assuming the same
+	 * speed as the bootup CPU. (cpufreq notifiers will fix this
+	 * up if their speed diverges)
+	 */
+	for_each_possible_cpu(cpu)
+		set_cyc2ns_scale(cpu_khz, cpu);
+
 	use_tsc_delay();
 
 	/* Check and install the TSC clocksource */
diff -puN arch/x86/kernel/tsc_64.c~git-x86 arch/x86/kernel/tsc_64.c
--- a/arch/x86/kernel/tsc_64.c~git-x86
+++ a/arch/x86/kernel/tsc_64.c
@@ -10,6 +10,7 @@
 
 #include <asm/hpet.h>
 #include <asm/timex.h>
+#include <asm/timer.h>
 
 static int notsc __initdata = 0;
 
@@ -18,19 +19,51 @@ EXPORT_SYMBOL(cpu_khz);
 unsigned int tsc_khz;
 EXPORT_SYMBOL(tsc_khz);
 
-static unsigned int cyc2ns_scale __read_mostly;
+/* Accelerators for sched_clock()
+ * convert from cycles(64bits) => nanoseconds (64bits)
+ *  basic equation:
+ *		ns = cycles / (freq / ns_per_sec)
+ *		ns = cycles * (ns_per_sec / freq)
+ *		ns = cycles * (10^9 / (cpu_khz * 10^3))
+ *		ns = cycles * (10^6 / cpu_khz)
+ *
+ *	Then we use scaling math (suggested by george@mvista.com) to get:
+ *		ns = cycles * (10^6 * SC / cpu_khz) / SC
+ *		ns = cycles * cyc2ns_scale / SC
+ *
+ *	And since SC is a constant power of two, we can convert the div
+ *  into a shift.
+ *
+ *  We can use khz divisor instead of mhz to keep a better precision, since
+ *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ *  (mathieu.desnoyers@polymtl.ca)
+ *
+ *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+DEFINE_PER_CPU(unsigned long, cyc2ns);
 
-static inline void set_cyc2ns_scale(unsigned long khz)
+static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 {
-	cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz;
-}
+	unsigned long flags, prev_scale, *scale;
+	unsigned long long tsc_now, ns_now;
 
-static unsigned long long cycles_2_ns(unsigned long long cyc)
-{
-	return (cyc * cyc2ns_scale) >> NS_SCALE;
+	local_irq_save(flags);
+	sched_clock_idle_sleep_event();
+
+	scale = &per_cpu(cyc2ns, cpu);
+
+	rdtscll(tsc_now);
+	ns_now = __cycles_2_ns(tsc_now);
+
+	prev_scale = *scale;
+	if (cpu_khz)
+		*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
+
+	sched_clock_idle_wakeup_event(0);
+	local_irq_restore(flags);
 }
 
-unsigned long long sched_clock(void)
+unsigned long long native_sched_clock(void)
 {
 	unsigned long a = 0;
 
@@ -44,6 +77,19 @@ unsigned long long sched_clock(void)
 	return cycles_2_ns(a);
 }
 
+/* We need to define a real function for sched_clock, to override the
+   weak default version */
+#ifdef CONFIG_PARAVIRT
+unsigned long long sched_clock(void)
+{
+	return paravirt_sched_clock();
+}
+#else
+unsigned long long
+sched_clock(void) __attribute__((alias("native_sched_clock")));
+#endif
+
+
 static int tsc_unstable;
 
 inline int check_tsc_unstable(void)
@@ -100,7 +146,9 @@ static int time_cpufreq_notifier(struct 
 			mark_tsc_unstable("cpufreq changes");
 	}
 
-	set_cyc2ns_scale(tsc_khz_ref);
+	preempt_disable();
+	set_cyc2ns_scale(tsc_khz_ref, smp_processor_id());
+	preempt_enable();
 
 	return 0;
 }
@@ -151,7 +199,7 @@ static unsigned long __init tsc_read_ref
 void __init tsc_calibrate(void)
 {
 	unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2;
-	int hpet = is_hpet_enabled();
+	int hpet = is_hpet_enabled(), cpu;
 
 	local_irq_save(flags);
 
@@ -206,7 +254,9 @@ void __init tsc_calibrate(void)
 	}
 
 	tsc_khz = tsc2 / tsc1;
-	set_cyc2ns_scale(tsc_khz);
+
+	for_each_possible_cpu(cpu)
+		set_cyc2ns_scale(tsc_khz, cpu);
 }
 
 /*
@@ -256,7 +306,7 @@ static cycle_t read_tsc(void)
 
 static cycle_t __vsyscall_fn vread_tsc(void)
 {
-	cycle_t ret = (cycle_t)get_cycles_sync();
+	cycle_t ret = (cycle_t)vget_cycles_sync();
 	return ret;
 }
 
diff -puN arch/x86/kernel/vm86_32.c~git-x86 arch/x86/kernel/vm86_32.c
--- a/arch/x86/kernel/vm86_32.c~git-x86
+++ a/arch/x86/kernel/vm86_32.c
@@ -70,10 +70,10 @@
 /*
  * 8- and 16-bit register defines..
  */
-#define AL(regs)	(((unsigned char *)&((regs)->pt.eax))[0])
-#define AH(regs)	(((unsigned char *)&((regs)->pt.eax))[1])
-#define IP(regs)	(*(unsigned short *)&((regs)->pt.eip))
-#define SP(regs)	(*(unsigned short *)&((regs)->pt.esp))
+#define AL(regs)	(((unsigned char *)&((regs)->pt.ax))[0])
+#define AH(regs)	(((unsigned char *)&((regs)->pt.ax))[1])
+#define IP(regs)	(*(unsigned short *)&((regs)->pt.ip))
+#define SP(regs)	(*(unsigned short *)&((regs)->pt.sp))
 
 /*
  * virtual flags (16 and 32-bit versions)
@@ -93,12 +93,12 @@ static int copy_vm86_regs_to_user(struct
 {
 	int ret = 0;
 
-	/* kernel_vm86_regs is missing xgs, so copy everything up to
+	/* kernel_vm86_regs is missing gs, so copy everything up to
 	   (but not including) orig_eax, and then rest including orig_eax. */
-	ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_eax));
-	ret += copy_to_user(&user->orig_eax, &regs->pt.orig_eax,
+	ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax));
+	ret += copy_to_user(&user->orig_eax, &regs->pt.orig_ax,
 			    sizeof(struct kernel_vm86_regs) -
-			    offsetof(struct kernel_vm86_regs, pt.orig_eax));
+			    offsetof(struct kernel_vm86_regs, pt.orig_ax));
 
 	return ret;
 }
@@ -110,12 +110,12 @@ static int copy_vm86_regs_from_user(stru
 {
 	int ret = 0;
 
-	/* copy eax-xfs inclusive */
-	ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_eax));
-	/* copy orig_eax-__gsh+extra */
-	ret += copy_from_user(&regs->pt.orig_eax, &user->orig_eax,
+	/* copy ax-fs inclusive */
+	ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_ax));
+	/* copy orig_ax-__gsh+extra */
+	ret += copy_from_user(&regs->pt.orig_ax, &user->orig_eax,
 			      sizeof(struct kernel_vm86_regs) -
-			      offsetof(struct kernel_vm86_regs, pt.orig_eax) +
+			      offsetof(struct kernel_vm86_regs, pt.orig_ax) +
 			      extra);
 	return ret;
 }
@@ -138,7 +138,7 @@ struct pt_regs * fastcall save_v86_state
 		printk("no vm86_info: BAD\n");
 		do_exit(SIGSEGV);
 	}
-	set_flags(regs->pt.eflags, VEFLAGS, VIF_MASK | current->thread.v86mask);
+	set_flags(regs->pt.flags, VEFLAGS, VIF_MASK | current->thread.v86mask);
 	tmp = copy_vm86_regs_to_user(&current->thread.vm86_info->regs,regs);
 	tmp += put_user(current->thread.screen_bitmap,&current->thread.vm86_info->screen_bitmap);
 	if (tmp) {
@@ -147,15 +147,15 @@ struct pt_regs * fastcall save_v86_state
 	}
 
 	tss = &per_cpu(init_tss, get_cpu());
-	current->thread.esp0 = current->thread.saved_esp0;
+	current->thread.sp0 = current->thread.saved_sp0;
 	current->thread.sysenter_cs = __KERNEL_CS;
-	load_esp0(tss, &current->thread);
-	current->thread.saved_esp0 = 0;
+	load_sp0(tss, &current->thread);
+	current->thread.saved_sp0 = 0;
 	put_cpu();
 
 	ret = KVM86->regs32;
 
-	ret->xfs = current->thread.saved_fs;
+	ret->fs = current->thread.saved_fs;
 	loadsegment(gs, current->thread.saved_gs);
 
 	return ret;
@@ -197,7 +197,7 @@ static void do_sys_vm86(struct kernel_vm
 
 asmlinkage int sys_vm86old(struct pt_regs regs)
 {
-	struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.ebx;
+	struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx;
 	struct kernel_vm86_struct info; /* declare this _on top_,
 					 * this avoids wasting of stack space.
 					 * This remains on the stack until we
@@ -207,7 +207,7 @@ asmlinkage int sys_vm86old(struct pt_reg
 	int tmp, ret = -EPERM;
 
 	tsk = current;
-	if (tsk->thread.saved_esp0)
+	if (tsk->thread.saved_sp0)
 		goto out;
 	tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
 				       offsetof(struct kernel_vm86_struct, vm86plus) -
@@ -237,12 +237,12 @@ asmlinkage int sys_vm86(struct pt_regs r
 	struct vm86plus_struct __user *v86;
 
 	tsk = current;
-	switch (regs.ebx) {
+	switch (regs.bx) {
 		case VM86_REQUEST_IRQ:
 		case VM86_FREE_IRQ:
 		case VM86_GET_IRQ_BITS:
 		case VM86_GET_AND_RESET_IRQ:
-			ret = do_vm86_irq_handling(regs.ebx, (int)regs.ecx);
+			ret = do_vm86_irq_handling(regs.bx, (int)regs.cx);
 			goto out;
 		case VM86_PLUS_INSTALL_CHECK:
 			/* NOTE: on old vm86 stuff this will return the error
@@ -256,9 +256,9 @@ asmlinkage int sys_vm86(struct pt_regs r
 
 	/* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
 	ret = -EPERM;
-	if (tsk->thread.saved_esp0)
+	if (tsk->thread.saved_sp0)
 		goto out;
-	v86 = (struct vm86plus_struct __user *)regs.ecx;
+	v86 = (struct vm86plus_struct __user *)regs.cx;
 	tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
 				       offsetof(struct kernel_vm86_struct, regs32) -
 				       sizeof(info.regs));
@@ -281,23 +281,23 @@ static void do_sys_vm86(struct kernel_vm
 /*
  * make sure the vm86() system call doesn't try to do anything silly
  */
-	info->regs.pt.xds = 0;
-	info->regs.pt.xes = 0;
-	info->regs.pt.xfs = 0;
+	info->regs.pt.ds = 0;
+	info->regs.pt.es = 0;
+	info->regs.pt.fs = 0;
 
 /* we are clearing gs later just before "jmp resume_userspace",
  * because it is not saved/restored.
  */
 
 /*
- * The eflags register is also special: we cannot trust that the user
+ * The flags register is also special: we cannot trust that the user
  * has set it up safely, so this makes sure interrupt etc flags are
  * inherited from protected mode.
  */
- 	VEFLAGS = info->regs.pt.eflags;
-	info->regs.pt.eflags &= SAFE_MASK;
-	info->regs.pt.eflags |= info->regs32->eflags & ~SAFE_MASK;
-	info->regs.pt.eflags |= VM_MASK;
+	VEFLAGS = info->regs.pt.flags;
+	info->regs.pt.flags &= SAFE_MASK;
+	info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK;
+	info->regs.pt.flags |= VM_MASK;
 
 	switch (info->cpu_type) {
 		case CPU_286:
@@ -315,18 +315,18 @@ static void do_sys_vm86(struct kernel_vm
 	}
 
 /*
- * Save old state, set default return value (%eax) to 0
+ * Save old state, set default return value (%ax) to 0
  */
-	info->regs32->eax = 0;
-	tsk->thread.saved_esp0 = tsk->thread.esp0;
-	tsk->thread.saved_fs = info->regs32->xfs;
+	info->regs32->ax = 0;
+	tsk->thread.saved_sp0 = tsk->thread.sp0;
+	tsk->thread.saved_fs = info->regs32->fs;
 	savesegment(gs, tsk->thread.saved_gs);
 
 	tss = &per_cpu(init_tss, get_cpu());
-	tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
+	tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
 	if (cpu_has_sep)
 		tsk->thread.sysenter_cs = 0;
-	load_esp0(tss, &tsk->thread);
+	load_sp0(tss, &tsk->thread);
 	put_cpu();
 
 	tsk->thread.screen_bitmap = info->screen_bitmap;
@@ -352,7 +352,7 @@ static inline void return_to_32bit(struc
 	struct pt_regs * regs32;
 
 	regs32 = save_v86_state(regs16);
-	regs32->eax = retval;
+	regs32->ax = retval;
 	__asm__ __volatile__("movl %0,%%esp\n\t"
 		"movl %1,%%ebp\n\t"
 		"jmp resume_userspace"
@@ -373,12 +373,12 @@ static inline void clear_IF(struct kerne
 
 static inline void clear_TF(struct kernel_vm86_regs * regs)
 {
-	regs->pt.eflags &= ~TF_MASK;
+	regs->pt.flags &= ~TF_MASK;
 }
 
 static inline void clear_AC(struct kernel_vm86_regs * regs)
 {
-	regs->pt.eflags &= ~AC_MASK;
+	regs->pt.flags &= ~AC_MASK;
 }
 
 /* It is correct to call set_IF(regs) from the set_vflags_*
@@ -392,11 +392,11 @@ static inline void clear_AC(struct kerne
  * [KD]
  */
 
-static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs)
+static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs * regs)
 {
-	set_flags(VEFLAGS, eflags, current->thread.v86mask);
-	set_flags(regs->pt.eflags, eflags, SAFE_MASK);
-	if (eflags & IF_MASK)
+	set_flags(VEFLAGS, flags, current->thread.v86mask);
+	set_flags(regs->pt.flags, flags, SAFE_MASK);
+	if (flags & IF_MASK)
 		set_IF(regs);
 	else
 		clear_IF(regs);
@@ -405,7 +405,7 @@ static inline void set_vflags_long(unsig
 static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs)
 {
 	set_flags(VFLAGS, flags, current->thread.v86mask);
-	set_flags(regs->pt.eflags, flags, SAFE_MASK);
+	set_flags(regs->pt.flags, flags, SAFE_MASK);
 	if (flags & IF_MASK)
 		set_IF(regs);
 	else
@@ -414,7 +414,7 @@ static inline void set_vflags_short(unsi
 
 static inline unsigned long get_vflags(struct kernel_vm86_regs * regs)
 {
-	unsigned long flags = regs->pt.eflags & RETURN_MASK;
+	unsigned long flags = regs->pt.flags & RETURN_MASK;
 
 	if (VEFLAGS & VIF_MASK)
 		flags |= IF_MASK;
@@ -518,7 +518,7 @@ static void do_int(struct kernel_vm86_re
 	unsigned long __user *intr_ptr;
 	unsigned long segoffs;
 
-	if (regs->pt.xcs == BIOSSEG)
+	if (regs->pt.cs == BIOSSEG)
 		goto cannot_handle;
 	if (is_revectored(i, &KVM86->int_revectored))
 		goto cannot_handle;
@@ -530,9 +530,9 @@ static void do_int(struct kernel_vm86_re
 	if ((segoffs >> 16) == BIOSSEG)
 		goto cannot_handle;
 	pushw(ssp, sp, get_vflags(regs), cannot_handle);
-	pushw(ssp, sp, regs->pt.xcs, cannot_handle);
+	pushw(ssp, sp, regs->pt.cs, cannot_handle);
 	pushw(ssp, sp, IP(regs), cannot_handle);
-	regs->pt.xcs = segoffs >> 16;
+	regs->pt.cs = segoffs >> 16;
 	SP(regs) -= 6;
 	IP(regs) = segoffs & 0xffff;
 	clear_TF(regs);
@@ -549,7 +549,7 @@ int handle_vm86_trap(struct kernel_vm86_
 	if (VMPI.is_vm86pus) {
 		if ( (trapno==3) || (trapno==1) )
 			return_to_32bit(regs, VM86_TRAP + (trapno << 8));
-		do_int(regs, trapno, (unsigned char __user *) (regs->pt.xss << 4), SP(regs));
+		do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
 		return 0;
 	}
 	if (trapno !=1)
@@ -585,10 +585,10 @@ void handle_vm86_fault(struct kernel_vm8
 		handle_vm86_trap(regs, 0, 1); \
 	return; } while (0)
 
-	orig_flags = *(unsigned short *)&regs->pt.eflags;
+	orig_flags = *(unsigned short *)&regs->pt.flags;
 
-	csp = (unsigned char __user *) (regs->pt.xcs << 4);
-	ssp = (unsigned char __user *) (regs->pt.xss << 4);
+	csp = (unsigned char __user *) (regs->pt.cs << 4);
+	ssp = (unsigned char __user *) (regs->pt.ss << 4);
 	sp = SP(regs);
 	ip = IP(regs);
 
@@ -675,7 +675,7 @@ void handle_vm86_fault(struct kernel_vm8
 			SP(regs) += 6;
 		}
 		IP(regs) = newip;
-		regs->pt.xcs = newcs;
+		regs->pt.cs = newcs;
 		CHECK_IF_IN_TRAP;
 		if (data32) {
 			set_vflags_long(newflags, regs);
diff -puN arch/x86/kernel/vmi_32.c~git-x86 arch/x86/kernel/vmi_32.c
--- a/arch/x86/kernel/vmi_32.c~git-x86
+++ a/arch/x86/kernel/vmi_32.c
@@ -62,7 +62,7 @@ static struct {
 	void (*cpuid)(void /* non-c */);
 	void (*_set_ldt)(u32 selector);
 	void (*set_tr)(u32 selector);
-	void (*set_kernel_stack)(u32 selector, u32 esp0);
+	void (*set_kernel_stack)(u32 selector, u32 sp0);
 	void (*allocate_page)(u32, u32, u32, u32, u32);
 	void (*release_page)(u32, u32);
 	void (*set_pte)(pte_t, pte_t *, unsigned);
@@ -88,13 +88,13 @@ struct vmi_timer_ops vmi_timer_ops;
 #define IRQ_PATCH_DISABLE  5
 
 static inline void patch_offset(void *insnbuf,
-				unsigned long eip, unsigned long dest)
+				unsigned long ip, unsigned long dest)
 {
-        *(unsigned long *)(insnbuf+1) = dest-eip-5;
+        *(unsigned long *)(insnbuf+1) = dest-ip-5;
 }
 
 static unsigned patch_internal(int call, unsigned len, void *insnbuf,
-			       unsigned long eip)
+			       unsigned long ip)
 {
 	u64 reloc;
 	struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc;
@@ -103,13 +103,13 @@ static unsigned patch_internal(int call,
 		case VMI_RELOCATION_CALL_REL:
 			BUG_ON(len < 5);
 			*(char *)insnbuf = MNEM_CALL;
-			patch_offset(insnbuf, eip, (unsigned long)rel->eip);
+			patch_offset(insnbuf, ip, (unsigned long)rel->eip);
 			return 5;
 
 		case VMI_RELOCATION_JUMP_REL:
 			BUG_ON(len < 5);
 			*(char *)insnbuf = MNEM_JMP;
-			patch_offset(insnbuf, eip, (unsigned long)rel->eip);
+			patch_offset(insnbuf, ip, (unsigned long)rel->eip);
 			return 5;
 
 		case VMI_RELOCATION_NOP:
@@ -131,25 +131,25 @@ static unsigned patch_internal(int call,
  * sequence.  The callee does nop padding for us.
  */
 static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
-			  unsigned long eip, unsigned len)
+			  unsigned long ip, unsigned len)
 {
 	switch (type) {
 		case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
 			return patch_internal(VMI_CALL_DisableInterrupts, len,
-					      insns, eip);
+					      insns, ip);
 		case PARAVIRT_PATCH(pv_irq_ops.irq_enable):
 			return patch_internal(VMI_CALL_EnableInterrupts, len,
-					      insns, eip);
+					      insns, ip);
 		case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
 			return patch_internal(VMI_CALL_SetInterruptMask, len,
-					      insns, eip);
+					      insns, ip);
 		case PARAVIRT_PATCH(pv_irq_ops.save_fl):
 			return patch_internal(VMI_CALL_GetInterruptMask, len,
-					      insns, eip);
+					      insns, ip);
 		case PARAVIRT_PATCH(pv_cpu_ops.iret):
-			return patch_internal(VMI_CALL_IRET, len, insns, eip);
-		case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
-			return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip);
+			return patch_internal(VMI_CALL_IRET, len, insns, ip);
+		case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret):
+			return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip);
 		default:
 			break;
 	}
@@ -157,29 +157,29 @@ static unsigned vmi_patch(u8 type, u16 c
 }
 
 /* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */
-static void vmi_cpuid(unsigned int *eax, unsigned int *ebx,
-                               unsigned int *ecx, unsigned int *edx)
+static void vmi_cpuid(unsigned int *ax, unsigned int *bx,
+                               unsigned int *cx, unsigned int *dx)
 {
 	int override = 0;
-	if (*eax == 1)
+	if (*ax == 1)
 		override = 1;
         asm volatile ("call *%6"
-                      : "=a" (*eax),
-                        "=b" (*ebx),
-                        "=c" (*ecx),
-                        "=d" (*edx)
-                      : "0" (*eax), "2" (*ecx), "r" (vmi_ops.cpuid));
+                      : "=a" (*ax),
+                        "=b" (*bx),
+                        "=c" (*cx),
+                        "=d" (*dx)
+                      : "0" (*ax), "2" (*cx), "r" (vmi_ops.cpuid));
 	if (override) {
 		if (disable_pse)
-			*edx &= ~X86_FEATURE_PSE;
+			*dx &= ~X86_FEATURE_PSE;
 		if (disable_pge)
-			*edx &= ~X86_FEATURE_PGE;
+			*dx &= ~X86_FEATURE_PGE;
 		if (disable_sep)
-			*edx &= ~X86_FEATURE_SEP;
+			*dx &= ~X86_FEATURE_SEP;
 		if (disable_tsc)
-			*edx &= ~X86_FEATURE_TSC;
+			*dx &= ~X86_FEATURE_TSC;
 		if (disable_mtrr)
-			*edx &= ~X86_FEATURE_MTRR;
+			*dx &= ~X86_FEATURE_MTRR;
 	}
 }
 
@@ -214,17 +214,17 @@ static void vmi_set_tr(void)
 	vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct));
 }
 
-static void vmi_load_esp0(struct tss_struct *tss,
+static void vmi_load_sp0(struct tss_struct *tss,
 				   struct thread_struct *thread)
 {
-	tss->x86_tss.esp0 = thread->esp0;
+	tss->x86_tss.sp0 = thread->sp0;
 
 	/* This can only happen when SEP is enabled, no need to test "SEP"arately */
 	if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
 		tss->x86_tss.ss1 = thread->sysenter_cs;
 		wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
 	}
-	vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.esp0);
+	vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.sp0);
 }
 
 static void vmi_flush_tlb_user(void)
@@ -793,7 +793,7 @@ static inline int __init activate_vmi(vo
 	para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry);
 	para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry);
 	para_fill(pv_cpu_ops.write_idt_entry, WriteIDTEntry);
-	para_wrap(pv_cpu_ops.load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
+	para_wrap(pv_cpu_ops.load_sp0, vmi_load_sp0, set_kernel_stack, UpdateKernelStack);
 	para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
 	para_fill(pv_cpu_ops.io_delay, IODelay);
 
@@ -870,7 +870,7 @@ static inline int __init activate_vmi(vo
 	 * the backend.  They are performance critical anyway, so requiring
 	 * a patch is not a big problem.
 	 */
-	pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
+	pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0;
 	pv_cpu_ops.iret = (void *)0xbadbab0;
 
 #ifdef CONFIG_SMP
@@ -963,19 +963,19 @@ static int __init parse_vmi(char *arg)
 		return -EINVAL;
 
 	if (!strcmp(arg, "disable_pge")) {
-		clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE);
 		disable_pge = 1;
 	} else if (!strcmp(arg, "disable_pse")) {
-		clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE);
 		disable_pse = 1;
 	} else if (!strcmp(arg, "disable_sep")) {
-		clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
 		disable_sep = 1;
 	} else if (!strcmp(arg, "disable_tsc")) {
-		clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC);
 		disable_tsc = 1;
 	} else if (!strcmp(arg, "disable_mtrr")) {
-		clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_MTRR);
 		disable_mtrr = 1;
 	} else if (!strcmp(arg, "disable_timer")) {
 		disable_vmi_timer = 1;
diff -puN arch/x86/kernel/vmiclock_32.c~git-x86 arch/x86/kernel/vmiclock_32.c
--- a/arch/x86/kernel/vmiclock_32.c~git-x86
+++ a/arch/x86/kernel/vmiclock_32.c
@@ -35,7 +35,6 @@
 #include <asm/i8253.h>
 
 #include <irq_vectors.h>
-#include "io_ports.h"
 
 #define VMI_ONESHOT  (VMI_ALARM_IS_ONESHOT  | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
 #define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
diff -puN arch/x86/kernel/vmlinux_32.lds.S~git-x86 arch/x86/kernel/vmlinux_32.lds.S
--- a/arch/x86/kernel/vmlinux_32.lds.S~git-x86
+++ a/arch/x86/kernel/vmlinux_32.lds.S
@@ -8,12 +8,6 @@
  * put it inside the section definition.
  */
 
-/* Don't define absolute symbols until and unless you know that symbol
- * value is should remain constant even if kernel image is relocated
- * at run time. Absolute symbols are not relocated. If symbol value should
- * change if kernel is relocated, make the symbol section relative and
- * put it inside the section definition.
- */
 #define LOAD_OFFSET __PAGE_OFFSET
 
 #include <asm-generic/vmlinux.lds.h>
diff -puN arch/x86/kernel/vsmp_64.c~git-x86 arch/x86/kernel/vsmp_64.c
--- a/arch/x86/kernel/vsmp_64.c~git-x86
+++ a/arch/x86/kernel/vsmp_64.c
@@ -25,21 +25,24 @@ static int __init vsmp_init(void)
 		return 0;
 
 	/* Check if we are running on a ScaleMP vSMP box */
-	if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != PCI_VENDOR_ID_SCALEMP) ||
-	    (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != PCI_DEVICE_ID_SCALEMP_VSMP_CTL))
+	if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) !=
+	     PCI_VENDOR_ID_SCALEMP) ||
+	    (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) !=
+	     PCI_DEVICE_ID_SCALEMP_VSMP_CTL))
 		return 0;
 
 	/* set vSMP magic bits to indicate vSMP capable kernel */
 	address = ioremap(read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0), 8);
 	cap = readl(address);
 	ctl = readl(address + 4);
-	printk("vSMP CTL: capabilities:0x%08x  control:0x%08x\n", cap, ctl);
+	printk(KERN_INFO "vSMP CTL: capabilities:0x%08x  control:0x%08x\n",
+	       cap, ctl);
 	if (cap & ctl & (1 << 4)) {
 		/* Turn on vSMP IRQ fastpath handling (see system.h) */
 		ctl &= ~(1 << 4);
 		writel(ctl, address + 4);
 		ctl = readl(address + 4);
-		printk("vSMP CTL: control set to:0x%08x\n", ctl);
+		printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl);
 	}
 
 	iounmap(address);
diff -puN arch/x86/kernel/vsyscall-int80_32.S~git-x86 /dev/null
--- a/arch/x86/kernel/vsyscall-int80_32.S
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Code for the vsyscall page.  This version uses the old int $0x80 method.
- *
- * NOTE:
- * 1) __kernel_vsyscall _must_ be first in this page.
- * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
- *    for details.
- */
-
-	.text
-	.globl __kernel_vsyscall
-	.type __kernel_vsyscall,@function
-__kernel_vsyscall:
-.LSTART_vsyscall:
-	int $0x80
-	ret
-.LEND_vsyscall:
-	.size __kernel_vsyscall,.-.LSTART_vsyscall
-	.previous
-
-	.section .eh_frame,"a",@progbits
-.LSTARTFRAMEDLSI:
-	.long .LENDCIEDLSI-.LSTARTCIEDLSI
-.LSTARTCIEDLSI:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zR"		/* NUL-terminated augmentation string */
-	.uleb128 1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte 8			/* Return address register column */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-	.byte 0x0c		/* DW_CFA_def_cfa */
-	.uleb128 4
-	.uleb128 4
-	.byte 0x88		/* DW_CFA_offset, column 0x8 */
-	.uleb128 1
-	.align 4
-.LENDCIEDLSI:
-	.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
-.LSTARTFDEDLSI:
-	.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
-	.long .LSTART_vsyscall-.	/* PC-relative start address */
-	.long .LEND_vsyscall-.LSTART_vsyscall
-	.uleb128 0
-	.align 4
-.LENDFDEDLSI:
-	.previous
-
-/*
- * Get the common code for the sigreturn entry points.
- */
-#include "vsyscall-sigreturn_32.S"
diff -puN arch/x86/kernel/vsyscall-note_32.S~git-x86 /dev/null
--- a/arch/x86/kernel/vsyscall-note_32.S
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-/* Ideally this would use UTS_NAME, but using a quoted string here
-   doesn't work. Remember to change this when changing the
-   kernel's name. */
-ELFNOTE_START(Linux, 0, "a")
-	.long LINUX_VERSION_CODE
-ELFNOTE_END
-
-#ifdef CONFIG_XEN
-/*
- * Add a special note telling glibc's dynamic linker a fake hardware
- * flavor that it will use to choose the search path for libraries in the
- * same way it uses real hardware capabilities like "mmx".
- * We supply "nosegneg" as the fake capability, to indicate that we
- * do not like negative offsets in instructions using segment overrides,
- * since we implement those inefficiently.  This makes it possible to
- * install libraries optimized to avoid those access patterns in someplace
- * like /lib/i686/tls/nosegneg.  Note that an /etc/ld.so.conf.d/file
- * corresponding to the bits here is needed to make ldconfig work right.
- * It should contain:
- *	hwcap 1 nosegneg
- * to match the mapping of bit to name that we give here.
- *
- * At runtime, the fake hardware feature will be considered to be present
- * if its bit is set in the mask word.  So, we start with the mask 0, and
- * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
- */
-
-#include "../../x86/xen/vdso.h"	/* Defines VDSO_NOTE_NONEGSEG_BIT.  */
-
-	.globl VDSO_NOTE_MASK
-ELFNOTE_START(GNU, 2, "a")
-	.long 1			/* ncaps */
-VDSO_NOTE_MASK:
-	.long 0			/* mask */
-	.byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg"	/* bit, name */
-ELFNOTE_END
-#endif
diff -puN arch/x86/kernel/vsyscall-sigreturn_32.S~git-x86 /dev/null
--- a/arch/x86/kernel/vsyscall-sigreturn_32.S
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Common code for the sigreturn entry points on the vsyscall page.
- * So far this code is the same for both int80 and sysenter versions.
- * This file is #include'd by vsyscall-*.S to define them after the
- * vsyscall entry point.  The kernel assumes that the addresses of these
- * routines are constant for all vsyscall implementations.
- */
-
-#include <asm/unistd.h>
-#include <asm/asm-offsets.h>
-
-
-/* XXX
-   Should these be named "_sigtramp" or something?
-*/
-
-	.text
-	.org __kernel_vsyscall+32,0x90
-	.globl __kernel_sigreturn
-	.type __kernel_sigreturn,@function
-__kernel_sigreturn:
-.LSTART_sigreturn:
-	popl %eax		/* XXX does this mean it needs unwind info? */
-	movl $__NR_sigreturn, %eax
-	int $0x80
-.LEND_sigreturn:
-	.size __kernel_sigreturn,.-.LSTART_sigreturn
-
-	.balign 32
-	.globl __kernel_rt_sigreturn
-	.type __kernel_rt_sigreturn,@function
-__kernel_rt_sigreturn:
-.LSTART_rt_sigreturn:
-	movl $__NR_rt_sigreturn, %eax
-	int $0x80
-.LEND_rt_sigreturn:
-	.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
-	.balign 32
-	.previous
-
-	.section .eh_frame,"a",@progbits
-.LSTARTFRAMEDLSI1:
-	.long .LENDCIEDLSI1-.LSTARTCIEDLSI1
-.LSTARTCIEDLSI1:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zRS"		/* NUL-terminated augmentation string */
-	.uleb128 1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte 8			/* Return address register column */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-	.byte 0			/* DW_CFA_nop */
-	.align 4
-.LENDCIEDLSI1:
-	.long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */
-.LSTARTFDEDLSI1:
-	.long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */
-	/* HACK: The dwarf2 unwind routines will subtract 1 from the
-	   return address to get an address in the middle of the
-	   presumed call instruction.  Since we didn't get here via
-	   a call, we need to include the nop before the real start
-	   to make up for it.  */
-	.long .LSTART_sigreturn-1-.	/* PC-relative start address */
-	.long .LEND_sigreturn-.LSTART_sigreturn+1
-	.uleb128 0			/* Augmentation */
-	/* What follows are the instructions for the table generation.
-	   We record the locations of each register saved.  This is
-	   complicated by the fact that the "CFA" is always assumed to
-	   be the value of the stack pointer in the caller.  This means
-	   that we must define the CFA of this body of code to be the
-	   saved value of the stack pointer in the sigcontext.  Which
-	   also means that there is no fixed relation to the other 
-	   saved registers, which means that we must use DW_CFA_expression
-	   to compute their addresses.  It also means that when we 
-	   adjust the stack with the popl, we have to do it all over again.  */
-
-#define do_cfa_expr(offset)						\
-	.byte 0x0f;			/* DW_CFA_def_cfa_expression */	\
-	.uleb128 1f-0f;			/*   length */			\
-0:	.byte 0x74;			/*     DW_OP_breg4 */		\
-	.sleb128 offset;		/*      offset */		\
-	.byte 0x06;			/*     DW_OP_deref */		\
-1:
-
-#define do_expr(regno, offset)						\
-	.byte 0x10;			/* DW_CFA_expression */		\
-	.uleb128 regno;			/*   regno */			\
-	.uleb128 1f-0f;			/*   length */			\
-0:	.byte 0x74;			/*     DW_OP_breg4 */		\
-	.sleb128 offset;		/*       offset */		\
-1:
-
-	do_cfa_expr(SIGCONTEXT_esp+4)
-	do_expr(0, SIGCONTEXT_eax+4)
-	do_expr(1, SIGCONTEXT_ecx+4)
-	do_expr(2, SIGCONTEXT_edx+4)
-	do_expr(3, SIGCONTEXT_ebx+4)
-	do_expr(5, SIGCONTEXT_ebp+4)
-	do_expr(6, SIGCONTEXT_esi+4)
-	do_expr(7, SIGCONTEXT_edi+4)
-	do_expr(8, SIGCONTEXT_eip+4)
-
-	.byte 0x42	/* DW_CFA_advance_loc 2 -- nop; popl eax. */
-
-	do_cfa_expr(SIGCONTEXT_esp)
-	do_expr(0, SIGCONTEXT_eax)
-	do_expr(1, SIGCONTEXT_ecx)
-	do_expr(2, SIGCONTEXT_edx)
-	do_expr(3, SIGCONTEXT_ebx)
-	do_expr(5, SIGCONTEXT_ebp)
-	do_expr(6, SIGCONTEXT_esi)
-	do_expr(7, SIGCONTEXT_edi)
-	do_expr(8, SIGCONTEXT_eip)
-
-	.align 4
-.LENDFDEDLSI1:
-
-	.long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */
-.LSTARTFDEDLSI2:
-	.long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */
-	/* HACK: See above wrt unwind library assumptions.  */
-	.long .LSTART_rt_sigreturn-1-.	/* PC-relative start address */
-	.long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
-	.uleb128 0			/* Augmentation */
-	/* What follows are the instructions for the table generation.
-	   We record the locations of each register saved.  This is
-	   slightly less complicated than the above, since we don't
-	   modify the stack pointer in the process.  */
-
-	do_cfa_expr(RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esp)
-	do_expr(0, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eax)
-	do_expr(1, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ecx)
-	do_expr(2, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edx)
-	do_expr(3, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebx)
-	do_expr(5, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebp)
-	do_expr(6, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esi)
-	do_expr(7, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edi)
-	do_expr(8, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eip)
-
-	.align 4
-.LENDFDEDLSI2:
-	.previous
diff -puN arch/x86/kernel/vsyscall-sysenter_32.S~git-x86 /dev/null
--- a/arch/x86/kernel/vsyscall-sysenter_32.S
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Code for the vsyscall page.  This version uses the sysenter instruction.
- *
- * NOTE:
- * 1) __kernel_vsyscall _must_ be first in this page.
- * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S
- *    for details.
- */
-
-/*
- * The caller puts arg2 in %ecx, which gets pushed. The kernel will use
- * %ecx itself for arg2. The pushing is because the sysexit instruction
- * (found in entry.S) requires that we clobber %ecx with the desired %esp.
- * User code might expect that %ecx is unclobbered though, as it would be
- * for returning via the iret instruction, so we must push and pop.
- *
- * The caller puts arg3 in %edx, which the sysexit instruction requires
- * for %eip. Thus, exactly as for arg2, we must push and pop.
- *
- * Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter
- * instruction clobbers %esp, the user's %esp won't even survive entry
- * into the kernel. We store %esp in %ebp. Code in entry.S must fetch
- * arg6 from the stack.
- *
- * You can not use this vsyscall for the clone() syscall because the
- * three dwords on the parent stack do not get copied to the child.
- */
-	.text
-	.globl __kernel_vsyscall
-	.type __kernel_vsyscall,@function
-__kernel_vsyscall:
-.LSTART_vsyscall:
-	push %ecx
-.Lpush_ecx:
-	push %edx
-.Lpush_edx:
-	push %ebp
-.Lenter_kernel:
-	movl %esp,%ebp
-	sysenter
-
-	/* 7: align return point with nop's to make disassembly easier */
-	.space 7,0x90
-
-	/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
-	jmp .Lenter_kernel
-	/* 16: System call normal return point is here! */
-	.globl SYSENTER_RETURN	/* Symbol used by sysenter.c  */
-SYSENTER_RETURN:
-	pop %ebp
-.Lpop_ebp:
-	pop %edx
-.Lpop_edx:
-	pop %ecx
-.Lpop_ecx:
-	ret
-.LEND_vsyscall:
-	.size __kernel_vsyscall,.-.LSTART_vsyscall
-	.previous
-
-	.section .eh_frame,"a",@progbits
-.LSTARTFRAMEDLSI:
-	.long .LENDCIEDLSI-.LSTARTCIEDLSI
-.LSTARTCIEDLSI:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zR"		/* NUL-terminated augmentation string */
-	.uleb128 1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte 8			/* Return address register column */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-	.byte 0x0c		/* DW_CFA_def_cfa */
-	.uleb128 4
-	.uleb128 4
-	.byte 0x88		/* DW_CFA_offset, column 0x8 */
-	.uleb128 1
-	.align 4
-.LENDCIEDLSI:
-	.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
-.LSTARTFDEDLSI:
-	.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
-	.long .LSTART_vsyscall-.	/* PC-relative start address */
-	.long .LEND_vsyscall-.LSTART_vsyscall
-	.uleb128 0
-	/* What follows are the instructions for the table generation.
-	   We have to record all changes of the stack pointer.  */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpush_ecx-.LSTART_vsyscall
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x08		/* RA at offset 8 now */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpush_edx-.Lpush_ecx
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x0c		/* RA at offset 12 now */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lenter_kernel-.Lpush_edx
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x10		/* RA at offset 16 now */
-	.byte 0x85, 0x04	/* DW_CFA_offset %ebp -16 */
-	/* Finally the epilogue.  */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpop_ebp-.Lenter_kernel
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x0c		/* RA at offset 12 now */
-	.byte 0xc5		/* DW_CFA_restore %ebp */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpop_edx-.Lpop_ebp
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x08		/* RA at offset 8 now */
-	.byte 0x04		/* DW_CFA_advance_loc4 */
-	.long .Lpop_ecx-.Lpop_edx
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x04		/* RA at offset 4 now */
-	.align 4
-.LENDFDEDLSI:
-	.previous
-
-/*
- * Get the common code for the sigreturn entry points.
- */
-#include "vsyscall-sigreturn_32.S"
diff -puN arch/x86/kernel/vsyscall_32.S~git-x86 /dev/null
--- a/arch/x86/kernel/vsyscall_32.S
+++ /dev/null
@@ -1,15 +0,0 @@
-#include <linux/init.h>
-
-__INITDATA
-
-	.globl vsyscall_int80_start, vsyscall_int80_end
-vsyscall_int80_start:
-	.incbin "arch/x86/kernel/vsyscall-int80_32.so"
-vsyscall_int80_end:
-
-	.globl vsyscall_sysenter_start, vsyscall_sysenter_end
-vsyscall_sysenter_start:
-	.incbin "arch/x86/kernel/vsyscall-sysenter_32.so"
-vsyscall_sysenter_end:
-
-__FINIT
diff -puN arch/x86/kernel/vsyscall_32.lds.S~git-x86 /dev/null
--- a/arch/x86/kernel/vsyscall_32.lds.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Linker script for vsyscall DSO.  The vsyscall page is an ELF shared
- * object prelinked to its virtual address, and with only one read-only
- * segment (that fits in one page).  This script controls its layout.
- */
-#include <asm/asm-offsets.h>
-
-SECTIONS
-{
-  . = VDSO_PRELINK_asm + SIZEOF_HEADERS;
-
-  .hash           : { *(.hash) }		:text
-  .gnu.hash       : { *(.gnu.hash) }
-  .dynsym         : { *(.dynsym) }
-  .dynstr         : { *(.dynstr) }
-  .gnu.version    : { *(.gnu.version) }
-  .gnu.version_d  : { *(.gnu.version_d) }
-  .gnu.version_r  : { *(.gnu.version_r) }
-
-  /* This linker script is used both with -r and with -shared.
-     For the layouts to match, we need to skip more than enough
-     space for the dynamic symbol table et al.  If this amount
-     is insufficient, ld -shared will barf.  Just increase it here.  */
-  . = VDSO_PRELINK_asm + 0x400;
-
-  .text           : { *(.text) }		:text =0x90909090
-  .note		  : { *(.note.*) }		:text :note
-  .eh_frame_hdr   : { *(.eh_frame_hdr) }	:text :eh_frame_hdr
-  .eh_frame       : { KEEP (*(.eh_frame)) }	:text
-  .dynamic        : { *(.dynamic) }		:text :dynamic
-  .useless        : {
-  	*(.got.plt) *(.got)
-	*(.data .data.* .gnu.linkonce.d.*)
-	*(.dynbss)
-	*(.bss .bss.* .gnu.linkonce.b.*)
-  }						:text
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
-  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
-  note PT_NOTE FLAGS(4); /* PF_R */
-  eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-  LINUX_2.5 {
-    global:
-    	__kernel_vsyscall;
-    	__kernel_sigreturn;
-    	__kernel_rt_sigreturn;
-
-    local: *;
-  };
-}
-
-/* The ELF entry point can be used to set the AT_SYSINFO value.  */
-ENTRY(__kernel_vsyscall);
diff -puN arch/x86/kernel/vsyscall_64.c~git-x86 arch/x86/kernel/vsyscall_64.c
--- a/arch/x86/kernel/vsyscall_64.c~git-x86
+++ a/arch/x86/kernel/vsyscall_64.c
@@ -43,7 +43,7 @@
 #include <asm/vgtod.h>
 
 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
-#define __syscall_clobber "r11","rcx","memory"
+#define __syscall_clobber "r11","cx","memory"
 #define __pa_vsymbol(x)			\
 	({unsigned long v;  		\
 	extern char __vsyscall_0; 	\
@@ -190,7 +190,7 @@ time_t __vsyscall(1) vtime(time_t *t)
 long __vsyscall(2)
 vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
 {
-	unsigned int dummy, p;
+	unsigned int p;
 	unsigned long j = 0;
 
 	/* Fast cache - only recompute value once per jiffies and avoid
@@ -205,7 +205,7 @@ vgetcpu(unsigned *cpu, unsigned *node, s
 		p = tcache->blob[1];
 	} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
 		/* Load per CPU data from RDTSCP */
-		rdtscp(dummy, dummy, p);
+		native_read_tscp(&p);
 	} else {
 		/* Load per CPU data from GDT */
 		asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
diff -puN arch/x86/kernel/x8664_ksyms_64.c~git-x86 arch/x86/kernel/x8664_ksyms_64.c
--- a/arch/x86/kernel/x8664_ksyms_64.c~git-x86
+++ a/arch/x86/kernel/x8664_ksyms_64.c
@@ -34,13 +34,6 @@ EXPORT_SYMBOL(__copy_from_user_inatomic)
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
 
-#ifdef CONFIG_SMP
-extern void  __write_lock_failed(rwlock_t *rw);
-extern void  __read_lock_failed(rwlock_t *rw);
-EXPORT_SYMBOL(__write_lock_failed);
-EXPORT_SYMBOL(__read_lock_failed);
-#endif
-
 /* Export string functions. We normally rely on gcc builtin for most of these,
    but gcc sometimes decides not to inline them. */    
 #undef memcpy
diff -puN arch/x86/lguest/boot.c~git-x86 arch/x86/lguest/boot.c
--- a/arch/x86/lguest/boot.c~git-x86
+++ a/arch/x86/lguest/boot.c
@@ -175,8 +175,8 @@ static void lguest_leave_lazy_mode(void)
  * check there when it wants to deliver an interrupt.
  */
 
-/* save_flags() is expected to return the processor state (ie. "eflags").  The
- * eflags word contains all kind of stuff, but in practice Linux only cares
+/* save_flags() is expected to return the processor state (ie. "flags").  The
+ * flags word contains all kind of stuff, but in practice Linux only cares
  * about the interrupt flag.  Our "save_flags()" just returns that. */
 static unsigned long save_fl(void)
 {
@@ -323,30 +323,30 @@ static void lguest_load_tr_desc(void)
  * anyone (including userspace) can just use the raw "cpuid" instruction and
  * the Host won't even notice since it isn't privileged.  So we try not to get
  * too worked up about it. */
-static void lguest_cpuid(unsigned int *eax, unsigned int *ebx,
-			 unsigned int *ecx, unsigned int *edx)
+static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
+			 unsigned int *cx, unsigned int *dx)
 {
-	int function = *eax;
+	int function = *ax;
 
-	native_cpuid(eax, ebx, ecx, edx);
+	native_cpuid(ax, bx, cx, dx);
 	switch (function) {
 	case 1:	/* Basic feature request. */
 		/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
-		*ecx &= 0x00002201;
+		*cx &= 0x00002201;
 		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */
-		*edx &= 0x07808101;
+		*dx &= 0x07808101;
 		/* The Host can do a nice optimization if it knows that the
 		 * kernel mappings (addresses above 0xC0000000 or whatever
 		 * PAGE_OFFSET is set to) haven't changed.  But Linux calls
 		 * flush_tlb_user() for both user and kernel mappings unless
 		 * the Page Global Enable (PGE) feature bit is set. */
-		*edx |= 0x00002000;
+		*dx |= 0x00002000;
 		break;
 	case 0x80000000:
 		/* Futureproof this a little: if they ask how much extended
 		 * processor information there is, limit it to known fields. */
-		if (*eax > 0x80000008)
-			*eax = 0x80000008;
+		if (*ax > 0x80000008)
+			*ax = 0x80000008;
 		break;
 	}
 }
@@ -755,10 +755,10 @@ static void lguest_time_init(void)
  * segment), the privilege level (we're privilege level 1, the Host is 0 and
  * will not tolerate us trying to use that), the stack pointer, and the number
  * of pages in the stack. */
-static void lguest_load_esp0(struct tss_struct *tss,
+static void lguest_load_sp0(struct tss_struct *tss,
 				     struct thread_struct *thread)
 {
-	lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->esp0,
+	lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->sp0,
 		   THREAD_SIZE/PAGE_SIZE);
 }
 
@@ -788,11 +788,11 @@ static void lguest_wbinvd(void)
  * code qualifies for Advanced.  It will also never interrupt anything.  It
  * does, however, allow us to get through the Linux boot code. */
 #ifdef CONFIG_X86_LOCAL_APIC
-static void lguest_apic_write(unsigned long reg, unsigned long v)
+static void lguest_apic_write(unsigned long reg, u32 v)
 {
 }
 
-static unsigned long lguest_apic_read(unsigned long reg)
+static u32 lguest_apic_read(unsigned long reg)
 {
 	return 0;
 }
@@ -957,7 +957,7 @@ __init void lguest_init(void)
 	pv_cpu_ops.cpuid = lguest_cpuid;
 	pv_cpu_ops.load_idt = lguest_load_idt;
 	pv_cpu_ops.iret = lguest_iret;
-	pv_cpu_ops.load_esp0 = lguest_load_esp0;
+	pv_cpu_ops.load_sp0 = lguest_load_sp0;
 	pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
 	pv_cpu_ops.set_ldt = lguest_set_ldt;
 	pv_cpu_ops.load_tls = lguest_load_tls;
diff -puN /dev/null arch/x86/mach-rdc321x/Makefile
--- /dev/null
+++ a/arch/x86/mach-rdc321x/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the RDC321x specific parts of the kernel
+#
+obj-$(CONFIG_X86_RDC321X)        := gpio.o platform.o wdt.o
+
diff -puN /dev/null arch/x86/mach-rdc321x/gpio.c
--- /dev/null
+++ a/arch/x86/mach-rdc321x/gpio.c
@@ -0,0 +1,91 @@
+/*
+ *  Copyright (C) 2007, OpenWrt.org, Florian Fainelli <florian@openwrt.org>
+ *  	RDC321x architecture specific GPIO support
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#include <linux/autoconf.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+
+#include <asm/mach-rdc321x/rdc321x_defs.h>
+
+static inline int rdc_gpio_is_valid(unsigned gpio)
+{
+	return (gpio <= RDC_MAX_GPIO);
+}
+
+static unsigned int rdc_gpio_read(unsigned gpio)
+{
+	unsigned int val;
+
+	val = 0x80000000 | (7 << 11) | ((gpio&0x20?0x84:0x48));
+	outl(val, RDC3210_CFGREG_ADDR);
+	udelay(10);
+	val = inl(RDC3210_CFGREG_DATA);
+	val |= (0x1 << (gpio & 0x1F));
+	outl(val, RDC3210_CFGREG_DATA);
+	udelay(10);
+	val = 0x80000000 | (7 << 11) | ((gpio&0x20?0x88:0x4C));
+	outl(val, RDC3210_CFGREG_ADDR);
+	udelay(10);
+	val = inl(RDC3210_CFGREG_DATA);
+
+	return val;
+}
+
+static void rdc_gpio_write(unsigned int val)
+{
+	if (val) {
+		outl(val, RDC3210_CFGREG_DATA);
+		udelay(10);
+	}
+}
+
+int rdc_gpio_get_value(unsigned gpio)
+{
+	if (rdc_gpio_is_valid(gpio))
+		return (int)rdc_gpio_read(gpio);
+	else
+		return -EINVAL;
+}
+EXPORT_SYMBOL(rdc_gpio_get_value);
+
+void rdc_gpio_set_value(unsigned gpio, int value)
+{
+	unsigned int val;
+
+	if (!rdc_gpio_is_valid(gpio))
+		return;
+
+	val = rdc_gpio_read(gpio);
+
+	if (value)
+		val &= ~(0x1 << (gpio & 0x1F));
+	else
+		val |= (0x1 << (gpio & 0x1F));
+
+	rdc_gpio_write(val);
+}
+EXPORT_SYMBOL(rdc_gpio_set_value);
+
+int rdc_gpio_direction_input(unsigned gpio)
+{
+	return 0;
+}
+EXPORT_SYMBOL(rdc_gpio_direction_input);
+
+int rdc_gpio_direction_output(unsigned gpio, int value)
+{
+	return 0;
+}
+EXPORT_SYMBOL(rdc_gpio_direction_output);
+
+
diff -puN /dev/null arch/x86/mach-rdc321x/platform.c
--- /dev/null
+++ a/arch/x86/mach-rdc321x/platform.c
@@ -0,0 +1,68 @@
+/*
+ *  Generic RDC321x platform devices
+ *
+ *  Copyright (C) 2007 Florian Fainelli <florian@openwrt.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the
+ *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ *  Boston, MA  02110-1301, USA.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/version.h>
+#include <linux/leds.h>
+
+#include <asm/gpio.h>
+
+/* LEDS */
+static struct gpio_led default_leds[] = {
+	{ .name = "rdc:dmz", .gpio = 1, },
+};
+
+static struct gpio_led_platform_data rdc321x_led_data = {
+	.num_leds = ARRAY_SIZE(default_leds),
+	.leds = default_leds,
+};
+
+static struct platform_device rdc321x_leds = {
+	.name = "leds-gpio",
+	.id = -1,
+	.dev = {
+		.platform_data = &rdc321x_led_data,
+	}
+};
+
+/* Watchdog */
+static struct platform_device rdc321x_wdt = {
+	.name = "rdc321x-wdt",
+	.id = -1,
+	.num_resources = 0,
+};
+
+static struct platform_device *rdc321x_devs[] = {
+	&rdc321x_leds,
+	&rdc321x_wdt
+};
+
+static int __init rdc_board_setup(void)
+{
+	return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs));
+}
+
+arch_initcall(rdc_board_setup);
diff -puN /dev/null arch/x86/mach-rdc321x/wdt.c
--- /dev/null
+++ a/arch/x86/mach-rdc321x/wdt.c
@@ -0,0 +1,275 @@
+/*
+ * RDC321x watchdog driver
+ *
+ * Copyright (C) 2007 Florian Fainelli <florian@openwrt.org>
+ *
+ * This driver is highly inspired from the cpu5_wdt driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/timer.h>
+#include <linux/completion.h>
+#include <linux/jiffies.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+
+#include <asm/mach-rdc321x/rdc321x_defs.h>
+
+#define RDC_WDT_MASK	0x80000000 /* Mask */
+#define RDC_WDT_EN	0x00800000 /* Enable bit */
+#define RDC_WDT_WTI	0x00200000 /* Generate CPU reset/NMI/WDT on timeout */
+#define RDC_WDT_RST	0x00100000 /* Reset bit */
+#define RDC_WDT_WIF	0x00040000 /* WDT IRQ Flag */
+#define RDC_WDT_IRT	0x00000100 /* IRQ Routing table */
+#define RDC_WDT_CNT	0x00000001 /* WDT count */
+
+#define RDC_CLS_TMR	0x80003844 /* Clear timer */
+
+#define RDC_WDT_INTERVAL	(HZ/10+1)
+
+int nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, int, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+static int ticks = 1000;
+
+/* some device data */
+
+static struct {
+	struct completion stop;
+	volatile int running;
+	struct timer_list timer;
+	volatile int queue;
+	int default_ticks;
+	unsigned long inuse;
+} rdc321x_wdt_device;
+
+/* generic helper functions */
+
+static void rdc321x_wdt_trigger(unsigned long unused)
+{
+	if (rdc321x_wdt_device.running)
+		ticks--;
+
+	/* keep watchdog alive */
+	outl(RDC_WDT_EN|inl(RDC3210_CFGREG_DATA), RDC3210_CFGREG_DATA);
+
+	/* requeue?? */
+	if (rdc321x_wdt_device.queue && ticks)
+		mod_timer(&rdc321x_wdt_device.timer,
+				jiffies + RDC_WDT_INTERVAL);
+	else {
+		/* ticks doesn't matter anyway */
+		complete(&rdc321x_wdt_device.stop);
+	}
+
+}
+
+static void rdc321x_wdt_reset(void)
+{
+	ticks = rdc321x_wdt_device.default_ticks;
+}
+
+static void rdc321x_wdt_start(void)
+{
+	if (!rdc321x_wdt_device.queue) {
+		rdc321x_wdt_device.queue = 1;
+
+		/* Clear the timer */
+		outl(RDC_CLS_TMR, RDC3210_CFGREG_ADDR);
+
+		/* Enable watchdog and set the timeout to 81.92 us */
+		outl(RDC_WDT_EN|RDC_WDT_CNT, RDC3210_CFGREG_DATA);
+
+		mod_timer(&rdc321x_wdt_device.timer,
+				jiffies + RDC_WDT_INTERVAL);
+	}
+
+	/* if process dies, counter is not decremented */
+	rdc321x_wdt_device.running++;
+}
+
+static int rdc321x_wdt_stop(void)
+{
+	if (rdc321x_wdt_device.running)
+		rdc321x_wdt_device.running = 0;
+
+	ticks = rdc321x_wdt_device.default_ticks;
+
+	return -EIO;
+}
+
+/* filesystem operations */
+
+static int rdc321x_wdt_open(struct inode *inode, struct file *file)
+{
+	if (test_and_set_bit(0, &rdc321x_wdt_device.inuse))
+		return -EBUSY;
+
+	return nonseekable_open(inode, file);
+}
+
+static int rdc321x_wdt_release(struct inode *inode, struct file *file)
+{
+	clear_bit(0, &rdc321x_wdt_device.inuse);
+	return 0;
+}
+
+static int rdc321x_wdt_ioctl(struct inode *inode, struct file *file,
+				unsigned int cmd, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	unsigned int value;
+	static struct watchdog_info ident = {
+		.options = WDIOF_CARDRESET,
+		.identity = "RDC321x WDT",
+	};
+
+	switch (cmd) {
+	case WDIOC_KEEPALIVE:
+		rdc321x_wdt_reset();
+		break;
+	case WDIOC_GETSTATUS:
+		/* Read the value from the DATA register */
+		value = inl(RDC3210_CFGREG_DATA);
+		if (copy_to_user(argp, &value, sizeof(int)))
+			return -EFAULT;
+		break;
+	case WDIOC_GETSUPPORT:
+		if (copy_to_user(argp, &ident, sizeof(ident)))
+			return -EFAULT;
+		break;
+	case WDIOC_SETOPTIONS:
+		if (copy_from_user(&value, argp, sizeof(int)))
+			return -EFAULT;
+		switch (value) {
+		case WDIOS_ENABLECARD:
+			rdc321x_wdt_start();
+			break;
+		case WDIOS_DISABLECARD:
+			return rdc321x_wdt_stop();
+		default:
+			return -EINVAL;
+		}
+		break;
+	default:
+		return -ENOTTY;
+	}
+	return 0;
+}
+
+static ssize_t rdc321x_wdt_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	if (!count)
+		return -EIO;
+
+	rdc321x_wdt_reset();
+
+	return count;
+}
+
+static const struct file_operations rdc321x_wdt_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.ioctl		= rdc321x_wdt_ioctl,
+	.open		= rdc321x_wdt_open,
+	.write		= rdc321x_wdt_write,
+	.release	= rdc321x_wdt_release,
+};
+
+static struct miscdevice rdc321x_wdt_misc = {
+	.minor	= WATCHDOG_MINOR,
+	.name	= "watchdog",
+	.fops	= &rdc321x_wdt_fops,
+};
+
+static int __devinit rdc321x_wdt_probe(struct platform_device *pdev)
+{
+	int err;
+
+	err = misc_register(&rdc321x_wdt_misc);
+	if (err < 0) {
+		printk(KERN_ERR PFX "watchdog misc_register failed\n");
+		return err;
+	}
+
+	/* Reset the watchdog */
+	outl(RDC_WDT_RST, RDC3210_CFGREG_DATA);
+
+	init_completion(&rdc321x_wdt_device.stop);
+	rdc321x_wdt_device.queue = 0;
+
+	clear_bit(0, &rdc321x_wdt_device.inuse);
+
+	setup_timer(&rdc321x_wdt_device.timer, rdc321x_wdt_trigger, 0);
+
+	rdc321x_wdt_device.default_ticks = ticks;
+
+	printk(KERN_INFO PFX "watchdog init success\n");
+
+	return 0;
+}
+
+static int rdc321x_wdt_remove(struct platform_device *pdev)
+{
+	if (rdc321x_wdt_device.queue) {
+		rdc321x_wdt_device.queue = 0;
+		wait_for_completion(&rdc321x_wdt_device.stop);
+	}
+
+	misc_deregister(&rdc321x_wdt_misc);
+
+	return 0;
+}
+
+static struct platform_driver rdc321x_wdt_driver = {
+	.probe = rdc321x_wdt_probe,
+	.remove = rdc321x_wdt_remove,
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "rdc321x-wdt",
+	},
+};
+
+static int __init rdc321x_wdt_init(void)
+{
+	return platform_driver_register(&rdc321x_wdt_driver);
+}
+
+static void __exit rdc321x_wdt_exit(void)
+{
+	platform_driver_unregister(&rdc321x_wdt_driver);
+}
+
+module_init(rdc321x_wdt_init);
+module_exit(rdc321x_wdt_exit);
+
+MODULE_AUTHOR("Florian Fainelli <florian@openwrt.org>");
+MODULE_DESCRIPTION("RDC321x watchdog driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff -puN arch/x86/mach-visws/mpparse.c~git-x86 arch/x86/mach-visws/mpparse.c
--- a/arch/x86/mach-visws/mpparse.c~git-x86
+++ a/arch/x86/mach-visws/mpparse.c
@@ -36,19 +36,19 @@ unsigned int __initdata maxcpus = NR_CPU
 
 static void __init MP_processor_info (struct mpc_config_processor *m)
 {
- 	int ver, logical_apicid;
+	int ver, logical_apicid;
 	physid_mask_t apic_cpus;
- 	
+
 	if (!(m->mpc_cpuflag & CPU_ENABLED))
 		return;
 
 	logical_apicid = m->mpc_apicid;
-	printk(KERN_INFO "%sCPU #%d %ld:%ld APIC version %d\n",
-		m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
-		m->mpc_apicid,
-		(m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-		(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-		m->mpc_apicver);
+	printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
+	       m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver);
 
 	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR)
 		boot_cpu_physical_apicid = m->mpc_apicid;
diff -puN arch/x86/mach-voyager/setup.c~git-x86 arch/x86/mach-voyager/setup.c
--- a/arch/x86/mach-voyager/setup.c~git-x86
+++ a/arch/x86/mach-voyager/setup.c
@@ -44,7 +44,7 @@ void __init trap_init_hook(void)
 {
 }
 
-static struct irqaction irq0  = {
+static struct irqaction irq0 = {
 	.handler = timer_interrupt,
 	.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL,
 	.mask = CPU_MASK_NONE,
@@ -59,44 +59,47 @@ void __init time_init_hook(void)
 
 /* Hook for machine specific memory setup. */
 
-char * __init machine_specific_memory_setup(void)
+char *__init machine_specific_memory_setup(void)
 {
 	char *who;
 
 	who = "NOT VOYAGER";
 
-	if(voyager_level == 5) {
+	if (voyager_level == 5) {
 		__u32 addr, length;
 		int i;
 
 		who = "Voyager-SUS";
 
 		e820.nr_map = 0;
-		for(i=0; voyager_memory_detect(i, &addr, &length); i++) {
+		for (i = 0; voyager_memory_detect(i, &addr, &length); i++) {
 			add_memory_region(addr, length, E820_RAM);
 		}
 		return who;
-	} else if(voyager_level == 4) {
+	} else if (voyager_level == 4) {
 		__u32 tom;
-		__u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT)<<8;
+		__u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8;
 		/* select the DINO config space */
 		outb(VOYAGER_DINO, VOYAGER_CAT_CONFIG_PORT);
 		/* Read DINO top of memory register */
 		tom = ((inb(catbase + 0x4) & 0xf0) << 16)
-			+ ((inb(catbase + 0x5) & 0x7f) << 24);
+		    + ((inb(catbase + 0x5) & 0x7f) << 24);
 
-		if(inb(catbase) != VOYAGER_DINO) {
-			printk(KERN_ERR "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n");
-			tom = (boot_params.screen_info.ext_mem_k)<<10;
+		if (inb(catbase) != VOYAGER_DINO) {
+			printk(KERN_ERR
+			       "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n");
+			tom = (boot_params.screen_info.ext_mem_k) << 10;
 		}
 		who = "Voyager-TOM";
 		add_memory_region(0, 0x9f000, E820_RAM);
 		/* map from 1M to top of memory */
-		add_memory_region(1*1024*1024, tom - 1*1024*1024, E820_RAM);
+		add_memory_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024,
+				  E820_RAM);
 		/* FIXME: Should check the ASICs to see if I need to
 		 * take out the 8M window.  Just do it at the moment
 		 * */
-		add_memory_region(8*1024*1024, 8*1024*1024, E820_RESERVED);
+		add_memory_region(8 * 1024 * 1024, 8 * 1024 * 1024,
+				  E820_RESERVED);
 		return who;
 	}
 
@@ -114,8 +117,7 @@ char * __init machine_specific_memory_se
 		unsigned long mem_size;
 
 		/* compare results from other methods and take the greater */
-		if (boot_params.alt_mem_k
-		    < boot_params.screen_info.ext_mem_k) {
+		if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) {
 			mem_size = boot_params.screen_info.ext_mem_k;
 			who = "BIOS-88";
 		} else {
@@ -126,6 +128,6 @@ char * __init machine_specific_memory_se
 		e820.nr_map = 0;
 		add_memory_region(0, LOWMEMSIZE(), E820_RAM);
 		add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
-  	}
+	}
 	return who;
 }
diff -puN arch/x86/mach-voyager/voyager_basic.c~git-x86 arch/x86/mach-voyager/voyager_basic.c
--- a/arch/x86/mach-voyager/voyager_basic.c~git-x86
+++ a/arch/x86/mach-voyager/voyager_basic.c
@@ -35,7 +35,7 @@
 /*
  * Power off function, if any
  */
-void (*pm_power_off)(void);
+void (*pm_power_off) (void);
 EXPORT_SYMBOL(pm_power_off);
 
 int voyager_level = 0;
@@ -43,39 +43,38 @@ int voyager_level = 0;
 struct voyager_SUS *voyager_SUS = NULL;
 
 #ifdef CONFIG_SMP
-static void
-voyager_dump(int dummy1, struct tty_struct *dummy3)
+static void voyager_dump(int dummy1, struct tty_struct *dummy3)
 {
 	/* get here via a sysrq */
 	voyager_smp_dump();
 }
 
 static struct sysrq_key_op sysrq_voyager_dump_op = {
-	.handler	= voyager_dump,
-	.help_msg	= "Voyager",
-	.action_msg	= "Dump Voyager Status",
+	.handler = voyager_dump,
+	.help_msg = "Voyager",
+	.action_msg = "Dump Voyager Status",
 };
 #endif
 
-void
-voyager_detect(struct voyager_bios_info *bios)
+void voyager_detect(struct voyager_bios_info *bios)
 {
-	if(bios->len != 0xff) {
-		int class = (bios->class_1 << 8) 
-			| (bios->class_2 & 0xff);
+	if (bios->len != 0xff) {
+		int class = (bios->class_1 << 8)
+		    | (bios->class_2 & 0xff);
 
 		printk("Voyager System detected.\n"
 		       "        Class %x, Revision %d.%d\n",
 		       class, bios->major, bios->minor);
-		if(class == VOYAGER_LEVEL4) 
+		if (class == VOYAGER_LEVEL4)
 			voyager_level = 4;
-		else if(class < VOYAGER_LEVEL5_AND_ABOVE)
+		else if (class < VOYAGER_LEVEL5_AND_ABOVE)
 			voyager_level = 3;
 		else
 			voyager_level = 5;
 		printk("        Architecture Level %d\n", voyager_level);
-		if(voyager_level < 4)
-			printk("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n");
+		if (voyager_level < 4)
+			printk
+			    ("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n");
 		/* install the power off handler */
 		pm_power_off = voyager_power_off;
 #ifdef CONFIG_SMP
@@ -86,15 +85,13 @@ voyager_detect(struct voyager_bios_info 
 	}
 }
 
-void
-voyager_system_interrupt(int cpl, void *dev_id)
+void voyager_system_interrupt(int cpl, void *dev_id)
 {
 	printk("Voyager: detected system interrupt\n");
 }
 
 /* Routine to read information from the extended CMOS area */
-__u8
-voyager_extended_cmos_read(__u16 addr)
+__u8 voyager_extended_cmos_read(__u16 addr)
 {
 	outb(addr & 0xff, 0x74);
 	outb((addr >> 8) & 0xff, 0x75);
@@ -108,12 +105,11 @@ voyager_extended_cmos_read(__u16 addr)
 
 typedef struct ClickMap {
 	struct Entry {
-		__u32	Address;
-		__u32	Length;
+		__u32 Address;
+		__u32 Length;
 	} Entry[CLICK_ENTRIES];
 } ClickMap_t;
 
-
 /* This routine is pretty much an awful hack to read the bios clickmap by
  * mapping it into page 0.  There are usually three regions in the map:
  * 	Base Memory
@@ -122,8 +118,7 @@ typedef struct ClickMap {
  *
  * Returns are 0 for failure and 1 for success on extracting region.
  */
-int __init
-voyager_memory_detect(int region, __u32 *start, __u32 *length)
+int __init voyager_memory_detect(int region, __u32 * start, __u32 * length)
 {
 	int i;
 	int retval = 0;
@@ -132,13 +127,14 @@ voyager_memory_detect(int region, __u32 
 	unsigned long map_addr;
 	unsigned long old;
 
-	if(region >= CLICK_ENTRIES) {
+	if (region >= CLICK_ENTRIES) {
 		printk("Voyager: Illegal ClickMap region %d\n", region);
 		return 0;
 	}
 
-	for(i = 0; i < sizeof(cmos); i++)
-		cmos[i] = voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i);
+	for (i = 0; i < sizeof(cmos); i++)
+		cmos[i] =
+		    voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i);
 
 	map_addr = *(unsigned long *)cmos;
 
@@ -147,10 +143,10 @@ voyager_memory_detect(int region, __u32 
 	pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
 	local_flush_tlb();
 	/* now clear everything out but page 0 */
-	map = (ClickMap_t *)(map_addr & (~PAGE_MASK));
+	map = (ClickMap_t *) (map_addr & (~PAGE_MASK));
 
 	/* zero length is the end of the clickmap */
-	if(map->Entry[region].Length != 0) {
+	if (map->Entry[region].Length != 0) {
 		*length = map->Entry[region].Length * CLICK_SIZE;
 		*start = map->Entry[region].Address;
 		retval = 1;
@@ -165,10 +161,9 @@ voyager_memory_detect(int region, __u32 
 /* voyager specific handling code for timer interrupts.  Used to hand
  * off the timer tick to the SMP code, since the VIC doesn't have an
  * internal timer (The QIC does, but that's another story). */
-void
-voyager_timer_interrupt(void)
+void voyager_timer_interrupt(void)
 {
-	if((jiffies & 0x3ff) == 0) {
+	if ((jiffies & 0x3ff) == 0) {
 
 		/* There seems to be something flaky in either
 		 * hardware or software that is resetting the timer 0
@@ -186,18 +181,20 @@ voyager_timer_interrupt(void)
 		__u16 val;
 
 		spin_lock(&i8253_lock);
-		
+
 		outb_p(0x00, 0x43);
 		val = inb_p(0x40);
 		val |= inb(0x40) << 8;
 		spin_unlock(&i8253_lock);
 
-		if(val > LATCH) {
-			printk("\nVOYAGER: countdown timer value too high (%d), resetting\n\n", val);
+		if (val > LATCH) {
+			printk
+			    ("\nVOYAGER: countdown timer value too high (%d), resetting\n\n",
+			     val);
 			spin_lock(&i8253_lock);
-			outb(0x34,0x43);
-			outb_p(LATCH & 0xff , 0x40);	/* LSB */
-			outb(LATCH >> 8 , 0x40);	/* MSB */
+			outb(0x34, 0x43);
+			outb_p(LATCH & 0xff, 0x40);	/* LSB */
+			outb(LATCH >> 8, 0x40);	/* MSB */
 			spin_unlock(&i8253_lock);
 		}
 	}
@@ -206,14 +203,13 @@ voyager_timer_interrupt(void)
 #endif
 }
 
-void
-voyager_power_off(void)
+void voyager_power_off(void)
 {
 	printk("VOYAGER Power Off\n");
 
-	if(voyager_level == 5) {
+	if (voyager_level == 5) {
 		voyager_cat_power_off();
-	} else if(voyager_level == 4) {
+	} else if (voyager_level == 4) {
 		/* This doesn't apparently work on most L4 machines,
 		 * but the specs say to do this to get automatic power
 		 * off.  Unfortunately, if it doesn't power off the
@@ -222,10 +218,8 @@ voyager_power_off(void)
 #if 0
 		int port;
 
-	  
 		/* enable the voyager Configuration Space */
-		outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8, 
-		     VOYAGER_MC_SETUP);
+		outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8, VOYAGER_MC_SETUP);
 		/* the port for the power off flag is an offset from the
 		   floating base */
 		port = (inb(VOYAGER_SSPB_RELOCATION_PORT) << 8) + 0x21;
@@ -235,62 +229,57 @@ voyager_power_off(void)
 	}
 	/* and wait for it to happen */
 	local_irq_disable();
-	for(;;)
+	for (;;)
 		halt();
 }
 
 /* copied from process.c */
-static inline void
-kb_wait(void)
+static inline void kb_wait(void)
 {
 	int i;
 
-	for (i=0; i<0x10000; i++)
+	for (i = 0; i < 0x10000; i++)
 		if ((inb_p(0x64) & 0x02) == 0)
 			break;
 }
 
-void
-machine_shutdown(void)
+void machine_shutdown(void)
 {
 	/* Architecture specific shutdown needed before a kexec */
 }
 
-void
-machine_restart(char *cmd)
+void machine_restart(char *cmd)
 {
 	printk("Voyager Warm Restart\n");
 	kb_wait();
 
-	if(voyager_level == 5) {
+	if (voyager_level == 5) {
 		/* write magic values to the RTC to inform system that
 		 * shutdown is beginning */
 		outb(0x8f, 0x70);
-		outb(0x5 , 0x71);
-		
+		outb(0x5, 0x71);
+
 		udelay(50);
-		outb(0xfe,0x64);         /* pull reset low */
-	} else if(voyager_level == 4) {
-		__u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT)<<8;
+		outb(0xfe, 0x64);	/* pull reset low */
+	} else if (voyager_level == 4) {
+		__u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8;
 		__u8 basebd = inb(VOYAGER_MC_SETUP);
-		
+
 		outb(basebd | 0x08, VOYAGER_MC_SETUP);
 		outb(0x02, catbase + 0x21);
 	}
 	local_irq_disable();
-	for(;;)
+	for (;;)
 		halt();
 }
 
-void
-machine_emergency_restart(void)
+void machine_emergency_restart(void)
 {
 	/*for now, just hook this to a warm restart */
 	machine_restart(NULL);
 }
 
-void
-mca_nmi_hook(void)
+void mca_nmi_hook(void)
 {
 	__u8 dumpval __maybe_unused = inb(0xf823);
 	__u8 swnmi __maybe_unused = inb(0xf813);
@@ -301,8 +290,8 @@ mca_nmi_hook(void)
 	/* clear swnmi */
 	outb(0xff, 0xf813);
 	/* tell SUS to ignore dump */
-	if(voyager_level == 5 && voyager_SUS != NULL) {
-		if(voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) {
+	if (voyager_level == 5 && voyager_SUS != NULL) {
+		if (voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) {
 			voyager_SUS->kernel_mbox = VOYAGER_NO_COMMAND;
 			voyager_SUS->kernel_flags |= VOYAGER_OS_IN_PROGRESS;
 			udelay(1000);
@@ -310,15 +299,14 @@ mca_nmi_hook(void)
 			voyager_SUS->kernel_flags &= ~VOYAGER_OS_IN_PROGRESS;
 		}
 	}
-	printk(KERN_ERR "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n", smp_processor_id());
+	printk(KERN_ERR
+	       "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n",
+	       smp_processor_id());
 	show_stack(NULL, NULL);
 	show_state();
 }
 
-
-
-void
-machine_halt(void)
+void machine_halt(void)
 {
 	/* treat a halt like a power off */
 	machine_power_off();
diff -puN arch/x86/mach-voyager/voyager_cat.c~git-x86 arch/x86/mach-voyager/voyager_cat.c
--- a/arch/x86/mach-voyager/voyager_cat.c~git-x86
+++ a/arch/x86/mach-voyager/voyager_cat.c
@@ -39,34 +39,32 @@
 #define CAT_DATA	(sspb + 0xd)
 
 /* the internal cat functions */
-static void cat_pack(__u8 *msg, __u16 start_bit, __u8 *data, 
-		     __u16 num_bits);
-static void cat_unpack(__u8 *msg, __u16 start_bit, __u8 *data,
+static void cat_pack(__u8 * msg, __u16 start_bit, __u8 * data, __u16 num_bits);
+static void cat_unpack(__u8 * msg, __u16 start_bit, __u8 * data,
 		       __u16 num_bits);
-static void cat_build_header(__u8 *header, const __u16 len, 
+static void cat_build_header(__u8 * header, const __u16 len,
 			     const __u16 smallest_reg_bits,
 			     const __u16 longest_reg_bits);
-static int cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp,
+static int cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp,
 			__u8 reg, __u8 op);
-static int cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp,
-		       __u8 reg, __u8 *value);
-static int cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes,
+static int cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp,
+		       __u8 reg, __u8 * value);
+static int cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes,
 			__u8 pad_bits);
-static int cat_write(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
+static int cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
 		     __u8 value);
-static int cat_read(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
-		    __u8 *value);
-static int cat_subread(voyager_module_t *modp, voyager_asic_t *asicp,
+static int cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
+		    __u8 * value);
+static int cat_subread(voyager_module_t * modp, voyager_asic_t * asicp,
 		       __u16 offset, __u16 len, void *buf);
-static int cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp,
+static int cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp,
 			__u8 reg, __u8 value);
-static int cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp);
-static int cat_connect(voyager_module_t *modp, voyager_asic_t *asicp);
+static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp);
+static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp);
 
-static inline const char *
-cat_module_name(int module_id)
+static inline const char *cat_module_name(int module_id)
 {
-	switch(module_id) {
+	switch (module_id) {
 	case 0x10:
 		return "Processor Slot 0";
 	case 0x11:
@@ -105,14 +103,14 @@ voyager_module_t *voyager_cat_list;
 
 /* the I/O port assignments for the VIC and QIC */
 static struct resource vic_res = {
-	.name	= "Voyager Interrupt Controller",
-	.start	= 0xFC00,
-	.end	= 0xFC6F
+	.name = "Voyager Interrupt Controller",
+	.start = 0xFC00,
+	.end = 0xFC6F
 };
 static struct resource qic_res = {
-	.name	= "Quad Interrupt Controller",
-	.start	= 0xFC70,
-	.end	= 0xFCFF
+	.name = "Quad Interrupt Controller",
+	.start = 0xFC70,
+	.end = 0xFCFF
 };
 
 /* This function is used to pack a data bit stream inside a message.
@@ -120,7 +118,7 @@ static struct resource qic_res = {
  * Note: This function assumes that any unused bit in the data stream
  * is set to zero so that the ors will work correctly */
 static void
-cat_pack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
+cat_pack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits)
 {
 	/* compute initial shift needed */
 	const __u16 offset = start_bit % BITS_PER_BYTE;
@@ -130,7 +128,7 @@ cat_pack(__u8 *msg, const __u16 start_bi
 	int i;
 
 	/* adjust if we have more than a byte of residue */
-	if(residue >= BITS_PER_BYTE) {
+	if (residue >= BITS_PER_BYTE) {
 		residue -= BITS_PER_BYTE;
 		len++;
 	}
@@ -138,24 +136,25 @@ cat_pack(__u8 *msg, const __u16 start_bi
 	/* clear out the bits.  We assume here that if len==0 then
 	 * residue >= offset.  This is always true for the catbus
 	 * operations */
-	msg[byte] &= 0xff << (BITS_PER_BYTE - offset); 
+	msg[byte] &= 0xff << (BITS_PER_BYTE - offset);
 	msg[byte++] |= data[0] >> offset;
-	if(len == 0)
+	if (len == 0)
 		return;
-	for(i = 1; i < len; i++)
-		msg[byte++] = (data[i-1] << (BITS_PER_BYTE - offset))
-			| (data[i] >> offset);
-	if(residue != 0) {
+	for (i = 1; i < len; i++)
+		msg[byte++] = (data[i - 1] << (BITS_PER_BYTE - offset))
+		    | (data[i] >> offset);
+	if (residue != 0) {
 		__u8 mask = 0xff >> residue;
-		__u8 last_byte = data[i-1] << (BITS_PER_BYTE - offset)
-			| (data[i] >> offset);
-		
+		__u8 last_byte = data[i - 1] << (BITS_PER_BYTE - offset)
+		    | (data[i] >> offset);
+
 		last_byte &= ~mask;
 		msg[byte] &= mask;
 		msg[byte] |= last_byte;
 	}
 	return;
 }
+
 /* unpack the data again (same arguments as cat_pack()). data buffer
  * must be zero populated.
  *
@@ -163,7 +162,7 @@ cat_pack(__u8 *msg, const __u16 start_bi
  * data (starting at bit 0 in data).
  */
 static void
-cat_unpack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits)
+cat_unpack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits)
 {
 	/* compute initial shift needed */
 	const __u16 offset = start_bit % BITS_PER_BYTE;
@@ -172,97 +171,97 @@ cat_unpack(__u8 *msg, const __u16 start_
 	__u16 byte = start_bit / BITS_PER_BYTE;
 	int i;
 
-	if(last_bits != 0)
+	if (last_bits != 0)
 		len++;
 
 	/* special case: want < 8 bits from msg and we can get it from
 	 * a single byte of the msg */
-	if(len == 0 && BITS_PER_BYTE - offset >= num_bits) {
+	if (len == 0 && BITS_PER_BYTE - offset >= num_bits) {
 		data[0] = msg[byte] << offset;
 		data[0] &= 0xff >> (BITS_PER_BYTE - num_bits);
 		return;
 	}
-	for(i = 0; i < len; i++) {
+	for (i = 0; i < len; i++) {
 		/* this annoying if has to be done just in case a read of
 		 * msg one beyond the array causes a panic */
-		if(offset != 0) {
+		if (offset != 0) {
 			data[i] = msg[byte++] << offset;
 			data[i] |= msg[byte] >> (BITS_PER_BYTE - offset);
-		}
-		else {
+		} else {
 			data[i] = msg[byte++];
 		}
 	}
 	/* do we need to truncate the final byte */
-	if(last_bits != 0) {
-		data[i-1] &= 0xff << (BITS_PER_BYTE - last_bits);
+	if (last_bits != 0) {
+		data[i - 1] &= 0xff << (BITS_PER_BYTE - last_bits);
 	}
 	return;
 }
 
 static void
-cat_build_header(__u8 *header, const __u16 len, const __u16 smallest_reg_bits,
+cat_build_header(__u8 * header, const __u16 len, const __u16 smallest_reg_bits,
 		 const __u16 longest_reg_bits)
 {
 	int i;
 	__u16 start_bit = (smallest_reg_bits - 1) % BITS_PER_BYTE;
 	__u8 *last_byte = &header[len - 1];
 
-	if(start_bit == 0)
+	if (start_bit == 0)
 		start_bit = 1;	/* must have at least one bit in the hdr */
-	
-	for(i=0; i < len; i++)
+
+	for (i = 0; i < len; i++)
 		header[i] = 0;
 
-	for(i = start_bit; i > 0; i--)
+	for (i = start_bit; i > 0; i--)
 		*last_byte = ((*last_byte) << 1) + 1;
 
 }
 
 static int
-cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, __u8 op)
+cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 op)
 {
 	__u8 parity, inst, inst_buf[4] = { 0 };
 	__u8 iseq[VOYAGER_MAX_SCAN_PATH], hseq[VOYAGER_MAX_REG_SIZE];
 	__u16 ibytes, hbytes, padbits;
 	int i;
-	
+
 	/* 
 	 * Parity is the parity of the register number + 1 (READ_REGISTER
 	 * and WRITE_REGISTER always add '1' to the number of bits == 1)
 	 */
-	parity = (__u8)(1 + (reg & 0x01) +
-	         ((__u8)(reg & 0x02) >> 1) +
-	         ((__u8)(reg & 0x04) >> 2) +
-	         ((__u8)(reg & 0x08) >> 3)) % 2;
+	parity = (__u8) (1 + (reg & 0x01) +
+			 ((__u8) (reg & 0x02) >> 1) +
+			 ((__u8) (reg & 0x04) >> 2) +
+			 ((__u8) (reg & 0x08) >> 3)) % 2;
 
 	inst = ((parity << 7) | (reg << 2) | op);
 
 	outb(VOYAGER_CAT_IRCYC, CAT_CMD);
-	if(!modp->scan_path_connected) {
-		if(asicp->asic_id != VOYAGER_CAT_ID) {
-			printk("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n");
+	if (!modp->scan_path_connected) {
+		if (asicp->asic_id != VOYAGER_CAT_ID) {
+			printk
+			    ("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n");
 			return 1;
 		}
 		outb(VOYAGER_CAT_HEADER, CAT_DATA);
 		outb(inst, CAT_DATA);
-		if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+		if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
 			CDEBUG(("VOYAGER CAT: cat_sendinst failed to get CAT_HEADER\n"));
 			return 1;
 		}
 		return 0;
 	}
 	ibytes = modp->inst_bits / BITS_PER_BYTE;
-	if((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) {
+	if ((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) {
 		padbits = BITS_PER_BYTE - padbits;
 		ibytes++;
 	}
 	hbytes = modp->largest_reg / BITS_PER_BYTE;
-	if(modp->largest_reg % BITS_PER_BYTE)
+	if (modp->largest_reg % BITS_PER_BYTE)
 		hbytes++;
 	CDEBUG(("cat_sendinst: ibytes=%d, hbytes=%d\n", ibytes, hbytes));
 	/* initialise the instruction sequence to 0xff */
-	for(i=0; i < ibytes + hbytes; i++)
+	for (i = 0; i < ibytes + hbytes; i++)
 		iseq[i] = 0xff;
 	cat_build_header(hseq, hbytes, modp->smallest_reg, modp->largest_reg);
 	cat_pack(iseq, modp->inst_bits, hseq, hbytes * BITS_PER_BYTE);
@@ -271,11 +270,11 @@ cat_sendinst(voyager_module_t *modp, voy
 	cat_pack(iseq, asicp->bit_location, inst_buf, asicp->ireg_length);
 #ifdef VOYAGER_CAT_DEBUG
 	printk("ins = 0x%x, iseq: ", inst);
-	for(i=0; i< ibytes + hbytes; i++)
+	for (i = 0; i < ibytes + hbytes; i++)
 		printk("0x%x ", iseq[i]);
 	printk("\n");
 #endif
-	if(cat_shiftout(iseq, ibytes, hbytes, padbits)) {
+	if (cat_shiftout(iseq, ibytes, hbytes, padbits)) {
 		CDEBUG(("VOYAGER CAT: cat_sendinst: cat_shiftout failed\n"));
 		return 1;
 	}
@@ -284,72 +283,74 @@ cat_sendinst(voyager_module_t *modp, voy
 }
 
 static int
-cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, 
-	    __u8 *value)
+cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
+	    __u8 * value)
 {
-	if(!modp->scan_path_connected) {
-		if(asicp->asic_id != VOYAGER_CAT_ID) {
+	if (!modp->scan_path_connected) {
+		if (asicp->asic_id != VOYAGER_CAT_ID) {
 			CDEBUG(("VOYAGER CAT: ERROR: cat_getdata to CAT asic with scan path connected\n"));
 			return 1;
 		}
-		if(reg > VOYAGER_SUBADDRHI) 
+		if (reg > VOYAGER_SUBADDRHI)
 			outb(VOYAGER_CAT_RUN, CAT_CMD);
 		outb(VOYAGER_CAT_DRCYC, CAT_CMD);
 		outb(VOYAGER_CAT_HEADER, CAT_DATA);
 		*value = inb(CAT_DATA);
 		outb(0xAA, CAT_DATA);
-		if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+		if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
 			CDEBUG(("cat_getdata: failed to get VOYAGER_CAT_HEADER\n"));
 			return 1;
 		}
 		return 0;
-	}
-	else {
-		__u16 sbits = modp->num_asics -1 + asicp->ireg_length;
+	} else {
+		__u16 sbits = modp->num_asics - 1 + asicp->ireg_length;
 		__u16 sbytes = sbits / BITS_PER_BYTE;
 		__u16 tbytes;
-		__u8 string[VOYAGER_MAX_SCAN_PATH], trailer[VOYAGER_MAX_REG_SIZE];
+		__u8 string[VOYAGER_MAX_SCAN_PATH],
+		    trailer[VOYAGER_MAX_REG_SIZE];
 		__u8 padbits;
 		int i;
-		
+
 		outb(VOYAGER_CAT_DRCYC, CAT_CMD);
 
-		if((padbits = sbits % BITS_PER_BYTE) != 0) {
+		if ((padbits = sbits % BITS_PER_BYTE) != 0) {
 			padbits = BITS_PER_BYTE - padbits;
 			sbytes++;
 		}
 		tbytes = asicp->ireg_length / BITS_PER_BYTE;
-		if(asicp->ireg_length % BITS_PER_BYTE)
+		if (asicp->ireg_length % BITS_PER_BYTE)
 			tbytes++;
 		CDEBUG(("cat_getdata: tbytes = %d, sbytes = %d, padbits = %d\n",
-			tbytes,	sbytes, padbits));
+			tbytes, sbytes, padbits));
 		cat_build_header(trailer, tbytes, 1, asicp->ireg_length);
 
-		
-		for(i = tbytes - 1; i >= 0; i--) {
+		for (i = tbytes - 1; i >= 0; i--) {
 			outb(trailer[i], CAT_DATA);
 			string[sbytes + i] = inb(CAT_DATA);
 		}
 
-		for(i = sbytes - 1; i >= 0; i--) {
+		for (i = sbytes - 1; i >= 0; i--) {
 			outb(0xaa, CAT_DATA);
 			string[i] = inb(CAT_DATA);
 		}
 		*value = 0;
-		cat_unpack(string, padbits + (tbytes * BITS_PER_BYTE) + asicp->asic_location, value, asicp->ireg_length);
+		cat_unpack(string,
+			   padbits + (tbytes * BITS_PER_BYTE) +
+			   asicp->asic_location, value, asicp->ireg_length);
 #ifdef VOYAGER_CAT_DEBUG
 		printk("value=0x%x, string: ", *value);
-		for(i=0; i< tbytes+sbytes; i++)
+		for (i = 0; i < tbytes + sbytes; i++)
 			printk("0x%x ", string[i]);
 		printk("\n");
 #endif
-		
+
 		/* sanity check the rest of the return */
-		for(i=0; i < tbytes; i++) {
+		for (i = 0; i < tbytes; i++) {
 			__u8 input = 0;
 
-			cat_unpack(string, padbits + (i * BITS_PER_BYTE), &input, BITS_PER_BYTE);
-			if(trailer[i] != input) {
+			cat_unpack(string, padbits + (i * BITS_PER_BYTE),
+				   &input, BITS_PER_BYTE);
+			if (trailer[i] != input) {
 				CDEBUG(("cat_getdata: failed to sanity check rest of ret(%d) 0x%x != 0x%x\n", i, input, trailer[i]));
 				return 1;
 			}
@@ -360,14 +361,14 @@ cat_getdata(voyager_module_t *modp, voya
 }
 
 static int
-cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
+cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
 {
 	int i;
-	
-	for(i = data_bytes + header_bytes - 1; i >= header_bytes; i--)
+
+	for (i = data_bytes + header_bytes - 1; i >= header_bytes; i--)
 		outb(data[i], CAT_DATA);
 
-	for(i = header_bytes - 1; i >= 0; i--) {
+	for (i = header_bytes - 1; i >= 0; i--) {
 		__u8 header = 0;
 		__u8 input;
 
@@ -376,7 +377,7 @@ cat_shiftout(__u8 *data, __u16 data_byte
 		CDEBUG(("cat_shiftout: returned 0x%x\n", input));
 		cat_unpack(data, ((data_bytes + i) * BITS_PER_BYTE) - pad_bits,
 			   &header, BITS_PER_BYTE);
-		if(input != header) {
+		if (input != header) {
 			CDEBUG(("VOYAGER CAT: cat_shiftout failed to return header 0x%x != 0x%x\n", input, header));
 			return 1;
 		}
@@ -385,57 +386,57 @@ cat_shiftout(__u8 *data, __u16 data_byte
 }
 
 static int
-cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp, 
+cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp,
 	     __u8 reg, __u8 value)
 {
 	outb(VOYAGER_CAT_DRCYC, CAT_CMD);
-	if(!modp->scan_path_connected) {
-		if(asicp->asic_id != VOYAGER_CAT_ID) {
+	if (!modp->scan_path_connected) {
+		if (asicp->asic_id != VOYAGER_CAT_ID) {
 			CDEBUG(("VOYAGER CAT: ERROR: scan path disconnected when asic != CAT\n"));
 			return 1;
 		}
 		outb(VOYAGER_CAT_HEADER, CAT_DATA);
 		outb(value, CAT_DATA);
-		if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+		if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
 			CDEBUG(("cat_senddata: failed to get correct header response to sent data\n"));
 			return 1;
 		}
-		if(reg > VOYAGER_SUBADDRHI) {
+		if (reg > VOYAGER_SUBADDRHI) {
 			outb(VOYAGER_CAT_RUN, CAT_CMD);
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			outb(VOYAGER_CAT_RUN, CAT_CMD);
 		}
-		
+
 		return 0;
-	}
-	else {
+	} else {
 		__u16 hbytes = asicp->ireg_length / BITS_PER_BYTE;
-		__u16 dbytes = (modp->num_asics - 1 + asicp->ireg_length)/BITS_PER_BYTE;
-		__u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH], 
-			hseq[VOYAGER_MAX_REG_SIZE];
+		__u16 dbytes =
+		    (modp->num_asics - 1 + asicp->ireg_length) / BITS_PER_BYTE;
+		__u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH],
+		    hseq[VOYAGER_MAX_REG_SIZE];
 		int i;
 
-		if((padbits = (modp->num_asics - 1 
-			       + asicp->ireg_length) % BITS_PER_BYTE) != 0) {
+		if ((padbits = (modp->num_asics - 1
+				+ asicp->ireg_length) % BITS_PER_BYTE) != 0) {
 			padbits = BITS_PER_BYTE - padbits;
 			dbytes++;
 		}
-		if(asicp->ireg_length % BITS_PER_BYTE)
+		if (asicp->ireg_length % BITS_PER_BYTE)
 			hbytes++;
-		
+
 		cat_build_header(hseq, hbytes, 1, asicp->ireg_length);
-		
-		for(i = 0; i < dbytes + hbytes; i++)
+
+		for (i = 0; i < dbytes + hbytes; i++)
 			dseq[i] = 0xff;
 		CDEBUG(("cat_senddata: dbytes=%d, hbytes=%d, padbits=%d\n",
 			dbytes, hbytes, padbits));
 		cat_pack(dseq, modp->num_asics - 1 + asicp->ireg_length,
 			 hseq, hbytes * BITS_PER_BYTE);
-		cat_pack(dseq, asicp->asic_location, &value, 
+		cat_pack(dseq, asicp->asic_location, &value,
 			 asicp->ireg_length);
 #ifdef VOYAGER_CAT_DEBUG
 		printk("dseq ");
-		for(i=0; i<hbytes+dbytes; i++) {
+		for (i = 0; i < hbytes + dbytes; i++) {
 			printk("0x%x ", dseq[i]);
 		}
 		printk("\n");
@@ -445,121 +446,125 @@ cat_senddata(voyager_module_t *modp, voy
 }
 
 static int
-cat_write(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
-	 __u8 value)
+cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 value)
 {
-	if(cat_sendinst(modp, asicp, reg, VOYAGER_WRITE_CONFIG))
+	if (cat_sendinst(modp, asicp, reg, VOYAGER_WRITE_CONFIG))
 		return 1;
 	return cat_senddata(modp, asicp, reg, value);
 }
 
 static int
-cat_read(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg,
-	 __u8 *value)
+cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
+	 __u8 * value)
 {
-	if(cat_sendinst(modp, asicp, reg, VOYAGER_READ_CONFIG))
+	if (cat_sendinst(modp, asicp, reg, VOYAGER_READ_CONFIG))
 		return 1;
 	return cat_getdata(modp, asicp, reg, value);
 }
 
 static int
-cat_subaddrsetup(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
+cat_subaddrsetup(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
 		 __u16 len)
 {
 	__u8 val;
 
-	if(len > 1) {
+	if (len > 1) {
 		/* set auto increment */
 		__u8 newval;
-		
-		if(cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) {
+
+		if (cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) {
 			CDEBUG(("cat_subaddrsetup: read of VOYAGER_AUTO_INC_REG failed\n"));
 			return 1;
 		}
-		CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n", val));
+		CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n",
+			val));
 		newval = val | VOYAGER_AUTO_INC;
-		if(newval != val) {
-			if(cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) {
+		if (newval != val) {
+			if (cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) {
 				CDEBUG(("cat_subaddrsetup: write to VOYAGER_AUTO_INC_REG failed\n"));
 				return 1;
 			}
 		}
 	}
-	if(cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8)(offset &0xff))) {
+	if (cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8) (offset & 0xff))) {
 		CDEBUG(("cat_subaddrsetup: write to SUBADDRLO failed\n"));
 		return 1;
 	}
-	if(asicp->subaddr > VOYAGER_SUBADDR_LO) {
-		if(cat_write(modp, asicp, VOYAGER_SUBADDRHI, (__u8)(offset >> 8))) {
+	if (asicp->subaddr > VOYAGER_SUBADDR_LO) {
+		if (cat_write
+		    (modp, asicp, VOYAGER_SUBADDRHI, (__u8) (offset >> 8))) {
 			CDEBUG(("cat_subaddrsetup: write to SUBADDRHI failed\n"));
 			return 1;
 		}
 		cat_read(modp, asicp, VOYAGER_SUBADDRHI, &val);
-		CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset, val));
+		CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset,
+			val));
 	}
 	cat_read(modp, asicp, VOYAGER_SUBADDRLO, &val);
 	CDEBUG(("cat_subaddrsetup: offset = %d, lo = %d\n", offset, val));
 	return 0;
 }
-		
+
 static int
-cat_subwrite(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
-	    __u16 len, void *buf)
+cat_subwrite(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
+	     __u16 len, void *buf)
 {
 	int i, retval;
 
 	/* FIXME: need special actions for VOYAGER_CAT_ID here */
-	if(asicp->asic_id == VOYAGER_CAT_ID) {
+	if (asicp->asic_id == VOYAGER_CAT_ID) {
 		CDEBUG(("cat_subwrite: ATTEMPT TO WRITE TO CAT ASIC\n"));
 		/* FIXME -- This is supposed to be handled better
 		 * There is a problem writing to the cat asic in the
 		 * PSI.  The 30us delay seems to work, though */
 		udelay(30);
 	}
-		
-	if((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
+
+	if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
 		printk("cat_subwrite: cat_subaddrsetup FAILED\n");
 		return retval;
 	}
-	
-	if(cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) {
+
+	if (cat_sendinst
+	    (modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) {
 		printk("cat_subwrite: cat_sendinst FAILED\n");
 		return 1;
 	}
-	for(i = 0; i < len; i++) {
-		if(cat_senddata(modp, asicp, 0xFF, ((__u8 *)buf)[i])) {
-			printk("cat_subwrite: cat_sendata element at %d FAILED\n", i);
+	for (i = 0; i < len; i++) {
+		if (cat_senddata(modp, asicp, 0xFF, ((__u8 *) buf)[i])) {
+			printk
+			    ("cat_subwrite: cat_sendata element at %d FAILED\n",
+			     i);
 			return 1;
 		}
 	}
 	return 0;
 }
 static int
-cat_subread(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset,
+cat_subread(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
 	    __u16 len, void *buf)
 {
 	int i, retval;
 
-	if((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
+	if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
 		CDEBUG(("cat_subread: cat_subaddrsetup FAILED\n"));
 		return retval;
 	}
 
-	if(cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) {
+	if (cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) {
 		CDEBUG(("cat_subread: cat_sendinst failed\n"));
 		return 1;
 	}
-	for(i = 0; i < len; i++) {
-		if(cat_getdata(modp, asicp, 0xFF,
-			       &((__u8 *)buf)[i])) {
-			CDEBUG(("cat_subread: cat_getdata element %d failed\n", i));
+	for (i = 0; i < len; i++) {
+		if (cat_getdata(modp, asicp, 0xFF, &((__u8 *) buf)[i])) {
+			CDEBUG(("cat_subread: cat_getdata element %d failed\n",
+				i));
 			return 1;
 		}
 	}
 	return 0;
 }
 
-
 /* buffer for storing EPROM data read in during initialisation */
 static __initdata __u8 eprom_buf[0xFFFF];
 static voyager_module_t *voyager_initial_module;
@@ -568,8 +573,7 @@ static voyager_module_t *voyager_initial
  * boot cpu *after* all memory initialisation has been done (so we can
  * use kmalloc) but before smp initialisation, so we can probe the SMP
  * configuration and pick up necessary information.  */
-void __init
-voyager_cat_init(void)
+void __init voyager_cat_init(void)
 {
 	voyager_module_t **modpp = &voyager_initial_module;
 	voyager_asic_t **asicpp;
@@ -578,27 +582,29 @@ voyager_cat_init(void)
 	unsigned long qic_addr = 0;
 	__u8 qabc_data[0x20];
 	__u8 num_submodules, val;
-	voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *)&eprom_buf[0];
-	
+	voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *) & eprom_buf[0];
+
 	__u8 cmos[4];
 	unsigned long addr;
-	
+
 	/* initiallise the SUS mailbox */
-	for(i=0; i<sizeof(cmos); i++)
+	for (i = 0; i < sizeof(cmos); i++)
 		cmos[i] = voyager_extended_cmos_read(VOYAGER_DUMP_LOCATION + i);
 	addr = *(unsigned long *)cmos;
-	if((addr & 0xff000000) != 0xff000000) {
-		printk(KERN_ERR "Voyager failed to get SUS mailbox (addr = 0x%lx\n", addr);
+	if ((addr & 0xff000000) != 0xff000000) {
+		printk(KERN_ERR
+		       "Voyager failed to get SUS mailbox (addr = 0x%lx\n",
+		       addr);
 	} else {
 		static struct resource res;
-		
+
 		res.name = "voyager SUS";
 		res.start = addr;
-		res.end = addr+0x3ff;
-		
+		res.end = addr + 0x3ff;
+
 		request_resource(&iomem_resource, &res);
 		voyager_SUS = (struct voyager_SUS *)
-			ioremap(addr, 0x400);
+		    ioremap(addr, 0x400);
 		printk(KERN_NOTICE "Voyager SUS mailbox version 0x%x\n",
 		       voyager_SUS->SUS_version);
 		voyager_SUS->kernel_version = VOYAGER_MAILBOX_VERSION;
@@ -609,8 +615,6 @@ voyager_cat_init(void)
 	voyager_extended_vic_processors = 0;
 	voyager_quad_processors = 0;
 
-
-
 	printk("VOYAGER: beginning CAT bus probe\n");
 	/* set up the SuperSet Port Block which tells us where the
 	 * CAT communication port is */
@@ -618,14 +622,14 @@ voyager_cat_init(void)
 	VDEBUG(("VOYAGER DEBUG: sspb = 0x%x\n", sspb));
 
 	/* now find out if were 8 slot or normal */
-	if((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER)
-	   == EIGHT_SLOT_IDENTIFIER) {
+	if ((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER)
+	    == EIGHT_SLOT_IDENTIFIER) {
 		voyager_8slot = 1;
-		printk(KERN_NOTICE "Voyager: Eight slot 51xx configuration detected\n");
+		printk(KERN_NOTICE
+		       "Voyager: Eight slot 51xx configuration detected\n");
 	}
 
-	for(i = VOYAGER_MIN_MODULE;
-	    i <= VOYAGER_MAX_MODULE; i++) {
+	for (i = VOYAGER_MIN_MODULE; i <= VOYAGER_MAX_MODULE; i++) {
 		__u8 input;
 		int asic;
 		__u16 eprom_size;
@@ -643,21 +647,21 @@ voyager_cat_init(void)
 		outb(0xAA, CAT_DATA);
 		input = inb(CAT_DATA);
 		outb(VOYAGER_CAT_END, CAT_CMD);
-		if(input != VOYAGER_CAT_HEADER) {
+		if (input != VOYAGER_CAT_HEADER) {
 			continue;
 		}
 		CDEBUG(("VOYAGER DEBUG: found module id 0x%x, %s\n", i,
 			cat_module_name(i)));
-		*modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL); /*&voyager_module_storage[cat_count++];*/
-		if(*modpp == NULL) {
+		*modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL);	/*&voyager_module_storage[cat_count++]; */
+		if (*modpp == NULL) {
 			printk("**WARNING** kmalloc failure in cat_init\n");
 			continue;
 		}
 		memset(*modpp, 0, sizeof(voyager_module_t));
 		/* need temporary asic for cat_subread.  It will be
 		 * filled in correctly later */
-		(*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count];*/
-		if((*modpp)->asic == NULL) {
+		(*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL);	/*&voyager_asic_storage[asic_count]; */
+		if ((*modpp)->asic == NULL) {
 			printk("**WARNING** kmalloc failure in cat_init\n");
 			continue;
 		}
@@ -666,47 +670,52 @@ voyager_cat_init(void)
 		(*modpp)->asic->subaddr = VOYAGER_SUBADDR_HI;
 		(*modpp)->module_addr = i;
 		(*modpp)->scan_path_connected = 0;
-		if(i == VOYAGER_PSI) {
+		if (i == VOYAGER_PSI) {
 			/* Exception leg for modules with no EEPROM */
 			printk("Module \"%s\"\n", cat_module_name(i));
 			continue;
 		}
-			       
+
 		CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
 		outb(VOYAGER_CAT_RUN, CAT_CMD);
 		cat_disconnect(*modpp, (*modpp)->asic);
-		if(cat_subread(*modpp, (*modpp)->asic,
-			       VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
-			       &eprom_size)) {
-			printk("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", i);
+		if (cat_subread(*modpp, (*modpp)->asic,
+				VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
+				&eprom_size)) {
+			printk
+			    ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n",
+			     i);
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			continue;
 		}
-		if(eprom_size > sizeof(eprom_buf)) {
-			printk("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x.  Need %d\n", i, eprom_size);
+		if (eprom_size > sizeof(eprom_buf)) {
+			printk
+			    ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x.  Need %d\n",
+			     i, eprom_size);
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			continue;
 		}
 		outb(VOYAGER_CAT_END, CAT_CMD);
 		outb(VOYAGER_CAT_RUN, CAT_CMD);
-		CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, eprom_size));
-		if(cat_subread(*modpp, (*modpp)->asic, 0, 
-			       eprom_size, eprom_buf)) {
+		CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i,
+			eprom_size));
+		if (cat_subread
+		    (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) {
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			continue;
 		}
 		outb(VOYAGER_CAT_END, CAT_CMD);
 		printk("Module \"%s\", version 0x%x, tracer 0x%x, asics %d\n",
 		       cat_module_name(i), eprom_hdr->version_id,
-		       *((__u32 *)eprom_hdr->tracer),  eprom_hdr->num_asics);
+		       *((__u32 *) eprom_hdr->tracer), eprom_hdr->num_asics);
 		(*modpp)->ee_size = eprom_hdr->ee_size;
 		(*modpp)->num_asics = eprom_hdr->num_asics;
 		asicpp = &((*modpp)->asic);
 		sp_offset = eprom_hdr->scan_path_offset;
 		/* All we really care about are the Quad cards.  We
-                 * identify them because they are in a processor slot
-                 * and have only four asics */
-		if((i < 0x10 || (i>=0x14 && i < 0x1c) || i>0x1f)) {
+		 * identify them because they are in a processor slot
+		 * and have only four asics */
+		if ((i < 0x10 || (i >= 0x14 && i < 0x1c) || i > 0x1f)) {
 			modpp = &((*modpp)->next);
 			continue;
 		}
@@ -717,16 +726,17 @@ voyager_cat_init(void)
 			 &num_submodules);
 		/* lowest two bits, active low */
 		num_submodules = ~(0xfc | num_submodules);
-		CDEBUG(("VOYAGER CAT: %d submodules present\n", num_submodules));
-		if(num_submodules == 0) {
+		CDEBUG(("VOYAGER CAT: %d submodules present\n",
+			num_submodules));
+		if (num_submodules == 0) {
 			/* fill in the dyadic extended processors */
 			__u8 cpu = i & 0x07;
 
 			printk("Module \"%s\": Dyadic Processor Card\n",
 			       cat_module_name(i));
-			voyager_extended_vic_processors |= (1<<cpu);
+			voyager_extended_vic_processors |= (1 << cpu);
 			cpu += 4;
-			voyager_extended_vic_processors |= (1<<cpu);
+			voyager_extended_vic_processors |= (1 << cpu);
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			continue;
 		}
@@ -740,28 +750,32 @@ voyager_cat_init(void)
 		cat_write(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, val);
 
 		outb(VOYAGER_CAT_END, CAT_CMD);
-			 
 
 		CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
 		outb(VOYAGER_CAT_RUN, CAT_CMD);
 		cat_disconnect(*modpp, (*modpp)->asic);
-		if(cat_subread(*modpp, (*modpp)->asic,
-			       VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
-			       &eprom_size)) {
-			printk("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", i);
+		if (cat_subread(*modpp, (*modpp)->asic,
+				VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
+				&eprom_size)) {
+			printk
+			    ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n",
+			     i);
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			continue;
 		}
-		if(eprom_size > sizeof(eprom_buf)) {
-			printk("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x.  Need %d\n", i, eprom_size);
+		if (eprom_size > sizeof(eprom_buf)) {
+			printk
+			    ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x.  Need %d\n",
+			     i, eprom_size);
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			continue;
 		}
 		outb(VOYAGER_CAT_END, CAT_CMD);
 		outb(VOYAGER_CAT_RUN, CAT_CMD);
-		CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, eprom_size));
-		if(cat_subread(*modpp, (*modpp)->asic, 0, 
-			       eprom_size, eprom_buf)) {
+		CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i,
+			eprom_size));
+		if (cat_subread
+		    (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) {
 			outb(VOYAGER_CAT_END, CAT_CMD);
 			continue;
 		}
@@ -773,30 +787,35 @@ voyager_cat_init(void)
 		sp_offset = eprom_hdr->scan_path_offset;
 		/* get rid of the dummy CAT asic and read the real one */
 		kfree((*modpp)->asic);
-		for(asic=0; asic < (*modpp)->num_asics; asic++) {
+		for (asic = 0; asic < (*modpp)->num_asics; asic++) {
 			int j;
-			voyager_asic_t *asicp = *asicpp 
-				= kzalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++];*/
+			voyager_asic_t *asicp = *asicpp = kzalloc(sizeof(voyager_asic_t), GFP_KERNEL);	/*&voyager_asic_storage[asic_count++]; */
 			voyager_sp_table_t *sp_table;
 			voyager_at_t *asic_table;
 			voyager_jtt_t *jtag_table;
 
-			if(asicp == NULL) {
-				printk("**WARNING** kmalloc failure in cat_init\n");
+			if (asicp == NULL) {
+				printk
+				    ("**WARNING** kmalloc failure in cat_init\n");
 				continue;
 			}
 			asicpp = &(asicp->next);
 			asicp->asic_location = asic;
-			sp_table = (voyager_sp_table_t *)(eprom_buf + sp_offset);
+			sp_table =
+			    (voyager_sp_table_t *) (eprom_buf + sp_offset);
 			asicp->asic_id = sp_table->asic_id;
-			asic_table = (voyager_at_t *)(eprom_buf + sp_table->asic_data_offset);
-			for(j=0; j<4; j++)
+			asic_table =
+			    (voyager_at_t *) (eprom_buf +
+					      sp_table->asic_data_offset);
+			for (j = 0; j < 4; j++)
 				asicp->jtag_id[j] = asic_table->jtag_id[j];
-			jtag_table = (voyager_jtt_t *)(eprom_buf + asic_table->jtag_offset);
+			jtag_table =
+			    (voyager_jtt_t *) (eprom_buf +
+					       asic_table->jtag_offset);
 			asicp->ireg_length = jtag_table->ireg_len;
 			asicp->bit_location = (*modpp)->inst_bits;
 			(*modpp)->inst_bits += asicp->ireg_length;
-			if(asicp->ireg_length > (*modpp)->largest_reg)
+			if (asicp->ireg_length > (*modpp)->largest_reg)
 				(*modpp)->largest_reg = asicp->ireg_length;
 			if (asicp->ireg_length < (*modpp)->smallest_reg ||
 			    (*modpp)->smallest_reg == 0)
@@ -804,15 +823,13 @@ voyager_cat_init(void)
 			CDEBUG(("asic 0x%x, ireg_length=%d, bit_location=%d\n",
 				asicp->asic_id, asicp->ireg_length,
 				asicp->bit_location));
-			if(asicp->asic_id == VOYAGER_QUAD_QABC) {
+			if (asicp->asic_id == VOYAGER_QUAD_QABC) {
 				CDEBUG(("VOYAGER CAT: QABC ASIC found\n"));
 				qabc_asic = asicp;
 			}
 			sp_offset += sizeof(voyager_sp_table_t);
 		}
-		CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n",
-			(*modpp)->inst_bits, (*modpp)->largest_reg,
-			(*modpp)->smallest_reg));
+		CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n", (*modpp)->inst_bits, (*modpp)->largest_reg, (*modpp)->smallest_reg));
 		/* OK, now we have the QUAD ASICs set up, use them.
 		 * we need to:
 		 *
@@ -828,10 +845,11 @@ voyager_cat_init(void)
 		qic_addr = qabc_data[5] << 8;
 		qic_addr = (qic_addr | qabc_data[6]) << 8;
 		qic_addr = (qic_addr | qabc_data[7]) << 8;
-		printk("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n",
-		       cat_module_name(i), qic_addr, qabc_data[8]);
+		printk
+		    ("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n",
+		     cat_module_name(i), qic_addr, qabc_data[8]);
 #if 0				/* plumbing fails---FIXME */
-		if((qabc_data[8] & 0xf0) == 0) {
+		if ((qabc_data[8] & 0xf0) == 0) {
 			/* FIXME: 32 way 8 CPU slot monster cannot be
 			 * plumbed this way---need to check for it */
 
@@ -842,94 +860,97 @@ voyager_cat_init(void)
 #ifdef VOYAGER_CAT_DEBUG
 			/* verify plumbing */
 			cat_subread(*modpp, qabc_asic, 8, 1, &qabc_data[8]);
-			if((qabc_data[8] & 0xf0) == 0) {
-				CDEBUG(("PLUMBING FAILED: 0x%x\n", qabc_data[8]));
+			if ((qabc_data[8] & 0xf0) == 0) {
+				CDEBUG(("PLUMBING FAILED: 0x%x\n",
+					qabc_data[8]));
 			}
 #endif
 		}
 #endif
 
 		{
-			struct resource *res = kzalloc(sizeof(struct resource),GFP_KERNEL);
+			struct resource *res =
+			    kzalloc(sizeof(struct resource), GFP_KERNEL);
 			res->name = kmalloc(128, GFP_KERNEL);
-			sprintf((char *)res->name, "Voyager %s Quad CPI", cat_module_name(i));
+			sprintf((char *)res->name, "Voyager %s Quad CPI",
+				cat_module_name(i));
 			res->start = qic_addr;
 			res->end = qic_addr + 0x3ff;
 			request_resource(&iomem_resource, res);
 		}
 
 		qic_addr = (unsigned long)ioremap(qic_addr, 0x400);
-				
-		for(j = 0; j < 4; j++) {
+
+		for (j = 0; j < 4; j++) {
 			__u8 cpu;
 
-			if(voyager_8slot) {
+			if (voyager_8slot) {
 				/* 8 slot has a different mapping,
 				 * each slot has only one vic line, so
 				 * 1 cpu in each slot must be < 8 */
-				cpu = (i & 0x07) + j*8;
+				cpu = (i & 0x07) + j * 8;
 			} else {
-				cpu = (i & 0x03) + j*4;
+				cpu = (i & 0x03) + j * 4;
 			}
-			if( (qabc_data[8] & (1<<j))) {
-				voyager_extended_vic_processors |= (1<<cpu);
+			if ((qabc_data[8] & (1 << j))) {
+				voyager_extended_vic_processors |= (1 << cpu);
 			}
-			if(qabc_data[8] & (1<<(j+4)) ) {
+			if (qabc_data[8] & (1 << (j + 4))) {
 				/* Second SET register plumbed: Quad
 				 * card has two VIC connected CPUs.
 				 * Secondary cannot be booted as a VIC
 				 * CPU */
-				voyager_extended_vic_processors |= (1<<cpu);
-				voyager_allowed_boot_processors &= (~(1<<cpu));
+				voyager_extended_vic_processors |= (1 << cpu);
+				voyager_allowed_boot_processors &=
+				    (~(1 << cpu));
 			}
 
-			voyager_quad_processors |= (1<<cpu);
+			voyager_quad_processors |= (1 << cpu);
 			voyager_quad_cpi_addr[cpu] = (struct voyager_qic_cpi *)
-				(qic_addr+(j<<8));
+			    (qic_addr + (j << 8));
 			CDEBUG(("CPU%d: CPI address 0x%lx\n", cpu,
 				(unsigned long)voyager_quad_cpi_addr[cpu]));
 		}
 		outb(VOYAGER_CAT_END, CAT_CMD);
 
-		
-		
 		*asicpp = NULL;
 		modpp = &((*modpp)->next);
 	}
 	*modpp = NULL;
-	printk("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n", voyager_extended_vic_processors, voyager_quad_processors, voyager_allowed_boot_processors);
+	printk
+	    ("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n",
+	     voyager_extended_vic_processors, voyager_quad_processors,
+	     voyager_allowed_boot_processors);
 	request_resource(&ioport_resource, &vic_res);
-	if(voyager_quad_processors)
+	if (voyager_quad_processors)
 		request_resource(&ioport_resource, &qic_res);
 	/* set up the front power switch */
 }
 
-int
-voyager_cat_readb(__u8 module, __u8 asic, int reg)
+int voyager_cat_readb(__u8 module, __u8 asic, int reg)
 {
 	return 0;
 }
 
-static int
-cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp) 
+static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp)
 {
 	__u8 val;
 	int err = 0;
 
-	if(!modp->scan_path_connected)
+	if (!modp->scan_path_connected)
 		return 0;
-	if(asicp->asic_id != VOYAGER_CAT_ID) {
+	if (asicp->asic_id != VOYAGER_CAT_ID) {
 		CDEBUG(("cat_disconnect: ASIC is not CAT\n"));
 		return 1;
 	}
 	err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
-	if(err) {
+	if (err) {
 		CDEBUG(("cat_disconnect: failed to read SCANPATH\n"));
 		return err;
 	}
 	val &= VOYAGER_DISCONNECT_ASIC;
 	err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
-	if(err) {
+	if (err) {
 		CDEBUG(("cat_disconnect: failed to write SCANPATH\n"));
 		return err;
 	}
@@ -940,27 +961,26 @@ cat_disconnect(voyager_module_t *modp, v
 	return 0;
 }
 
-static int
-cat_connect(voyager_module_t *modp, voyager_asic_t *asicp) 
+static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp)
 {
 	__u8 val;
 	int err = 0;
 
-	if(modp->scan_path_connected)
+	if (modp->scan_path_connected)
 		return 0;
-	if(asicp->asic_id != VOYAGER_CAT_ID) {
+	if (asicp->asic_id != VOYAGER_CAT_ID) {
 		CDEBUG(("cat_connect: ASIC is not CAT\n"));
 		return 1;
 	}
 
 	err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
-	if(err) {
+	if (err) {
 		CDEBUG(("cat_connect: failed to read SCANPATH\n"));
 		return err;
 	}
 	val |= VOYAGER_CONNECT_ASIC;
 	err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
-	if(err) {
+	if (err) {
 		CDEBUG(("cat_connect: failed to write SCANPATH\n"));
 		return err;
 	}
@@ -971,11 +991,10 @@ cat_connect(voyager_module_t *modp, voya
 	return 0;
 }
 
-void
-voyager_cat_power_off(void)
+void voyager_cat_power_off(void)
 {
 	/* Power the machine off by writing to the PSI over the CAT
-         * bus */
+	 * bus */
 	__u8 data;
 	voyager_module_t psi = { 0 };
 	voyager_asic_t psi_asic = { 0 };
@@ -1009,8 +1028,7 @@ voyager_cat_power_off(void)
 
 struct voyager_status voyager_status = { 0 };
 
-void
-voyager_cat_psi(__u8 cmd, __u16 reg, __u8 *data)
+void voyager_cat_psi(__u8 cmd, __u16 reg, __u8 * data)
 {
 	voyager_module_t psi = { 0 };
 	voyager_asic_t psi_asic = { 0 };
@@ -1027,7 +1045,7 @@ voyager_cat_psi(__u8 cmd, __u16 reg, __u
 	outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
 	outb(VOYAGER_CAT_RUN, CAT_CMD);
 	cat_disconnect(&psi, &psi_asic);
-	switch(cmd) {
+	switch (cmd) {
 	case VOYAGER_PSI_READ:
 		cat_read(&psi, &psi_asic, reg, data);
 		break;
@@ -1047,8 +1065,7 @@ voyager_cat_psi(__u8 cmd, __u16 reg, __u
 	outb(VOYAGER_CAT_END, CAT_CMD);
 }
 
-void
-voyager_cat_do_common_interrupt(void)
+void voyager_cat_do_common_interrupt(void)
 {
 	/* This is caused either by a memory parity error or something
 	 * in the PSI */
@@ -1057,7 +1074,7 @@ voyager_cat_do_common_interrupt(void)
 	voyager_asic_t psi_asic = { 0 };
 	struct voyager_psi psi_reg;
 	int i;
- re_read:
+      re_read:
 	psi.asic = &psi_asic;
 	psi.asic->asic_id = VOYAGER_CAT_ID;
 	psi.asic->subaddr = VOYAGER_SUBADDR_HI;
@@ -1072,43 +1089,45 @@ voyager_cat_do_common_interrupt(void)
 	cat_disconnect(&psi, &psi_asic);
 	/* Read the status.  NOTE: Need to read *all* the PSI regs here
 	 * otherwise the cmn int will be reasserted */
-	for(i = 0; i < sizeof(psi_reg.regs); i++) {
-		cat_read(&psi, &psi_asic, i, &((__u8 *)&psi_reg.regs)[i]);
+	for (i = 0; i < sizeof(psi_reg.regs); i++) {
+		cat_read(&psi, &psi_asic, i, &((__u8 *) & psi_reg.regs)[i]);
 	}
 	outb(VOYAGER_CAT_END, CAT_CMD);
-	if((psi_reg.regs.checkbit & 0x02) == 0) {
+	if ((psi_reg.regs.checkbit & 0x02) == 0) {
 		psi_reg.regs.checkbit |= 0x02;
 		cat_write(&psi, &psi_asic, 5, psi_reg.regs.checkbit);
 		printk("VOYAGER RE-READ PSI\n");
 		goto re_read;
 	}
 	outb(VOYAGER_CAT_RUN, CAT_CMD);
-	for(i = 0; i < sizeof(psi_reg.subregs); i++) {
+	for (i = 0; i < sizeof(psi_reg.subregs); i++) {
 		/* This looks strange, but the PSI doesn't do auto increment
 		 * correctly */
-		cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i, 
-			    1, &((__u8 *)&psi_reg.subregs)[i]); 
+		cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i,
+			    1, &((__u8 *) & psi_reg.subregs)[i]);
 	}
 	outb(VOYAGER_CAT_END, CAT_CMD);
 #ifdef VOYAGER_CAT_DEBUG
 	printk("VOYAGER PSI: ");
-	for(i=0; i<sizeof(psi_reg.regs); i++)
-		printk("%02x ", ((__u8 *)&psi_reg.regs)[i]);
+	for (i = 0; i < sizeof(psi_reg.regs); i++)
+		printk("%02x ", ((__u8 *) & psi_reg.regs)[i]);
 	printk("\n           ");
-	for(i=0; i<sizeof(psi_reg.subregs); i++)
-		printk("%02x ", ((__u8 *)&psi_reg.subregs)[i]);
+	for (i = 0; i < sizeof(psi_reg.subregs); i++)
+		printk("%02x ", ((__u8 *) & psi_reg.subregs)[i]);
 	printk("\n");
 #endif
-	if(psi_reg.regs.intstatus & PSI_MON) {
+	if (psi_reg.regs.intstatus & PSI_MON) {
 		/* switch off or power fail */
 
-		if(psi_reg.subregs.supply & PSI_SWITCH_OFF) {
-			if(voyager_status.switch_off) {
-				printk(KERN_ERR "Voyager front panel switch turned off again---Immediate power off!\n");
+		if (psi_reg.subregs.supply & PSI_SWITCH_OFF) {
+			if (voyager_status.switch_off) {
+				printk(KERN_ERR
+				       "Voyager front panel switch turned off again---Immediate power off!\n");
 				voyager_cat_power_off();
 				/* not reached */
 			} else {
-				printk(KERN_ERR "Voyager front panel switch turned off\n");
+				printk(KERN_ERR
+				       "Voyager front panel switch turned off\n");
 				voyager_status.switch_off = 1;
 				voyager_status.request_from_kernel = 1;
 				wake_up_process(voyager_thread);
@@ -1127,7 +1146,7 @@ voyager_cat_do_common_interrupt(void)
 
 			VDEBUG(("Voyager ac fail reg 0x%x\n",
 				psi_reg.subregs.ACfail));
-			if((psi_reg.subregs.ACfail & AC_FAIL_STAT_CHANGE) == 0) {
+			if ((psi_reg.subregs.ACfail & AC_FAIL_STAT_CHANGE) == 0) {
 				/* No further update */
 				return;
 			}
@@ -1135,20 +1154,20 @@ voyager_cat_do_common_interrupt(void)
 			/* Don't bother trying to find out who failed.
 			 * FIXME: This probably makes the code incorrect on
 			 * anything other than a 345x */
-			for(i=0; i< 5; i++) {
-				if( psi_reg.subregs.ACfail &(1<<i)) {
+			for (i = 0; i < 5; i++) {
+				if (psi_reg.subregs.ACfail & (1 << i)) {
 					break;
 				}
 			}
 			printk(KERN_NOTICE "AC FAIL IN SUPPLY %d\n", i);
 #endif
 			/* DON'T do this: it shuts down the AC PSI 
-			outb(VOYAGER_CAT_RUN, CAT_CMD);
-			data = PSI_MASK_MASK | i;
-			cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_MASK,
-				     1, &data);
-			outb(VOYAGER_CAT_END, CAT_CMD);
-			*/
+			   outb(VOYAGER_CAT_RUN, CAT_CMD);
+			   data = PSI_MASK_MASK | i;
+			   cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_MASK,
+			   1, &data);
+			   outb(VOYAGER_CAT_END, CAT_CMD);
+			 */
 			printk(KERN_ERR "Voyager AC power failure\n");
 			outb(VOYAGER_CAT_RUN, CAT_CMD);
 			data = PSI_COLD_START;
@@ -1159,16 +1178,16 @@ voyager_cat_do_common_interrupt(void)
 			voyager_status.request_from_kernel = 1;
 			wake_up_process(voyager_thread);
 		}
-		
-		
-	} else if(psi_reg.regs.intstatus & PSI_FAULT) {
+
+	} else if (psi_reg.regs.intstatus & PSI_FAULT) {
 		/* Major fault! */
-		printk(KERN_ERR "Voyager PSI Detected major fault, immediate power off!\n");
+		printk(KERN_ERR
+		       "Voyager PSI Detected major fault, immediate power off!\n");
 		voyager_cat_power_off();
 		/* not reached */
-	} else if(psi_reg.regs.intstatus & (PSI_DC_FAIL | PSI_ALARM
-					    | PSI_CURRENT | PSI_DVM
-					    | PSI_PSCFAULT | PSI_STAT_CHG)) {
+	} else if (psi_reg.regs.intstatus & (PSI_DC_FAIL | PSI_ALARM
+					     | PSI_CURRENT | PSI_DVM
+					     | PSI_PSCFAULT | PSI_STAT_CHG)) {
 		/* other psi fault */
 
 		printk(KERN_WARNING "Voyager PSI status 0x%x\n", data);
diff -puN arch/x86/mach-voyager/voyager_smp.c~git-x86 arch/x86/mach-voyager/voyager_smp.c
--- a/arch/x86/mach-voyager/voyager_smp.c~git-x86
+++ a/arch/x86/mach-voyager/voyager_smp.c
@@ -32,7 +32,8 @@
 DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0 };
 
 /* CPU IRQ affinity -- set to all ones initially */
-static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1]  = ~0UL };
+static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned =
+	{[0 ... NR_CPUS-1]  = ~0UL };
 
 /* per CPU data structure (for /proc/cpuinfo et al), visible externally
  * indexed physically */
@@ -76,7 +77,6 @@ EXPORT_SYMBOL(cpu_online_map);
  * by scheduler but indexed physically */
 cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
 
-
 /* The internal functions */
 static void send_CPI(__u32 cpuset, __u8 cpi);
 static void ack_CPI(__u8 cpi);
@@ -101,94 +101,86 @@ int hard_smp_processor_id(void);
 int safe_smp_processor_id(void);
 
 /* Inline functions */
-static inline void
-send_one_QIC_CPI(__u8 cpu, __u8 cpi)
+static inline void send_one_QIC_CPI(__u8 cpu, __u8 cpi)
 {
 	voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi =
-		(smp_processor_id() << 16) + cpi;
+	    (smp_processor_id() << 16) + cpi;
 }
 
-static inline void
-send_QIC_CPI(__u32 cpuset, __u8 cpi)
+static inline void send_QIC_CPI(__u32 cpuset, __u8 cpi)
 {
 	int cpu;
 
 	for_each_online_cpu(cpu) {
-		if(cpuset & (1<<cpu)) {
+		if (cpuset & (1 << cpu)) {
 #ifdef VOYAGER_DEBUG
-			if(!cpu_isset(cpu, cpu_online_map))
-				VDEBUG(("CPU%d sending cpi %d to CPU%d not in cpu_online_map\n", hard_smp_processor_id(), cpi, cpu));
+			if (!cpu_isset(cpu, cpu_online_map))
+				VDEBUG(("CPU%d sending cpi %d to CPU%d not in "
+					"cpu_online_map\n",
+					hard_smp_processor_id(), cpi, cpu));
 #endif
 			send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
 		}
 	}
 }
 
-static inline void
-wrapper_smp_local_timer_interrupt(void)
+static inline void wrapper_smp_local_timer_interrupt(void)
 {
 	irq_enter();
 	smp_local_timer_interrupt();
 	irq_exit();
 }
 
-static inline void
-send_one_CPI(__u8 cpu, __u8 cpi)
+static inline void send_one_CPI(__u8 cpu, __u8 cpi)
 {
-	if(voyager_quad_processors & (1<<cpu))
+	if (voyager_quad_processors & (1 << cpu))
 		send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
 	else
-		send_CPI(1<<cpu, cpi);
+		send_CPI(1 << cpu, cpi);
 }
 
-static inline void
-send_CPI_allbutself(__u8 cpi)
+static inline void send_CPI_allbutself(__u8 cpi)
 {
 	__u8 cpu = smp_processor_id();
 	__u32 mask = cpus_addr(cpu_online_map)[0] & ~(1 << cpu);
 	send_CPI(mask, cpi);
 }
 
-static inline int
-is_cpu_quad(void)
+static inline int is_cpu_quad(void)
 {
 	__u8 cpumask = inb(VIC_PROC_WHO_AM_I);
 	return ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER);
 }
 
-static inline int
-is_cpu_extended(void)
+static inline int is_cpu_extended(void)
 {
 	__u8 cpu = hard_smp_processor_id();
 
-	return(voyager_extended_vic_processors & (1<<cpu));
+	return (voyager_extended_vic_processors & (1 << cpu));
 }
 
-static inline int
-is_cpu_vic_boot(void)
+static inline int is_cpu_vic_boot(void)
 {
 	__u8 cpu = hard_smp_processor_id();
 
-	return(voyager_extended_vic_processors
-	       & voyager_allowed_boot_processors & (1<<cpu));
+	return (voyager_extended_vic_processors
+		& voyager_allowed_boot_processors & (1 << cpu));
 }
 
-
-static inline void
-ack_CPI(__u8 cpi)
+static inline void ack_CPI(__u8 cpi)
 {
-	switch(cpi) {
+	switch (cpi) {
 	case VIC_CPU_BOOT_CPI:
-		if(is_cpu_quad() && !is_cpu_vic_boot())
+		if (is_cpu_quad() && !is_cpu_vic_boot())
 			ack_QIC_CPI(cpi);
 		else
 			ack_VIC_CPI(cpi);
 		break;
 	case VIC_SYS_INT:
-	case VIC_CMN_INT: 
+	case VIC_CMN_INT:
 		/* These are slightly strange.  Even on the Quad card,
 		 * They are vectored as VIC CPIs */
-		if(is_cpu_quad())
+		if (is_cpu_quad())
 			ack_special_QIC_CPI(cpi);
 		else
 			ack_VIC_CPI(cpi);
@@ -205,11 +197,11 @@ ack_CPI(__u8 cpi)
  * 8259 IRQs except that masks and things must be kept per processor
  */
 static struct irq_chip vic_chip = {
-	.name		= "VIC",
-	.startup	= startup_vic_irq,
-	.mask		= mask_vic_irq,
-	.unmask		= unmask_vic_irq,
-	.set_affinity	= set_vic_irq_affinity,
+	.name = "VIC",
+	.startup = startup_vic_irq,
+	.mask = mask_vic_irq,
+	.unmask = unmask_vic_irq,
+	.set_affinity = set_vic_irq_affinity,
 };
 
 /* used to count up as CPUs are brought on line (starts at 0) */
@@ -223,7 +215,7 @@ static __u32 trampoline_base;
 /* The per cpu profile stuff - used in smp_local_timer_interrupt */
 static DEFINE_PER_CPU(int, prof_multiplier) = 1;
 static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_counter) =  1;
+static DEFINE_PER_CPU(int, prof_counter) = 1;
 
 /* the map used to check if a CPU has booted */
 static __u32 cpu_booted_map;
@@ -235,7 +227,6 @@ static cpumask_t smp_commenced_mask = CP
 /* This is for the new dynamic CPU boot code */
 cpumask_t cpu_callin_map = CPU_MASK_NONE;
 cpumask_t cpu_callout_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_callout_map);
 cpumask_t cpu_possible_map = CPU_MASK_NONE;
 EXPORT_SYMBOL(cpu_possible_map);
 
@@ -246,9 +237,9 @@ static __u16 vic_irq_mask[NR_CPUS] __cac
 static __u16 vic_irq_enable_mask[NR_CPUS] __cacheline_aligned = { 0 };
 
 /* Lock for enable/disable of VIC interrupts */
-static  __cacheline_aligned DEFINE_SPINLOCK(vic_irq_lock);
+static __cacheline_aligned DEFINE_SPINLOCK(vic_irq_lock);
 
-/* The boot processor is correctly set up in PC mode when it 
+/* The boot processor is correctly set up in PC mode when it
  * comes up, but the secondaries need their master/slave 8259
  * pairs initializing correctly */
 
@@ -262,8 +253,7 @@ static unsigned long vic_tick[NR_CPUS] _
 static unsigned long vic_cpi_mailbox[NR_CPUS] __cacheline_aligned;
 
 /* debugging routine to read the isr of the cpu's pic */
-static inline __u16
-vic_read_isr(void)
+static inline __u16 vic_read_isr(void)
 {
 	__u16 isr;
 
@@ -275,17 +265,16 @@ vic_read_isr(void)
 	return isr;
 }
 
-static __init void
-qic_setup(void)
+static __init void qic_setup(void)
 {
-	if(!is_cpu_quad()) {
+	if (!is_cpu_quad()) {
 		/* not a quad, no setup */
 		return;
 	}
 	outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
 	outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
-	
-	if(is_cpu_extended()) {
+
+	if (is_cpu_extended()) {
 		/* the QIC duplicate of the VIC base register */
 		outb(VIC_DEFAULT_CPI_BASE, QIC_VIC_CPI_BASE_REGISTER);
 		outb(QIC_DEFAULT_CPI_BASE, QIC_CPI_BASE_REGISTER);
@@ -295,8 +284,7 @@ qic_setup(void)
 	}
 }
 
-static __init void
-vic_setup_pic(void)
+static __init void vic_setup_pic(void)
 {
 	outb(1, VIC_REDIRECT_REGISTER_1);
 	/* clear the claim registers for dynamic routing */
@@ -333,7 +321,7 @@ vic_setup_pic(void)
 
 	/* ICW2: slave vector base */
 	outb(FIRST_EXTERNAL_VECTOR + 8, 0xA1);
-	
+
 	/* ICW3: slave ID */
 	outb(0x02, 0xA1);
 
@@ -341,19 +329,18 @@ vic_setup_pic(void)
 	outb(0x01, 0xA1);
 }
 
-static void
-do_quad_bootstrap(void)
+static void do_quad_bootstrap(void)
 {
-	if(is_cpu_quad() && is_cpu_vic_boot()) {
+	if (is_cpu_quad() && is_cpu_vic_boot()) {
 		int i;
 		unsigned long flags;
 		__u8 cpuid = hard_smp_processor_id();
 
 		local_irq_save(flags);
 
-		for(i = 0; i<4; i++) {
+		for (i = 0; i < 4; i++) {
 			/* FIXME: this would be >>3 &0x7 on the 32 way */
-			if(((cpuid >> 2) & 0x03) == i)
+			if (((cpuid >> 2) & 0x03) == i)
 				/* don't lower our own mask! */
 				continue;
 
@@ -368,12 +355,10 @@ do_quad_bootstrap(void)
 	}
 }
 
-
 /* Set up all the basic stuff: read the SMP config and make all the
  * SMP information reflect only the boot cpu.  All others will be
  * brought on-line later. */
-void __init 
-find_smp_config(void)
+void __init find_smp_config(void)
 {
 	int i;
 
@@ -382,24 +367,31 @@ find_smp_config(void)
 	printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id);
 
 	/* initialize the CPU structures (moved from smp_boot_cpus) */
-	for(i=0; i<NR_CPUS; i++) {
+	for (i = 0; i < NR_CPUS; i++) {
 		cpu_irq_affinity[i] = ~0;
 	}
 	cpu_online_map = cpumask_of_cpu(boot_cpu_id);
 
 	/* The boot CPU must be extended */
-	voyager_extended_vic_processors = 1<<boot_cpu_id;
+	voyager_extended_vic_processors = 1 << boot_cpu_id;
 	/* initially, all of the first 8 CPUs can boot */
 	voyager_allowed_boot_processors = 0xff;
 	/* set up everything for just this CPU, we can alter
 	 * this as we start the other CPUs later */
 	/* now get the CPU disposition from the extended CMOS */
-	cpus_addr(phys_cpu_present_map)[0] = voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK);
-	cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8;
-	cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 2) << 16;
-	cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 3) << 24;
+	cpus_addr(phys_cpu_present_map)[0] =
+	    voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK);
+	cpus_addr(phys_cpu_present_map)[0] |=
+	    voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8;
+	cpus_addr(phys_cpu_present_map)[0] |=
+	    voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
+				       2) << 16;
+	cpus_addr(phys_cpu_present_map)[0] |=
+	    voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
+				       3) << 24;
 	cpu_possible_map = phys_cpu_present_map;
-	printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n", cpus_addr(phys_cpu_present_map)[0]);
+	printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n",
+	       cpus_addr(phys_cpu_present_map)[0]);
 	/* Here we set up the VIC to enable SMP */
 	/* enable the CPIs by writing the base vector to their register */
 	outb(VIC_DEFAULT_CPI_BASE, VIC_CPI_BASE_REGISTER);
@@ -427,8 +419,7 @@ find_smp_config(void)
 /*
  *	The bootstrap kernel entry code has set these up. Save them
  *	for a given CPU, id is physical */
-void __init
-smp_store_cpu_info(int id)
+void __init smp_store_cpu_info(int id)
 {
 	struct cpuinfo_x86 *c = &cpu_data(id);
 
@@ -438,21 +429,19 @@ smp_store_cpu_info(int id)
 }
 
 /* set up the trampoline and return the physical address of the code */
-static __u32 __init
-setup_trampoline(void)
+static __u32 __init setup_trampoline(void)
 {
 	/* these two are global symbols in trampoline.S */
 	extern const __u8 trampoline_end[];
 	extern const __u8 trampoline_data[];
 
-	memcpy((__u8 *)trampoline_base, trampoline_data,
+	memcpy((__u8 *) trampoline_base, trampoline_data,
 	       trampoline_end - trampoline_data);
-	return virt_to_phys((__u8 *)trampoline_base);
+	return virt_to_phys((__u8 *) trampoline_base);
 }
 
 /* Routine initially called when a non-boot CPU is brought online */
-static void __init
-start_secondary(void *unused)
+static void __init start_secondary(void *unused)
 {
 	__u8 cpuid = hard_smp_processor_id();
 	/* external functions not defined in the headers */
@@ -464,17 +453,18 @@ start_secondary(void *unused)
 	ack_CPI(VIC_CPU_BOOT_CPI);
 
 	/* setup the 8259 master slave pair belonging to this CPU ---
-         * we won't actually receive any until the boot CPU
-         * relinquishes it's static routing mask */
+	 * we won't actually receive any until the boot CPU
+	 * relinquishes it's static routing mask */
 	vic_setup_pic();
 
 	qic_setup();
 
-	if(is_cpu_quad() && !is_cpu_vic_boot()) {
+	if (is_cpu_quad() && !is_cpu_vic_boot()) {
 		/* clear the boot CPI */
 		__u8 dummy;
 
-		dummy = voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi;
+		dummy =
+		    voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi;
 		printk("read dummy %d\n", dummy);
 	}
 
@@ -516,7 +506,6 @@ start_secondary(void *unused)
 	cpu_idle();
 }
 
-
 /* Routine to kick start the given CPU and wait for it to report ready
  * (or timeout in startup).  When this routine returns, the requested
  * CPU is either fully running and configured or known to be dead.
@@ -524,29 +513,28 @@ start_secondary(void *unused)
  * We call this routine sequentially 1 CPU at a time, so no need for
  * locking */
 
-static void __init
-do_boot_cpu(__u8 cpu)
+static void __init do_boot_cpu(__u8 cpu)
 {
 	struct task_struct *idle;
 	int timeout;
 	unsigned long flags;
-	int quad_boot = (1<<cpu) & voyager_quad_processors 
-		& ~( voyager_extended_vic_processors
-		     & voyager_allowed_boot_processors);
+	int quad_boot = (1 << cpu) & voyager_quad_processors
+	    & ~(voyager_extended_vic_processors
+		& voyager_allowed_boot_processors);
 
 	/* This is an area in head.S which was used to set up the
 	 * initial kernel stack.  We need to alter this to give the
 	 * booting CPU a new stack (taken from its idle process) */
 	extern struct {
-		__u8 *esp;
+		__u8 *sp;
 		unsigned short ss;
 	} stack_start;
 	/* This is the format of the CPI IDT gate (in real mode) which
 	 * we're hijacking to boot the CPU */
-	union 	IDTFormat {
+	union IDTFormat {
 		struct seg {
-			__u16	Offset;
-			__u16	Segment;
+			__u16 Offset;
+			__u16 Segment;
 		} idt;
 		__u32 val;
 	} hijack_source;
@@ -565,37 +553,44 @@ do_boot_cpu(__u8 cpu)
 	alternatives_smp_switch(1);
 
 	idle = fork_idle(cpu);
-	if(IS_ERR(idle))
+	if (IS_ERR(idle))
 		panic("failed fork for CPU%d", cpu);
-	idle->thread.eip = (unsigned long) start_secondary;
+	idle->thread.ip = (unsigned long)start_secondary;
 	/* init_tasks (in sched.c) is indexed logically */
-	stack_start.esp = (void *) idle->thread.esp;
+	stack_start.sp = (void *)idle->thread.sp;
 
 	init_gdt(cpu);
- 	per_cpu(current_task, cpu) = idle;
+	per_cpu(current_task, cpu) = idle;
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	irq_ctx_init(cpu);
 
 	/* Note: Don't modify initial ss override */
-	VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu, 
+	VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu,
 		(unsigned long)hijack_source.val, hijack_source.idt.Segment,
-		hijack_source.idt.Offset, stack_start.esp));
+		hijack_source.idt.Offset, stack_start.sp));
 
 	/* init lowmem identity mapping */
 	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
 			min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
 	flush_tlb_all();
 
-	if(quad_boot) {
+	if (quad_boot) {
 		printk("CPU %d: non extended Quad boot\n", cpu);
-		hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE)*4);
+		hijack_vector =
+		    (__u32 *)
+		    phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE) * 4);
 		*hijack_vector = hijack_source.val;
 	} else {
 		printk("CPU%d: extended VIC boot\n", cpu);
-		hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE)*4);
+		hijack_vector =
+		    (__u32 *)
+		    phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE) * 4);
 		*hijack_vector = hijack_source.val;
 		/* VIC errata, may also receive interrupt at this address */
-		hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI + VIC_DEFAULT_CPI_BASE)*4);
+		hijack_vector =
+		    (__u32 *)
+		    phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI +
+				  VIC_DEFAULT_CPI_BASE) * 4);
 		*hijack_vector = hijack_source.val;
 	}
 	/* All non-boot CPUs start with interrupts fully masked.  Need
@@ -603,73 +598,76 @@ do_boot_cpu(__u8 cpu)
 	 * this in the VIC by masquerading as the processor we're
 	 * about to boot and lowering its interrupt mask */
 	local_irq_save(flags);
-	if(quad_boot) {
+	if (quad_boot) {
 		send_one_QIC_CPI(cpu, VIC_CPU_BOOT_CPI);
 	} else {
 		outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID);
 		/* here we're altering registers belonging to `cpu' */
-		
+
 		outb(VIC_BOOT_INTERRUPT_MASK, 0x21);
 		/* now go back to our original identity */
 		outb(boot_cpu_id, VIC_PROCESSOR_ID);
 
 		/* and boot the CPU */
 
-		send_CPI((1<<cpu), VIC_CPU_BOOT_CPI);
+		send_CPI((1 << cpu), VIC_CPU_BOOT_CPI);
 	}
 	cpu_booted_map = 0;
 	local_irq_restore(flags);
 
 	/* now wait for it to become ready (or timeout) */
-	for(timeout = 0; timeout < 50000; timeout++) {
-		if(cpu_booted_map)
+	for (timeout = 0; timeout < 50000; timeout++) {
+		if (cpu_booted_map)
 			break;
 		udelay(100);
 	}
 	/* reset the page table */
 	zap_low_mappings();
-	  
+
 	if (cpu_booted_map) {
 		VDEBUG(("CPU%d: Booted successfully, back in CPU %d\n",
 			cpu, smp_processor_id()));
-	
+
 		printk("CPU%d: ", cpu);
 		print_cpu_info(&cpu_data(cpu));
 		wmb();
 		cpu_set(cpu, cpu_callout_map);
 		cpu_set(cpu, cpu_present_map);
-	}
-	else {
+	} else {
 		printk("CPU%d FAILED TO BOOT: ", cpu);
-		if (*((volatile unsigned char *)phys_to_virt(start_phys_address))==0xA5)
+		if (*
+		    ((volatile unsigned char *)phys_to_virt(start_phys_address))
+		    == 0xA5)
 			printk("Stuck.\n");
 		else
 			printk("Not responding.\n");
-		
+
 		cpucount--;
 	}
 }
 
-void __init
-smp_boot_cpus(void)
+void __init smp_boot_cpus(void)
 {
 	int i;
 
 	/* CAT BUS initialisation must be done after the memory */
 	/* FIXME: The L4 has a catbus too, it just needs to be
 	 * accessed in a totally different way */
-	if(voyager_level == 5) {
+	if (voyager_level == 5) {
 		voyager_cat_init();
 
 		/* now that the cat has probed the Voyager System Bus, sanity
 		 * check the cpu map */
-		if( ((voyager_quad_processors | voyager_extended_vic_processors)
-		     & cpus_addr(phys_cpu_present_map)[0]) != cpus_addr(phys_cpu_present_map)[0]) {
+		if (((voyager_quad_processors | voyager_extended_vic_processors)
+		     & cpus_addr(phys_cpu_present_map)[0]) !=
+		    cpus_addr(phys_cpu_present_map)[0]) {
 			/* should panic */
-			printk("\n\n***WARNING*** Sanity check of CPU present map FAILED\n");
+			printk("\n\n***WARNING*** "
+			       "Sanity check of CPU present map FAILED\n");
 		}
-	} else if(voyager_level == 4)
-		voyager_extended_vic_processors = cpus_addr(phys_cpu_present_map)[0];
+	} else if (voyager_level == 4)
+		voyager_extended_vic_processors =
+		    cpus_addr(phys_cpu_present_map)[0];
 
 	/* this sets up the idle task to run on the current cpu */
 	voyager_extended_cpus = 1;
@@ -678,14 +676,14 @@ smp_boot_cpus(void)
 	//global_irq_holder = boot_cpu_id;
 
 	/* FIXME: Need to do something about this but currently only works
-	 * on CPUs with a tsc which none of mine have. 
-	smp_tune_scheduling();
+	 * on CPUs with a tsc which none of mine have.
+	 smp_tune_scheduling();
 	 */
 	smp_store_cpu_info(boot_cpu_id);
 	printk("CPU%d: ", boot_cpu_id);
 	print_cpu_info(&cpu_data(boot_cpu_id));
 
-	if(is_cpu_quad()) {
+	if (is_cpu_quad()) {
 		/* booting on a Quad CPU */
 		printk("VOYAGER SMP: Boot CPU is Quad\n");
 		qic_setup();
@@ -697,11 +695,11 @@ smp_boot_cpus(void)
 
 	cpu_set(boot_cpu_id, cpu_online_map);
 	cpu_set(boot_cpu_id, cpu_callout_map);
-	
-	/* loop over all the extended VIC CPUs and boot them.  The 
+
+	/* loop over all the extended VIC CPUs and boot them.  The
 	 * Quad CPUs must be bootstrapped by their extended VIC cpu */
-	for(i = 0; i < NR_CPUS; i++) {
-		if(i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
+	for (i = 0; i < NR_CPUS; i++) {
+		if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
 			continue;
 		do_boot_cpu(i);
 		/* This udelay seems to be needed for the Quad boots
@@ -715,25 +713,26 @@ smp_boot_cpus(void)
 		for (i = 0; i < NR_CPUS; i++)
 			if (cpu_isset(i, cpu_online_map))
 				bogosum += cpu_data(i).loops_per_jiffy;
-		printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
-			cpucount+1,
-			bogosum/(500000/HZ),
-			(bogosum/(5000/HZ))%100);
+		printk(KERN_INFO "Total of %d processors activated "
+		       "(%lu.%02lu BogoMIPS).\n",
+		       cpucount + 1, bogosum / (500000 / HZ),
+		       (bogosum / (5000 / HZ)) % 100);
 	}
 	voyager_extended_cpus = hweight32(voyager_extended_vic_processors);
-	printk("VOYAGER: Extended (interrupt handling CPUs): %d, non-extended: %d\n", voyager_extended_cpus, num_booting_cpus() - voyager_extended_cpus);
+	printk("VOYAGER: Extended (interrupt handling CPUs): "
+	       "%d, non-extended: %d\n", voyager_extended_cpus,
+	       num_booting_cpus() - voyager_extended_cpus);
 	/* that's it, switch to symmetric mode */
 	outb(0, VIC_PRIORITY_REGISTER);
 	outb(0, VIC_CLAIM_REGISTER_0);
 	outb(0, VIC_CLAIM_REGISTER_1);
-	
+
 	VDEBUG(("VOYAGER SMP: Booted with %d CPUs\n", num_booting_cpus()));
 }
 
 /* Reload the secondary CPUs task structure (this function does not
  * return ) */
-void __init 
-initialize_secondary(void)
+void __init initialize_secondary(void)
 {
 #if 0
 	// AC kernels only
@@ -745,11 +744,9 @@ initialize_secondary(void)
 	 * basically just the stack pointer and the eip.
 	 */
 
-	asm volatile(
-		"movl %0,%%esp\n\t"
-		"jmp *%1"
-		:
-		:"r" (current->thread.esp),"r" (current->thread.eip));
+	asm volatile ("movl %0,%%esp\n\t"
+		      "jmp *%1"::"r" (current->thread.sp),
+		      "r"(current->thread.ip));
 }
 
 /* handle a Voyager SYS_INT -- If we don't, the base board will
@@ -758,25 +755,23 @@ initialize_secondary(void)
  * System interrupts occur because some problem was detected on the
  * various busses.  To find out what you have to probe all the
  * hardware via the CAT bus.  FIXME: At the moment we do nothing. */
-fastcall void
-smp_vic_sys_interrupt(struct pt_regs *regs)
+fastcall void smp_vic_sys_interrupt(struct pt_regs *regs)
 {
 	ack_CPI(VIC_SYS_INT);
-	printk("Voyager SYSTEM INTERRUPT\n");	
+	printk("Voyager SYSTEM INTERRUPT\n");
 }
 
 /* Handle a voyager CMN_INT; These interrupts occur either because of
  * a system status change or because a single bit memory error
  * occurred.  FIXME: At the moment, ignore all this. */
-fastcall void
-smp_vic_cmn_interrupt(struct pt_regs *regs)
+fastcall void smp_vic_cmn_interrupt(struct pt_regs *regs)
 {
 	static __u8 in_cmn_int = 0;
 	static DEFINE_SPINLOCK(cmn_int_lock);
 
 	/* common ints are broadcast, so make sure we only do this once */
 	_raw_spin_lock(&cmn_int_lock);
-	if(in_cmn_int)
+	if (in_cmn_int)
 		goto unlock_end;
 
 	in_cmn_int++;
@@ -784,12 +779,12 @@ smp_vic_cmn_interrupt(struct pt_regs *re
 
 	VDEBUG(("Voyager COMMON INTERRUPT\n"));
 
-	if(voyager_level == 5)
+	if (voyager_level == 5)
 		voyager_cat_do_common_interrupt();
 
 	_raw_spin_lock(&cmn_int_lock);
 	in_cmn_int = 0;
- unlock_end:
+      unlock_end:
 	_raw_spin_unlock(&cmn_int_lock);
 	ack_CPI(VIC_CMN_INT);
 }
@@ -797,26 +792,23 @@ smp_vic_cmn_interrupt(struct pt_regs *re
 /*
  * Reschedule call back. Nothing to do, all the work is done
  * automatically when we return from the interrupt.  */
-static void
-smp_reschedule_interrupt(void)
+static void smp_reschedule_interrupt(void)
 {
 	/* do nothing */
 }
 
-static struct mm_struct * flush_mm;
+static struct mm_struct *flush_mm;
 static unsigned long flush_va;
 static DEFINE_SPINLOCK(tlbstate_lock);
-#define FLUSH_ALL	0xffffffff
 
 /*
- * We cannot call mmdrop() because we are in interrupt context, 
+ * We cannot call mmdrop() because we are in interrupt context,
  * instead update mm->cpu_vm_mask.
  *
  * We need to reload %cr3 since the page tables may be going
  * away from under us..
  */
-static inline void
-leave_mm (unsigned long cpu)
+static inline void leave_mm(unsigned long cpu)
 {
 	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
 		BUG();
@@ -824,12 +816,10 @@ leave_mm (unsigned long cpu)
 	load_cr3(swapper_pg_dir);
 }
 
-
 /*
  * Invalidate call-back
  */
-static void 
-smp_invalidate_interrupt(void)
+static void smp_invalidate_interrupt(void)
 {
 	__u8 cpu = smp_processor_id();
 
@@ -837,13 +827,13 @@ smp_invalidate_interrupt(void)
 		return;
 	/* This will flood messages.  Don't uncomment unless you see
 	 * Problems with cross cpu invalidation
-	VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n",
-		smp_processor_id()));
-	*/
+	 VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n",
+	 smp_processor_id()));
+	 */
 
 	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
 		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
-			if (flush_va == FLUSH_ALL)
+			if (flush_va == TLB_FLUSH_ALL)
 				local_flush_tlb();
 			else
 				__flush_tlb_one(flush_va);
@@ -857,11 +847,10 @@ smp_invalidate_interrupt(void)
 
 /* All the new flush operations for 2.4 */
 
-
 /* This routine is called with a physical cpu mask */
 static void
-voyager_flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
-			  unsigned long va)
+voyager_flush_tlb_others(unsigned long cpumask, struct mm_struct *mm,
+			 unsigned long va)
 {
 	int stuck = 50000;
 
@@ -875,7 +864,7 @@ voyager_flush_tlb_others (unsigned long 
 		BUG();
 
 	spin_lock(&tlbstate_lock);
-	
+
 	flush_mm = mm;
 	flush_va = va;
 	atomic_set_mask(cpumask, &smp_invalidate_needed);
@@ -887,23 +876,23 @@ voyager_flush_tlb_others (unsigned long 
 
 	while (smp_invalidate_needed) {
 		mb();
-		if(--stuck == 0) {
-			printk("***WARNING*** Stuck doing invalidate CPI (CPU%d)\n", smp_processor_id());
+		if (--stuck == 0) {
+			printk("***WARNING*** Stuck doing invalidate CPI "
+			       "(CPU%d)\n", smp_processor_id());
 			break;
 		}
 	}
 
 	/* Uncomment only to debug invalidation problems
-	VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu));
-	*/
+	   VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu));
+	 */
 
 	flush_mm = NULL;
 	flush_va = 0;
 	spin_unlock(&tlbstate_lock);
 }
 
-void
-flush_tlb_current_task(void)
+void flush_tlb_current_task(void)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long cpu_mask;
@@ -913,14 +902,12 @@ flush_tlb_current_task(void)
 	cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
 	local_flush_tlb();
 	if (cpu_mask)
-		voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+		voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
 
 	preempt_enable();
 }
 
-
-void
-flush_tlb_mm (struct mm_struct * mm)
+void flush_tlb_mm(struct mm_struct *mm)
 {
 	unsigned long cpu_mask;
 
@@ -935,12 +922,12 @@ flush_tlb_mm (struct mm_struct * mm)
 			leave_mm(smp_processor_id());
 	}
 	if (cpu_mask)
-		voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+		voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
 
 	preempt_enable();
 }
 
-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long cpu_mask;
@@ -949,10 +936,10 @@ void flush_tlb_page(struct vm_area_struc
 
 	cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
 	if (current->active_mm == mm) {
-		if(current->mm)
+		if (current->mm)
 			__flush_tlb_one(va);
-		 else
-		 	leave_mm(smp_processor_id());
+		else
+			leave_mm(smp_processor_id());
 	}
 
 	if (cpu_mask)
@@ -960,21 +947,21 @@ void flush_tlb_page(struct vm_area_struc
 
 	preempt_enable();
 }
+
 EXPORT_SYMBOL(flush_tlb_page);
 
 /* enable the requested IRQs */
-static void
-smp_enable_irq_interrupt(void)
+static void smp_enable_irq_interrupt(void)
 {
 	__u8 irq;
 	__u8 cpu = get_cpu();
 
 	VDEBUG(("VOYAGER SMP: CPU%d enabling irq mask 0x%x\n", cpu,
-	       vic_irq_enable_mask[cpu]));
+		vic_irq_enable_mask[cpu]));
 
 	spin_lock(&vic_irq_lock);
-	for(irq = 0; irq < 16; irq++) {
-		if(vic_irq_enable_mask[cpu] & (1<<irq))
+	for (irq = 0; irq < 16; irq++) {
+		if (vic_irq_enable_mask[cpu] & (1 << irq))
 			enable_local_vic_irq(irq);
 	}
 	vic_irq_enable_mask[cpu] = 0;
@@ -982,17 +969,16 @@ smp_enable_irq_interrupt(void)
 
 	put_cpu_no_resched();
 }
-	
+
 /*
  *	CPU halt call-back
  */
-static void
-smp_stop_cpu_function(void *dummy)
+static void smp_stop_cpu_function(void *dummy)
 {
 	VDEBUG(("VOYAGER SMP: CPU%d is STOPPING\n", smp_processor_id()));
 	cpu_clear(smp_processor_id(), cpu_online_map);
 	local_irq_disable();
-	for(;;)
+	for (;;)
 		halt();
 }
 
@@ -1006,14 +992,13 @@ struct call_data_struct {
 	int wait;
 };
 
-static struct call_data_struct * call_data;
+static struct call_data_struct *call_data;
 
 /* execute a thread on a new CPU.  The function to be called must be
  * previously set up.  This is used to schedule a function for
  * execution on all CPUs - set up the function then broadcast a
  * function_interrupt CPI to come here on each CPU */
-static void
-smp_call_function_interrupt(void)
+static void smp_call_function_interrupt(void)
 {
 	void (*func) (void *info) = call_data->func;
 	void *info = call_data->info;
@@ -1027,16 +1012,17 @@ smp_call_function_interrupt(void)
 	 * about to execute the function
 	 */
 	mb();
-	if(!test_and_clear_bit(cpu, &call_data->started)) {
+	if (!test_and_clear_bit(cpu, &call_data->started)) {
 		/* If the bit wasn't set, this could be a replay */
-		printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion with no call pending\n", cpu);
+		printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion"
+		       " with no call pending\n", cpu);
 		return;
 	}
 	/*
 	 * At this point the info structure may be out of scope unless wait==1
 	 */
 	irq_enter();
-	(*func)(info);
+	(*func) (info);
 	__get_cpu_var(irq_stat).irq_call_count++;
 	irq_exit();
 	if (wait) {
@@ -1046,14 +1032,13 @@ smp_call_function_interrupt(void)
 }
 
 static int
-voyager_smp_call_function_mask (cpumask_t cpumask,
-				void (*func) (void *info), void *info,
-				int wait)
+voyager_smp_call_function_mask(cpumask_t cpumask,
+			       void (*func) (void *info), void *info, int wait)
 {
 	struct call_data_struct data;
 	u32 mask = cpus_addr(cpumask)[0];
 
-	mask &= ~(1<<smp_processor_id());
+	mask &= ~(1 << smp_processor_id());
 
 	if (!mask)
 		return 0;
@@ -1093,7 +1078,7 @@ voyager_smp_call_function_mask (cpumask_
  * so we use the system clock to interrupt one processor, which in
  * turn, broadcasts a timer CPI to all the others --- we receive that
  * CPI here.  We don't use this actually for counting so losing
- * ticks doesn't matter 
+ * ticks doesn't matter
  *
  * FIXME: For those CPUs which actually have a local APIC, we could
  * try to use it to trigger this interrupt instead of having to
@@ -1101,8 +1086,7 @@ voyager_smp_call_function_mask (cpumask_
  * no local APIC, so I can't do this
  *
  * This function is currently a placeholder and is unused in the code */
-fastcall void 
-smp_apic_timer_interrupt(struct pt_regs *regs)
+fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	wrapper_smp_local_timer_interrupt();
@@ -1110,8 +1094,7 @@ smp_apic_timer_interrupt(struct pt_regs 
 }
 
 /* All of the QUAD interrupt GATES */
-fastcall void
-smp_qic_timer_interrupt(struct pt_regs *regs)
+fastcall void smp_qic_timer_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	ack_QIC_CPI(QIC_TIMER_CPI);
@@ -1119,60 +1102,54 @@ smp_qic_timer_interrupt(struct pt_regs *
 	set_irq_regs(old_regs);
 }
 
-fastcall void
-smp_qic_invalidate_interrupt(struct pt_regs *regs)
+fastcall void smp_qic_invalidate_interrupt(struct pt_regs *regs)
 {
 	ack_QIC_CPI(QIC_INVALIDATE_CPI);
 	smp_invalidate_interrupt();
 }
 
-fastcall void
-smp_qic_reschedule_interrupt(struct pt_regs *regs)
+fastcall void smp_qic_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_QIC_CPI(QIC_RESCHEDULE_CPI);
 	smp_reschedule_interrupt();
 }
 
-fastcall void
-smp_qic_enable_irq_interrupt(struct pt_regs *regs)
+fastcall void smp_qic_enable_irq_interrupt(struct pt_regs *regs)
 {
 	ack_QIC_CPI(QIC_ENABLE_IRQ_CPI);
 	smp_enable_irq_interrupt();
 }
 
-fastcall void
-smp_qic_call_function_interrupt(struct pt_regs *regs)
+fastcall void smp_qic_call_function_interrupt(struct pt_regs *regs)
 {
 	ack_QIC_CPI(QIC_CALL_FUNCTION_CPI);
 	smp_call_function_interrupt();
 }
 
-fastcall void
-smp_vic_cpi_interrupt(struct pt_regs *regs)
+fastcall void smp_vic_cpi_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	__u8 cpu = smp_processor_id();
 
-	if(is_cpu_quad())
+	if (is_cpu_quad())
 		ack_QIC_CPI(VIC_CPI_LEVEL0);
 	else
 		ack_VIC_CPI(VIC_CPI_LEVEL0);
 
-	if(test_and_clear_bit(VIC_TIMER_CPI, &vic_cpi_mailbox[cpu]))
+	if (test_and_clear_bit(VIC_TIMER_CPI, &vic_cpi_mailbox[cpu]))
 		wrapper_smp_local_timer_interrupt();
-	if(test_and_clear_bit(VIC_INVALIDATE_CPI, &vic_cpi_mailbox[cpu]))
+	if (test_and_clear_bit(VIC_INVALIDATE_CPI, &vic_cpi_mailbox[cpu]))
 		smp_invalidate_interrupt();
-	if(test_and_clear_bit(VIC_RESCHEDULE_CPI, &vic_cpi_mailbox[cpu]))
+	if (test_and_clear_bit(VIC_RESCHEDULE_CPI, &vic_cpi_mailbox[cpu]))
 		smp_reschedule_interrupt();
-	if(test_and_clear_bit(VIC_ENABLE_IRQ_CPI, &vic_cpi_mailbox[cpu]))
+	if (test_and_clear_bit(VIC_ENABLE_IRQ_CPI, &vic_cpi_mailbox[cpu]))
 		smp_enable_irq_interrupt();
-	if(test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
+	if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
 		smp_call_function_interrupt();
 	set_irq_regs(old_regs);
 }
 
-static void
-do_flush_tlb_all(void* info)
+static void do_flush_tlb_all(void *info)
 {
 	unsigned long cpu = smp_processor_id();
 
@@ -1181,65 +1158,56 @@ do_flush_tlb_all(void* info)
 		leave_mm(cpu);
 }
 
-
 /* flush the TLB of every active CPU in the system */
-void
-flush_tlb_all(void)
+void flush_tlb_all(void)
 {
 	on_each_cpu(do_flush_tlb_all, 0, 1, 1);
 }
 
 /* used to set up the trampoline for other CPUs when the memory manager
  * is sorted out */
-void __init
-smp_alloc_memory(void)
+void __init smp_alloc_memory(void)
 {
-	trampoline_base = (__u32)alloc_bootmem_low_pages(PAGE_SIZE);
-	if(__pa(trampoline_base) >= 0x93000)
+	trampoline_base = (__u32) alloc_bootmem_low_pages(PAGE_SIZE);
+	if (__pa(trampoline_base) >= 0x93000)
 		BUG();
 }
 
 /* send a reschedule CPI to one CPU by physical CPU number*/
-static void
-voyager_smp_send_reschedule(int cpu)
+static void voyager_smp_send_reschedule(int cpu)
 {
 	send_one_CPI(cpu, VIC_RESCHEDULE_CPI);
 }
 
-
-int
-hard_smp_processor_id(void)
+int hard_smp_processor_id(void)
 {
 	__u8 i;
 	__u8 cpumask = inb(VIC_PROC_WHO_AM_I);
-	if((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER)
+	if ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER)
 		return cpumask & 0x1F;
 
-	for(i = 0; i < 8; i++) {
-		if(cpumask & (1<<i))
+	for (i = 0; i < 8; i++) {
+		if (cpumask & (1 << i))
 			return i;
 	}
 	printk("** WARNING ** Illegal cpuid returned by VIC: %d", cpumask);
 	return 0;
 }
 
-int
-safe_smp_processor_id(void)
+int safe_smp_processor_id(void)
 {
 	return hard_smp_processor_id();
 }
 
 /* broadcast a halt to all other CPUs */
-static void
-voyager_smp_send_stop(void)
+static void voyager_smp_send_stop(void)
 {
 	smp_call_function(smp_stop_cpu_function, NULL, 1, 1);
 }
 
 /* this function is triggered in time.c when a clock tick fires
  * we need to re-broadcast the tick to all CPUs */
-void
-smp_vic_timer_interrupt(void)
+void smp_vic_timer_interrupt(void)
 {
 	send_CPI_allbutself(VIC_TIMER_CPI);
 	smp_local_timer_interrupt();
@@ -1253,8 +1221,7 @@ smp_vic_timer_interrupt(void)
  * multiplier is 1 and it can be changed by writing the new multiplier
  * value into /proc/profile.
  */
-void
-smp_local_timer_interrupt(void)
+void smp_local_timer_interrupt(void)
 {
 	int cpu = smp_processor_id();
 	long weight;
@@ -1269,18 +1236,18 @@ smp_local_timer_interrupt(void)
 		 *
 		 * Interrupts are already masked off at this point.
 		 */
-		per_cpu(prof_counter,cpu) = per_cpu(prof_multiplier, cpu);
+		per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
 		if (per_cpu(prof_counter, cpu) !=
-					per_cpu(prof_old_multiplier, cpu)) {
+		    per_cpu(prof_old_multiplier, cpu)) {
 			/* FIXME: need to update the vic timer tick here */
 			per_cpu(prof_old_multiplier, cpu) =
-						per_cpu(prof_counter, cpu);
+			    per_cpu(prof_counter, cpu);
 		}
 
 		update_process_times(user_mode_vm(get_irq_regs()));
 	}
 
-	if( ((1<<cpu) & voyager_extended_vic_processors) == 0)
+	if (((1 << cpu) & voyager_extended_vic_processors) == 0)
 		/* only extended VIC processors participate in
 		 * interrupt distribution */
 		return;
@@ -1296,12 +1263,12 @@ smp_local_timer_interrupt(void)
 	 * we can take more than 100K local irqs per second on a 100 MHz P5.
 	 */
 
-	if((++vic_tick[cpu] & 0x7) != 0)
+	if ((++vic_tick[cpu] & 0x7) != 0)
 		return;
 	/* get here every 16 ticks (about every 1/6 of a second) */
 
 	/* Change our priority to give someone else a chance at getting
-         * the IRQ. The algorithm goes like this:
+	 * the IRQ. The algorithm goes like this:
 	 *
 	 * In the VIC, the dynamically routed interrupt is always
 	 * handled by the lowest priority eligible (i.e. receiving
@@ -1325,18 +1292,18 @@ smp_local_timer_interrupt(void)
 	 * affinity code since we now try to even up the interrupt
 	 * counts when an affinity binding is keeping them on a
 	 * particular CPU*/
-	weight = (vic_intr_count[cpu]*voyager_extended_cpus
+	weight = (vic_intr_count[cpu] * voyager_extended_cpus
 		  - vic_intr_total) >> 4;
 	weight += 4;
-	if(weight > 7)
+	if (weight > 7)
 		weight = 7;
-	if(weight < 0)
+	if (weight < 0)
 		weight = 0;
-	
-	outb((__u8)weight, VIC_PRIORITY_REGISTER);
+
+	outb((__u8) weight, VIC_PRIORITY_REGISTER);
 
 #ifdef VOYAGER_DEBUG
-	if((vic_tick[cpu] & 0xFFF) == 0) {
+	if ((vic_tick[cpu] & 0xFFF) == 0) {
 		/* print this message roughly every 25 secs */
 		printk("VOYAGER SMP: vic_tick[%d] = %lu, weight = %ld\n",
 		       cpu, vic_tick[cpu], weight);
@@ -1345,15 +1312,14 @@ smp_local_timer_interrupt(void)
 }
 
 /* setup the profiling timer */
-int 
-setup_profiling_timer(unsigned int multiplier)
+int setup_profiling_timer(unsigned int multiplier)
 {
 	int i;
 
-	if ( (!multiplier))
+	if ((!multiplier))
 		return -EINVAL;
 
-	/* 
+	/*
 	 * Set the new multiplier for each CPU. CPUs don't start using the
 	 * new values until the next timer interrupt in which they do process
 	 * accounting.
@@ -1367,15 +1333,13 @@ setup_profiling_timer(unsigned int multi
 /* This is a bit of a mess, but forced on us by the genirq changes
  * there's no genirq handler that really does what voyager wants
  * so hack it up with the simple IRQ handler */
-static void fastcall
-handle_vic_irq(unsigned int irq, struct irq_desc *desc)
+static void fastcall handle_vic_irq(unsigned int irq, struct irq_desc *desc)
 {
 	before_handle_vic_irq(irq);
 	handle_simple_irq(irq, desc);
 	after_handle_vic_irq(irq);
 }
 
-
 /*  The CPIs are handled in the per cpu 8259s, so they must be
  *  enabled to be received: FIX: enabling the CPIs in the early
  *  boot sequence interferes with bug checking; enable them later
@@ -1385,13 +1349,12 @@ handle_vic_irq(unsigned int irq, struct 
 #define QIC_SET_GATE(cpi, vector) \
 	set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector))
 
-void __init
-smp_intr_init(void)
+void __init smp_intr_init(void)
 {
 	int i;
 
 	/* initialize the per cpu irq mask to all disabled */
-	for(i = 0; i < NR_CPUS; i++)
+	for (i = 0; i < NR_CPUS; i++)
 		vic_irq_mask[i] = 0xFFFF;
 
 	VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt);
@@ -1404,42 +1367,40 @@ smp_intr_init(void)
 	QIC_SET_GATE(QIC_RESCHEDULE_CPI, qic_reschedule_interrupt);
 	QIC_SET_GATE(QIC_ENABLE_IRQ_CPI, qic_enable_irq_interrupt);
 	QIC_SET_GATE(QIC_CALL_FUNCTION_CPI, qic_call_function_interrupt);
-	
 
-	/* now put the VIC descriptor into the first 48 IRQs 
+	/* now put the VIC descriptor into the first 48 IRQs
 	 *
 	 * This is for later: first 16 correspond to PC IRQs; next 16
 	 * are Primary MC IRQs and final 16 are Secondary MC IRQs */
-	for(i = 0; i < 48; i++)
+	for (i = 0; i < 48; i++)
 		set_irq_chip_and_handler(i, &vic_chip, handle_vic_irq);
 }
 
 /* send a CPI at level cpi to a set of cpus in cpuset (set 1 bit per
  * processor to receive CPI */
-static void
-send_CPI(__u32 cpuset, __u8 cpi)
+static void send_CPI(__u32 cpuset, __u8 cpi)
 {
 	int cpu;
 	__u32 quad_cpuset = (cpuset & voyager_quad_processors);
 
-	if(cpi < VIC_START_FAKE_CPI) {
-		/* fake CPI are only used for booting, so send to the 
+	if (cpi < VIC_START_FAKE_CPI) {
+		/* fake CPI are only used for booting, so send to the
 		 * extended quads as well---Quads must be VIC booted */
-		outb((__u8)(cpuset), VIC_CPI_Registers[cpi]);
+		outb((__u8) (cpuset), VIC_CPI_Registers[cpi]);
 		return;
 	}
-	if(quad_cpuset)
+	if (quad_cpuset)
 		send_QIC_CPI(quad_cpuset, cpi);
 	cpuset &= ~quad_cpuset;
 	cpuset &= 0xff;		/* only first 8 CPUs vaild for VIC CPI */
-	if(cpuset == 0)
+	if (cpuset == 0)
 		return;
 	for_each_online_cpu(cpu) {
-		if(cpuset & (1<<cpu))
+		if (cpuset & (1 << cpu))
 			set_bit(cpi, &vic_cpi_mailbox[cpu]);
 	}
-	if(cpuset)
-		outb((__u8)cpuset, VIC_CPI_Registers[VIC_CPI_LEVEL0]);
+	if (cpuset)
+		outb((__u8) cpuset, VIC_CPI_Registers[VIC_CPI_LEVEL0]);
 }
 
 /* Acknowledge receipt of CPI in the QIC, clear in QIC hardware and
@@ -1448,20 +1409,19 @@ send_CPI(__u32 cpuset, __u8 cpi)
  * DON'T make this inline otherwise the cache line read will be
  * optimised away
  * */
-static int
-ack_QIC_CPI(__u8 cpi) {
+static int ack_QIC_CPI(__u8 cpi)
+{
 	__u8 cpu = hard_smp_processor_id();
 
 	cpi &= 7;
 
-	outb(1<<cpi, QIC_INTERRUPT_CLEAR1);
+	outb(1 << cpi, QIC_INTERRUPT_CLEAR1);
 	return voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi;
 }
 
-static void
-ack_special_QIC_CPI(__u8 cpi)
+static void ack_special_QIC_CPI(__u8 cpi)
 {
-	switch(cpi) {
+	switch (cpi) {
 	case VIC_CMN_INT:
 		outb(QIC_CMN_INT, QIC_INTERRUPT_CLEAR0);
 		break;
@@ -1474,8 +1434,7 @@ ack_special_QIC_CPI(__u8 cpi)
 }
 
 /* Acknowledge receipt of CPI in the VIC (essentially an EOI) */
-static void
-ack_VIC_CPI(__u8 cpi)
+static void ack_VIC_CPI(__u8 cpi)
 {
 #ifdef VOYAGER_DEBUG
 	unsigned long flags;
@@ -1484,17 +1443,17 @@ ack_VIC_CPI(__u8 cpi)
 
 	local_irq_save(flags);
 	isr = vic_read_isr();
-	if((isr & (1<<(cpi &7))) == 0) {
+	if ((isr & (1 << (cpi & 7))) == 0) {
 		printk("VOYAGER SMP: CPU%d lost CPI%d\n", cpu, cpi);
 	}
 #endif
 	/* send specific EOI; the two system interrupts have
 	 * bit 4 set for a separate vector but behave as the
 	 * corresponding 3 bit intr */
-	outb_p(0x60|(cpi & 7),0x20);
+	outb_p(0x60 | (cpi & 7), 0x20);
 
 #ifdef VOYAGER_DEBUG
-	if((vic_read_isr() & (1<<(cpi &7))) != 0) {
+	if ((vic_read_isr() & (1 << (cpi & 7))) != 0) {
 		printk("VOYAGER SMP: CPU%d still asserting CPI%d\n", cpu, cpi);
 	}
 	local_irq_restore(flags);
@@ -1502,12 +1461,11 @@ ack_VIC_CPI(__u8 cpi)
 }
 
 /* cribbed with thanks from irq.c */
-#define __byte(x,y) 	(((unsigned char *)&(y))[x])
+#define __byte(x,y)	(((unsigned char *)&(y))[x])
 #define cached_21(cpu)	(__byte(0,vic_irq_mask[cpu]))
 #define cached_A1(cpu)	(__byte(1,vic_irq_mask[cpu]))
 
-static unsigned int
-startup_vic_irq(unsigned int irq)
+static unsigned int startup_vic_irq(unsigned int irq)
 {
 	unmask_vic_irq(irq);
 
@@ -1535,13 +1493,12 @@ startup_vic_irq(unsigned int irq)
  *    broadcast an Interrupt enable CPI which causes all other CPUs to
  *    adjust their masks accordingly.  */
 
-static void
-unmask_vic_irq(unsigned int irq)
+static void unmask_vic_irq(unsigned int irq)
 {
 	/* linux doesn't to processor-irq affinity, so enable on
 	 * all CPUs we know about */
 	int cpu = smp_processor_id(), real_cpu;
-	__u16 mask = (1<<irq);
+	__u16 mask = (1 << irq);
 	__u32 processorList = 0;
 	unsigned long flags;
 
@@ -1549,78 +1506,72 @@ unmask_vic_irq(unsigned int irq)
 		irq, cpu, cpu_irq_affinity[cpu]));
 	spin_lock_irqsave(&vic_irq_lock, flags);
 	for_each_online_cpu(real_cpu) {
-		if(!(voyager_extended_vic_processors & (1<<real_cpu)))
+		if (!(voyager_extended_vic_processors & (1 << real_cpu)))
 			continue;
-		if(!(cpu_irq_affinity[real_cpu] & mask)) {
+		if (!(cpu_irq_affinity[real_cpu] & mask)) {
 			/* irq has no affinity for this CPU, ignore */
 			continue;
 		}
-		if(real_cpu == cpu) {
+		if (real_cpu == cpu) {
 			enable_local_vic_irq(irq);
-		}
-		else if(vic_irq_mask[real_cpu] & mask) {
+		} else if (vic_irq_mask[real_cpu] & mask) {
 			vic_irq_enable_mask[real_cpu] |= mask;
-			processorList |= (1<<real_cpu);
+			processorList |= (1 << real_cpu);
 		}
 	}
 	spin_unlock_irqrestore(&vic_irq_lock, flags);
-	if(processorList)
+	if (processorList)
 		send_CPI(processorList, VIC_ENABLE_IRQ_CPI);
 }
 
-static void
-mask_vic_irq(unsigned int irq)
+static void mask_vic_irq(unsigned int irq)
 {
 	/* lazy disable, do nothing */
 }
 
-static void
-enable_local_vic_irq(unsigned int irq)
+static void enable_local_vic_irq(unsigned int irq)
 {
 	__u8 cpu = smp_processor_id();
 	__u16 mask = ~(1 << irq);
 	__u16 old_mask = vic_irq_mask[cpu];
 
 	vic_irq_mask[cpu] &= mask;
-	if(vic_irq_mask[cpu] == old_mask)
+	if (vic_irq_mask[cpu] == old_mask)
 		return;
 
 	VDEBUG(("VOYAGER DEBUG: Enabling irq %d in hardware on CPU %d\n",
 		irq, cpu));
 
 	if (irq & 8) {
-		outb_p(cached_A1(cpu),0xA1);
+		outb_p(cached_A1(cpu), 0xA1);
 		(void)inb_p(0xA1);
-	}
-	else {
-		outb_p(cached_21(cpu),0x21);
+	} else {
+		outb_p(cached_21(cpu), 0x21);
 		(void)inb_p(0x21);
 	}
 }
 
-static void
-disable_local_vic_irq(unsigned int irq)
+static void disable_local_vic_irq(unsigned int irq)
 {
 	__u8 cpu = smp_processor_id();
 	__u16 mask = (1 << irq);
 	__u16 old_mask = vic_irq_mask[cpu];
 
-	if(irq == 7)
+	if (irq == 7)
 		return;
 
 	vic_irq_mask[cpu] |= mask;
-	if(old_mask == vic_irq_mask[cpu])
+	if (old_mask == vic_irq_mask[cpu])
 		return;
 
 	VDEBUG(("VOYAGER DEBUG: Disabling irq %d in hardware on CPU %d\n",
 		irq, cpu));
 
 	if (irq & 8) {
-		outb_p(cached_A1(cpu),0xA1);
+		outb_p(cached_A1(cpu), 0xA1);
 		(void)inb_p(0xA1);
-	}
-	else {
-		outb_p(cached_21(cpu),0x21);
+	} else {
+		outb_p(cached_21(cpu), 0x21);
 		(void)inb_p(0x21);
 	}
 }
@@ -1631,8 +1582,7 @@ disable_local_vic_irq(unsigned int irq)
  * interrupt in the vic, so we merely set a flag (IRQ_DISABLED).  If
  * this interrupt actually comes in, then we mask and ack here to push
  * the interrupt off to another CPU */
-static void
-before_handle_vic_irq(unsigned int irq)
+static void before_handle_vic_irq(unsigned int irq)
 {
 	irq_desc_t *desc = irq_desc + irq;
 	__u8 cpu = smp_processor_id();
@@ -1641,16 +1591,16 @@ before_handle_vic_irq(unsigned int irq)
 	vic_intr_total++;
 	vic_intr_count[cpu]++;
 
-	if(!(cpu_irq_affinity[cpu] & (1<<irq))) {
+	if (!(cpu_irq_affinity[cpu] & (1 << irq))) {
 		/* The irq is not in our affinity mask, push it off
 		 * onto another CPU */
-		VDEBUG(("VOYAGER DEBUG: affinity triggered disable of irq %d on cpu %d\n",
-			irq, cpu));
+		VDEBUG(("VOYAGER DEBUG: affinity triggered disable of irq %d "
+			"on cpu %d\n", irq, cpu));
 		disable_local_vic_irq(irq);
 		/* set IRQ_INPROGRESS to prevent the handler in irq.c from
 		 * actually calling the interrupt routine */
 		desc->status |= IRQ_REPLAY | IRQ_INPROGRESS;
-	} else if(desc->status & IRQ_DISABLED) {
+	} else if (desc->status & IRQ_DISABLED) {
 		/* Damn, the interrupt actually arrived, do the lazy
 		 * disable thing. The interrupt routine in irq.c will
 		 * not handle a IRQ_DISABLED interrupt, so nothing more
@@ -1667,8 +1617,7 @@ before_handle_vic_irq(unsigned int irq)
 }
 
 /* Finish the VIC interrupt: basically mask */
-static void
-after_handle_vic_irq(unsigned int irq)
+static void after_handle_vic_irq(unsigned int irq)
 {
 	irq_desc_t *desc = irq_desc + irq;
 
@@ -1685,11 +1634,11 @@ after_handle_vic_irq(unsigned int irq)
 #ifdef VOYAGER_DEBUG
 		/* DEBUG: before we ack, check what's in progress */
 		isr = vic_read_isr();
-		if((isr & (1<<irq) && !(status & IRQ_REPLAY)) == 0) {
+		if ((isr & (1 << irq) && !(status & IRQ_REPLAY)) == 0) {
 			int i;
 			__u8 cpu = smp_processor_id();
 			__u8 real_cpu;
-			int mask; /* Um... initialize me??? --RR */
+			int mask;	/* Um... initialize me??? --RR */
 
 			printk("VOYAGER SMP: CPU%d lost interrupt %d\n",
 			       cpu, irq);
@@ -1698,9 +1647,10 @@ after_handle_vic_irq(unsigned int irq)
 				outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu,
 				     VIC_PROCESSOR_ID);
 				isr = vic_read_isr();
-				if(isr & (1<<irq)) {
-					printk("VOYAGER SMP: CPU%d ack irq %d\n",
-					       real_cpu, irq);
+				if (isr & (1 << irq)) {
+					printk
+					    ("VOYAGER SMP: CPU%d ack irq %d\n",
+					     real_cpu, irq);
 					ack_vic_irq(irq);
 				}
 				outb(cpu, VIC_PROCESSOR_ID);
@@ -1711,7 +1661,7 @@ after_handle_vic_irq(unsigned int irq)
 		 * receipt by another CPU so everything must be in
 		 * order here  */
 		ack_vic_irq(irq);
-		if(status & IRQ_REPLAY) {
+		if (status & IRQ_REPLAY) {
 			/* replay is set if we disable the interrupt
 			 * in the before_handle_vic_irq() routine, so
 			 * clear the in progress bit here to allow the
@@ -1720,9 +1670,9 @@ after_handle_vic_irq(unsigned int irq)
 		}
 #ifdef VOYAGER_DEBUG
 		isr = vic_read_isr();
-		if((isr & (1<<irq)) != 0)
-			printk("VOYAGER SMP: after_handle_vic_irq() after ack irq=%d, isr=0x%x\n",
-			       irq, isr);
+		if ((isr & (1 << irq)) != 0)
+			printk("VOYAGER SMP: after_handle_vic_irq() after "
+			       "ack irq=%d, isr=0x%x\n", irq, isr);
 #endif /* VOYAGER_DEBUG */
 	}
 	_raw_spin_unlock(&vic_irq_lock);
@@ -1731,7 +1681,6 @@ after_handle_vic_irq(unsigned int irq)
 	 * may be intercepted by another CPU if reasserted */
 }
 
-
 /* Linux processor - interrupt affinity manipulations.
  *
  * For each processor, we maintain a 32 bit irq affinity mask.
@@ -1748,8 +1697,7 @@ after_handle_vic_irq(unsigned int irq)
  * change the mask and then do an interrupt enable CPI to re-enable on
  * the selected processors */
 
-void
-set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
+void set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
 {
 	/* Only extended processors handle interrupts */
 	unsigned long real_mask;
@@ -1757,13 +1705,13 @@ set_vic_irq_affinity(unsigned int irq, c
 	int cpu;
 
 	real_mask = cpus_addr(mask)[0] & voyager_extended_vic_processors;
-	
-	if(cpus_addr(mask)[0] == 0)
+
+	if (cpus_addr(mask)[0] == 0)
 		/* can't have no CPUs to accept the interrupt -- extremely
 		 * bad things will happen */
 		return;
 
-	if(irq == 0)
+	if (irq == 0)
 		/* can't change the affinity of the timer IRQ.  This
 		 * is due to the constraint in the voyager
 		 * architecture that the CPI also comes in on and IRQ
@@ -1772,7 +1720,7 @@ set_vic_irq_affinity(unsigned int irq, c
 		 * will no-longer be able to accept VIC CPIs */
 		return;
 
-	if(irq >= 32) 
+	if (irq >= 32)
 		/* You can only have 32 interrupts in a voyager system
 		 * (and 32 only if you have a secondary microchannel
 		 * bus) */
@@ -1780,8 +1728,8 @@ set_vic_irq_affinity(unsigned int irq, c
 
 	for_each_online_cpu(cpu) {
 		unsigned long cpu_mask = 1 << cpu;
-		
-		if(cpu_mask & real_mask) {
+
+		if (cpu_mask & real_mask) {
 			/* enable the interrupt for this cpu */
 			cpu_irq_affinity[cpu] |= irq_mask;
 		} else {
@@ -1800,25 +1748,23 @@ set_vic_irq_affinity(unsigned int irq, c
 	unmask_vic_irq(irq);
 }
 
-static void
-ack_vic_irq(unsigned int irq)
+static void ack_vic_irq(unsigned int irq)
 {
 	if (irq & 8) {
-		outb(0x62,0x20);	/* Specific EOI to cascade */
-		outb(0x60|(irq & 7),0xA0);
+		outb(0x62, 0x20);	/* Specific EOI to cascade */
+		outb(0x60 | (irq & 7), 0xA0);
 	} else {
-		outb(0x60 | (irq & 7),0x20);
+		outb(0x60 | (irq & 7), 0x20);
 	}
 }
 
 /* enable the CPIs.  In the VIC, the CPIs are delivered by the 8259
  * but are not vectored by it.  This means that the 8259 mask must be
  * lowered to receive them */
-static __init void
-vic_enable_cpi(void)
+static __init void vic_enable_cpi(void)
 {
 	__u8 cpu = smp_processor_id();
-	
+
 	/* just take a copy of the current mask (nop for boot cpu) */
 	vic_irq_mask[cpu] = vic_irq_mask[boot_cpu_id];
 
@@ -1827,7 +1773,7 @@ vic_enable_cpi(void)
 	/* for sys int and cmn int */
 	enable_local_vic_irq(7);
 
-	if(is_cpu_quad()) {
+	if (is_cpu_quad()) {
 		outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
 		outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
 		VDEBUG(("VOYAGER SMP: QIC ENABLE CPI: CPU%d: MASK 0x%x\n",
@@ -1838,8 +1784,7 @@ vic_enable_cpi(void)
 		cpu, vic_irq_mask[cpu]));
 }
 
-void
-voyager_smp_dump()
+void voyager_smp_dump()
 {
 	int old_cpu = smp_processor_id(), cpu;
 
@@ -1865,10 +1810,10 @@ voyager_smp_dump()
 		       cpu, vic_irq_mask[cpu], imr, irr, isr);
 #if 0
 		/* These lines are put in to try to unstick an un ack'd irq */
-		if(isr != 0) {
+		if (isr != 0) {
 			int irq;
-			for(irq=0; irq<16; irq++) {
-				if(isr & (1<<irq)) {
+			for (irq = 0; irq < 16; irq++) {
+				if (isr & (1 << irq)) {
 					printk("\tCPU%d: ack irq %d\n",
 					       cpu, irq);
 					local_irq_save(flags);
@@ -1884,17 +1829,15 @@ voyager_smp_dump()
 	}
 }
 
-void
-smp_voyager_power_off(void *dummy)
+void smp_voyager_power_off(void *dummy)
 {
-	if(smp_processor_id() == boot_cpu_id) 
+	if (smp_processor_id() == boot_cpu_id)
 		voyager_power_off();
 	else
 		smp_stop_cpu_function(NULL);
 }
 
-static void __init
-voyager_smp_prepare_cpus(unsigned int max_cpus)
+static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
 {
 	/* FIXME: ignore max_cpus for now */
 	smp_boot_cpus();
@@ -1911,8 +1854,7 @@ static void __cpuinit voyager_smp_prepar
 	cpu_set(smp_processor_id(), cpu_present_map);
 }
 
-static int __cpuinit
-voyager_cpu_up(unsigned int cpu)
+static int __cpuinit voyager_cpu_up(unsigned int cpu)
 {
 	/* This only works at boot for x86.  See "rewrite" above. */
 	if (cpu_isset(cpu, smp_commenced_mask))
@@ -1928,14 +1870,12 @@ voyager_cpu_up(unsigned int cpu)
 	return 0;
 }
 
-static void __init
-voyager_smp_cpus_done(unsigned int max_cpus)
+static void __init voyager_smp_cpus_done(unsigned int max_cpus)
 {
 	zap_low_mappings();
 }
 
-void __init
-smp_setup_processor_id(void)
+void __init smp_setup_processor_id(void)
 {
 	current_thread_info()->cpu = hard_smp_processor_id();
 	x86_write_percpu(cpu_number, hard_smp_processor_id());
diff -puN arch/x86/mach-voyager/voyager_thread.c~git-x86 arch/x86/mach-voyager/voyager_thread.c
--- a/arch/x86/mach-voyager/voyager_thread.c~git-x86
+++ a/arch/x86/mach-voyager/voyager_thread.c
@@ -30,12 +30,10 @@
 #include <asm/mtrr.h>
 #include <asm/msr.h>
 
-
 struct task_struct *voyager_thread;
 static __u8 set_timeout;
 
-static int
-execute(const char *string)
+static int execute(const char *string)
 {
 	int ret;
 
@@ -52,48 +50,48 @@ execute(const char *string)
 		NULL,
 	};
 
-	if ((ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) {
-		printk(KERN_ERR "Voyager failed to run \"%s\": %i\n",
-		       string, ret);
+	if ((ret =
+	     call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) {
+		printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", string,
+		       ret);
 	}
 	return ret;
 }
 
-static void
-check_from_kernel(void)
+static void check_from_kernel(void)
 {
-	if(voyager_status.switch_off) {
-		
+	if (voyager_status.switch_off) {
+
 		/* FIXME: This should be configurable via proc */
 		execute("umask 600; echo 0 > /etc/initrunlvl; kill -HUP 1");
-	} else if(voyager_status.power_fail) {
+	} else if (voyager_status.power_fail) {
 		VDEBUG(("Voyager daemon detected AC power failure\n"));
-		
+
 		/* FIXME: This should be configureable via proc */
 		execute("umask 600; echo F > /etc/powerstatus; kill -PWR 1");
 		set_timeout = 1;
 	}
 }
 
-static void
-check_continuing_condition(void)
+static void check_continuing_condition(void)
 {
-	if(voyager_status.power_fail) {
+	if (voyager_status.power_fail) {
 		__u8 data;
-		voyager_cat_psi(VOYAGER_PSI_SUBREAD, 
+		voyager_cat_psi(VOYAGER_PSI_SUBREAD,
 				VOYAGER_PSI_AC_FAIL_REG, &data);
-		if((data & 0x1f) == 0) {
+		if ((data & 0x1f) == 0) {
 			/* all power restored */
-			printk(KERN_NOTICE "VOYAGER AC power restored, cancelling shutdown\n");
+			printk(KERN_NOTICE
+			       "VOYAGER AC power restored, cancelling shutdown\n");
 			/* FIXME: should be user configureable */
-			execute("umask 600; echo O > /etc/powerstatus; kill -PWR 1");
+			execute
+			    ("umask 600; echo O > /etc/powerstatus; kill -PWR 1");
 			set_timeout = 0;
 		}
 	}
 }
 
-static int
-thread(void *unused)
+static int thread(void *unused)
 {
 	printk(KERN_NOTICE "Voyager starting monitor thread\n");
 
@@ -102,7 +100,7 @@ thread(void *unused)
 		schedule_timeout(set_timeout ? HZ : MAX_SCHEDULE_TIMEOUT);
 
 		VDEBUG(("Voyager Daemon awoken\n"));
-		if(voyager_status.request_from_kernel == 0) {
+		if (voyager_status.request_from_kernel == 0) {
 			/* probably awoken from timeout */
 			check_continuing_condition();
 		} else {
@@ -112,20 +110,18 @@ thread(void *unused)
 	}
 }
 
-static int __init
-voyager_thread_start(void)
+static int __init voyager_thread_start(void)
 {
 	voyager_thread = kthread_run(thread, NULL, "kvoyagerd");
 	if (IS_ERR(voyager_thread)) {
-		printk(KERN_ERR "Voyager: Failed to create system monitor thread.\n");
+		printk(KERN_ERR
+		       "Voyager: Failed to create system monitor thread.\n");
 		return PTR_ERR(voyager_thread);
 	}
 	return 0;
 }
 
-
-static void __exit
-voyager_thread_stop(void)
+static void __exit voyager_thread_stop(void)
 {
 	kthread_stop(voyager_thread);
 }
diff -puN arch/x86/math-emu/errors.c~git-x86 arch/x86/math-emu/errors.c
--- a/arch/x86/math-emu/errors.c~git-x86
+++ a/arch/x86/math-emu/errors.c
@@ -33,45 +33,41 @@
 #undef PRINT_MESSAGES
 /* */
 
-
 #if 0
 void Un_impl(void)
 {
-  u_char byte1, FPU_modrm;
-  unsigned long address = FPU_ORIG_EIP;
+	u_char byte1, FPU_modrm;
+	unsigned long address = FPU_ORIG_EIP;
 
-  RE_ENTRANT_CHECK_OFF;
-  /* No need to check access_ok(), we have previously fetched these bytes. */
-  printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *) address);
-  if ( FPU_CS == __USER_CS )
-    {
-      while ( 1 )
-	{
-	  FPU_get_user(byte1, (u_char __user *) address);
-	  if ( (byte1 & 0xf8) == 0xd8 ) break;
-	  printk("[%02x]", byte1);
-	  address++;
-	}
-      printk("%02x ", byte1);
-      FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
-      
-      if (FPU_modrm >= 0300)
-	printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
-      else
-	printk("/%d\n", (FPU_modrm >> 3) & 7);
-    }
-  else
-    {
-      printk("cs selector = %04x\n", FPU_CS);
-    }
+	RE_ENTRANT_CHECK_OFF;
+	/* No need to check access_ok(), we have previously fetched these bytes. */
+	printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *)address);
+	if (FPU_CS == __USER_CS) {
+		while (1) {
+			FPU_get_user(byte1, (u_char __user *) address);
+			if ((byte1 & 0xf8) == 0xd8)
+				break;
+			printk("[%02x]", byte1);
+			address++;
+		}
+		printk("%02x ", byte1);
+		FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
+
+		if (FPU_modrm >= 0300)
+			printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8,
+			       FPU_modrm & 7);
+		else
+			printk("/%d\n", (FPU_modrm >> 3) & 7);
+	} else {
+		printk("cs selector = %04x\n", FPU_CS);
+	}
 
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_ON;
 
-  EXCEPTION(EX_Invalid);
+	EXCEPTION(EX_Invalid);
 
 }
-#endif  /*  0  */
-
+#endif /*  0  */
 
 /*
    Called for opcodes which are illegal and which are known to result in a
@@ -79,139 +75,152 @@ void Un_impl(void)
    */
 void FPU_illegal(void)
 {
-  math_abort(FPU_info,SIGILL);
+	math_abort(FPU_info, SIGILL);
 }
 
-
-
 void FPU_printall(void)
 {
-  int i;
-  static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty",
-                              "DeNorm", "Inf", "NaN" };
-  u_char byte1, FPU_modrm;
-  unsigned long address = FPU_ORIG_EIP;
-
-  RE_ENTRANT_CHECK_OFF;
-  /* No need to check access_ok(), we have previously fetched these bytes. */
-  printk("At %p:", (void *) address);
-  if ( FPU_CS == __USER_CS )
-    {
+	int i;
+	static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty",
+		"DeNorm", "Inf", "NaN"
+	};
+	u_char byte1, FPU_modrm;
+	unsigned long address = FPU_ORIG_EIP;
+
+	RE_ENTRANT_CHECK_OFF;
+	/* No need to check access_ok(), we have previously fetched these bytes. */
+	printk("At %p:", (void *)address);
+	if (FPU_CS == __USER_CS) {
 #define MAX_PRINTED_BYTES 20
-      for ( i = 0; i < MAX_PRINTED_BYTES; i++ )
-	{
-	  FPU_get_user(byte1, (u_char __user *) address);
-	  if ( (byte1 & 0xf8) == 0xd8 )
-	    {
-	      printk(" %02x", byte1);
-	      break;
-	    }
-	  printk(" [%02x]", byte1);
-	  address++;
-	}
-      if ( i == MAX_PRINTED_BYTES )
-	printk(" [more..]\n");
-      else
-	{
-	  FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
-	  
-	  if (FPU_modrm >= 0300)
-	    printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7);
-	  else
-	    printk(" /%d, mod=%d rm=%d\n",
-		   (FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7);
-	}
-    }
-  else
-    {
-      printk("%04x\n", FPU_CS);
-    }
+		for (i = 0; i < MAX_PRINTED_BYTES; i++) {
+			FPU_get_user(byte1, (u_char __user *) address);
+			if ((byte1 & 0xf8) == 0xd8) {
+				printk(" %02x", byte1);
+				break;
+			}
+			printk(" [%02x]", byte1);
+			address++;
+		}
+		if (i == MAX_PRINTED_BYTES)
+			printk(" [more..]\n");
+		else {
+			FPU_get_user(FPU_modrm, 1 + (u_char __user *) address);
+
+			if (FPU_modrm >= 0300)
+				printk(" %02x (%02x+%d)\n", FPU_modrm,
+				       FPU_modrm & 0xf8, FPU_modrm & 7);
+			else
+				printk(" /%d, mod=%d rm=%d\n",
+				       (FPU_modrm >> 3) & 7,
+				       (FPU_modrm >> 6) & 3, FPU_modrm & 7);
+		}
+	} else {
+		printk("%04x\n", FPU_CS);
+	}
 
-  partial_status = status_word();
+	partial_status = status_word();
 
 #ifdef DEBUGGING
-if ( partial_status & SW_Backward )    printk("SW: backward compatibility\n");
-if ( partial_status & SW_C3 )          printk("SW: condition bit 3\n");
-if ( partial_status & SW_C2 )          printk("SW: condition bit 2\n");
-if ( partial_status & SW_C1 )          printk("SW: condition bit 1\n");
-if ( partial_status & SW_C0 )          printk("SW: condition bit 0\n");
-if ( partial_status & SW_Summary )     printk("SW: exception summary\n");
-if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n");
-if ( partial_status & SW_Precision )   printk("SW: loss of precision\n");
-if ( partial_status & SW_Underflow )   printk("SW: underflow\n");
-if ( partial_status & SW_Overflow )    printk("SW: overflow\n");
-if ( partial_status & SW_Zero_Div )    printk("SW: divide by zero\n");
-if ( partial_status & SW_Denorm_Op )   printk("SW: denormalized operand\n");
-if ( partial_status & SW_Invalid )     printk("SW: invalid operation\n");
+	if (partial_status & SW_Backward)
+		printk("SW: backward compatibility\n");
+	if (partial_status & SW_C3)
+		printk("SW: condition bit 3\n");
+	if (partial_status & SW_C2)
+		printk("SW: condition bit 2\n");
+	if (partial_status & SW_C1)
+		printk("SW: condition bit 1\n");
+	if (partial_status & SW_C0)
+		printk("SW: condition bit 0\n");
+	if (partial_status & SW_Summary)
+		printk("SW: exception summary\n");
+	if (partial_status & SW_Stack_Fault)
+		printk("SW: stack fault\n");
+	if (partial_status & SW_Precision)
+		printk("SW: loss of precision\n");
+	if (partial_status & SW_Underflow)
+		printk("SW: underflow\n");
+	if (partial_status & SW_Overflow)
+		printk("SW: overflow\n");
+	if (partial_status & SW_Zero_Div)
+		printk("SW: divide by zero\n");
+	if (partial_status & SW_Denorm_Op)
+		printk("SW: denormalized operand\n");
+	if (partial_status & SW_Invalid)
+		printk("SW: invalid operation\n");
 #endif /* DEBUGGING */
 
-  printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n",
-	 partial_status & 0x8000 ? 1 : 0,   /* busy */
-	 (partial_status & 0x3800) >> 11,   /* stack top pointer */
-	 partial_status & 0x80 ? 1 : 0,     /* Error summary status */
-	 partial_status & 0x40 ? 1 : 0,     /* Stack flag */
-	 partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */
-	 partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */
-	 partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0,
-	 partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0,
-	 partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0);
-  
-printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d     ef=%d%d%d%d%d%d\n",
-	 control_word & 0x1000 ? 1 : 0,
-	 (control_word & 0x800) >> 11, (control_word & 0x400) >> 10,
-	 (control_word & 0x200) >> 9, (control_word & 0x100) >> 8,
-	 control_word & 0x80 ? 1 : 0,
-	 control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0,
-	 control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0,
-	 control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0);
-
-  for ( i = 0; i < 8; i++ )
-    {
-      FPU_REG *r = &st(i);
-      u_char tagi = FPU_gettagi(i);
-      switch (tagi)
-	{
-	case TAG_Empty:
-	  continue;
-	  break;
-	case TAG_Zero:
-	case TAG_Special:
-	  tagi = FPU_Special(r);
-	case TAG_Valid:
-	  printk("st(%d)  %c .%04lx %04lx %04lx %04lx e%+-6d ", i,
-		 getsign(r) ? '-' : '+',
-		 (long)(r->sigh >> 16),
-		 (long)(r->sigh & 0xFFFF),
-		 (long)(r->sigl >> 16),
-		 (long)(r->sigl & 0xFFFF),
-		 exponent(r) - EXP_BIAS + 1);
-	  break;
-	default:
-	  printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi);
-	  continue;
-	  break;
+	printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n", partial_status & 0x8000 ? 1 : 0,	/* busy */
+	       (partial_status & 0x3800) >> 11,	/* stack top pointer */
+	       partial_status & 0x80 ? 1 : 0,	/* Error summary status */
+	       partial_status & 0x40 ? 1 : 0,	/* Stack flag */
+	       partial_status & SW_C3 ? 1 : 0, partial_status & SW_C2 ? 1 : 0,	/* cc */
+	       partial_status & SW_C1 ? 1 : 0, partial_status & SW_C0 ? 1 : 0,	/* cc */
+	       partial_status & SW_Precision ? 1 : 0,
+	       partial_status & SW_Underflow ? 1 : 0,
+	       partial_status & SW_Overflow ? 1 : 0,
+	       partial_status & SW_Zero_Div ? 1 : 0,
+	       partial_status & SW_Denorm_Op ? 1 : 0,
+	       partial_status & SW_Invalid ? 1 : 0);
+
+	printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d     ef=%d%d%d%d%d%d\n",
+	       control_word & 0x1000 ? 1 : 0,
+	       (control_word & 0x800) >> 11, (control_word & 0x400) >> 10,
+	       (control_word & 0x200) >> 9, (control_word & 0x100) >> 8,
+	       control_word & 0x80 ? 1 : 0,
+	       control_word & SW_Precision ? 1 : 0,
+	       control_word & SW_Underflow ? 1 : 0,
+	       control_word & SW_Overflow ? 1 : 0,
+	       control_word & SW_Zero_Div ? 1 : 0,
+	       control_word & SW_Denorm_Op ? 1 : 0,
+	       control_word & SW_Invalid ? 1 : 0);
+
+	for (i = 0; i < 8; i++) {
+		FPU_REG *r = &st(i);
+		u_char tagi = FPU_gettagi(i);
+		switch (tagi) {
+		case TAG_Empty:
+			continue;
+			break;
+		case TAG_Zero:
+		case TAG_Special:
+			tagi = FPU_Special(r);
+		case TAG_Valid:
+			printk("st(%d)  %c .%04lx %04lx %04lx %04lx e%+-6d ", i,
+			       getsign(r) ? '-' : '+',
+			       (long)(r->sigh >> 16),
+			       (long)(r->sigh & 0xFFFF),
+			       (long)(r->sigl >> 16),
+			       (long)(r->sigl & 0xFFFF),
+			       exponent(r) - EXP_BIAS + 1);
+			break;
+		default:
+			printk("Whoops! Error in errors.c: tag%d is %d ", i,
+			       tagi);
+			continue;
+			break;
+		}
+		printk("%s\n", tag_desc[(int)(unsigned)tagi]);
 	}
-      printk("%s\n", tag_desc[(int) (unsigned) tagi]);
-    }
 
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_ON;
 
 }
 
 static struct {
-  int type;
-  const char *name;
+	int type;
+	const char *name;
 } exception_names[] = {
-  { EX_StackOver, "stack overflow" },
-  { EX_StackUnder, "stack underflow" },
-  { EX_Precision, "loss of precision" },
-  { EX_Underflow, "underflow" },
-  { EX_Overflow, "overflow" },
-  { EX_ZeroDiv, "divide by zero" },
-  { EX_Denormal, "denormalized operand" },
-  { EX_Invalid, "invalid operation" },
-  { EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION },
-  { 0, NULL }
+	{
+	EX_StackOver, "stack overflow"}, {
+	EX_StackUnder, "stack underflow"}, {
+	EX_Precision, "loss of precision"}, {
+	EX_Underflow, "underflow"}, {
+	EX_Overflow, "overflow"}, {
+	EX_ZeroDiv, "divide by zero"}, {
+	EX_Denormal, "denormalized operand"}, {
+	EX_Invalid, "invalid operation"}, {
+	EX_INTERNAL, "INTERNAL BUG in " FPU_VERSION}, {
+	0, NULL}
 };
 
 /*
@@ -295,445 +304,386 @@ static struct {
 
 asmlinkage void FPU_exception(int n)
 {
-  int i, int_type;
+	int i, int_type;
 
-  int_type = 0;         /* Needed only to stop compiler warnings */
-  if ( n & EX_INTERNAL )
-    {
-      int_type = n - EX_INTERNAL;
-      n = EX_INTERNAL;
-      /* Set lots of exception bits! */
-      partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward);
-    }
-  else
-    {
-      /* Extract only the bits which we use to set the status word */
-      n &= (SW_Exc_Mask);
-      /* Set the corresponding exception bit */
-      partial_status |= n;
-      /* Set summary bits iff exception isn't masked */
-      if ( partial_status & ~control_word & CW_Exceptions )
-	partial_status |= (SW_Summary | SW_Backward);
-      if ( n & (SW_Stack_Fault | EX_Precision) )
-	{
-	  if ( !(n & SW_C1) )
-	    /* This bit distinguishes over- from underflow for a stack fault,
-	       and roundup from round-down for precision loss. */
-	    partial_status &= ~SW_C1;
-	}
-    }
-
-  RE_ENTRANT_CHECK_OFF;
-  if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) )
-    {
+	int_type = 0;		/* Needed only to stop compiler warnings */
+	if (n & EX_INTERNAL) {
+		int_type = n - EX_INTERNAL;
+		n = EX_INTERNAL;
+		/* Set lots of exception bits! */
+		partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward);
+	} else {
+		/* Extract only the bits which we use to set the status word */
+		n &= (SW_Exc_Mask);
+		/* Set the corresponding exception bit */
+		partial_status |= n;
+		/* Set summary bits iff exception isn't masked */
+		if (partial_status & ~control_word & CW_Exceptions)
+			partial_status |= (SW_Summary | SW_Backward);
+		if (n & (SW_Stack_Fault | EX_Precision)) {
+			if (!(n & SW_C1))
+				/* This bit distinguishes over- from underflow for a stack fault,
+				   and roundup from round-down for precision loss. */
+				partial_status &= ~SW_C1;
+		}
+	}
+
+	RE_ENTRANT_CHECK_OFF;
+	if ((~control_word & n & CW_Exceptions) || (n == EX_INTERNAL)) {
 #ifdef PRINT_MESSAGES
-      /* My message from the sponsor */
-      printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n");
+		/* My message from the sponsor */
+		printk(FPU_VERSION " " __DATE__ " (C) W. Metzenthen.\n");
 #endif /* PRINT_MESSAGES */
-      
-      /* Get a name string for error reporting */
-      for (i=0; exception_names[i].type; i++)
-	if ( (exception_names[i].type & n) == exception_names[i].type )
-	  break;
-      
-      if (exception_names[i].type)
-	{
+
+		/* Get a name string for error reporting */
+		for (i = 0; exception_names[i].type; i++)
+			if ((exception_names[i].type & n) ==
+			    exception_names[i].type)
+				break;
+
+		if (exception_names[i].type) {
 #ifdef PRINT_MESSAGES
-	  printk("FP Exception: %s!\n", exception_names[i].name);
+			printk("FP Exception: %s!\n", exception_names[i].name);
 #endif /* PRINT_MESSAGES */
-	}
-      else
-	printk("FPU emulator: Unknown Exception: 0x%04x!\n", n);
-      
-      if ( n == EX_INTERNAL )
-	{
-	  printk("FPU emulator: Internal error type 0x%04x\n", int_type);
-	  FPU_printall();
-	}
+		} else
+			printk("FPU emulator: Unknown Exception: 0x%04x!\n", n);
+
+		if (n == EX_INTERNAL) {
+			printk("FPU emulator: Internal error type 0x%04x\n",
+			       int_type);
+			FPU_printall();
+		}
 #ifdef PRINT_MESSAGES
-      else
-	FPU_printall();
+		else
+			FPU_printall();
 #endif /* PRINT_MESSAGES */
 
-      /*
-       * The 80486 generates an interrupt on the next non-control FPU
-       * instruction. So we need some means of flagging it.
-       * We use the ES (Error Summary) bit for this.
-       */
-    }
-  RE_ENTRANT_CHECK_ON;
+		/*
+		 * The 80486 generates an interrupt on the next non-control FPU
+		 * instruction. So we need some means of flagging it.
+		 * We use the ES (Error Summary) bit for this.
+		 */
+	}
+	RE_ENTRANT_CHECK_ON;
 
 #ifdef __DEBUG__
-  math_abort(FPU_info,SIGFPE);
+	math_abort(FPU_info, SIGFPE);
 #endif /* __DEBUG__ */
 
 }
 
-
 /* Real operation attempted on a NaN. */
 /* Returns < 0 if the exception is unmasked */
 int real_1op_NaN(FPU_REG *a)
 {
-  int signalling, isNaN;
+	int signalling, isNaN;
 
-  isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000);
+	isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000);
 
-  /* The default result for the case of two "equal" NaNs (signs may
-     differ) is chosen to reproduce 80486 behaviour */
-  signalling = isNaN && !(a->sigh & 0x40000000);
-
-  if ( !signalling )
-    {
-      if ( !isNaN )  /* pseudo-NaN, or other unsupported? */
-	{
-	  if ( control_word & CW_Invalid )
-	    {
-	      /* Masked response */
-	      reg_copy(&CONST_QNaN, a);
-	    }
-	  EXCEPTION(EX_Invalid);
-	  return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
-	}
-      return TAG_Special;
-    }
-
-  if ( control_word & CW_Invalid )
-    {
-      /* The masked response */
-      if ( !(a->sigh & 0x80000000) )  /* pseudo-NaN ? */
-	{
-	  reg_copy(&CONST_QNaN, a);
+	/* The default result for the case of two "equal" NaNs (signs may
+	   differ) is chosen to reproduce 80486 behaviour */
+	signalling = isNaN && !(a->sigh & 0x40000000);
+
+	if (!signalling) {
+		if (!isNaN) {	/* pseudo-NaN, or other unsupported? */
+			if (control_word & CW_Invalid) {
+				/* Masked response */
+				reg_copy(&CONST_QNaN, a);
+			}
+			EXCEPTION(EX_Invalid);
+			return (!(control_word & CW_Invalid) ? FPU_Exception :
+				0) | TAG_Special;
+		}
+		return TAG_Special;
+	}
+
+	if (control_word & CW_Invalid) {
+		/* The masked response */
+		if (!(a->sigh & 0x80000000)) {	/* pseudo-NaN ? */
+			reg_copy(&CONST_QNaN, a);
+		}
+		/* ensure a Quiet NaN */
+		a->sigh |= 0x40000000;
 	}
-      /* ensure a Quiet NaN */
-      a->sigh |= 0x40000000;
-    }
 
-  EXCEPTION(EX_Invalid);
+	EXCEPTION(EX_Invalid);
 
-  return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
+	return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
 }
 
-
 /* Real operation attempted on two operands, one a NaN. */
 /* Returns < 0 if the exception is unmasked */
 int real_2op_NaN(FPU_REG const *b, u_char tagb,
-		 int deststnr,
-		 FPU_REG const *defaultNaN)
+		 int deststnr, FPU_REG const *defaultNaN)
 {
-  FPU_REG *dest = &st(deststnr);
-  FPU_REG const *a = dest;
-  u_char taga = FPU_gettagi(deststnr);
-  FPU_REG const *x;
-  int signalling, unsupported;
-
-  if ( taga == TAG_Special )
-    taga = FPU_Special(a);
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
-
-  /* TW_NaN is also used for unsupported data types. */
-  unsupported = ((taga == TW_NaN)
-		 && !((exponent(a) == EXP_OVER) && (a->sigh & 0x80000000)))
-    || ((tagb == TW_NaN)
-	&& !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000)));
-  if ( unsupported )
-    {
-      if ( control_word & CW_Invalid )
-	{
-	  /* Masked response */
-	  FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
-	}
-      EXCEPTION(EX_Invalid);
-      return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
-    }
-
-  if (taga == TW_NaN)
-    {
-      x = a;
-      if (tagb == TW_NaN)
-	{
-	  signalling = !(a->sigh & b->sigh & 0x40000000);
-	  if ( significand(b) > significand(a) )
-	    x = b;
-	  else if ( significand(b) == significand(a) )
-	    {
-	      /* The default result for the case of two "equal" NaNs (signs may
-		 differ) is chosen to reproduce 80486 behaviour */
-	      x = defaultNaN;
-	    }
-	}
-      else
-	{
-	  /* return the quiet version of the NaN in a */
-	  signalling = !(a->sigh & 0x40000000);
-	}
-    }
-  else
+	FPU_REG *dest = &st(deststnr);
+	FPU_REG const *a = dest;
+	u_char taga = FPU_gettagi(deststnr);
+	FPU_REG const *x;
+	int signalling, unsupported;
+
+	if (taga == TAG_Special)
+		taga = FPU_Special(a);
+	if (tagb == TAG_Special)
+		tagb = FPU_Special(b);
+
+	/* TW_NaN is also used for unsupported data types. */
+	unsupported = ((taga == TW_NaN)
+		       && !((exponent(a) == EXP_OVER)
+			    && (a->sigh & 0x80000000)))
+	    || ((tagb == TW_NaN)
+		&& !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000)));
+	if (unsupported) {
+		if (control_word & CW_Invalid) {
+			/* Masked response */
+			FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
+		}
+		EXCEPTION(EX_Invalid);
+		return (!(control_word & CW_Invalid) ? FPU_Exception : 0) |
+		    TAG_Special;
+	}
+
+	if (taga == TW_NaN) {
+		x = a;
+		if (tagb == TW_NaN) {
+			signalling = !(a->sigh & b->sigh & 0x40000000);
+			if (significand(b) > significand(a))
+				x = b;
+			else if (significand(b) == significand(a)) {
+				/* The default result for the case of two "equal" NaNs (signs may
+				   differ) is chosen to reproduce 80486 behaviour */
+				x = defaultNaN;
+			}
+		} else {
+			/* return the quiet version of the NaN in a */
+			signalling = !(a->sigh & 0x40000000);
+		}
+	} else
 #ifdef PARANOID
-    if (tagb == TW_NaN)
+	if (tagb == TW_NaN)
 #endif /* PARANOID */
-    {
-      signalling = !(b->sigh & 0x40000000);
-      x = b;
-    }
+	{
+		signalling = !(b->sigh & 0x40000000);
+		x = b;
+	}
 #ifdef PARANOID
-  else
-    {
-      signalling = 0;
-      EXCEPTION(EX_INTERNAL|0x113);
-      x = &CONST_QNaN;
-    }
+	else {
+		signalling = 0;
+		EXCEPTION(EX_INTERNAL | 0x113);
+		x = &CONST_QNaN;
+	}
 #endif /* PARANOID */
 
-  if ( (!signalling) || (control_word & CW_Invalid) )
-    {
-      if ( ! x )
-	x = b;
+	if ((!signalling) || (control_word & CW_Invalid)) {
+		if (!x)
+			x = b;
 
-      if ( !(x->sigh & 0x80000000) )  /* pseudo-NaN ? */
-	x = &CONST_QNaN;
+		if (!(x->sigh & 0x80000000))	/* pseudo-NaN ? */
+			x = &CONST_QNaN;
 
-      FPU_copy_to_regi(x, TAG_Special, deststnr);
+		FPU_copy_to_regi(x, TAG_Special, deststnr);
 
-      if ( !signalling )
-	return TAG_Special;
+		if (!signalling)
+			return TAG_Special;
 
-      /* ensure a Quiet NaN */
-      dest->sigh |= 0x40000000;
-    }
+		/* ensure a Quiet NaN */
+		dest->sigh |= 0x40000000;
+	}
 
-  EXCEPTION(EX_Invalid);
+	EXCEPTION(EX_Invalid);
 
-  return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
+	return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
 }
 
-
 /* Invalid arith operation on Valid registers */
 /* Returns < 0 if the exception is unmasked */
 asmlinkage int arith_invalid(int deststnr)
 {
 
-  EXCEPTION(EX_Invalid);
-  
-  if ( control_word & CW_Invalid )
-    {
-      /* The masked response */
-      FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
-    }
-  
-  return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid;
+	EXCEPTION(EX_Invalid);
 
-}
+	if (control_word & CW_Invalid) {
+		/* The masked response */
+		FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
+	}
+
+	return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid;
 
+}
 
 /* Divide a finite number by zero */
 asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign)
 {
-  FPU_REG *dest = &st(deststnr);
-  int tag = TAG_Valid;
+	FPU_REG *dest = &st(deststnr);
+	int tag = TAG_Valid;
+
+	if (control_word & CW_ZeroDiv) {
+		/* The masked response */
+		FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr);
+		setsign(dest, sign);
+		tag = TAG_Special;
+	}
 
-  if ( control_word & CW_ZeroDiv )
-    {
-      /* The masked response */
-      FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr);
-      setsign(dest, sign);
-      tag = TAG_Special;
-    }
- 
-  EXCEPTION(EX_ZeroDiv);
+	EXCEPTION(EX_ZeroDiv);
 
-  return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag;
+	return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag;
 
 }
 
-
 /* This may be called often, so keep it lean */
 int set_precision_flag(int flags)
 {
-  if ( control_word & CW_Precision )
-    {
-      partial_status &= ~(SW_C1 & flags);
-      partial_status |= flags;   /* The masked response */
-      return 0;
-    }
-  else
-    {
-      EXCEPTION(flags);
-      return 1;
-    }
+	if (control_word & CW_Precision) {
+		partial_status &= ~(SW_C1 & flags);
+		partial_status |= flags;	/* The masked response */
+		return 0;
+	} else {
+		EXCEPTION(flags);
+		return 1;
+	}
 }
 
-
 /* This may be called often, so keep it lean */
 asmlinkage void set_precision_flag_up(void)
 {
-  if ( control_word & CW_Precision )
-    partial_status |= (SW_Precision | SW_C1);   /* The masked response */
-  else
-    EXCEPTION(EX_Precision | SW_C1);
+	if (control_word & CW_Precision)
+		partial_status |= (SW_Precision | SW_C1);	/* The masked response */
+	else
+		EXCEPTION(EX_Precision | SW_C1);
 }
 
-
 /* This may be called often, so keep it lean */
 asmlinkage void set_precision_flag_down(void)
 {
-  if ( control_word & CW_Precision )
-    {   /* The masked response */
-      partial_status &= ~SW_C1;
-      partial_status |= SW_Precision;
-    }
-  else
-    EXCEPTION(EX_Precision);
+	if (control_word & CW_Precision) {	/* The masked response */
+		partial_status &= ~SW_C1;
+		partial_status |= SW_Precision;
+	} else
+		EXCEPTION(EX_Precision);
 }
 
-
 asmlinkage int denormal_operand(void)
 {
-  if ( control_word & CW_Denormal )
-    {   /* The masked response */
-      partial_status |= SW_Denorm_Op;
-      return TAG_Special;
-    }
-  else
-    {
-      EXCEPTION(EX_Denormal);
-      return TAG_Special | FPU_Exception;
-    }
+	if (control_word & CW_Denormal) {	/* The masked response */
+		partial_status |= SW_Denorm_Op;
+		return TAG_Special;
+	} else {
+		EXCEPTION(EX_Denormal);
+		return TAG_Special | FPU_Exception;
+	}
 }
 
-
 asmlinkage int arith_overflow(FPU_REG *dest)
 {
-  int tag = TAG_Valid;
+	int tag = TAG_Valid;
 
-  if ( control_word & CW_Overflow )
-    {
-      /* The masked response */
+	if (control_word & CW_Overflow) {
+		/* The masked response */
 /* ###### The response here depends upon the rounding mode */
-      reg_copy(&CONST_INF, dest);
-      tag = TAG_Special;
-    }
-  else
-    {
-      /* Subtract the magic number from the exponent */
-      addexponent(dest, (-3 * (1 << 13)));
-    }
-
-  EXCEPTION(EX_Overflow);
-  if ( control_word & CW_Overflow )
-    {
-      /* The overflow exception is masked. */
-      /* By definition, precision is lost.
-	 The roundup bit (C1) is also set because we have
-	 "rounded" upwards to Infinity. */
-      EXCEPTION(EX_Precision | SW_C1);
-      return tag;
-    }
+		reg_copy(&CONST_INF, dest);
+		tag = TAG_Special;
+	} else {
+		/* Subtract the magic number from the exponent */
+		addexponent(dest, (-3 * (1 << 13)));
+	}
+
+	EXCEPTION(EX_Overflow);
+	if (control_word & CW_Overflow) {
+		/* The overflow exception is masked. */
+		/* By definition, precision is lost.
+		   The roundup bit (C1) is also set because we have
+		   "rounded" upwards to Infinity. */
+		EXCEPTION(EX_Precision | SW_C1);
+		return tag;
+	}
 
-  return tag;
+	return tag;
 
 }
 
-
 asmlinkage int arith_underflow(FPU_REG *dest)
 {
-  int tag = TAG_Valid;
+	int tag = TAG_Valid;
 
-  if ( control_word & CW_Underflow )
-    {
-      /* The masked response */
-      if ( exponent16(dest) <= EXP_UNDER - 63 )
-	{
-	  reg_copy(&CONST_Z, dest);
-	  partial_status &= ~SW_C1;       /* Round down. */
-	  tag = TAG_Zero;
-	}
-      else
-	{
-	  stdexp(dest);
+	if (control_word & CW_Underflow) {
+		/* The masked response */
+		if (exponent16(dest) <= EXP_UNDER - 63) {
+			reg_copy(&CONST_Z, dest);
+			partial_status &= ~SW_C1;	/* Round down. */
+			tag = TAG_Zero;
+		} else {
+			stdexp(dest);
+		}
+	} else {
+		/* Add the magic number to the exponent. */
+		addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias);
+	}
+
+	EXCEPTION(EX_Underflow);
+	if (control_word & CW_Underflow) {
+		/* The underflow exception is masked. */
+		EXCEPTION(EX_Precision);
+		return tag;
 	}
-    }
-  else
-    {
-      /* Add the magic number to the exponent. */
-      addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias);
-    }
-
-  EXCEPTION(EX_Underflow);
-  if ( control_word & CW_Underflow )
-    {
-      /* The underflow exception is masked. */
-      EXCEPTION(EX_Precision);
-      return tag;
-    }
 
-  return tag;
+	return tag;
 
 }
 
-
 void FPU_stack_overflow(void)
 {
 
- if ( control_word & CW_Invalid )
-    {
-      /* The masked response */
-      top--;
-      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
-    }
+	if (control_word & CW_Invalid) {
+		/* The masked response */
+		top--;
+		FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+	}
 
-  EXCEPTION(EX_StackOver);
+	EXCEPTION(EX_StackOver);
 
-  return;
+	return;
 
 }
 
-
 void FPU_stack_underflow(void)
 {
 
- if ( control_word & CW_Invalid )
-    {
-      /* The masked response */
-      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
-    }
+	if (control_word & CW_Invalid) {
+		/* The masked response */
+		FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+	}
 
-  EXCEPTION(EX_StackUnder);
+	EXCEPTION(EX_StackUnder);
 
-  return;
+	return;
 
 }
 
-
 void FPU_stack_underflow_i(int i)
 {
 
- if ( control_word & CW_Invalid )
-    {
-      /* The masked response */
-      FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
-    }
+	if (control_word & CW_Invalid) {
+		/* The masked response */
+		FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
+	}
 
-  EXCEPTION(EX_StackUnder);
+	EXCEPTION(EX_StackUnder);
 
-  return;
+	return;
 
 }
 
-
 void FPU_stack_underflow_pop(int i)
 {
 
- if ( control_word & CW_Invalid )
-    {
-      /* The masked response */
-      FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
-      FPU_pop();
-    }
+	if (control_word & CW_Invalid) {
+		/* The masked response */
+		FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
+		FPU_pop();
+	}
 
-  EXCEPTION(EX_StackUnder);
+	EXCEPTION(EX_StackUnder);
 
-  return;
+	return;
 
 }
-
diff -puN arch/x86/math-emu/exception.h~git-x86 arch/x86/math-emu/exception.h
--- a/arch/x86/math-emu/exception.h~git-x86
+++ a/arch/x86/math-emu/exception.h
@@ -9,7 +9,6 @@
 #ifndef _EXCEPTION_H_
 #define _EXCEPTION_H_
 
-
 #ifdef __ASSEMBLY__
 #define	Const_(x)	$##x
 #else
@@ -20,8 +19,8 @@
 #include "fpu_emu.h"
 #endif /* SW_C1 */
 
-#define FPU_BUSY        Const_(0x8000)   /* FPU busy bit (8087 compatibility) */
-#define EX_ErrorSummary Const_(0x0080)   /* Error summary status */
+#define FPU_BUSY        Const_(0x8000)	/* FPU busy bit (8087 compatibility) */
+#define EX_ErrorSummary Const_(0x0080)	/* Error summary status */
 /* Special exceptions: */
 #define	EX_INTERNAL	Const_(0x8000)	/* Internal error in wm-FPU-emu */
 #define EX_StackOver	Const_(0x0041|SW_C1)	/* stack overflow */
@@ -34,11 +33,9 @@
 #define EX_Denormal	Const_(0x0002)	/* denormalized operand */
 #define EX_Invalid	Const_(0x0001)	/* invalid operation */
 
-
 #define PRECISION_LOST_UP    Const_((EX_Precision | SW_C1))
 #define PRECISION_LOST_DOWN  Const_(EX_Precision)
 
-
 #ifndef __ASSEMBLY__
 
 #ifdef DEBUG
@@ -48,6 +45,6 @@
 #define	EXCEPTION(x)	FPU_exception(x)
 #endif
 
-#endif /* __ASSEMBLY__ */ 
+#endif /* __ASSEMBLY__ */
 
 #endif /* _EXCEPTION_H_ */
diff -puN arch/x86/math-emu/fpu_arith.c~git-x86 arch/x86/math-emu/fpu_arith.c
--- a/arch/x86/math-emu/fpu_arith.c~git-x86
+++ a/arch/x86/math-emu/fpu_arith.c
@@ -15,160 +15,138 @@
 #include "control_w.h"
 #include "status_w.h"
 
-
 void fadd__(void)
 {
-  /* fadd st,st(i) */
-  int i = FPU_rm;
-  clear_C1();
-  FPU_add(&st(i), FPU_gettagi(i), 0, control_word);
+	/* fadd st,st(i) */
+	int i = FPU_rm;
+	clear_C1();
+	FPU_add(&st(i), FPU_gettagi(i), 0, control_word);
 }
 
-
 void fmul__(void)
 {
-  /* fmul st,st(i) */
-  int i = FPU_rm;
-  clear_C1();
-  FPU_mul(&st(i), FPU_gettagi(i), 0, control_word);
+	/* fmul st,st(i) */
+	int i = FPU_rm;
+	clear_C1();
+	FPU_mul(&st(i), FPU_gettagi(i), 0, control_word);
 }
 
-
-
 void fsub__(void)
 {
-  /* fsub st,st(i) */
-  clear_C1();
-  FPU_sub(0, FPU_rm, control_word);
+	/* fsub st,st(i) */
+	clear_C1();
+	FPU_sub(0, FPU_rm, control_word);
 }
 
-
 void fsubr_(void)
 {
-  /* fsubr st,st(i) */
-  clear_C1();
-  FPU_sub(REV, FPU_rm, control_word);
+	/* fsubr st,st(i) */
+	clear_C1();
+	FPU_sub(REV, FPU_rm, control_word);
 }
 
-
 void fdiv__(void)
 {
-  /* fdiv st,st(i) */
-  clear_C1();
-  FPU_div(0, FPU_rm, control_word);
+	/* fdiv st,st(i) */
+	clear_C1();
+	FPU_div(0, FPU_rm, control_word);
 }
 
-
 void fdivr_(void)
 {
-  /* fdivr st,st(i) */
-  clear_C1();
-  FPU_div(REV, FPU_rm, control_word);
+	/* fdivr st,st(i) */
+	clear_C1();
+	FPU_div(REV, FPU_rm, control_word);
 }
 
-
-
 void fadd_i(void)
 {
-  /* fadd st(i),st */
-  int i = FPU_rm;
-  clear_C1();
-  FPU_add(&st(i), FPU_gettagi(i), i, control_word);
+	/* fadd st(i),st */
+	int i = FPU_rm;
+	clear_C1();
+	FPU_add(&st(i), FPU_gettagi(i), i, control_word);
 }
 
-
 void fmul_i(void)
 {
-  /* fmul st(i),st */
-  clear_C1();
-  FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word);
+	/* fmul st(i),st */
+	clear_C1();
+	FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word);
 }
 
-
 void fsubri(void)
 {
-  /* fsubr st(i),st */
-  clear_C1();
-  FPU_sub(DEST_RM, FPU_rm, control_word);
+	/* fsubr st(i),st */
+	clear_C1();
+	FPU_sub(DEST_RM, FPU_rm, control_word);
 }
 
-
 void fsub_i(void)
 {
-  /* fsub st(i),st */
-  clear_C1();
-  FPU_sub(REV|DEST_RM, FPU_rm, control_word);
+	/* fsub st(i),st */
+	clear_C1();
+	FPU_sub(REV | DEST_RM, FPU_rm, control_word);
 }
 
-
 void fdivri(void)
 {
-  /* fdivr st(i),st */
-  clear_C1();
-  FPU_div(DEST_RM, FPU_rm, control_word);
+	/* fdivr st(i),st */
+	clear_C1();
+	FPU_div(DEST_RM, FPU_rm, control_word);
 }
 
-
 void fdiv_i(void)
 {
-  /* fdiv st(i),st */
-  clear_C1();
-  FPU_div(REV|DEST_RM, FPU_rm, control_word);
+	/* fdiv st(i),st */
+	clear_C1();
+	FPU_div(REV | DEST_RM, FPU_rm, control_word);
 }
 
-
-
 void faddp_(void)
 {
-  /* faddp st(i),st */
-  int i = FPU_rm;
-  clear_C1();
-  if ( FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0 )
-    FPU_pop();
+	/* faddp st(i),st */
+	int i = FPU_rm;
+	clear_C1();
+	if (FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0)
+		FPU_pop();
 }
 
-
 void fmulp_(void)
 {
-  /* fmulp st(i),st */
-  clear_C1();
-  if ( FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0 )
-    FPU_pop();
+	/* fmulp st(i),st */
+	clear_C1();
+	if (FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0)
+		FPU_pop();
 }
 
-
-
 void fsubrp(void)
 {
-  /* fsubrp st(i),st */
-  clear_C1();
-  if ( FPU_sub(DEST_RM, FPU_rm, control_word) >= 0 )
-    FPU_pop();
+	/* fsubrp st(i),st */
+	clear_C1();
+	if (FPU_sub(DEST_RM, FPU_rm, control_word) >= 0)
+		FPU_pop();
 }
 
-
 void fsubp_(void)
 {
-  /* fsubp st(i),st */
-  clear_C1();
-  if ( FPU_sub(REV|DEST_RM, FPU_rm, control_word) >= 0 )
-    FPU_pop();
+	/* fsubp st(i),st */
+	clear_C1();
+	if (FPU_sub(REV | DEST_RM, FPU_rm, control_word) >= 0)
+		FPU_pop();
 }
 
-
 void fdivrp(void)
 {
-  /* fdivrp st(i),st */
-  clear_C1();
-  if ( FPU_div(DEST_RM, FPU_rm, control_word) >= 0 )
-    FPU_pop();
+	/* fdivrp st(i),st */
+	clear_C1();
+	if (FPU_div(DEST_RM, FPU_rm, control_word) >= 0)
+		FPU_pop();
 }
 
-
 void fdivp_(void)
 {
-  /* fdivp st(i),st */
-  clear_C1();
-  if ( FPU_div(REV|DEST_RM, FPU_rm, control_word) >= 0 )
-    FPU_pop();
+	/* fdivp st(i),st */
+	clear_C1();
+	if (FPU_div(REV | DEST_RM, FPU_rm, control_word) >= 0)
+		FPU_pop();
 }
diff -puN arch/x86/math-emu/fpu_asm.h~git-x86 arch/x86/math-emu/fpu_asm.h
--- a/arch/x86/math-emu/fpu_asm.h~git-x86
+++ a/arch/x86/math-emu/fpu_asm.h
@@ -14,7 +14,6 @@
 
 #define	EXCEPTION	FPU_exception
 
-
 #define PARAM1	8(%ebp)
 #define	PARAM2	12(%ebp)
 #define	PARAM3	16(%ebp)
diff -puN arch/x86/math-emu/fpu_aux.c~git-x86 arch/x86/math-emu/fpu_aux.c
--- a/arch/x86/math-emu/fpu_aux.c~git-x86
+++ a/arch/x86/math-emu/fpu_aux.c
@@ -16,34 +16,34 @@
 #include "status_w.h"
 #include "control_w.h"
 
-
 static void fnop(void)
 {
 }
 
 static void fclex(void)
 {
-  partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision|
-		   SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op|
-		   SW_Invalid);
-  no_ip_update = 1;
+	partial_status &=
+	    ~(SW_Backward | SW_Summary | SW_Stack_Fault | SW_Precision |
+	      SW_Underflow | SW_Overflow | SW_Zero_Div | SW_Denorm_Op |
+	      SW_Invalid);
+	no_ip_update = 1;
 }
 
 /* Needs to be externally visible */
 void finit(void)
 {
-  control_word = 0x037f;
-  partial_status = 0;
-  top = 0;            /* We don't keep top in the status word internally. */
-  fpu_tag_word = 0xffff;
-  /* The behaviour is different from that detailed in
-     Section 15.1.6 of the Intel manual */
-  operand_address.offset = 0;
-  operand_address.selector = 0;
-  instruction_address.offset = 0;
-  instruction_address.selector = 0;
-  instruction_address.opcode = 0;
-  no_ip_update = 1;
+	control_word = 0x037f;
+	partial_status = 0;
+	top = 0;		/* We don't keep top in the status word internally. */
+	fpu_tag_word = 0xffff;
+	/* The behaviour is different from that detailed in
+	   Section 15.1.6 of the Intel manual */
+	operand_address.offset = 0;
+	operand_address.selector = 0;
+	instruction_address.offset = 0;
+	instruction_address.selector = 0;
+	instruction_address.opcode = 0;
+	no_ip_update = 1;
 }
 
 /*
@@ -54,151 +54,134 @@ void finit(void)
 #define fsetpm fnop
 
 static FUNC const finit_table[] = {
-  feni, fdisi, fclex, finit,
-  fsetpm, FPU_illegal, FPU_illegal, FPU_illegal
+	feni, fdisi, fclex, finit,
+	fsetpm, FPU_illegal, FPU_illegal, FPU_illegal
 };
 
 void finit_(void)
 {
-  (finit_table[FPU_rm])();
+	(finit_table[FPU_rm]) ();
 }
 
-
 static void fstsw_ax(void)
 {
-  *(short *) &FPU_EAX = status_word();
-  no_ip_update = 1;
+	*(short *)&FPU_EAX = status_word();
+	no_ip_update = 1;
 }
 
 static FUNC const fstsw_table[] = {
-  fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal,
-  FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
+	fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal,
+	FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
 };
 
 void fstsw_(void)
 {
-  (fstsw_table[FPU_rm])();
+	(fstsw_table[FPU_rm]) ();
 }
 
-
 static FUNC const fp_nop_table[] = {
-  fnop, FPU_illegal, FPU_illegal, FPU_illegal,
-  FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
+	fnop, FPU_illegal, FPU_illegal, FPU_illegal,
+	FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
 };
 
 void fp_nop(void)
 {
-  (fp_nop_table[FPU_rm])();
+	(fp_nop_table[FPU_rm]) ();
 }
 
-
 void fld_i_(void)
 {
-  FPU_REG *st_new_ptr;
-  int i;
-  u_char tag;
-
-  if ( STACK_OVERFLOW )
-    { FPU_stack_overflow(); return; }
-
-  /* fld st(i) */
-  i = FPU_rm;
-  if ( NOT_EMPTY(i) )
-    {
-      reg_copy(&st(i), st_new_ptr);
-      tag = FPU_gettagi(i);
-      push();
-      FPU_settag0(tag);
-    }
-  else
-    {
-      if ( control_word & CW_Invalid )
-	{
-	  /* The masked response */
-	  FPU_stack_underflow();
+	FPU_REG *st_new_ptr;
+	int i;
+	u_char tag;
+
+	if (STACK_OVERFLOW) {
+		FPU_stack_overflow();
+		return;
 	}
-      else
-	EXCEPTION(EX_StackUnder);
-    }
 
-}
+	/* fld st(i) */
+	i = FPU_rm;
+	if (NOT_EMPTY(i)) {
+		reg_copy(&st(i), st_new_ptr);
+		tag = FPU_gettagi(i);
+		push();
+		FPU_settag0(tag);
+	} else {
+		if (control_word & CW_Invalid) {
+			/* The masked response */
+			FPU_stack_underflow();
+		} else
+			EXCEPTION(EX_StackUnder);
+	}
 
+}
 
 void fxch_i(void)
 {
-  /* fxch st(i) */
-  FPU_REG t;
-  int i = FPU_rm;
-  FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
-  long tag_word = fpu_tag_word;
-  int regnr = top & 7, regnri = ((regnr + i) & 7);
-  u_char st0_tag = (tag_word >> (regnr*2)) & 3;
-  u_char sti_tag = (tag_word >> (regnri*2)) & 3;
-
-  if ( st0_tag == TAG_Empty )
-    {
-      if ( sti_tag == TAG_Empty )
-	{
-	  FPU_stack_underflow();
-	  FPU_stack_underflow_i(i);
-	  return;
+	/* fxch st(i) */
+	FPU_REG t;
+	int i = FPU_rm;
+	FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
+	long tag_word = fpu_tag_word;
+	int regnr = top & 7, regnri = ((regnr + i) & 7);
+	u_char st0_tag = (tag_word >> (regnr * 2)) & 3;
+	u_char sti_tag = (tag_word >> (regnri * 2)) & 3;
+
+	if (st0_tag == TAG_Empty) {
+		if (sti_tag == TAG_Empty) {
+			FPU_stack_underflow();
+			FPU_stack_underflow_i(i);
+			return;
+		}
+		if (control_word & CW_Invalid) {
+			/* Masked response */
+			FPU_copy_to_reg0(sti_ptr, sti_tag);
+		}
+		FPU_stack_underflow_i(i);
+		return;
 	}
-      if ( control_word & CW_Invalid )
-	{
-	  /* Masked response */
-	  FPU_copy_to_reg0(sti_ptr, sti_tag);
+	if (sti_tag == TAG_Empty) {
+		if (control_word & CW_Invalid) {
+			/* Masked response */
+			FPU_copy_to_regi(st0_ptr, st0_tag, i);
+		}
+		FPU_stack_underflow();
+		return;
 	}
-      FPU_stack_underflow_i(i);
-      return;
-    }
-  if ( sti_tag == TAG_Empty )
-    {
-      if ( control_word & CW_Invalid )
-	{
-	  /* Masked response */
-	  FPU_copy_to_regi(st0_ptr, st0_tag, i);
-	}
-      FPU_stack_underflow();
-      return;
-    }
-  clear_C1();
-
-  reg_copy(st0_ptr, &t);
-  reg_copy(sti_ptr, st0_ptr);
-  reg_copy(&t, sti_ptr);
-
-  tag_word &= ~(3 << (regnr*2)) & ~(3 << (regnri*2));
-  tag_word |= (sti_tag << (regnr*2)) | (st0_tag << (regnri*2));
-  fpu_tag_word = tag_word;
-}
+	clear_C1();
 
+	reg_copy(st0_ptr, &t);
+	reg_copy(sti_ptr, st0_ptr);
+	reg_copy(&t, sti_ptr);
+
+	tag_word &= ~(3 << (regnr * 2)) & ~(3 << (regnri * 2));
+	tag_word |= (sti_tag << (regnr * 2)) | (st0_tag << (regnri * 2));
+	fpu_tag_word = tag_word;
+}
 
 void ffree_(void)
 {
-  /* ffree st(i) */
-  FPU_settagi(FPU_rm, TAG_Empty);
+	/* ffree st(i) */
+	FPU_settagi(FPU_rm, TAG_Empty);
 }
 
-
 void ffreep(void)
 {
-  /* ffree st(i) + pop - unofficial code */
-  FPU_settagi(FPU_rm, TAG_Empty);
-  FPU_pop();
+	/* ffree st(i) + pop - unofficial code */
+	FPU_settagi(FPU_rm, TAG_Empty);
+	FPU_pop();
 }
 
-
 void fst_i_(void)
 {
-  /* fst st(i) */
-  FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
+	/* fst st(i) */
+	FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
 }
 
-
 void fstp_i(void)
 {
-  /* fstp st(i) */
-  FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
-  FPU_pop();
+	/* fstp st(i) */
+	FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
+	FPU_pop();
 }
-
diff -puN arch/x86/math-emu/fpu_emu.h~git-x86 arch/x86/math-emu/fpu_emu.h
--- a/arch/x86/math-emu/fpu_emu.h~git-x86
+++ a/arch/x86/math-emu/fpu_emu.h
@@ -7,7 +7,6 @@
  |                                                                           |
  +---------------------------------------------------------------------------*/
 
-
 #ifndef _FPU_EMU_H_
 #define _FPU_EMU_H_
 
@@ -28,15 +27,15 @@
 #endif
 
 #define EXP_BIAS	Const(0)
-#define EXP_OVER	Const(0x4000)    /* smallest invalid large exponent */
-#define	EXP_UNDER	Const(-0x3fff)   /* largest invalid small exponent */
-#define EXP_WAY_UNDER   Const(-0x6000)   /* Below the smallest denormal, but
-					    still a 16 bit nr. */
+#define EXP_OVER	Const(0x4000)	/* smallest invalid large exponent */
+#define	EXP_UNDER	Const(-0x3fff)	/* largest invalid small exponent */
+#define EXP_WAY_UNDER   Const(-0x6000)	/* Below the smallest denormal, but
+					   still a 16 bit nr. */
 #define EXP_Infinity    EXP_OVER
 #define EXP_NaN         EXP_OVER
 
 #define EXTENDED_Ebias Const(0x3fff)
-#define EXTENDED_Emin (-0x3ffe)  /* smallest valid exponent */
+#define EXTENDED_Emin (-0x3ffe)	/* smallest valid exponent */
 
 #define SIGN_POS	Const(0)
 #define SIGN_NEG	Const(0x80)
@@ -44,10 +43,9 @@
 #define SIGN_Positive	Const(0)
 #define SIGN_Negative	Const(0x8000)
 
-
 /* Keep the order TAG_Valid, TAG_Zero, TW_Denormal */
 /* The following fold to 2 (Special) in the Tag Word */
-#define TW_Denormal     Const(4)        /* De-normal */
+#define TW_Denormal     Const(4)	/* De-normal */
 #define TW_Infinity	Const(5)	/* + or - infinity */
 #define	TW_NaN		Const(6)	/* Not a Number */
 #define	TW_Unsupported	Const(7)	/* Not supported by an 80486 */
@@ -67,14 +65,13 @@
 #define DEST_RM         0x20
 #define LOADED          0x40
 
-#define FPU_Exception   Const(0x80000000)   /* Added to tag returns. */
-
+#define FPU_Exception   Const(0x80000000)	/* Added to tag returns. */
 
 #ifndef __ASSEMBLY__
 
 #include "fpu_system.h"
 
-#include <asm/sigcontext.h>   /* for struct _fpstate */
+#include <asm/sigcontext.h>	/* for struct _fpstate */
 #include <asm/math_emu.h>
 #include <linux/linkage.h>
 
@@ -112,30 +109,33 @@ extern u_char emulating;
 #define PREFIX_DEFAULT 7
 
 struct address {
-  unsigned int offset;
-  unsigned int selector:16;
-  unsigned int opcode:11;
-  unsigned int empty:5;
+	unsigned int offset;
+	unsigned int selector:16;
+	unsigned int opcode:11;
+	unsigned int empty:5;
 };
 struct fpu__reg {
-  unsigned sigl;
-  unsigned sigh;
-  short exp;
+	unsigned sigl;
+	unsigned sigh;
+	short exp;
 };
 
-typedef void (*FUNC)(void);
+typedef void (*FUNC) (void);
 typedef struct fpu__reg FPU_REG;
-typedef void (*FUNC_ST0)(FPU_REG *st0_ptr, u_char st0_tag);
-typedef struct { u_char address_size, operand_size, segment; }
-        overrides;
+typedef void (*FUNC_ST0) (FPU_REG *st0_ptr, u_char st0_tag);
+typedef struct {
+	u_char address_size, operand_size, segment;
+} overrides;
 /* This structure is 32 bits: */
-typedef struct { overrides override;
-		 u_char default_mode; } fpu_addr_modes;
+typedef struct {
+	overrides override;
+	u_char default_mode;
+} fpu_addr_modes;
 /* PROTECTED has a restricted meaning in the emulator; it is used
    to signal that the emulator needs to do special things to ensure
    that protection is respected in a segmented model. */
 #define PROTECTED 4
-#define SIXTEEN   1         /* We rely upon this being 1 (true) */
+#define SIXTEEN   1		/* We rely upon this being 1 (true) */
 #define VM86      SIXTEEN
 #define PM16      (SIXTEEN | PROTECTED)
 #define SEG32     PROTECTED
@@ -168,8 +168,8 @@ extern u_char const data_sizes_16[32];
 
 static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
 {
-  *(short *)&(y->exp) = *(const short *)&(x->exp); 
-  *(long long *)&(y->sigl) = *(const long long *)&(x->sigl);
+	*(short *)&(y->exp) = *(const short *)&(x->exp);
+	*(long long *)&(y->sigl) = *(const long long *)&(x->sigl);
 }
 
 #define exponent(x)  (((*(short *)&((x)->exp)) & 0x7fff) - EXTENDED_Ebias)
@@ -184,27 +184,26 @@ static inline void reg_copy(FPU_REG cons
 
 #define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] )
 
-
 /*----- Prototypes for functions written in assembler -----*/
 /* extern void reg_move(FPU_REG *a, FPU_REG *b); */
 
 asmlinkage int FPU_normalize(FPU_REG *x);
 asmlinkage int FPU_normalize_nuo(FPU_REG *x);
 asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2,
-			 FPU_REG *answ, unsigned int control_w, u_char sign,
+			 FPU_REG * answ, unsigned int control_w, u_char sign,
 			 int expa, int expb);
 asmlinkage int FPU_u_mul(FPU_REG const *arg1, FPU_REG const *arg2,
-			 FPU_REG *answ, unsigned int control_w, u_char sign,
+			 FPU_REG * answ, unsigned int control_w, u_char sign,
 			 int expon);
 asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2,
-			 FPU_REG *answ, unsigned int control_w, u_char sign);
+			 FPU_REG * answ, unsigned int control_w, u_char sign);
 asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2,
-			 FPU_REG *answ, unsigned int control_w, u_char sign,
+			 FPU_REG * answ, unsigned int control_w, u_char sign,
 			 int expa, int expb);
 asmlinkage int wm_sqrt(FPU_REG *n, int dummy1, int dummy2,
 		       unsigned int control_w, u_char sign);
-asmlinkage unsigned	FPU_shrx(void *l, unsigned x);
-asmlinkage unsigned	FPU_shrxs(void *v, unsigned x);
+asmlinkage unsigned FPU_shrx(void *l, unsigned x);
+asmlinkage unsigned FPU_shrxs(void *v, unsigned x);
 asmlinkage unsigned long FPU_div_small(unsigned long long *x, unsigned long y);
 asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy,
 			 unsigned int control_w, u_char sign);
diff -puN arch/x86/math-emu/fpu_entry.c~git-x86 arch/x86/math-emu/fpu_entry.c
--- a/arch/x86/math-emu/fpu_entry.c~git-x86
+++ a/arch/x86/math-emu/fpu_entry.c
@@ -36,726 +36,720 @@
 #include "control_w.h"
 #include "status_w.h"
 
-#define __BAD__ FPU_illegal   /* Illegal on an 80486, causes SIGILL */
+#define __BAD__ FPU_illegal	/* Illegal on an 80486, causes SIGILL */
 
-#ifndef NO_UNDOC_CODE    /* Un-documented FPU op-codes supported by default. */
+#ifndef NO_UNDOC_CODE		/* Un-documented FPU op-codes supported by default. */
 
 /* WARNING: These codes are not documented by Intel in their 80486 manual
    and may not work on FPU clones or later Intel FPUs. */
 
 /* Changes to support the un-doc codes provided by Linus Torvalds. */
 
-#define _d9_d8_ fstp_i    /* unofficial code (19) */
-#define _dc_d0_ fcom_st   /* unofficial code (14) */
-#define _dc_d8_ fcompst   /* unofficial code (1c) */
-#define _dd_c8_ fxch_i    /* unofficial code (0d) */
-#define _de_d0_ fcompst   /* unofficial code (16) */
-#define _df_c0_ ffreep    /* unofficial code (07) ffree + pop */
-#define _df_c8_ fxch_i    /* unofficial code (0f) */
-#define _df_d0_ fstp_i    /* unofficial code (17) */
-#define _df_d8_ fstp_i    /* unofficial code (1f) */
+#define _d9_d8_ fstp_i		/* unofficial code (19) */
+#define _dc_d0_ fcom_st		/* unofficial code (14) */
+#define _dc_d8_ fcompst		/* unofficial code (1c) */
+#define _dd_c8_ fxch_i		/* unofficial code (0d) */
+#define _de_d0_ fcompst		/* unofficial code (16) */
+#define _df_c0_ ffreep		/* unofficial code (07) ffree + pop */
+#define _df_c8_ fxch_i		/* unofficial code (0f) */
+#define _df_d0_ fstp_i		/* unofficial code (17) */
+#define _df_d8_ fstp_i		/* unofficial code (1f) */
 
 static FUNC const st_instr_table[64] = {
-  fadd__,   fld_i_,     __BAD__, __BAD__, fadd_i,  ffree_,  faddp_,  _df_c0_,
-  fmul__,   fxch_i,     __BAD__, __BAD__, fmul_i,  _dd_c8_, fmulp_,  _df_c8_,
-  fcom_st,  fp_nop,     __BAD__, __BAD__, _dc_d0_, fst_i_,  _de_d0_, _df_d0_,
-  fcompst,  _d9_d8_,    __BAD__, __BAD__, _dc_d8_, fstp_i,  fcompp,  _df_d8_,
-  fsub__,   FPU_etc,    __BAD__, finit_,  fsubri,  fucom_,  fsubrp,  fstsw_,
-  fsubr_,   fconst,     fucompp, __BAD__, fsub_i,  fucomp,  fsubp_,  __BAD__,
-  fdiv__,   FPU_triga,  __BAD__, __BAD__, fdivri,  __BAD__, fdivrp,  __BAD__,
-  fdivr_,   FPU_trigb,  __BAD__, __BAD__, fdiv_i,  __BAD__, fdivp_,  __BAD__,
+	fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_,
+	fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_,
+	fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_,
+	fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_,
+	fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
+	fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
+	fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
+	fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
 };
 
-#else     /* Support only documented FPU op-codes */
+#else /* Support only documented FPU op-codes */
 
 static FUNC const st_instr_table[64] = {
-  fadd__,   fld_i_,     __BAD__, __BAD__, fadd_i,  ffree_,  faddp_,  __BAD__,
-  fmul__,   fxch_i,     __BAD__, __BAD__, fmul_i,  __BAD__, fmulp_,  __BAD__,
-  fcom_st,  fp_nop,     __BAD__, __BAD__, __BAD__, fst_i_,  __BAD__, __BAD__,
-  fcompst,  __BAD__,    __BAD__, __BAD__, __BAD__, fstp_i,  fcompp,  __BAD__,
-  fsub__,   FPU_etc,    __BAD__, finit_,  fsubri,  fucom_,  fsubrp,  fstsw_,
-  fsubr_,   fconst,     fucompp, __BAD__, fsub_i,  fucomp,  fsubp_,  __BAD__,
-  fdiv__,   FPU_triga,  __BAD__, __BAD__, fdivri,  __BAD__, fdivrp,  __BAD__,
-  fdivr_,   FPU_trigb,  __BAD__, __BAD__, fdiv_i,  __BAD__, fdivp_,  __BAD__,
+	fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__,
+	fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__,
+	fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__,
+	fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__,
+	fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
+	fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
+	fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
+	fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
 };
 
 #endif /* NO_UNDOC_CODE */
 
-
-#define _NONE_ 0   /* Take no special action */
-#define _REG0_ 1   /* Need to check for not empty st(0) */
-#define _REGI_ 2   /* Need to check for not empty st(0) and st(rm) */
-#define _REGi_ 0   /* Uses st(rm) */
-#define _PUSH_ 3   /* Need to check for space to push onto stack */
-#define _null_ 4   /* Function illegal or not implemented */
-#define _REGIi 5   /* Uses st(0) and st(rm), result to st(rm) */
-#define _REGIp 6   /* Uses st(0) and st(rm), result to st(rm) then pop */
-#define _REGIc 0   /* Compare st(0) and st(rm) */
-#define _REGIn 0   /* Uses st(0) and st(rm), but handle checks later */
+#define _NONE_ 0		/* Take no special action */
+#define _REG0_ 1		/* Need to check for not empty st(0) */
+#define _REGI_ 2		/* Need to check for not empty st(0) and st(rm) */
+#define _REGi_ 0		/* Uses st(rm) */
+#define _PUSH_ 3		/* Need to check for space to push onto stack */
+#define _null_ 4		/* Function illegal or not implemented */
+#define _REGIi 5		/* Uses st(0) and st(rm), result to st(rm) */
+#define _REGIp 6		/* Uses st(0) and st(rm), result to st(rm) then pop */
+#define _REGIc 0		/* Compare st(0) and st(rm) */
+#define _REGIn 0		/* Uses st(0) and st(rm), but handle checks later */
 
 #ifndef NO_UNDOC_CODE
 
 /* Un-documented FPU op-codes supported by default. (see above) */
 
 static u_char const type_table[64] = {
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
-  _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
-  _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
-  _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
-  _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
-  _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
+	_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
+	_REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
+	_REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
+	_REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
+	_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
+	_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
+	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
 };
 
-#else     /* Support only documented FPU op-codes */
+#else /* Support only documented FPU op-codes */
 
 static u_char const type_table[64] = {
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
-  _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
-  _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
-  _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
-  _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
-  _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
+	_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
+	_REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+	_REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
+	_REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
+	_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
+	_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
+	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
 };
 
 #endif /* NO_UNDOC_CODE */
 
-
 #ifdef RE_ENTRANT_CHECKING
-u_char emulating=0;
+u_char emulating = 0;
 #endif /* RE_ENTRANT_CHECKING */
 
-static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
-			overrides *override);
+static int valid_prefix(u_char *Byte, u_char __user ** fpu_eip,
+			overrides * override);
 
 asmlinkage void math_emulate(long arg)
 {
-  u_char  FPU_modrm, byte1;
-  unsigned short code;
-  fpu_addr_modes addr_modes;
-  int unmasked;
-  FPU_REG loaded_data;
-  FPU_REG *st0_ptr;
-  u_char	  loaded_tag, st0_tag;
-  void __user *data_address;
-  struct address data_sel_off;
-  struct address entry_sel_off;
-  unsigned long code_base = 0;
-  unsigned long code_limit = 0;  /* Initialized to stop compiler warnings */
-  struct desc_struct code_descriptor;
+	u_char FPU_modrm, byte1;
+	unsigned short code;
+	fpu_addr_modes addr_modes;
+	int unmasked;
+	FPU_REG loaded_data;
+	FPU_REG *st0_ptr;
+	u_char loaded_tag, st0_tag;
+	void __user *data_address;
+	struct address data_sel_off;
+	struct address entry_sel_off;
+	unsigned long code_base = 0;
+	unsigned long code_limit = 0;	/* Initialized to stop compiler warnings */
+	struct desc_struct code_descriptor;
 
 #ifdef RE_ENTRANT_CHECKING
-  if ( emulating )
-    {
-      printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n");
-    }
-  RE_ENTRANT_CHECK_ON;
+	if (emulating) {
+		printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n");
+	}
+	RE_ENTRANT_CHECK_ON;
 #endif /* RE_ENTRANT_CHECKING */
 
-  if (!used_math())
-    {
-      finit();
-      set_used_math();
-    }
-
-  SETUP_DATA_AREA(arg);
-
-  FPU_ORIG_EIP = FPU_EIP;
-
-  if ( (FPU_EFLAGS & 0x00020000) != 0 )
-    {
-      /* Virtual 8086 mode */
-      addr_modes.default_mode = VM86;
-      FPU_EIP += code_base = FPU_CS << 4;
-      code_limit = code_base + 0xffff;  /* Assumes code_base <= 0xffff0000 */
-    }
-  else if ( FPU_CS == __USER_CS && FPU_DS == __USER_DS )
-    {
-      addr_modes.default_mode = 0;
-    }
-  else if ( FPU_CS == __KERNEL_CS )
-    {
-      printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP);
-      panic("Math emulation needed in kernel");
-    }
-  else
-    {
-
-      if ( (FPU_CS & 4) != 4 )   /* Must be in the LDT */
-	{
-	  /* Can only handle segmented addressing via the LDT
-	     for now, and it must be 16 bit */
-	  printk("FPU emulator: Unsupported addressing mode\n");
-	  math_abort(FPU_info, SIGILL);
-	}
-
-      code_descriptor = LDT_DESCRIPTOR(FPU_CS);
-      if ( SEG_D_SIZE(code_descriptor) )
-	{
-	  /* The above test may be wrong, the book is not clear */
-	  /* Segmented 32 bit protected mode */
-	  addr_modes.default_mode = SEG32;
-	}
-      else
-	{
-	  /* 16 bit protected mode */
-	  addr_modes.default_mode = PM16;
-	}
-      FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
-      code_limit = code_base
-	+ (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor)
-	  - 1;
-      if ( code_limit < code_base ) code_limit = 0xffffffff;
-    }
-
-  FPU_lookahead = 1;
-  if (current->ptrace & PT_PTRACED)
-    FPU_lookahead = 0;
-
-  if ( !valid_prefix(&byte1, (u_char __user **)&FPU_EIP,
-		     &addr_modes.override) )
-    {
-      RE_ENTRANT_CHECK_OFF;
-      printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n"
-	     "FPU emulator: self-modifying code! (emulation impossible)\n",
-	     byte1);
-      RE_ENTRANT_CHECK_ON;
-      EXCEPTION(EX_INTERNAL|0x126);
-      math_abort(FPU_info,SIGILL);
-    }
-
-do_another_FPU_instruction:
-
-  no_ip_update = 0;
-
-  FPU_EIP++;  /* We have fetched the prefix and first code bytes. */
-
-  if ( addr_modes.default_mode )
-    {
-      /* This checks for the minimum instruction bytes.
-	 We also need to check any extra (address mode) code access. */
-      if ( FPU_EIP > code_limit )
-	math_abort(FPU_info,SIGSEGV);
-    }
-
-  if ( (byte1 & 0xf8) != 0xd8 )
-    {
-      if ( byte1 == FWAIT_OPCODE )
-	{
-	  if (partial_status & SW_Summary)
-	    goto do_the_FPU_interrupt;
-	  else
-	    goto FPU_fwait_done;
-	}
+	if (!used_math()) {
+		finit();
+		set_used_math();
+	}
+
+	SETUP_DATA_AREA(arg);
+
+	FPU_ORIG_EIP = FPU_EIP;
+
+	if ((FPU_EFLAGS & 0x00020000) != 0) {
+		/* Virtual 8086 mode */
+		addr_modes.default_mode = VM86;
+		FPU_EIP += code_base = FPU_CS << 4;
+		code_limit = code_base + 0xffff;	/* Assumes code_base <= 0xffff0000 */
+	} else if (FPU_CS == __USER_CS && FPU_DS == __USER_DS) {
+		addr_modes.default_mode = 0;
+	} else if (FPU_CS == __KERNEL_CS) {
+		printk("math_emulate: %04x:%08lx\n", FPU_CS, FPU_EIP);
+		panic("Math emulation needed in kernel");
+	} else {
+
+		if ((FPU_CS & 4) != 4) {	/* Must be in the LDT */
+			/* Can only handle segmented addressing via the LDT
+			   for now, and it must be 16 bit */
+			printk("FPU emulator: Unsupported addressing mode\n");
+			math_abort(FPU_info, SIGILL);
+		}
+
+		code_descriptor = LDT_DESCRIPTOR(FPU_CS);
+		if (SEG_D_SIZE(code_descriptor)) {
+			/* The above test may be wrong, the book is not clear */
+			/* Segmented 32 bit protected mode */
+			addr_modes.default_mode = SEG32;
+		} else {
+			/* 16 bit protected mode */
+			addr_modes.default_mode = PM16;
+		}
+		FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
+		code_limit = code_base
+		    + (SEG_LIMIT(code_descriptor) +
+		       1) * SEG_GRANULARITY(code_descriptor)
+		    - 1;
+		if (code_limit < code_base)
+			code_limit = 0xffffffff;
+	}
+
+	FPU_lookahead = 1;
+	if (current->ptrace & PT_PTRACED)
+		FPU_lookahead = 0;
+
+	if (!valid_prefix(&byte1, (u_char __user **) & FPU_EIP,
+			  &addr_modes.override)) {
+		RE_ENTRANT_CHECK_OFF;
+		printk
+		    ("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n"
+		     "FPU emulator: self-modifying code! (emulation impossible)\n",
+		     byte1);
+		RE_ENTRANT_CHECK_ON;
+		EXCEPTION(EX_INTERNAL | 0x126);
+		math_abort(FPU_info, SIGILL);
+	}
+
+      do_another_FPU_instruction:
+
+	no_ip_update = 0;
+
+	FPU_EIP++;		/* We have fetched the prefix and first code bytes. */
+
+	if (addr_modes.default_mode) {
+		/* This checks for the minimum instruction bytes.
+		   We also need to check any extra (address mode) code access. */
+		if (FPU_EIP > code_limit)
+			math_abort(FPU_info, SIGSEGV);
+	}
+
+	if ((byte1 & 0xf8) != 0xd8) {
+		if (byte1 == FWAIT_OPCODE) {
+			if (partial_status & SW_Summary)
+				goto do_the_FPU_interrupt;
+			else
+				goto FPU_fwait_done;
+		}
 #ifdef PARANOID
-      EXCEPTION(EX_INTERNAL|0x128);
-      math_abort(FPU_info,SIGILL);
+		EXCEPTION(EX_INTERNAL | 0x128);
+		math_abort(FPU_info, SIGILL);
 #endif /* PARANOID */
-    }
+	}
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_code_access_ok(1);
-  FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP);
-  RE_ENTRANT_CHECK_ON;
-  FPU_EIP++;
-
-  if (partial_status & SW_Summary)
-    {
-      /* Ignore the error for now if the current instruction is a no-wait
-	 control instruction */
-      /* The 80486 manual contradicts itself on this topic,
-	 but a real 80486 uses the following instructions:
-	 fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex.
-       */
-      code = (FPU_modrm << 8) | byte1;
-      if ( ! ( (((code & 0xf803) == 0xe003) ||    /* fnclex, fninit, fnstsw */
-		(((code & 0x3003) == 0x3001) &&   /* fnsave, fnstcw, fnstenv,
-						     fnstsw */
-		 ((code & 0xc000) != 0xc000))) ) )
-	{
-	  /*
-	   *  We need to simulate the action of the kernel to FPU
-	   *  interrupts here.
-	   */
-	do_the_FPU_interrupt:
-
-	  FPU_EIP = FPU_ORIG_EIP;	/* Point to current FPU instruction. */
-
-	  RE_ENTRANT_CHECK_OFF;
-	  current->thread.trap_no = 16;
-	  current->thread.error_code = 0;
-	  send_sig(SIGFPE, current, 1);
-	  return;
-	}
-    }
-
-  entry_sel_off.offset = FPU_ORIG_EIP;
-  entry_sel_off.selector = FPU_CS;
-  entry_sel_off.opcode = (byte1 << 8) | FPU_modrm;
-
-  FPU_rm = FPU_modrm & 7;
-
-  if ( FPU_modrm < 0300 )
-    {
-      /* All of these instructions use the mod/rm byte to get a data address */
-
-      if ( (addr_modes.default_mode & SIXTEEN)
-	  ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) )
-	data_address = FPU_get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off,
-					  addr_modes);
-      else
-	data_address = FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off,
-				       addr_modes);
-
-      if ( addr_modes.default_mode )
-	{
-	  if ( FPU_EIP-1 > code_limit )
-	    math_abort(FPU_info,SIGSEGV);
-	}
-
-      if ( !(byte1 & 1) )
-	{
-	  unsigned short status1 = partial_status;
-
-	  st0_ptr = &st(0);
-	  st0_tag = FPU_gettag0();
-
-	  /* Stack underflow has priority */
-	  if ( NOT_EMPTY_ST0 )
-	    {
-	      if ( addr_modes.default_mode & PROTECTED )
-		{
-		  /* This table works for 16 and 32 bit protected mode */
-		  if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] )
-		    math_abort(FPU_info,SIGSEGV);
+	RE_ENTRANT_CHECK_OFF;
+	FPU_code_access_ok(1);
+	FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP);
+	RE_ENTRANT_CHECK_ON;
+	FPU_EIP++;
+
+	if (partial_status & SW_Summary) {
+		/* Ignore the error for now if the current instruction is a no-wait
+		   control instruction */
+		/* The 80486 manual contradicts itself on this topic,
+		   but a real 80486 uses the following instructions:
+		   fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex.
+		 */
+		code = (FPU_modrm << 8) | byte1;
+		if (!((((code & 0xf803) == 0xe003) ||	/* fnclex, fninit, fnstsw */
+		       (((code & 0x3003) == 0x3001) &&	/* fnsave, fnstcw, fnstenv,
+							   fnstsw */
+			((code & 0xc000) != 0xc000))))) {
+			/*
+			 *  We need to simulate the action of the kernel to FPU
+			 *  interrupts here.
+			 */
+		      do_the_FPU_interrupt:
+
+			FPU_EIP = FPU_ORIG_EIP;	/* Point to current FPU instruction. */
+
+			RE_ENTRANT_CHECK_OFF;
+			current->thread.trap_no = 16;
+			current->thread.error_code = 0;
+			send_sig(SIGFPE, current, 1);
+			return;
 		}
+	}
 
-	      unmasked = 0;  /* Do this here to stop compiler warnings. */
-	      switch ( (byte1 >> 1) & 3 )
-		{
-		case 0:
-		  unmasked = FPU_load_single((float __user *)data_address,
-					     &loaded_data);
-		  loaded_tag = unmasked & 0xff;
-		  unmasked &= ~0xff;
-		  break;
-		case 1:
-		  loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data);
-		  break;
-		case 2:
-		  unmasked = FPU_load_double((double __user *)data_address,
-					     &loaded_data);
-		  loaded_tag = unmasked & 0xff;
-		  unmasked &= ~0xff;
-		  break;
-		case 3:
-		default:  /* Used here to suppress gcc warnings. */
-		  loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data);
-		  break;
+	entry_sel_off.offset = FPU_ORIG_EIP;
+	entry_sel_off.selector = FPU_CS;
+	entry_sel_off.opcode = (byte1 << 8) | FPU_modrm;
+
+	FPU_rm = FPU_modrm & 7;
+
+	if (FPU_modrm < 0300) {
+		/* All of these instructions use the mod/rm byte to get a data address */
+
+		if ((addr_modes.default_mode & SIXTEEN)
+		    ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX))
+			data_address =
+			    FPU_get_address_16(FPU_modrm, &FPU_EIP,
+					       &data_sel_off, addr_modes);
+		else
+			data_address =
+			    FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off,
+					    addr_modes);
+
+		if (addr_modes.default_mode) {
+			if (FPU_EIP - 1 > code_limit)
+				math_abort(FPU_info, SIGSEGV);
 		}
 
-	      /* No more access to user memory, it is safe
-		 to use static data now */
+		if (!(byte1 & 1)) {
+			unsigned short status1 = partial_status;
 
-	      /* NaN operands have the next priority. */
-	      /* We have to delay looking at st(0) until after
-		 loading the data, because that data might contain an SNaN */
-	      if ( ((st0_tag == TAG_Special) && isNaN(st0_ptr)) ||
-		  ((loaded_tag == TAG_Special) && isNaN(&loaded_data)) )
-		{
-		  /* Restore the status word; we might have loaded a
-		     denormal. */
-		  partial_status = status1;
-		  if ( (FPU_modrm & 0x30) == 0x10 )
-		    {
-		      /* fcom or fcomp */
-		      EXCEPTION(EX_Invalid);
-		      setcc(SW_C3 | SW_C2 | SW_C0);
-		      if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
-			FPU_pop();             /* fcomp, masked, so we pop. */
-		    }
-		  else
-		    {
-		      if ( loaded_tag == TAG_Special )
-			loaded_tag = FPU_Special(&loaded_data);
-#ifdef PECULIAR_486
-		      /* This is not really needed, but gives behaviour
-			 identical to an 80486 */
-		      if ( (FPU_modrm & 0x28) == 0x20 )
-			/* fdiv or fsub */
-			real_2op_NaN(&loaded_data, loaded_tag, 0, &loaded_data);
-		      else
-#endif /* PECULIAR_486 */ 
-			/* fadd, fdivr, fmul, or fsubr */
-			real_2op_NaN(&loaded_data, loaded_tag, 0, st0_ptr);
-		    }
-		  goto reg_mem_instr_done;
-		}
+			st0_ptr = &st(0);
+			st0_tag = FPU_gettag0();
 
-	      if ( unmasked && !((FPU_modrm & 0x30) == 0x10) )
-		{
-		  /* Is not a comparison instruction. */
-		  if ( (FPU_modrm & 0x38) == 0x38 )
-		    {
-		      /* fdivr */
-		      if ( (st0_tag == TAG_Zero) &&
-			   ((loaded_tag == TAG_Valid)
-			    || (loaded_tag == TAG_Special
-				&& isdenormal(&loaded_data))) )
-			{
-			  if ( FPU_divide_by_zero(0, getsign(&loaded_data))
-			       < 0 )
-			    {
-			      /* We use the fact here that the unmasked
-				 exception in the loaded data was for a
-				 denormal operand */
-			      /* Restore the state of the denormal op bit */
-			      partial_status &= ~SW_Denorm_Op;
-			      partial_status |= status1 & SW_Denorm_Op;
-			    }
-			  else
-			    setsign(st0_ptr, getsign(&loaded_data));
+			/* Stack underflow has priority */
+			if (NOT_EMPTY_ST0) {
+				if (addr_modes.default_mode & PROTECTED) {
+					/* This table works for 16 and 32 bit protected mode */
+					if (access_limit <
+					    data_sizes_16[(byte1 >> 1) & 3])
+						math_abort(FPU_info, SIGSEGV);
+				}
+
+				unmasked = 0;	/* Do this here to stop compiler warnings. */
+				switch ((byte1 >> 1) & 3) {
+				case 0:
+					unmasked =
+					    FPU_load_single((float __user *)
+							    data_address,
+							    &loaded_data);
+					loaded_tag = unmasked & 0xff;
+					unmasked &= ~0xff;
+					break;
+				case 1:
+					loaded_tag =
+					    FPU_load_int32((long __user *)
+							   data_address,
+							   &loaded_data);
+					break;
+				case 2:
+					unmasked =
+					    FPU_load_double((double __user *)
+							    data_address,
+							    &loaded_data);
+					loaded_tag = unmasked & 0xff;
+					unmasked &= ~0xff;
+					break;
+				case 3:
+				default:	/* Used here to suppress gcc warnings. */
+					loaded_tag =
+					    FPU_load_int16((short __user *)
+							   data_address,
+							   &loaded_data);
+					break;
+				}
+
+				/* No more access to user memory, it is safe
+				   to use static data now */
+
+				/* NaN operands have the next priority. */
+				/* We have to delay looking at st(0) until after
+				   loading the data, because that data might contain an SNaN */
+				if (((st0_tag == TAG_Special) && isNaN(st0_ptr))
+				    || ((loaded_tag == TAG_Special)
+					&& isNaN(&loaded_data))) {
+					/* Restore the status word; we might have loaded a
+					   denormal. */
+					partial_status = status1;
+					if ((FPU_modrm & 0x30) == 0x10) {
+						/* fcom or fcomp */
+						EXCEPTION(EX_Invalid);
+						setcc(SW_C3 | SW_C2 | SW_C0);
+						if ((FPU_modrm & 0x08)
+						    && (control_word &
+							CW_Invalid))
+							FPU_pop();	/* fcomp, masked, so we pop. */
+					} else {
+						if (loaded_tag == TAG_Special)
+							loaded_tag =
+							    FPU_Special
+							    (&loaded_data);
+#ifdef PECULIAR_486
+						/* This is not really needed, but gives behaviour
+						   identical to an 80486 */
+						if ((FPU_modrm & 0x28) == 0x20)
+							/* fdiv or fsub */
+							real_2op_NaN
+							    (&loaded_data,
+							     loaded_tag, 0,
+							     &loaded_data);
+						else
+#endif /* PECULIAR_486 */
+							/* fadd, fdivr, fmul, or fsubr */
+							real_2op_NaN
+							    (&loaded_data,
+							     loaded_tag, 0,
+							     st0_ptr);
+					}
+					goto reg_mem_instr_done;
+				}
+
+				if (unmasked && !((FPU_modrm & 0x30) == 0x10)) {
+					/* Is not a comparison instruction. */
+					if ((FPU_modrm & 0x38) == 0x38) {
+						/* fdivr */
+						if ((st0_tag == TAG_Zero) &&
+						    ((loaded_tag == TAG_Valid)
+						     || (loaded_tag ==
+							 TAG_Special
+							 &&
+							 isdenormal
+							 (&loaded_data)))) {
+							if (FPU_divide_by_zero
+							    (0,
+							     getsign
+							     (&loaded_data))
+							    < 0) {
+								/* We use the fact here that the unmasked
+								   exception in the loaded data was for a
+								   denormal operand */
+								/* Restore the state of the denormal op bit */
+								partial_status
+								    &=
+								    ~SW_Denorm_Op;
+								partial_status
+								    |=
+								    status1 &
+								    SW_Denorm_Op;
+							} else
+								setsign(st0_ptr,
+									getsign
+									(&loaded_data));
+						}
+					}
+					goto reg_mem_instr_done;
+				}
+
+				switch ((FPU_modrm >> 3) & 7) {
+				case 0:	/* fadd */
+					clear_C1();
+					FPU_add(&loaded_data, loaded_tag, 0,
+						control_word);
+					break;
+				case 1:	/* fmul */
+					clear_C1();
+					FPU_mul(&loaded_data, loaded_tag, 0,
+						control_word);
+					break;
+				case 2:	/* fcom */
+					FPU_compare_st_data(&loaded_data,
+							    loaded_tag);
+					break;
+				case 3:	/* fcomp */
+					if (!FPU_compare_st_data
+					    (&loaded_data, loaded_tag)
+					    && !unmasked)
+						FPU_pop();
+					break;
+				case 4:	/* fsub */
+					clear_C1();
+					FPU_sub(LOADED | loaded_tag,
+						(int)&loaded_data,
+						control_word);
+					break;
+				case 5:	/* fsubr */
+					clear_C1();
+					FPU_sub(REV | LOADED | loaded_tag,
+						(int)&loaded_data,
+						control_word);
+					break;
+				case 6:	/* fdiv */
+					clear_C1();
+					FPU_div(LOADED | loaded_tag,
+						(int)&loaded_data,
+						control_word);
+					break;
+				case 7:	/* fdivr */
+					clear_C1();
+					if (st0_tag == TAG_Zero)
+						partial_status = status1;	/* Undo any denorm tag,
+										   zero-divide has priority. */
+					FPU_div(REV | LOADED | loaded_tag,
+						(int)&loaded_data,
+						control_word);
+					break;
+				}
+			} else {
+				if ((FPU_modrm & 0x30) == 0x10) {
+					/* The instruction is fcom or fcomp */
+					EXCEPTION(EX_StackUnder);
+					setcc(SW_C3 | SW_C2 | SW_C0);
+					if ((FPU_modrm & 0x08)
+					    && (control_word & CW_Invalid))
+						FPU_pop();	/* fcomp */
+				} else
+					FPU_stack_underflow();
+			}
+		      reg_mem_instr_done:
+			operand_address = data_sel_off;
+		} else {
+			if (!(no_ip_update =
+			      FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6))
+					     >> 1, addr_modes, data_address))) {
+				operand_address = data_sel_off;
 			}
-		    }
-		  goto reg_mem_instr_done;
 		}
 
-	      switch ( (FPU_modrm >> 3) & 7 )
-		{
-		case 0:         /* fadd */
-		  clear_C1();
-		  FPU_add(&loaded_data, loaded_tag, 0, control_word);
-		  break;
-		case 1:         /* fmul */
-		  clear_C1();
-		  FPU_mul(&loaded_data, loaded_tag, 0, control_word);
-		  break;
-		case 2:         /* fcom */
-		  FPU_compare_st_data(&loaded_data, loaded_tag);
-		  break;
-		case 3:         /* fcomp */
-		  if ( !FPU_compare_st_data(&loaded_data, loaded_tag)
-		       && !unmasked )
-		    FPU_pop();
-		  break;
-		case 4:         /* fsub */
-		  clear_C1();
-		  FPU_sub(LOADED|loaded_tag, (int)&loaded_data, control_word);
-		  break;
-		case 5:         /* fsubr */
-		  clear_C1();
-		  FPU_sub(REV|LOADED|loaded_tag, (int)&loaded_data, control_word);
-		  break;
-		case 6:         /* fdiv */
-		  clear_C1();
-		  FPU_div(LOADED|loaded_tag, (int)&loaded_data, control_word);
-		  break;
-		case 7:         /* fdivr */
-		  clear_C1();
-		  if ( st0_tag == TAG_Zero )
-		    partial_status = status1;  /* Undo any denorm tag,
-						  zero-divide has priority. */
-		  FPU_div(REV|LOADED|loaded_tag, (int)&loaded_data, control_word);
-		  break;
-		}
-	    }
-	  else
-	    {
-	      if ( (FPU_modrm & 0x30) == 0x10 )
-		{
-		  /* The instruction is fcom or fcomp */
-		  EXCEPTION(EX_StackUnder);
-		  setcc(SW_C3 | SW_C2 | SW_C0);
-		  if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) )
-		    FPU_pop();             /* fcomp */
-		}
-	      else
-		FPU_stack_underflow();
-	    }
-	reg_mem_instr_done:
-	  operand_address = data_sel_off;
-	}
-      else
-	{
-	  if ( !(no_ip_update =
-		 FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1,
-				addr_modes, data_address)) )
-	    {
-	      operand_address = data_sel_off;
-	    }
-	}
-
-    }
-  else
-    {
-      /* None of these instructions access user memory */
-      u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7);
+	} else {
+		/* None of these instructions access user memory */
+		u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7);
 
 #ifdef PECULIAR_486
-      /* This is supposed to be undefined, but a real 80486 seems
-	 to do this: */
-      operand_address.offset = 0;
-      operand_address.selector = FPU_DS;
+		/* This is supposed to be undefined, but a real 80486 seems
+		   to do this: */
+		operand_address.offset = 0;
+		operand_address.selector = FPU_DS;
 #endif /* PECULIAR_486 */
 
-      st0_ptr = &st(0);
-      st0_tag = FPU_gettag0();
-      switch ( type_table[(int) instr_index] )
-	{
-	case _NONE_:   /* also _REGIc: _REGIn */
-	  break;
-	case _REG0_:
-	  if ( !NOT_EMPTY_ST0 )
-	    {
-	      FPU_stack_underflow();
-	      goto FPU_instruction_done;
-	    }
-	  break;
-	case _REGIi:
-	  if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
-	    {
-	      FPU_stack_underflow_i(FPU_rm);
-	      goto FPU_instruction_done;
-	    }
-	  break;
-	case _REGIp:
-	  if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
-	    {
-	      FPU_stack_underflow_pop(FPU_rm);
-	      goto FPU_instruction_done;
-	    }
-	  break;
-	case _REGI_:
-	  if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) )
-	    {
-	      FPU_stack_underflow();
-	      goto FPU_instruction_done;
-	    }
-	  break;
-	case _PUSH_:     /* Only used by the fld st(i) instruction */
-	  break;
-	case _null_:
-	  FPU_illegal();
-	  goto FPU_instruction_done;
-	default:
-	  EXCEPTION(EX_INTERNAL|0x111);
-	  goto FPU_instruction_done;
-	}
-      (*st_instr_table[(int) instr_index])();
-
-FPU_instruction_done:
-      ;
-    }
+		st0_ptr = &st(0);
+		st0_tag = FPU_gettag0();
+		switch (type_table[(int)instr_index]) {
+		case _NONE_:	/* also _REGIc: _REGIn */
+			break;
+		case _REG0_:
+			if (!NOT_EMPTY_ST0) {
+				FPU_stack_underflow();
+				goto FPU_instruction_done;
+			}
+			break;
+		case _REGIi:
+			if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm)) {
+				FPU_stack_underflow_i(FPU_rm);
+				goto FPU_instruction_done;
+			}
+			break;
+		case _REGIp:
+			if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm)) {
+				FPU_stack_underflow_pop(FPU_rm);
+				goto FPU_instruction_done;
+			}
+			break;
+		case _REGI_:
+			if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm)) {
+				FPU_stack_underflow();
+				goto FPU_instruction_done;
+			}
+			break;
+		case _PUSH_:	/* Only used by the fld st(i) instruction */
+			break;
+		case _null_:
+			FPU_illegal();
+			goto FPU_instruction_done;
+		default:
+			EXCEPTION(EX_INTERNAL | 0x111);
+			goto FPU_instruction_done;
+		}
+		(*st_instr_table[(int)instr_index]) ();
+
+	      FPU_instruction_done:
+		;
+	}
 
-  if ( ! no_ip_update )
-    instruction_address = entry_sel_off;
+	if (!no_ip_update)
+		instruction_address = entry_sel_off;
 
-FPU_fwait_done:
+      FPU_fwait_done:
 
 #ifdef DEBUG
-  RE_ENTRANT_CHECK_OFF;
-  FPU_printall();
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_printall();
+	RE_ENTRANT_CHECK_ON;
 #endif /* DEBUG */
 
-  if (FPU_lookahead && !need_resched())
-    {
-      FPU_ORIG_EIP = FPU_EIP - code_base;
-      if ( valid_prefix(&byte1, (u_char __user **)&FPU_EIP,
-			&addr_modes.override) )
-	goto do_another_FPU_instruction;
-    }
+	if (FPU_lookahead && !need_resched()) {
+		FPU_ORIG_EIP = FPU_EIP - code_base;
+		if (valid_prefix(&byte1, (u_char __user **) & FPU_EIP,
+				 &addr_modes.override))
+			goto do_another_FPU_instruction;
+	}
 
-  if ( addr_modes.default_mode )
-    FPU_EIP -= code_base;
+	if (addr_modes.default_mode)
+		FPU_EIP -= code_base;
 
-  RE_ENTRANT_CHECK_OFF;
+	RE_ENTRANT_CHECK_OFF;
 }
 
-
 /* Support for prefix bytes is not yet complete. To properly handle
    all prefix bytes, further changes are needed in the emulator code
    which accesses user address space. Access to separate segments is
    important for msdos emulation. */
 static int valid_prefix(u_char *Byte, u_char __user **fpu_eip,
-			overrides *override)
+			overrides * override)
 {
-  u_char byte;
-  u_char __user *ip = *fpu_eip;
+	u_char byte;
+	u_char __user *ip = *fpu_eip;
 
-  *override = (overrides) { 0, 0, PREFIX_DEFAULT };       /* defaults */
+	*override = (overrides) {
+	0, 0, PREFIX_DEFAULT};	/* defaults */
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_code_access_ok(1);
-  FPU_get_user(byte, ip);
-  RE_ENTRANT_CHECK_ON;
-
-  while ( 1 )
-    {
-      switch ( byte )
-	{
-	case ADDR_SIZE_PREFIX:
-	  override->address_size = ADDR_SIZE_PREFIX;
-	  goto do_next_byte;
-
-	case OP_SIZE_PREFIX:
-	  override->operand_size = OP_SIZE_PREFIX;
-	  goto do_next_byte;
-
-	case PREFIX_CS:
-	  override->segment = PREFIX_CS_;
-	  goto do_next_byte;
-	case PREFIX_ES:
-	  override->segment = PREFIX_ES_;
-	  goto do_next_byte;
-	case PREFIX_SS:
-	  override->segment = PREFIX_SS_;
-	  goto do_next_byte;
-	case PREFIX_FS:
-	  override->segment = PREFIX_FS_;
-	  goto do_next_byte;
-	case PREFIX_GS:
-	  override->segment = PREFIX_GS_;
-	  goto do_next_byte;
-	case PREFIX_DS:
-	  override->segment = PREFIX_DS_;
-	  goto do_next_byte;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_code_access_ok(1);
+	FPU_get_user(byte, ip);
+	RE_ENTRANT_CHECK_ON;
+
+	while (1) {
+		switch (byte) {
+		case ADDR_SIZE_PREFIX:
+			override->address_size = ADDR_SIZE_PREFIX;
+			goto do_next_byte;
+
+		case OP_SIZE_PREFIX:
+			override->operand_size = OP_SIZE_PREFIX;
+			goto do_next_byte;
+
+		case PREFIX_CS:
+			override->segment = PREFIX_CS_;
+			goto do_next_byte;
+		case PREFIX_ES:
+			override->segment = PREFIX_ES_;
+			goto do_next_byte;
+		case PREFIX_SS:
+			override->segment = PREFIX_SS_;
+			goto do_next_byte;
+		case PREFIX_FS:
+			override->segment = PREFIX_FS_;
+			goto do_next_byte;
+		case PREFIX_GS:
+			override->segment = PREFIX_GS_;
+			goto do_next_byte;
+		case PREFIX_DS:
+			override->segment = PREFIX_DS_;
+			goto do_next_byte;
 
 /* lock is not a valid prefix for FPU instructions,
    let the cpu handle it to generate a SIGILL. */
 /*	case PREFIX_LOCK: */
 
-	  /* rep.. prefixes have no meaning for FPU instructions */
-	case PREFIX_REPE:
-	case PREFIX_REPNE:
-
-	do_next_byte:
-	  ip++;
-	  RE_ENTRANT_CHECK_OFF;
-	  FPU_code_access_ok(1);
-	  FPU_get_user(byte, ip);
-	  RE_ENTRANT_CHECK_ON;
-	  break;
-	case FWAIT_OPCODE:
-	  *Byte = byte;
-	  return 1;
-	default:
-	  if ( (byte & 0xf8) == 0xd8 )
-	    {
-	      *Byte = byte;
-	      *fpu_eip = ip;
-	      return 1;
-	    }
-	  else
-	    {
-	      /* Not a valid sequence of prefix bytes followed by
-		 an FPU instruction. */
-	      *Byte = byte;  /* Needed for error message. */
-	      return 0;
-	    }
+			/* rep.. prefixes have no meaning for FPU instructions */
+		case PREFIX_REPE:
+		case PREFIX_REPNE:
+
+		      do_next_byte:
+			ip++;
+			RE_ENTRANT_CHECK_OFF;
+			FPU_code_access_ok(1);
+			FPU_get_user(byte, ip);
+			RE_ENTRANT_CHECK_ON;
+			break;
+		case FWAIT_OPCODE:
+			*Byte = byte;
+			return 1;
+		default:
+			if ((byte & 0xf8) == 0xd8) {
+				*Byte = byte;
+				*fpu_eip = ip;
+				return 1;
+			} else {
+				/* Not a valid sequence of prefix bytes followed by
+				   an FPU instruction. */
+				*Byte = byte;	/* Needed for error message. */
+				return 0;
+			}
+		}
 	}
-    }
 }
 
-
-void math_abort(struct info * info, unsigned int signal)
+void math_abort(struct info *info, unsigned int signal)
 {
 	FPU_EIP = FPU_ORIG_EIP;
 	current->thread.trap_no = 16;
 	current->thread.error_code = 0;
-	send_sig(signal,current,1);
+	send_sig(signal, current, 1);
 	RE_ENTRANT_CHECK_OFF;
-	__asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4));
+      __asm__("movl %0,%%esp ; ret": :"g"(((long)info) - 4));
 #ifdef PARANOID
-      printk("ERROR: wm-FPU-emu math_abort failed!\n");
+	printk("ERROR: wm-FPU-emu math_abort failed!\n");
 #endif /* PARANOID */
 }
 
-
-
 #define S387 ((struct i387_soft_struct *)s387)
 #define sstatus_word() \
   ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top))
 
 int restore_i387_soft(void *s387, struct _fpstate __user *buf)
 {
-  u_char __user *d = (u_char __user *)buf;
-  int offset, other, i, tags, regnr, tag, newtop;
+	u_char __user *d = (u_char __user *) buf;
+	int offset, other, i, tags, regnr, tag, newtop;
+
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, d, 7 * 4 + 8 * 10);
+	if (__copy_from_user(&S387->cwd, d, 7 * 4))
+		return -1;
+	RE_ENTRANT_CHECK_ON;
+
+	d += 7 * 4;
+
+	S387->ftop = (S387->swd >> SW_Top_Shift) & 7;
+	offset = (S387->ftop & 7) * 10;
+	other = 80 - offset;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, d, 7*4 + 8*10);
-  if (__copy_from_user(&S387->cwd, d, 7*4))
-    return -1;
-  RE_ENTRANT_CHECK_ON;
-
-  d += 7*4;
-
-  S387->ftop = (S387->swd >> SW_Top_Shift) & 7;
-  offset = (S387->ftop & 7) * 10;
-  other = 80 - offset;
-
-  RE_ENTRANT_CHECK_OFF;
-  /* Copy all registers in stack order. */
-  if (__copy_from_user(((u_char *)&S387->st_space)+offset, d, other))
-    return -1;
-  if ( offset )
-    if (__copy_from_user((u_char *)&S387->st_space, d+other, offset))
-      return -1;
-  RE_ENTRANT_CHECK_ON;
-
-  /* The tags may need to be corrected now. */
-  tags = S387->twd;
-  newtop = S387->ftop;
-  for ( i = 0; i < 8; i++ )
-    {
-      regnr = (i+newtop) & 7;
-      if ( ((tags >> ((regnr & 7)*2)) & 3) != TAG_Empty )
-	{
-	  /* The loaded data over-rides all other cases. */
-	  tag = FPU_tagof((FPU_REG *)((u_char *)S387->st_space + 10*regnr));
-	  tags &= ~(3 << (regnr*2));
-	  tags |= (tag & 3) << (regnr*2);
+	RE_ENTRANT_CHECK_OFF;
+	/* Copy all registers in stack order. */
+	if (__copy_from_user(((u_char *) & S387->st_space) + offset, d, other))
+		return -1;
+	if (offset)
+		if (__copy_from_user
+		    ((u_char *) & S387->st_space, d + other, offset))
+			return -1;
+	RE_ENTRANT_CHECK_ON;
+
+	/* The tags may need to be corrected now. */
+	tags = S387->twd;
+	newtop = S387->ftop;
+	for (i = 0; i < 8; i++) {
+		regnr = (i + newtop) & 7;
+		if (((tags >> ((regnr & 7) * 2)) & 3) != TAG_Empty) {
+			/* The loaded data over-rides all other cases. */
+			tag =
+			    FPU_tagof((FPU_REG *) ((u_char *) S387->st_space +
+						   10 * regnr));
+			tags &= ~(3 << (regnr * 2));
+			tags |= (tag & 3) << (regnr * 2);
+		}
 	}
-    }
-  S387->twd = tags;
+	S387->twd = tags;
 
-  return 0;
+	return 0;
 }
 
-
 int save_i387_soft(void *s387, struct _fpstate __user * buf)
 {
-  u_char __user *d = (u_char __user *)buf;
-  int offset = (S387->ftop & 7) * 10, other = 80 - offset;
+	u_char __user *d = (u_char __user *) buf;
+	int offset = (S387->ftop & 7) * 10, other = 80 - offset;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE, d, 7*4 + 8*10);
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, d, 7 * 4 + 8 * 10);
 #ifdef PECULIAR_486
-  S387->cwd &= ~0xe080;
-  /* An 80486 sets nearly all of the reserved bits to 1. */
-  S387->cwd |= 0xffff0040;
-  S387->swd = sstatus_word() | 0xffff0000;
-  S387->twd |= 0xffff0000;
-  S387->fcs &= ~0xf8000000;
-  S387->fos |= 0xffff0000;
+	S387->cwd &= ~0xe080;
+	/* An 80486 sets nearly all of the reserved bits to 1. */
+	S387->cwd |= 0xffff0040;
+	S387->swd = sstatus_word() | 0xffff0000;
+	S387->twd |= 0xffff0000;
+	S387->fcs &= ~0xf8000000;
+	S387->fos |= 0xffff0000;
 #endif /* PECULIAR_486 */
-  if (__copy_to_user(d, &S387->cwd, 7*4))
-    return -1;
-  RE_ENTRANT_CHECK_ON;
-
-  d += 7*4;
-
-  RE_ENTRANT_CHECK_OFF;
-  /* Copy all registers in stack order. */
-  if (__copy_to_user(d, ((u_char *)&S387->st_space)+offset, other))
-    return -1;
-  if ( offset )
-    if (__copy_to_user(d+other, (u_char *)&S387->st_space, offset))
-      return -1;
-  RE_ENTRANT_CHECK_ON;
+	if (__copy_to_user(d, &S387->cwd, 7 * 4))
+		return -1;
+	RE_ENTRANT_CHECK_ON;
+
+	d += 7 * 4;
+
+	RE_ENTRANT_CHECK_OFF;
+	/* Copy all registers in stack order. */
+	if (__copy_to_user(d, ((u_char *) & S387->st_space) + offset, other))
+		return -1;
+	if (offset)
+		if (__copy_to_user
+		    (d + other, (u_char *) & S387->st_space, offset))
+			return -1;
+	RE_ENTRANT_CHECK_ON;
 
-  return 1;
+	return 1;
 }
diff -puN arch/x86/math-emu/fpu_etc.c~git-x86 arch/x86/math-emu/fpu_etc.c
--- a/arch/x86/math-emu/fpu_etc.c~git-x86
+++ a/arch/x86/math-emu/fpu_etc.c
@@ -16,128 +16,115 @@
 #include "status_w.h"
 #include "reg_constant.h"
 
-
 static void fchs(FPU_REG *st0_ptr, u_char st0tag)
 {
-  if ( st0tag ^ TAG_Empty )
-    {
-      signbyte(st0_ptr) ^= SIGN_NEG;
-      clear_C1();
-    }
-  else
-    FPU_stack_underflow();
+	if (st0tag ^ TAG_Empty) {
+		signbyte(st0_ptr) ^= SIGN_NEG;
+		clear_C1();
+	} else
+		FPU_stack_underflow();
 }
 
-
 static void fabs(FPU_REG *st0_ptr, u_char st0tag)
 {
-  if ( st0tag ^ TAG_Empty )
-    {
-      setpositive(st0_ptr);
-      clear_C1();
-    }
-  else
-    FPU_stack_underflow();
+	if (st0tag ^ TAG_Empty) {
+		setpositive(st0_ptr);
+		clear_C1();
+	} else
+		FPU_stack_underflow();
 }
 
-
 static void ftst_(FPU_REG *st0_ptr, u_char st0tag)
 {
-  switch (st0tag)
-    {
-    case TAG_Zero:
-      setcc(SW_C3);
-      break;
-    case TAG_Valid:
-      if (getsign(st0_ptr) == SIGN_POS)
-        setcc(0);
-      else
-        setcc(SW_C0);
-      break;
-    case TAG_Special:
-      switch ( FPU_Special(st0_ptr) )
-	{
-	case TW_Denormal:
-	  if (getsign(st0_ptr) == SIGN_POS)
-	    setcc(0);
-	  else
-	    setcc(SW_C0);
-	  if ( denormal_operand() < 0 )
-	    {
-#ifdef PECULIAR_486
-	      /* This is weird! */
-	      if (getsign(st0_ptr) == SIGN_POS)
+	switch (st0tag) {
+	case TAG_Zero:
 		setcc(SW_C3);
+		break;
+	case TAG_Valid:
+		if (getsign(st0_ptr) == SIGN_POS)
+			setcc(0);
+		else
+			setcc(SW_C0);
+		break;
+	case TAG_Special:
+		switch (FPU_Special(st0_ptr)) {
+		case TW_Denormal:
+			if (getsign(st0_ptr) == SIGN_POS)
+				setcc(0);
+			else
+				setcc(SW_C0);
+			if (denormal_operand() < 0) {
+#ifdef PECULIAR_486
+				/* This is weird! */
+				if (getsign(st0_ptr) == SIGN_POS)
+					setcc(SW_C3);
 #endif /* PECULIAR_486 */
-	      return;
-	    }
-	  break;
-	case TW_NaN:
-	  setcc(SW_C0|SW_C2|SW_C3);   /* Operand is not comparable */ 
-	  EXCEPTION(EX_Invalid);
-	  break;
-	case TW_Infinity:
-	  if (getsign(st0_ptr) == SIGN_POS)
-	    setcc(0);
-	  else
-	    setcc(SW_C0);
-	  break;
-	default:
-	  setcc(SW_C0|SW_C2|SW_C3);   /* Operand is not comparable */ 
-	  EXCEPTION(EX_INTERNAL|0x14);
-	  break;
+				return;
+			}
+			break;
+		case TW_NaN:
+			setcc(SW_C0 | SW_C2 | SW_C3);	/* Operand is not comparable */
+			EXCEPTION(EX_Invalid);
+			break;
+		case TW_Infinity:
+			if (getsign(st0_ptr) == SIGN_POS)
+				setcc(0);
+			else
+				setcc(SW_C0);
+			break;
+		default:
+			setcc(SW_C0 | SW_C2 | SW_C3);	/* Operand is not comparable */
+			EXCEPTION(EX_INTERNAL | 0x14);
+			break;
+		}
+		break;
+	case TAG_Empty:
+		setcc(SW_C0 | SW_C2 | SW_C3);
+		EXCEPTION(EX_StackUnder);
+		break;
 	}
-      break;
-    case TAG_Empty:
-      setcc(SW_C0|SW_C2|SW_C3);
-      EXCEPTION(EX_StackUnder);
-      break;
-    }
 }
 
-
 static void fxam(FPU_REG *st0_ptr, u_char st0tag)
 {
-  int c = 0;
-  switch (st0tag)
-    {
-    case TAG_Empty:
-      c = SW_C3|SW_C0;
-      break;
-    case TAG_Zero:
-      c = SW_C3;
-      break;
-    case TAG_Valid:
-      c = SW_C2;
-      break;
-    case TAG_Special:
-      switch ( FPU_Special(st0_ptr) )
-	{
-	case TW_Denormal:
-	  c = SW_C2|SW_C3;  /* Denormal */
-	  break;
-	case TW_NaN:
-	  /* We also use NaN for unsupported types. */
-	  if ( (st0_ptr->sigh & 0x80000000) && (exponent(st0_ptr) == EXP_OVER) )
-	    c = SW_C0;
-	  break;
-	case TW_Infinity:
-	  c = SW_C2|SW_C0;
-	  break;
+	int c = 0;
+	switch (st0tag) {
+	case TAG_Empty:
+		c = SW_C3 | SW_C0;
+		break;
+	case TAG_Zero:
+		c = SW_C3;
+		break;
+	case TAG_Valid:
+		c = SW_C2;
+		break;
+	case TAG_Special:
+		switch (FPU_Special(st0_ptr)) {
+		case TW_Denormal:
+			c = SW_C2 | SW_C3;	/* Denormal */
+			break;
+		case TW_NaN:
+			/* We also use NaN for unsupported types. */
+			if ((st0_ptr->sigh & 0x80000000)
+			    && (exponent(st0_ptr) == EXP_OVER))
+				c = SW_C0;
+			break;
+		case TW_Infinity:
+			c = SW_C2 | SW_C0;
+			break;
+		}
 	}
-    }
-  if ( getsign(st0_ptr) == SIGN_NEG )
-    c |= SW_C1;
-  setcc(c);
+	if (getsign(st0_ptr) == SIGN_NEG)
+		c |= SW_C1;
+	setcc(c);
 }
 
-
 static FUNC_ST0 const fp_etc_table[] = {
-  fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal,
-  ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal
+	fchs, fabs, (FUNC_ST0) FPU_illegal, (FUNC_ST0) FPU_illegal,
+	ftst_, fxam, (FUNC_ST0) FPU_illegal, (FUNC_ST0) FPU_illegal
 };
 
 void FPU_etc(void)
 {
-  (fp_etc_table[FPU_rm])(&st(0), FPU_gettag0());
+	(fp_etc_table[FPU_rm]) (&st(0), FPU_gettag0());
 }
diff -puN arch/x86/math-emu/fpu_proto.h~git-x86 arch/x86/math-emu/fpu_proto.h
--- a/arch/x86/math-emu/fpu_proto.h~git-x86
+++ a/arch/x86/math-emu/fpu_proto.h
@@ -66,7 +66,7 @@ extern int FPU_Special(FPU_REG const *pt
 extern int isNaN(FPU_REG const *ptr);
 extern void FPU_pop(void);
 extern int FPU_empty_i(int stnr);
-extern int FPU_stackoverflow(FPU_REG **st_new_ptr);
+extern int FPU_stackoverflow(FPU_REG ** st_new_ptr);
 extern void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr);
 extern void FPU_copy_to_reg1(FPU_REG const *r, u_char tag);
 extern void FPU_copy_to_reg0(FPU_REG const *r, u_char tag);
@@ -75,21 +75,23 @@ extern void FPU_triga(void);
 extern void FPU_trigb(void);
 /* get_address.c */
 extern void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
-			 struct address *addr, fpu_addr_modes addr_modes);
+				    struct address *addr,
+				    fpu_addr_modes addr_modes);
 extern void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
-			    struct address *addr, fpu_addr_modes addr_modes);
+				       struct address *addr,
+				       fpu_addr_modes addr_modes);
 /* load_store.c */
 extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
-			    void __user *data_address);
+			  void __user * data_address);
 /* poly_2xm1.c */
-extern int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result);
+extern int poly_2xm1(u_char sign, FPU_REG * arg, FPU_REG *result);
 /* poly_atan.c */
-extern void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, FPU_REG *st1_ptr,
+extern void poly_atan(FPU_REG * st0_ptr, u_char st0_tag, FPU_REG *st1_ptr,
 		      u_char st1_tag);
 /* poly_l2.c */
 extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign);
 extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1,
-		     FPU_REG *d);
+		     FPU_REG * d);
 /* poly_sin.c */
 extern void poly_sine(FPU_REG *st0_ptr);
 extern void poly_cos(FPU_REG *st0_ptr);
@@ -117,10 +119,13 @@ extern int FPU_load_int32(long __user *_
 extern int FPU_load_int16(short __user *_s, FPU_REG *loaded_data);
 extern int FPU_load_bcd(u_char __user *s);
 extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag,
-			      long double __user *d);
-extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat);
-extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single);
-extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d);
+			      long double __user * d);
+extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag,
+			    double __user * dfloat);
+extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag,
+			    float __user * single);
+extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag,
+			   long long __user * d);
 extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d);
 extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d);
 extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d);
@@ -137,4 +142,3 @@ extern int FPU_div(int flags, int regrm,
 /* reg_convert.c */
 extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x);
 #endif /* _FPU_PROTO_H */
-
diff -puN arch/x86/math-emu/fpu_tags.c~git-x86 arch/x86/math-emu/fpu_tags.c
--- a/arch/x86/math-emu/fpu_tags.c~git-x86
+++ a/arch/x86/math-emu/fpu_tags.c
@@ -14,114 +14,102 @@
 #include "fpu_system.h"
 #include "exception.h"
 
-
 void FPU_pop(void)
 {
-  fpu_tag_word |= 3 << ((top & 7)*2);
-  top++;
+	fpu_tag_word |= 3 << ((top & 7) * 2);
+	top++;
 }
 
-
 int FPU_gettag0(void)
 {
-  return (fpu_tag_word >> ((top & 7)*2)) & 3;
+	return (fpu_tag_word >> ((top & 7) * 2)) & 3;
 }
 
-
 int FPU_gettagi(int stnr)
 {
-  return (fpu_tag_word >> (((top+stnr) & 7)*2)) & 3;
+	return (fpu_tag_word >> (((top + stnr) & 7) * 2)) & 3;
 }
 
-
 int FPU_gettag(int regnr)
 {
-  return (fpu_tag_word >> ((regnr & 7)*2)) & 3;
+	return (fpu_tag_word >> ((regnr & 7) * 2)) & 3;
 }
 
-
 void FPU_settag0(int tag)
 {
-  int regnr = top;
-  regnr &= 7;
-  fpu_tag_word &= ~(3 << (regnr*2));
-  fpu_tag_word |= (tag & 3) << (regnr*2);
+	int regnr = top;
+	regnr &= 7;
+	fpu_tag_word &= ~(3 << (regnr * 2));
+	fpu_tag_word |= (tag & 3) << (regnr * 2);
 }
 
-
 void FPU_settagi(int stnr, int tag)
 {
-  int regnr = stnr+top;
-  regnr &= 7;
-  fpu_tag_word &= ~(3 << (regnr*2));
-  fpu_tag_word |= (tag & 3) << (regnr*2);
+	int regnr = stnr + top;
+	regnr &= 7;
+	fpu_tag_word &= ~(3 << (regnr * 2));
+	fpu_tag_word |= (tag & 3) << (regnr * 2);
 }
 
-
 void FPU_settag(int regnr, int tag)
 {
-  regnr &= 7;
-  fpu_tag_word &= ~(3 << (regnr*2));
-  fpu_tag_word |= (tag & 3) << (regnr*2);
+	regnr &= 7;
+	fpu_tag_word &= ~(3 << (regnr * 2));
+	fpu_tag_word |= (tag & 3) << (regnr * 2);
 }
 
-
 int FPU_Special(FPU_REG const *ptr)
 {
-  int exp = exponent(ptr);
+	int exp = exponent(ptr);
 
-  if ( exp == EXP_BIAS+EXP_UNDER )
-    return TW_Denormal;
-  else if ( exp != EXP_BIAS+EXP_OVER )
-    return TW_NaN;
-  else if ( (ptr->sigh == 0x80000000) && (ptr->sigl == 0) )
-    return TW_Infinity;
-  return TW_NaN;
+	if (exp == EXP_BIAS + EXP_UNDER)
+		return TW_Denormal;
+	else if (exp != EXP_BIAS + EXP_OVER)
+		return TW_NaN;
+	else if ((ptr->sigh == 0x80000000) && (ptr->sigl == 0))
+		return TW_Infinity;
+	return TW_NaN;
 }
 
-
 int isNaN(FPU_REG const *ptr)
 {
-  return ( (exponent(ptr) == EXP_BIAS+EXP_OVER)
-	   && !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)) );
+	return ((exponent(ptr) == EXP_BIAS + EXP_OVER)
+		&& !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)));
 }
 
-
 int FPU_empty_i(int stnr)
 {
-  int regnr = (top+stnr) & 7;
+	int regnr = (top + stnr) & 7;
 
-  return ((fpu_tag_word >> (regnr*2)) & 3) == TAG_Empty;
+	return ((fpu_tag_word >> (regnr * 2)) & 3) == TAG_Empty;
 }
 
-
-int FPU_stackoverflow(FPU_REG **st_new_ptr)
+int FPU_stackoverflow(FPU_REG ** st_new_ptr)
 {
-  *st_new_ptr = &st(-1);
+	*st_new_ptr = &st(-1);
 
-  return ((fpu_tag_word >> (((top - 1) & 7)*2)) & 3) != TAG_Empty;
+	return ((fpu_tag_word >> (((top - 1) & 7) * 2)) & 3) != TAG_Empty;
 }
 
-
 void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr)
 {
-  reg_copy(r, &st(stnr));
-  FPU_settagi(stnr, tag);
+	reg_copy(r, &st(stnr));
+	FPU_settagi(stnr, tag);
 }
 
 void FPU_copy_to_reg1(FPU_REG const *r, u_char tag)
 {
-  reg_copy(r, &st(1));
-  FPU_settagi(1, tag);
+	reg_copy(r, &st(1));
+	FPU_settagi(1, tag);
 }
 
 void FPU_copy_to_reg0(FPU_REG const *r, u_char tag)
 {
-  int regnr = top;
-  regnr &= 7;
+	int regnr = top;
+	regnr &= 7;
 
-  reg_copy(r, &st(0));
+	reg_copy(r, &st(0));
 
-  fpu_tag_word &= ~(3 << (regnr*2));
-  fpu_tag_word |= (tag & 3) << (regnr*2);
+	fpu_tag_word &= ~(3 << (regnr * 2));
+	fpu_tag_word |= (tag & 3) << (regnr * 2);
 }
diff -puN arch/x86/math-emu/fpu_trig.c~git-x86 arch/x86/math-emu/fpu_trig.c
--- a/arch/x86/math-emu/fpu_trig.c~git-x86
+++ a/arch/x86/math-emu/fpu_trig.c
@@ -15,11 +15,10 @@
 #include "fpu_emu.h"
 #include "status_w.h"
 #include "control_w.h"
-#include "reg_constant.h"	
+#include "reg_constant.h"
 
 static void rem_kernel(unsigned long long st0, unsigned long long *y,
-		       unsigned long long st1,
-		       unsigned long long q, int n);
+		       unsigned long long st1, unsigned long long q, int n);
 
 #define BETTER_THAN_486
 
@@ -33,788 +32,706 @@ static void rem_kernel(unsigned long lon
    precision of the result sometimes degrades to about 63.9 bits */
 static int trig_arg(FPU_REG *st0_ptr, int even)
 {
-  FPU_REG tmp;
-  u_char tmptag;
-  unsigned long long q;
-  int old_cw = control_word, saved_status = partial_status;
-  int tag, st0_tag = TAG_Valid;
-
-  if ( exponent(st0_ptr) >= 63 )
-    {
-      partial_status |= SW_C2;     /* Reduction incomplete. */
-      return -1;
-    }
-
-  control_word &= ~CW_RC;
-  control_word |= RC_CHOP;
-
-  setpositive(st0_ptr);
-  tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
-		  SIGN_POS);
-
-  FPU_round_to_int(&tmp, tag);  /* Fortunately, this can't overflow
-				   to 2^64 */
-  q = significand(&tmp);
-  if ( q )
-    {
-      rem_kernel(significand(st0_ptr),
-		 &significand(&tmp),
-		 significand(&CONST_PI2),
-		 q, exponent(st0_ptr) - exponent(&CONST_PI2));
-      setexponent16(&tmp, exponent(&CONST_PI2));
-      st0_tag = FPU_normalize(&tmp);
-      FPU_copy_to_reg0(&tmp, st0_tag);
-    }
-
-  if ( (even && !(q & 1)) || (!even && (q & 1)) )
-    {
-      st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, FULL_PRECISION);
+	FPU_REG tmp;
+	u_char tmptag;
+	unsigned long long q;
+	int old_cw = control_word, saved_status = partial_status;
+	int tag, st0_tag = TAG_Valid;
+
+	if (exponent(st0_ptr) >= 63) {
+		partial_status |= SW_C2;	/* Reduction incomplete. */
+		return -1;
+	}
+
+	control_word &= ~CW_RC;
+	control_word |= RC_CHOP;
+
+	setpositive(st0_ptr);
+	tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
+			SIGN_POS);
+
+	FPU_round_to_int(&tmp, tag);	/* Fortunately, this can't overflow
+					   to 2^64 */
+	q = significand(&tmp);
+	if (q) {
+		rem_kernel(significand(st0_ptr),
+			   &significand(&tmp),
+			   significand(&CONST_PI2),
+			   q, exponent(st0_ptr) - exponent(&CONST_PI2));
+		setexponent16(&tmp, exponent(&CONST_PI2));
+		st0_tag = FPU_normalize(&tmp);
+		FPU_copy_to_reg0(&tmp, st0_tag);
+	}
+
+	if ((even && !(q & 1)) || (!even && (q & 1))) {
+		st0_tag =
+		    FPU_sub(REV | LOADED | TAG_Valid, (int)&CONST_PI2,
+			    FULL_PRECISION);
 
 #ifdef BETTER_THAN_486
-      /* So far, the results are exact but based upon a 64 bit
-	 precision approximation to pi/2. The technique used
-	 now is equivalent to using an approximation to pi/2 which
-	 is accurate to about 128 bits. */
-      if ( (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64) || (q > 1) )
-	{
-	  /* This code gives the effect of having pi/2 to better than
-	     128 bits precision. */
-
-	  significand(&tmp) = q + 1;
-	  setexponent16(&tmp, 63);
-	  FPU_normalize(&tmp);
-	  tmptag =
-	    FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, SIGN_POS,
-		      exponent(&CONST_PI2extra) + exponent(&tmp));
-	  setsign(&tmp, getsign(&CONST_PI2extra));
-	  st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION);
-	  if ( signnegative(st0_ptr) )
-	    {
-	      /* CONST_PI2extra is negative, so the result of the addition
-		 can be negative. This means that the argument is actually
-		 in a different quadrant. The correction is always < pi/2,
-		 so it can't overflow into yet another quadrant. */
-	      setpositive(st0_ptr);
-	      q++;
-	    }
-	}
+		/* So far, the results are exact but based upon a 64 bit
+		   precision approximation to pi/2. The technique used
+		   now is equivalent to using an approximation to pi/2 which
+		   is accurate to about 128 bits. */
+		if ((exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64)
+		    || (q > 1)) {
+			/* This code gives the effect of having pi/2 to better than
+			   128 bits precision. */
+
+			significand(&tmp) = q + 1;
+			setexponent16(&tmp, 63);
+			FPU_normalize(&tmp);
+			tmptag =
+			    FPU_u_mul(&CONST_PI2extra, &tmp, &tmp,
+				      FULL_PRECISION, SIGN_POS,
+				      exponent(&CONST_PI2extra) +
+				      exponent(&tmp));
+			setsign(&tmp, getsign(&CONST_PI2extra));
+			st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION);
+			if (signnegative(st0_ptr)) {
+				/* CONST_PI2extra is negative, so the result of the addition
+				   can be negative. This means that the argument is actually
+				   in a different quadrant. The correction is always < pi/2,
+				   so it can't overflow into yet another quadrant. */
+				setpositive(st0_ptr);
+				q++;
+			}
+		}
 #endif /* BETTER_THAN_486 */
-    }
+	}
 #ifdef BETTER_THAN_486
-  else
-    {
-      /* So far, the results are exact but based upon a 64 bit
-	 precision approximation to pi/2. The technique used
-	 now is equivalent to using an approximation to pi/2 which
-	 is accurate to about 128 bits. */
-      if ( ((q > 0) && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64))
-	   || (q > 1) )
-	{
-	  /* This code gives the effect of having p/2 to better than
-	     128 bits precision. */
-
-	  significand(&tmp) = q;
-	  setexponent16(&tmp, 63);
-	  FPU_normalize(&tmp);         /* This must return TAG_Valid */
-	  tmptag = FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION,
-			     SIGN_POS,
-			     exponent(&CONST_PI2extra) + exponent(&tmp));
-	  setsign(&tmp, getsign(&CONST_PI2extra));
-	  st0_tag = FPU_sub(LOADED|(tmptag & 0x0f), (int)&tmp,
-			    FULL_PRECISION);
-	  if ( (exponent(st0_ptr) == exponent(&CONST_PI2)) &&
-	      ((st0_ptr->sigh > CONST_PI2.sigh)
-	       || ((st0_ptr->sigh == CONST_PI2.sigh)
-		   && (st0_ptr->sigl > CONST_PI2.sigl))) )
-	    {
-	      /* CONST_PI2extra is negative, so the result of the
-		 subtraction can be larger than pi/2. This means
-		 that the argument is actually in a different quadrant.
-		 The correction is always < pi/2, so it can't overflow
-		 into yet another quadrant. */
-	      st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2,
-				FULL_PRECISION);
-	      q++;
-	    }
+	else {
+		/* So far, the results are exact but based upon a 64 bit
+		   precision approximation to pi/2. The technique used
+		   now is equivalent to using an approximation to pi/2 which
+		   is accurate to about 128 bits. */
+		if (((q > 0)
+		     && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64))
+		    || (q > 1)) {
+			/* This code gives the effect of having p/2 to better than
+			   128 bits precision. */
+
+			significand(&tmp) = q;
+			setexponent16(&tmp, 63);
+			FPU_normalize(&tmp);	/* This must return TAG_Valid */
+			tmptag =
+			    FPU_u_mul(&CONST_PI2extra, &tmp, &tmp,
+				      FULL_PRECISION, SIGN_POS,
+				      exponent(&CONST_PI2extra) +
+				      exponent(&tmp));
+			setsign(&tmp, getsign(&CONST_PI2extra));
+			st0_tag = FPU_sub(LOADED | (tmptag & 0x0f), (int)&tmp,
+					  FULL_PRECISION);
+			if ((exponent(st0_ptr) == exponent(&CONST_PI2)) &&
+			    ((st0_ptr->sigh > CONST_PI2.sigh)
+			     || ((st0_ptr->sigh == CONST_PI2.sigh)
+				 && (st0_ptr->sigl > CONST_PI2.sigl)))) {
+				/* CONST_PI2extra is negative, so the result of the
+				   subtraction can be larger than pi/2. This means
+				   that the argument is actually in a different quadrant.
+				   The correction is always < pi/2, so it can't overflow
+				   into yet another quadrant. */
+				st0_tag =
+				    FPU_sub(REV | LOADED | TAG_Valid,
+					    (int)&CONST_PI2, FULL_PRECISION);
+				q++;
+			}
+		}
 	}
-    }
 #endif /* BETTER_THAN_486 */
 
-  FPU_settag0(st0_tag);
-  control_word = old_cw;
-  partial_status = saved_status & ~SW_C2;     /* Reduction complete. */
+	FPU_settag0(st0_tag);
+	control_word = old_cw;
+	partial_status = saved_status & ~SW_C2;	/* Reduction complete. */
 
-  return (q & 3) | even;
+	return (q & 3) | even;
 }
 
-
 /* Convert a long to register */
 static void convert_l2reg(long const *arg, int deststnr)
 {
-  int tag;
-  long num = *arg;
-  u_char sign;
-  FPU_REG *dest = &st(deststnr);
-
-  if (num == 0)
-    {
-      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-      return;
-    }
-
-  if (num > 0)
-    { sign = SIGN_POS; }
-  else
-    { num = -num; sign = SIGN_NEG; }
-
-  dest->sigh = num;
-  dest->sigl = 0;
-  setexponent16(dest, 31);
-  tag = FPU_normalize(dest);
-  FPU_settagi(deststnr, tag);
-  setsign(dest, sign);
-  return;
-}
+	int tag;
+	long num = *arg;
+	u_char sign;
+	FPU_REG *dest = &st(deststnr);
+
+	if (num == 0) {
+		FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+		return;
+	}
 
+	if (num > 0) {
+		sign = SIGN_POS;
+	} else {
+		num = -num;
+		sign = SIGN_NEG;
+	}
+
+	dest->sigh = num;
+	dest->sigl = 0;
+	setexponent16(dest, 31);
+	tag = FPU_normalize(dest);
+	FPU_settagi(deststnr, tag);
+	setsign(dest, sign);
+	return;
+}
 
 static void single_arg_error(FPU_REG *st0_ptr, u_char st0_tag)
 {
-  if ( st0_tag == TAG_Empty )
-    FPU_stack_underflow();  /* Puts a QNaN in st(0) */
-  else if ( st0_tag == TW_NaN )
-    real_1op_NaN(st0_ptr);       /* return with a NaN in st(0) */
+	if (st0_tag == TAG_Empty)
+		FPU_stack_underflow();	/* Puts a QNaN in st(0) */
+	else if (st0_tag == TW_NaN)
+		real_1op_NaN(st0_ptr);	/* return with a NaN in st(0) */
 #ifdef PARANOID
-  else
-    EXCEPTION(EX_INTERNAL|0x0112);
+	else
+		EXCEPTION(EX_INTERNAL | 0x0112);
 #endif /* PARANOID */
 }
 
-
 static void single_arg_2_error(FPU_REG *st0_ptr, u_char st0_tag)
 {
-  int isNaN;
+	int isNaN;
 
-  switch ( st0_tag )
-    {
-    case TW_NaN:
-      isNaN = (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000);
-      if ( isNaN && !(st0_ptr->sigh & 0x40000000) )   /* Signaling ? */
-	{
-	  EXCEPTION(EX_Invalid);
-	  if ( control_word & CW_Invalid )
-	    {
-	      /* The masked response */
-	      /* Convert to a QNaN */
-	      st0_ptr->sigh |= 0x40000000;
-	      push();
-	      FPU_copy_to_reg0(st0_ptr, TAG_Special);
-	    }
-	}
-      else if ( isNaN )
-	{
-	  /* A QNaN */
-	  push();
-	  FPU_copy_to_reg0(st0_ptr, TAG_Special);
-	}
-      else
-	{
-	  /* pseudoNaN or other unsupported */
-	  EXCEPTION(EX_Invalid);
-	  if ( control_word & CW_Invalid )
-	    {
-	      /* The masked response */
-	      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
-	      push();
-	      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
-	    }
-	}
-      break;              /* return with a NaN in st(0) */
+	switch (st0_tag) {
+	case TW_NaN:
+		isNaN = (exponent(st0_ptr) == EXP_OVER)
+		    && (st0_ptr->sigh & 0x80000000);
+		if (isNaN && !(st0_ptr->sigh & 0x40000000)) {	/* Signaling ? */
+			EXCEPTION(EX_Invalid);
+			if (control_word & CW_Invalid) {
+				/* The masked response */
+				/* Convert to a QNaN */
+				st0_ptr->sigh |= 0x40000000;
+				push();
+				FPU_copy_to_reg0(st0_ptr, TAG_Special);
+			}
+		} else if (isNaN) {
+			/* A QNaN */
+			push();
+			FPU_copy_to_reg0(st0_ptr, TAG_Special);
+		} else {
+			/* pseudoNaN or other unsupported */
+			EXCEPTION(EX_Invalid);
+			if (control_word & CW_Invalid) {
+				/* The masked response */
+				FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+				push();
+				FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+			}
+		}
+		break;		/* return with a NaN in st(0) */
 #ifdef PARANOID
-    default:
-      EXCEPTION(EX_INTERNAL|0x0112);
+	default:
+		EXCEPTION(EX_INTERNAL | 0x0112);
 #endif /* PARANOID */
-    }
+	}
 }
 
-
 /*---------------------------------------------------------------------------*/
 
 static void f2xm1(FPU_REG *st0_ptr, u_char tag)
 {
-  FPU_REG a;
+	FPU_REG a;
 
-  clear_C1();
+	clear_C1();
 
-  if ( tag == TAG_Valid )
-    {
-      /* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */
-      if ( exponent(st0_ptr) < 0 )
-	{
-	denormal_arg:
+	if (tag == TAG_Valid) {
+		/* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */
+		if (exponent(st0_ptr) < 0) {
+		      denormal_arg:
 
-	  FPU_to_exp16(st0_ptr, &a);
+			FPU_to_exp16(st0_ptr, &a);
 
-	  /* poly_2xm1(x) requires 0 < st(0) < 1. */
-	  poly_2xm1(getsign(st0_ptr), &a, st0_ptr);
+			/* poly_2xm1(x) requires 0 < st(0) < 1. */
+			poly_2xm1(getsign(st0_ptr), &a, st0_ptr);
+		}
+		set_precision_flag_up();	/* 80486 appears to always do this */
+		return;
 	}
-      set_precision_flag_up();   /* 80486 appears to always do this */
-      return;
-    }
 
-  if ( tag == TAG_Zero )
-    return;
+	if (tag == TAG_Zero)
+		return;
 
-  if ( tag == TAG_Special )
-    tag = FPU_Special(st0_ptr);
+	if (tag == TAG_Special)
+		tag = FPU_Special(st0_ptr);
 
-  switch ( tag )
-    {
-    case TW_Denormal:
-      if ( denormal_operand() < 0 )
-	return;
-      goto denormal_arg;
-    case TW_Infinity:
-      if ( signnegative(st0_ptr) )
-	{
-	  /* -infinity gives -1 (p16-10) */
-	  FPU_copy_to_reg0(&CONST_1, TAG_Valid);
-	  setnegative(st0_ptr);
-	}
-      return;
-    default:
-      single_arg_error(st0_ptr, tag);
-    }
+	switch (tag) {
+	case TW_Denormal:
+		if (denormal_operand() < 0)
+			return;
+		goto denormal_arg;
+	case TW_Infinity:
+		if (signnegative(st0_ptr)) {
+			/* -infinity gives -1 (p16-10) */
+			FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+			setnegative(st0_ptr);
+		}
+		return;
+	default:
+		single_arg_error(st0_ptr, tag);
+	}
 }
 
-
 static void fptan(FPU_REG *st0_ptr, u_char st0_tag)
 {
-  FPU_REG *st_new_ptr;
-  int q;
-  u_char arg_sign = getsign(st0_ptr);
-
-  /* Stack underflow has higher priority */
-  if ( st0_tag == TAG_Empty )
-    {
-      FPU_stack_underflow();  /* Puts a QNaN in st(0) */
-      if ( control_word & CW_Invalid )
-	{
-	  st_new_ptr = &st(-1);
-	  push();
-	  FPU_stack_underflow();  /* Puts a QNaN in the new st(0) */
-	}
-      return;
-    }
-
-  if ( STACK_OVERFLOW )
-    { FPU_stack_overflow(); return; }
-
-  if ( st0_tag == TAG_Valid )
-    {
-      if ( exponent(st0_ptr) > -40 )
-	{
-	  if ( (q = trig_arg(st0_ptr, 0)) == -1 )
-	    {
-	      /* Operand is out of range */
-	      return;
-	    }
-
-	  poly_tan(st0_ptr);
-	  setsign(st0_ptr, (q & 1) ^ (arg_sign != 0));
-	  set_precision_flag_up();  /* We do not really know if up or down */
-	}
-      else
-	{
-	  /* For a small arg, the result == the argument */
-	  /* Underflow may happen */
-
-	denormal_arg:
-
-	  FPU_to_exp16(st0_ptr, st0_ptr);
-      
-	  st0_tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
-	  FPU_settag0(st0_tag);
-	}
-      push();
-      FPU_copy_to_reg0(&CONST_1, TAG_Valid);
-      return;
-    }
-
-  if ( st0_tag == TAG_Zero )
-    {
-      push();
-      FPU_copy_to_reg0(&CONST_1, TAG_Valid);
-      setcc(0);
-      return;
-    }
-
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-
-  if ( st0_tag == TW_Denormal )
-    {
-      if ( denormal_operand() < 0 )
-	return;
+	FPU_REG *st_new_ptr;
+	int q;
+	u_char arg_sign = getsign(st0_ptr);
+
+	/* Stack underflow has higher priority */
+	if (st0_tag == TAG_Empty) {
+		FPU_stack_underflow();	/* Puts a QNaN in st(0) */
+		if (control_word & CW_Invalid) {
+			st_new_ptr = &st(-1);
+			push();
+			FPU_stack_underflow();	/* Puts a QNaN in the new st(0) */
+		}
+		return;
+	}
 
-      goto denormal_arg;
-    }
+	if (STACK_OVERFLOW) {
+		FPU_stack_overflow();
+		return;
+	}
+
+	if (st0_tag == TAG_Valid) {
+		if (exponent(st0_ptr) > -40) {
+			if ((q = trig_arg(st0_ptr, 0)) == -1) {
+				/* Operand is out of range */
+				return;
+			}
 
-  if ( st0_tag == TW_Infinity )
-    {
-      /* The 80486 treats infinity as an invalid operand */
-      if ( arith_invalid(0) >= 0 )
-	{
-	  st_new_ptr = &st(-1);
-	  push();
-	  arith_invalid(0);
+			poly_tan(st0_ptr);
+			setsign(st0_ptr, (q & 1) ^ (arg_sign != 0));
+			set_precision_flag_up();	/* We do not really know if up or down */
+		} else {
+			/* For a small arg, the result == the argument */
+			/* Underflow may happen */
+
+		      denormal_arg:
+
+			FPU_to_exp16(st0_ptr, st0_ptr);
+
+			st0_tag =
+			    FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
+			FPU_settag0(st0_tag);
+		}
+		push();
+		FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+		return;
 	}
-      return;
-    }
 
-  single_arg_2_error(st0_ptr, st0_tag);
-}
+	if (st0_tag == TAG_Zero) {
+		push();
+		FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+		setcc(0);
+		return;
+	}
+
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+
+	if (st0_tag == TW_Denormal) {
+		if (denormal_operand() < 0)
+			return;
+
+		goto denormal_arg;
+	}
+
+	if (st0_tag == TW_Infinity) {
+		/* The 80486 treats infinity as an invalid operand */
+		if (arith_invalid(0) >= 0) {
+			st_new_ptr = &st(-1);
+			push();
+			arith_invalid(0);
+		}
+		return;
+	}
 
+	single_arg_2_error(st0_ptr, st0_tag);
+}
 
 static void fxtract(FPU_REG *st0_ptr, u_char st0_tag)
 {
-  FPU_REG *st_new_ptr;
-  u_char sign;
-  register FPU_REG *st1_ptr = st0_ptr;  /* anticipate */
-
-  if ( STACK_OVERFLOW )
-    {  FPU_stack_overflow(); return; }
-
-  clear_C1();
-
-  if ( st0_tag == TAG_Valid )
-    {
-      long e;
-
-      push();
-      sign = getsign(st1_ptr);
-      reg_copy(st1_ptr, st_new_ptr);
-      setexponent16(st_new_ptr, exponent(st_new_ptr));
-
-    denormal_arg:
-
-      e = exponent16(st_new_ptr);
-      convert_l2reg(&e, 1);
-      setexponentpos(st_new_ptr, 0);
-      setsign(st_new_ptr, sign);
-      FPU_settag0(TAG_Valid);       /* Needed if arg was a denormal */
-      return;
-    }
-  else if ( st0_tag == TAG_Zero )
-    {
-      sign = getsign(st0_ptr);
+	FPU_REG *st_new_ptr;
+	u_char sign;
+	register FPU_REG *st1_ptr = st0_ptr;	/* anticipate */
 
-      if ( FPU_divide_by_zero(0, SIGN_NEG) < 0 )
-	return;
+	if (STACK_OVERFLOW) {
+		FPU_stack_overflow();
+		return;
+	}
 
-      push();
-      FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
-      setsign(st_new_ptr, sign);
-      return;
-    }
-
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-
-  if ( st0_tag == TW_Denormal )
-    {
-      if (denormal_operand() < 0 )
-	return;
+	clear_C1();
 
-      push();
-      sign = getsign(st1_ptr);
-      FPU_to_exp16(st1_ptr, st_new_ptr);
-      goto denormal_arg;
-    }
-  else if ( st0_tag == TW_Infinity )
-    {
-      sign = getsign(st0_ptr);
-      setpositive(st0_ptr);
-      push();
-      FPU_copy_to_reg0(&CONST_INF, TAG_Special);
-      setsign(st_new_ptr, sign);
-      return;
-    }
-  else if ( st0_tag == TW_NaN )
-    {
-      if ( real_1op_NaN(st0_ptr) < 0 )
-	return;
+	if (st0_tag == TAG_Valid) {
+		long e;
 
-      push();
-      FPU_copy_to_reg0(st0_ptr, TAG_Special);
-      return;
-    }
-  else if ( st0_tag == TAG_Empty )
-    {
-      /* Is this the correct behaviour? */
-      if ( control_word & EX_Invalid )
-	{
-	  FPU_stack_underflow();
-	  push();
-	  FPU_stack_underflow();
-	}
-      else
-	EXCEPTION(EX_StackUnder);
-    }
+		push();
+		sign = getsign(st1_ptr);
+		reg_copy(st1_ptr, st_new_ptr);
+		setexponent16(st_new_ptr, exponent(st_new_ptr));
+
+	      denormal_arg:
+
+		e = exponent16(st_new_ptr);
+		convert_l2reg(&e, 1);
+		setexponentpos(st_new_ptr, 0);
+		setsign(st_new_ptr, sign);
+		FPU_settag0(TAG_Valid);	/* Needed if arg was a denormal */
+		return;
+	} else if (st0_tag == TAG_Zero) {
+		sign = getsign(st0_ptr);
+
+		if (FPU_divide_by_zero(0, SIGN_NEG) < 0)
+			return;
+
+		push();
+		FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+		setsign(st_new_ptr, sign);
+		return;
+	}
+
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+
+	if (st0_tag == TW_Denormal) {
+		if (denormal_operand() < 0)
+			return;
+
+		push();
+		sign = getsign(st1_ptr);
+		FPU_to_exp16(st1_ptr, st_new_ptr);
+		goto denormal_arg;
+	} else if (st0_tag == TW_Infinity) {
+		sign = getsign(st0_ptr);
+		setpositive(st0_ptr);
+		push();
+		FPU_copy_to_reg0(&CONST_INF, TAG_Special);
+		setsign(st_new_ptr, sign);
+		return;
+	} else if (st0_tag == TW_NaN) {
+		if (real_1op_NaN(st0_ptr) < 0)
+			return;
+
+		push();
+		FPU_copy_to_reg0(st0_ptr, TAG_Special);
+		return;
+	} else if (st0_tag == TAG_Empty) {
+		/* Is this the correct behaviour? */
+		if (control_word & EX_Invalid) {
+			FPU_stack_underflow();
+			push();
+			FPU_stack_underflow();
+		} else
+			EXCEPTION(EX_StackUnder);
+	}
 #ifdef PARANOID
-  else
-    EXCEPTION(EX_INTERNAL | 0x119);
+	else
+		EXCEPTION(EX_INTERNAL | 0x119);
 #endif /* PARANOID */
 }
 
-
 static void fdecstp(void)
 {
-  clear_C1();
-  top--;
+	clear_C1();
+	top--;
 }
 
 static void fincstp(void)
 {
-  clear_C1();
-  top++;
+	clear_C1();
+	top++;
 }
 
-
 static void fsqrt_(FPU_REG *st0_ptr, u_char st0_tag)
 {
-  int expon;
+	int expon;
+
+	clear_C1();
+
+	if (st0_tag == TAG_Valid) {
+		u_char tag;
+
+		if (signnegative(st0_ptr)) {
+			arith_invalid(0);	/* sqrt(negative) is invalid */
+			return;
+		}
+
+		/* make st(0) in  [1.0 .. 4.0) */
+		expon = exponent(st0_ptr);
 
-  clear_C1();
+	      denormal_arg:
 
-  if ( st0_tag == TAG_Valid )
-    {
-      u_char tag;
-      
-      if (signnegative(st0_ptr))
-	{
-	  arith_invalid(0);  /* sqrt(negative) is invalid */
-	  return;
-	}
-
-      /* make st(0) in  [1.0 .. 4.0) */
-      expon = exponent(st0_ptr);
-
-    denormal_arg:
-
-      setexponent16(st0_ptr, (expon & 1));
-
-      /* Do the computation, the sign of the result will be positive. */
-      tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS);
-      addexponent(st0_ptr, expon >> 1);
-      FPU_settag0(tag);
-      return;
-    }
-
-  if ( st0_tag == TAG_Zero )
-    return;
-
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-
-  if ( st0_tag == TW_Infinity )
-    {
-      if ( signnegative(st0_ptr) )
-	arith_invalid(0);  /* sqrt(-Infinity) is invalid */
-      return;
-    }
-  else if ( st0_tag == TW_Denormal )
-    {
-      if (signnegative(st0_ptr))
-	{
-	  arith_invalid(0);  /* sqrt(negative) is invalid */
-	  return;
+		setexponent16(st0_ptr, (expon & 1));
+
+		/* Do the computation, the sign of the result will be positive. */
+		tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS);
+		addexponent(st0_ptr, expon >> 1);
+		FPU_settag0(tag);
+		return;
 	}
 
-      if ( denormal_operand() < 0 )
-	return;
+	if (st0_tag == TAG_Zero)
+		return;
+
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+
+	if (st0_tag == TW_Infinity) {
+		if (signnegative(st0_ptr))
+			arith_invalid(0);	/* sqrt(-Infinity) is invalid */
+		return;
+	} else if (st0_tag == TW_Denormal) {
+		if (signnegative(st0_ptr)) {
+			arith_invalid(0);	/* sqrt(negative) is invalid */
+			return;
+		}
 
-      FPU_to_exp16(st0_ptr, st0_ptr);
+		if (denormal_operand() < 0)
+			return;
 
-      expon = exponent16(st0_ptr);
+		FPU_to_exp16(st0_ptr, st0_ptr);
 
-      goto denormal_arg;
-    }
+		expon = exponent16(st0_ptr);
 
-  single_arg_error(st0_ptr, st0_tag);
+		goto denormal_arg;
+	}
 
-}
+	single_arg_error(st0_ptr, st0_tag);
 
+}
 
 static void frndint_(FPU_REG *st0_ptr, u_char st0_tag)
 {
-  int flags, tag;
+	int flags, tag;
 
-  if ( st0_tag == TAG_Valid )
-    {
-      u_char sign;
+	if (st0_tag == TAG_Valid) {
+		u_char sign;
 
-    denormal_arg:
+	      denormal_arg:
 
-      sign = getsign(st0_ptr);
+		sign = getsign(st0_ptr);
 
-      if (exponent(st0_ptr) > 63)
-	return;
+		if (exponent(st0_ptr) > 63)
+			return;
 
-      if ( st0_tag == TW_Denormal )
-	{
-	  if (denormal_operand() < 0 )
-	    return;
-	}
-
-      /* Fortunately, this can't overflow to 2^64 */
-      if ( (flags = FPU_round_to_int(st0_ptr, st0_tag)) )
-	set_precision_flag(flags);
-
-      setexponent16(st0_ptr, 63);
-      tag = FPU_normalize(st0_ptr);
-      setsign(st0_ptr, sign);
-      FPU_settag0(tag);
-      return;
-    }
-
-  if ( st0_tag == TAG_Zero )
-    return;
-
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-
-  if ( st0_tag == TW_Denormal )
-    goto denormal_arg;
-  else if ( st0_tag == TW_Infinity )
-    return;
-  else
-    single_arg_error(st0_ptr, st0_tag);
-}
+		if (st0_tag == TW_Denormal) {
+			if (denormal_operand() < 0)
+				return;
+		}
+
+		/* Fortunately, this can't overflow to 2^64 */
+		if ((flags = FPU_round_to_int(st0_ptr, st0_tag)))
+			set_precision_flag(flags);
+
+		setexponent16(st0_ptr, 63);
+		tag = FPU_normalize(st0_ptr);
+		setsign(st0_ptr, sign);
+		FPU_settag0(tag);
+		return;
+	}
+
+	if (st0_tag == TAG_Zero)
+		return;
+
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
 
+	if (st0_tag == TW_Denormal)
+		goto denormal_arg;
+	else if (st0_tag == TW_Infinity)
+		return;
+	else
+		single_arg_error(st0_ptr, st0_tag);
+}
 
 static int fsin(FPU_REG *st0_ptr, u_char tag)
 {
-  u_char arg_sign = getsign(st0_ptr);
+	u_char arg_sign = getsign(st0_ptr);
 
-  if ( tag == TAG_Valid )
-    {
-      int q;
-
-      if ( exponent(st0_ptr) > -40 )
-	{
-	  if ( (q = trig_arg(st0_ptr, 0)) == -1 )
-	    {
-	      /* Operand is out of range */
-	      return 1;
-	    }
-
-	  poly_sine(st0_ptr);
-	  
-	  if (q & 2)
-	    changesign(st0_ptr);
-
-	  setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign);
-
-	  /* We do not really know if up or down */
-	  set_precision_flag_up();
-	  return 0;
-	}
-      else
-	{
-	  /* For a small arg, the result == the argument */
-	  set_precision_flag_up();  /* Must be up. */
-	  return 0;
-	}
-    }
-
-  if ( tag == TAG_Zero )
-    {
-      setcc(0);
-      return 0;
-    }
-
-  if ( tag == TAG_Special )
-    tag = FPU_Special(st0_ptr);
-
-  if ( tag == TW_Denormal )
-    {
-      if ( denormal_operand() < 0 )
-	return 1;
-
-      /* For a small arg, the result == the argument */
-      /* Underflow may happen */
-      FPU_to_exp16(st0_ptr, st0_ptr);
-      
-      tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
-
-      FPU_settag0(tag);
-
-      return 0;
-    }
-  else if ( tag == TW_Infinity )
-    {
-      /* The 80486 treats infinity as an invalid operand */
-      arith_invalid(0);
-      return 1;
-    }
-  else
-    {
-      single_arg_error(st0_ptr, tag);
-      return 1;
-    }
-}
+	if (tag == TAG_Valid) {
+		int q;
+
+		if (exponent(st0_ptr) > -40) {
+			if ((q = trig_arg(st0_ptr, 0)) == -1) {
+				/* Operand is out of range */
+				return 1;
+			}
+
+			poly_sine(st0_ptr);
 
+			if (q & 2)
+				changesign(st0_ptr);
+
+			setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign);
+
+			/* We do not really know if up or down */
+			set_precision_flag_up();
+			return 0;
+		} else {
+			/* For a small arg, the result == the argument */
+			set_precision_flag_up();	/* Must be up. */
+			return 0;
+		}
+	}
+
+	if (tag == TAG_Zero) {
+		setcc(0);
+		return 0;
+	}
+
+	if (tag == TAG_Special)
+		tag = FPU_Special(st0_ptr);
+
+	if (tag == TW_Denormal) {
+		if (denormal_operand() < 0)
+			return 1;
+
+		/* For a small arg, the result == the argument */
+		/* Underflow may happen */
+		FPU_to_exp16(st0_ptr, st0_ptr);
+
+		tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign);
+
+		FPU_settag0(tag);
+
+		return 0;
+	} else if (tag == TW_Infinity) {
+		/* The 80486 treats infinity as an invalid operand */
+		arith_invalid(0);
+		return 1;
+	} else {
+		single_arg_error(st0_ptr, tag);
+		return 1;
+	}
+}
 
 static int f_cos(FPU_REG *st0_ptr, u_char tag)
 {
-  u_char st0_sign;
+	u_char st0_sign;
+
+	st0_sign = getsign(st0_ptr);
 
-  st0_sign = getsign(st0_ptr);
+	if (tag == TAG_Valid) {
+		int q;
 
-  if ( tag == TAG_Valid )
-    {
-      int q;
-
-      if ( exponent(st0_ptr) > -40 )
-	{
-	  if ( (exponent(st0_ptr) < 0)
-	      || ((exponent(st0_ptr) == 0)
-		  && (significand(st0_ptr) <= 0xc90fdaa22168c234LL)) )
-	    {
-	      poly_cos(st0_ptr);
-
-	      /* We do not really know if up or down */
-	      set_precision_flag_down();
-	  
-	      return 0;
-	    }
-	  else if ( (q = trig_arg(st0_ptr, FCOS)) != -1 )
-	    {
-	      poly_sine(st0_ptr);
-
-	      if ((q+1) & 2)
-		changesign(st0_ptr);
-
-	      /* We do not really know if up or down */
-	      set_precision_flag_down();
-	  
-	      return 0;
-	    }
-	  else
-	    {
-	      /* Operand is out of range */
-	      return 1;
-	    }
-	}
-      else
-	{
-	denormal_arg:
+		if (exponent(st0_ptr) > -40) {
+			if ((exponent(st0_ptr) < 0)
+			    || ((exponent(st0_ptr) == 0)
+				&& (significand(st0_ptr) <=
+				    0xc90fdaa22168c234LL))) {
+				poly_cos(st0_ptr);
 
-	  setcc(0);
-	  FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+				/* We do not really know if up or down */
+				set_precision_flag_down();
+
+				return 0;
+			} else if ((q = trig_arg(st0_ptr, FCOS)) != -1) {
+				poly_sine(st0_ptr);
+
+				if ((q + 1) & 2)
+					changesign(st0_ptr);
+
+				/* We do not really know if up or down */
+				set_precision_flag_down();
+
+				return 0;
+			} else {
+				/* Operand is out of range */
+				return 1;
+			}
+		} else {
+		      denormal_arg:
+
+			setcc(0);
+			FPU_copy_to_reg0(&CONST_1, TAG_Valid);
 #ifdef PECULIAR_486
-	  set_precision_flag_down();  /* 80486 appears to do this. */
+			set_precision_flag_down();	/* 80486 appears to do this. */
 #else
-	  set_precision_flag_up();  /* Must be up. */
+			set_precision_flag_up();	/* Must be up. */
 #endif /* PECULIAR_486 */
-	  return 0;
+			return 0;
+		}
+	} else if (tag == TAG_Zero) {
+		FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+		setcc(0);
+		return 0;
+	}
+
+	if (tag == TAG_Special)
+		tag = FPU_Special(st0_ptr);
+
+	if (tag == TW_Denormal) {
+		if (denormal_operand() < 0)
+			return 1;
+
+		goto denormal_arg;
+	} else if (tag == TW_Infinity) {
+		/* The 80486 treats infinity as an invalid operand */
+		arith_invalid(0);
+		return 1;
+	} else {
+		single_arg_error(st0_ptr, tag);	/* requires st0_ptr == &st(0) */
+		return 1;
 	}
-    }
-  else if ( tag == TAG_Zero )
-    {
-      FPU_copy_to_reg0(&CONST_1, TAG_Valid);
-      setcc(0);
-      return 0;
-    }
-
-  if ( tag == TAG_Special )
-    tag = FPU_Special(st0_ptr);
-
-  if ( tag == TW_Denormal )
-    {
-      if ( denormal_operand() < 0 )
-	return 1;
-
-      goto denormal_arg;
-    }
-  else if ( tag == TW_Infinity )
-    {
-      /* The 80486 treats infinity as an invalid operand */
-      arith_invalid(0);
-      return 1;
-    }
-  else
-    {
-      single_arg_error(st0_ptr, tag);  /* requires st0_ptr == &st(0) */
-      return 1;
-    }
 }
 
-
 static void fcos(FPU_REG *st0_ptr, u_char st0_tag)
 {
-  f_cos(st0_ptr, st0_tag);
+	f_cos(st0_ptr, st0_tag);
 }
 
-
 static void fsincos(FPU_REG *st0_ptr, u_char st0_tag)
 {
-  FPU_REG *st_new_ptr;
-  FPU_REG arg;
-  u_char tag;
-
-  /* Stack underflow has higher priority */
-  if ( st0_tag == TAG_Empty )
-    {
-      FPU_stack_underflow();  /* Puts a QNaN in st(0) */
-      if ( control_word & CW_Invalid )
-	{
-	  st_new_ptr = &st(-1);
-	  push();
-	  FPU_stack_underflow();  /* Puts a QNaN in the new st(0) */
-	}
-      return;
-    }
-
-  if ( STACK_OVERFLOW )
-    { FPU_stack_overflow(); return; }
-
-  if ( st0_tag == TAG_Special )
-    tag = FPU_Special(st0_ptr);
-  else
-    tag = st0_tag;
-
-  if ( tag == TW_NaN )
-    {
-      single_arg_2_error(st0_ptr, TW_NaN);
-      return;
-    }
-  else if ( tag == TW_Infinity )
-    {
-      /* The 80486 treats infinity as an invalid operand */
-      if ( arith_invalid(0) >= 0 )
-	{
-	  /* Masked response */
-	  push();
-	  arith_invalid(0);
-	}
-      return;
-    }
-
-  reg_copy(st0_ptr, &arg);
-  if ( !fsin(st0_ptr, st0_tag) )
-    {
-      push();
-      FPU_copy_to_reg0(&arg, st0_tag);
-      f_cos(&st(0), st0_tag);
-    }
-  else
-    {
-      /* An error, so restore st(0) */
-      FPU_copy_to_reg0(&arg, st0_tag);
-    }
-}
+	FPU_REG *st_new_ptr;
+	FPU_REG arg;
+	u_char tag;
+
+	/* Stack underflow has higher priority */
+	if (st0_tag == TAG_Empty) {
+		FPU_stack_underflow();	/* Puts a QNaN in st(0) */
+		if (control_word & CW_Invalid) {
+			st_new_ptr = &st(-1);
+			push();
+			FPU_stack_underflow();	/* Puts a QNaN in the new st(0) */
+		}
+		return;
+	}
+
+	if (STACK_OVERFLOW) {
+		FPU_stack_overflow();
+		return;
+	}
 
+	if (st0_tag == TAG_Special)
+		tag = FPU_Special(st0_ptr);
+	else
+		tag = st0_tag;
+
+	if (tag == TW_NaN) {
+		single_arg_2_error(st0_ptr, TW_NaN);
+		return;
+	} else if (tag == TW_Infinity) {
+		/* The 80486 treats infinity as an invalid operand */
+		if (arith_invalid(0) >= 0) {
+			/* Masked response */
+			push();
+			arith_invalid(0);
+		}
+		return;
+	}
+
+	reg_copy(st0_ptr, &arg);
+	if (!fsin(st0_ptr, st0_tag)) {
+		push();
+		FPU_copy_to_reg0(&arg, st0_tag);
+		f_cos(&st(0), st0_tag);
+	} else {
+		/* An error, so restore st(0) */
+		FPU_copy_to_reg0(&arg, st0_tag);
+	}
+}
 
 /*---------------------------------------------------------------------------*/
 /* The following all require two arguments: st(0) and st(1) */
@@ -826,1020 +743,901 @@ static void fsincos(FPU_REG *st0_ptr, u_
    result must be zero.
  */
 static void rem_kernel(unsigned long long st0, unsigned long long *y,
-		       unsigned long long st1,
-		       unsigned long long q, int n)
+		       unsigned long long st1, unsigned long long q, int n)
 {
-  int dummy;
-  unsigned long long x;
+	int dummy;
+	unsigned long long x;
 
-  x = st0 << n;
+	x = st0 << n;
 
-  /* Do the required multiplication and subtraction in the one operation */
-
-  /* lsw x -= lsw st1 * lsw q */
-  asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1"
-		:"=m" (((unsigned *)&x)[0]), "=m" (((unsigned *)&x)[1]),
-		"=a" (dummy)
-		:"2" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[0])
-		:"%dx");
-  /* msw x -= msw st1 * lsw q */
-  asm volatile ("mull %3; subl %%eax,%0"
-		:"=m" (((unsigned *)&x)[1]), "=a" (dummy)
-		:"1" (((unsigned *)&st1)[1]), "m" (((unsigned *)&q)[0])
-		:"%dx");
-  /* msw x -= lsw st1 * msw q */
-  asm volatile ("mull %3; subl %%eax,%0"
-		:"=m" (((unsigned *)&x)[1]), "=a" (dummy)
-		:"1" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[1])
-		:"%dx");
+	/* Do the required multiplication and subtraction in the one operation */
 
-  *y = x;
-}
+	/* lsw x -= lsw st1 * lsw q */
+	asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1":"=m"
+		      (((unsigned *)&x)[0]), "=m"(((unsigned *)&x)[1]),
+		      "=a"(dummy)
+		      :"2"(((unsigned *)&st1)[0]), "m"(((unsigned *)&q)[0])
+		      :"%dx");
+	/* msw x -= msw st1 * lsw q */
+	asm volatile ("mull %3; subl %%eax,%0":"=m" (((unsigned *)&x)[1]),
+		      "=a"(dummy)
+		      :"1"(((unsigned *)&st1)[1]), "m"(((unsigned *)&q)[0])
+		      :"%dx");
+	/* msw x -= lsw st1 * msw q */
+	asm volatile ("mull %3; subl %%eax,%0":"=m" (((unsigned *)&x)[1]),
+		      "=a"(dummy)
+		      :"1"(((unsigned *)&st1)[0]), "m"(((unsigned *)&q)[1])
+		      :"%dx");
 
+	*y = x;
+}
 
 /* Remainder of st(0) / st(1) */
 /* This routine produces exact results, i.e. there is never any
    rounding or truncation, etc of the result. */
 static void do_fprem(FPU_REG *st0_ptr, u_char st0_tag, int round)
 {
-  FPU_REG *st1_ptr = &st(1);
-  u_char st1_tag = FPU_gettagi(1);
+	FPU_REG *st1_ptr = &st(1);
+	u_char st1_tag = FPU_gettagi(1);
 
-  if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
-    {
-      FPU_REG tmp, st0, st1;
-      u_char st0_sign, st1_sign;
-      u_char tmptag;
-      int tag;
-      int old_cw;
-      int expdif;
-      long long q;
-      unsigned short saved_status;
-      int cc;
-
-    fprem_valid:
-      /* Convert registers for internal use. */
-      st0_sign = FPU_to_exp16(st0_ptr, &st0);
-      st1_sign = FPU_to_exp16(st1_ptr, &st1);
-      expdif = exponent16(&st0) - exponent16(&st1);
-
-      old_cw = control_word;
-      cc = 0;
-
-      /* We want the status following the denorm tests, but don't want
-	 the status changed by the arithmetic operations. */
-      saved_status = partial_status;
-      control_word &= ~CW_RC;
-      control_word |= RC_CHOP;
-
-      if ( expdif < 64 )
-	{
-	  /* This should be the most common case */
-
-	  if ( expdif > -2 )
-	    {
-	      u_char sign = st0_sign ^ st1_sign;
-	      tag = FPU_u_div(&st0, &st1, &tmp,
-			      PR_64_BITS | RC_CHOP | 0x3f,
-			      sign);
-	      setsign(&tmp, sign);
-
-	      if ( exponent(&tmp) >= 0 )
-		{
-		  FPU_round_to_int(&tmp, tag);  /* Fortunately, this can't
-						   overflow to 2^64 */
-		  q = significand(&tmp);
-
-		  rem_kernel(significand(&st0),
-			     &significand(&tmp),
-			     significand(&st1),
-			     q, expdif);
-
-		  setexponent16(&tmp, exponent16(&st1));
-		}
-	      else
-		{
-		  reg_copy(&st0, &tmp);
-		  q = 0;
-		}
-
-	      if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) )
-		{
-		  /* We may need to subtract st(1) once more,
-		     to get a result <= 1/2 of st(1). */
-		  unsigned long long x;
-		  expdif = exponent16(&st1) - exponent16(&tmp);
-		  if ( expdif <= 1 )
-		    {
-		      if ( expdif == 0 )
-			x = significand(&st1) - significand(&tmp);
-		      else /* expdif is 1 */
-			x = (significand(&st1) << 1) - significand(&tmp);
-		      if ( (x < significand(&tmp)) ||
-			  /* or equi-distant (from 0 & st(1)) and q is odd */
-			  ((x == significand(&tmp)) && (q & 1) ) )
-			{
-			  st0_sign = ! st0_sign;
-			  significand(&tmp) = x;
-			  q++;
+	if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) {
+		FPU_REG tmp, st0, st1;
+		u_char st0_sign, st1_sign;
+		u_char tmptag;
+		int tag;
+		int old_cw;
+		int expdif;
+		long long q;
+		unsigned short saved_status;
+		int cc;
+
+	      fprem_valid:
+		/* Convert registers for internal use. */
+		st0_sign = FPU_to_exp16(st0_ptr, &st0);
+		st1_sign = FPU_to_exp16(st1_ptr, &st1);
+		expdif = exponent16(&st0) - exponent16(&st1);
+
+		old_cw = control_word;
+		cc = 0;
+
+		/* We want the status following the denorm tests, but don't want
+		   the status changed by the arithmetic operations. */
+		saved_status = partial_status;
+		control_word &= ~CW_RC;
+		control_word |= RC_CHOP;
+
+		if (expdif < 64) {
+			/* This should be the most common case */
+
+			if (expdif > -2) {
+				u_char sign = st0_sign ^ st1_sign;
+				tag = FPU_u_div(&st0, &st1, &tmp,
+						PR_64_BITS | RC_CHOP | 0x3f,
+						sign);
+				setsign(&tmp, sign);
+
+				if (exponent(&tmp) >= 0) {
+					FPU_round_to_int(&tmp, tag);	/* Fortunately, this can't
+									   overflow to 2^64 */
+					q = significand(&tmp);
+
+					rem_kernel(significand(&st0),
+						   &significand(&tmp),
+						   significand(&st1),
+						   q, expdif);
+
+					setexponent16(&tmp, exponent16(&st1));
+				} else {
+					reg_copy(&st0, &tmp);
+					q = 0;
+				}
+
+				if ((round == RC_RND)
+				    && (tmp.sigh & 0xc0000000)) {
+					/* We may need to subtract st(1) once more,
+					   to get a result <= 1/2 of st(1). */
+					unsigned long long x;
+					expdif =
+					    exponent16(&st1) - exponent16(&tmp);
+					if (expdif <= 1) {
+						if (expdif == 0)
+							x = significand(&st1) -
+							    significand(&tmp);
+						else	/* expdif is 1 */
+							x = (significand(&st1)
+							     << 1) -
+							    significand(&tmp);
+						if ((x < significand(&tmp)) ||
+						    /* or equi-distant (from 0 & st(1)) and q is odd */
+						    ((x == significand(&tmp))
+						     && (q & 1))) {
+							st0_sign = !st0_sign;
+							significand(&tmp) = x;
+							q++;
+						}
+					}
+				}
+
+				if (q & 4)
+					cc |= SW_C0;
+				if (q & 2)
+					cc |= SW_C3;
+				if (q & 1)
+					cc |= SW_C1;
+			} else {
+				control_word = old_cw;
+				setcc(0);
+				return;
 			}
-		    }
-		}
-
-	      if (q & 4) cc |= SW_C0;
-	      if (q & 2) cc |= SW_C3;
-	      if (q & 1) cc |= SW_C1;
-	    }
-	  else
-	    {
-	      control_word = old_cw;
-	      setcc(0);
-	      return;
-	    }
-	}
-      else
-	{
-	  /* There is a large exponent difference ( >= 64 ) */
-	  /* To make much sense, the code in this section should
-	     be done at high precision. */
-	  int exp_1, N;
-	  u_char sign;
-
-	  /* prevent overflow here */
-	  /* N is 'a number between 32 and 63' (p26-113) */
-	  reg_copy(&st0, &tmp);
-	  tmptag = st0_tag;
-	  N = (expdif & 0x0000001f) + 32;  /* This choice gives results
-					      identical to an AMD 486 */
-	  setexponent16(&tmp, N);
-	  exp_1 = exponent16(&st1);
-	  setexponent16(&st1, 0);
-	  expdif -= N;
-
-	  sign = getsign(&tmp) ^ st1_sign;
-	  tag = FPU_u_div(&tmp, &st1, &tmp, PR_64_BITS | RC_CHOP | 0x3f,
-			  sign);
-	  setsign(&tmp, sign);
-
-	  FPU_round_to_int(&tmp, tag);  /* Fortunately, this can't
-					   overflow to 2^64 */
-
-	  rem_kernel(significand(&st0),
-		     &significand(&tmp),
-		     significand(&st1),
-		     significand(&tmp),
-		     exponent(&tmp)
-		     ); 
-	  setexponent16(&tmp, exp_1 + expdif);
-
-	  /* It is possible for the operation to be complete here.
-	     What does the IEEE standard say? The Intel 80486 manual
-	     implies that the operation will never be completed at this
-	     point, and the behaviour of a real 80486 confirms this.
-	   */
-	  if ( !(tmp.sigh | tmp.sigl) )
-	    {
-	      /* The result is zero */
-	      control_word = old_cw;
-	      partial_status = saved_status;
-	      FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
-	      setsign(&st0, st0_sign);
+		} else {
+			/* There is a large exponent difference ( >= 64 ) */
+			/* To make much sense, the code in this section should
+			   be done at high precision. */
+			int exp_1, N;
+			u_char sign;
+
+			/* prevent overflow here */
+			/* N is 'a number between 32 and 63' (p26-113) */
+			reg_copy(&st0, &tmp);
+			tmptag = st0_tag;
+			N = (expdif & 0x0000001f) + 32;	/* This choice gives results
+							   identical to an AMD 486 */
+			setexponent16(&tmp, N);
+			exp_1 = exponent16(&st1);
+			setexponent16(&st1, 0);
+			expdif -= N;
+
+			sign = getsign(&tmp) ^ st1_sign;
+			tag =
+			    FPU_u_div(&tmp, &st1, &tmp,
+				      PR_64_BITS | RC_CHOP | 0x3f, sign);
+			setsign(&tmp, sign);
+
+			FPU_round_to_int(&tmp, tag);	/* Fortunately, this can't
+							   overflow to 2^64 */
+
+			rem_kernel(significand(&st0),
+				   &significand(&tmp),
+				   significand(&st1),
+				   significand(&tmp), exponent(&tmp)
+			    );
+			setexponent16(&tmp, exp_1 + expdif);
+
+			/* It is possible for the operation to be complete here.
+			   What does the IEEE standard say? The Intel 80486 manual
+			   implies that the operation will never be completed at this
+			   point, and the behaviour of a real 80486 confirms this.
+			 */
+			if (!(tmp.sigh | tmp.sigl)) {
+				/* The result is zero */
+				control_word = old_cw;
+				partial_status = saved_status;
+				FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+				setsign(&st0, st0_sign);
 #ifdef PECULIAR_486
-	      setcc(SW_C2);
+				setcc(SW_C2);
 #else
-	      setcc(0);
+				setcc(0);
 #endif /* PECULIAR_486 */
-	      return;
-	    }
-	  cc = SW_C2;
-	}
-
-      control_word = old_cw;
-      partial_status = saved_status;
-      tag = FPU_normalize_nuo(&tmp);
-      reg_copy(&tmp, st0_ptr);
-
-      /* The only condition to be looked for is underflow,
-	 and it can occur here only if underflow is unmasked. */
-      if ( (exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero)
-	  && !(control_word & CW_Underflow) )
-	{
-	  setcc(cc);
-	  tag = arith_underflow(st0_ptr);
-	  setsign(st0_ptr, st0_sign);
-	  FPU_settag0(tag);
-	  return;
-	}
-      else if ( (exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero) )
-	{
-	  stdexp(st0_ptr);
-	  setsign(st0_ptr, st0_sign);
-	}
-      else
-	{
-	  tag = FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign);
-	}
-      FPU_settag0(tag);
-      setcc(cc);
-
-      return;
-    }
-
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-  if ( st1_tag == TAG_Special )
-    st1_tag = FPU_Special(st1_ptr);
+				return;
+			}
+			cc = SW_C2;
+		}
+
+		control_word = old_cw;
+		partial_status = saved_status;
+		tag = FPU_normalize_nuo(&tmp);
+		reg_copy(&tmp, st0_ptr);
+
+		/* The only condition to be looked for is underflow,
+		   and it can occur here only if underflow is unmasked. */
+		if ((exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero)
+		    && !(control_word & CW_Underflow)) {
+			setcc(cc);
+			tag = arith_underflow(st0_ptr);
+			setsign(st0_ptr, st0_sign);
+			FPU_settag0(tag);
+			return;
+		} else if ((exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero)) {
+			stdexp(st0_ptr);
+			setsign(st0_ptr, st0_sign);
+		} else {
+			tag =
+			    FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign);
+		}
+		FPU_settag0(tag);
+		setcc(cc);
 
-  if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
+		return;
+	}
+
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+	if (st1_tag == TAG_Special)
+		st1_tag = FPU_Special(st1_ptr);
+
+	if (((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
 	    || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
-	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
-    {
-      if ( denormal_operand() < 0 )
-	return;
-      goto fprem_valid;
-    }
-  else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
-    {
-      FPU_stack_underflow();
-      return;
-    }
-  else if ( st0_tag == TAG_Zero )
-    {
-      if ( st1_tag == TAG_Valid )
-	{
-	  setcc(0); return;
-	}
-      else if ( st1_tag == TW_Denormal )
-	{
-	  if ( denormal_operand() < 0 )
-	    return;
-	  setcc(0); return;
-	}
-      else if ( st1_tag == TAG_Zero )
-	{ arith_invalid(0); return; } /* fprem(?,0) always invalid */
-      else if ( st1_tag == TW_Infinity )
-	{ setcc(0); return; }
-    }
-  else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
-    {
-      if ( st1_tag == TAG_Zero )
-	{
-	  arith_invalid(0); /* fprem(Valid,Zero) is invalid */
-	  return;
-	}
-      else if ( st1_tag != TW_NaN )
-	{
-	  if ( ((st0_tag == TW_Denormal) || (st1_tag == TW_Denormal))
-	       && (denormal_operand() < 0) )
-	    return;
-
-	  if ( st1_tag == TW_Infinity )
-	    {
-	      /* fprem(Valid,Infinity) is o.k. */
-	      setcc(0); return;
-	    }
-	}
-    }
-  else if ( st0_tag == TW_Infinity )
-    {
-      if ( st1_tag != TW_NaN )
-	{
-	  arith_invalid(0); /* fprem(Infinity,?) is invalid */
-	  return;
+	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal))) {
+		if (denormal_operand() < 0)
+			return;
+		goto fprem_valid;
+	} else if ((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) {
+		FPU_stack_underflow();
+		return;
+	} else if (st0_tag == TAG_Zero) {
+		if (st1_tag == TAG_Valid) {
+			setcc(0);
+			return;
+		} else if (st1_tag == TW_Denormal) {
+			if (denormal_operand() < 0)
+				return;
+			setcc(0);
+			return;
+		} else if (st1_tag == TAG_Zero) {
+			arith_invalid(0);
+			return;
+		} /* fprem(?,0) always invalid */
+		else if (st1_tag == TW_Infinity) {
+			setcc(0);
+			return;
+		}
+	} else if ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) {
+		if (st1_tag == TAG_Zero) {
+			arith_invalid(0);	/* fprem(Valid,Zero) is invalid */
+			return;
+		} else if (st1_tag != TW_NaN) {
+			if (((st0_tag == TW_Denormal)
+			     || (st1_tag == TW_Denormal))
+			    && (denormal_operand() < 0))
+				return;
+
+			if (st1_tag == TW_Infinity) {
+				/* fprem(Valid,Infinity) is o.k. */
+				setcc(0);
+				return;
+			}
+		}
+	} else if (st0_tag == TW_Infinity) {
+		if (st1_tag != TW_NaN) {
+			arith_invalid(0);	/* fprem(Infinity,?) is invalid */
+			return;
+		}
 	}
-    }
 
-  /* One of the registers must contain a NaN if we got here. */
+	/* One of the registers must contain a NaN if we got here. */
 
 #ifdef PARANOID
-  if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) )
-      EXCEPTION(EX_INTERNAL | 0x118);
+	if ((st0_tag != TW_NaN) && (st1_tag != TW_NaN))
+		EXCEPTION(EX_INTERNAL | 0x118);
 #endif /* PARANOID */
 
-  real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr);
+	real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr);
 
 }
 
-
 /* ST(1) <- ST(1) * log ST;  pop ST */
 static void fyl2x(FPU_REG *st0_ptr, u_char st0_tag)
 {
-  FPU_REG *st1_ptr = &st(1), exponent;
-  u_char st1_tag = FPU_gettagi(1);
-  u_char sign;
-  int e, tag;
-
-  clear_C1();
-
-  if ( (st0_tag == TAG_Valid) && (st1_tag == TAG_Valid) )
-    {
-    both_valid:
-      /* Both regs are Valid or Denormal */
-      if ( signpositive(st0_ptr) )
-	{
-	  if ( st0_tag == TW_Denormal )
-	    FPU_to_exp16(st0_ptr, st0_ptr);
-	  else
-	    /* Convert st(0) for internal use. */
-	    setexponent16(st0_ptr, exponent(st0_ptr));
-
-	  if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) )
-	    {
-	      /* Special case. The result can be precise. */
-	      u_char esign;
-	      e = exponent16(st0_ptr);
-	      if ( e >= 0 )
-		{
-		  exponent.sigh = e;
-		  esign = SIGN_POS;
-		}
-	      else
-		{
-		  exponent.sigh = -e;
-		  esign = SIGN_NEG;
-		}
-	      exponent.sigl = 0;
-	      setexponent16(&exponent, 31);
-	      tag = FPU_normalize_nuo(&exponent);
-	      stdexp(&exponent);
-	      setsign(&exponent, esign);
-	      tag = FPU_mul(&exponent, tag, 1, FULL_PRECISION);
-	      if ( tag >= 0 )
-		FPU_settagi(1, tag);
-	    }
-	  else
-	    {
-	      /* The usual case */
-	      sign = getsign(st1_ptr);
-	      if ( st1_tag == TW_Denormal )
-		FPU_to_exp16(st1_ptr, st1_ptr);
-	      else
-		/* Convert st(1) for internal use. */
-		setexponent16(st1_ptr, exponent(st1_ptr));
-	      poly_l2(st0_ptr, st1_ptr, sign);
-	    }
-	}
-      else
-	{
-	  /* negative */
-	  if ( arith_invalid(1) < 0 )
-	    return;
-	}
-
-      FPU_pop();
-
-      return;
-    }
-
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-  if ( st1_tag == TAG_Special )
-    st1_tag = FPU_Special(st1_ptr);
-
-  if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
-    {
-      FPU_stack_underflow_pop(1);
-      return;
-    }
-  else if ( (st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal) )
-    {
-      if ( st0_tag == TAG_Zero )
-	{
-	  if ( st1_tag == TAG_Zero )
-	    {
-	      /* Both args zero is invalid */
-	      if ( arith_invalid(1) < 0 )
-		return;
-	    }
-	  else
-	    {
-	      u_char sign;
-	      sign = getsign(st1_ptr)^SIGN_NEG;
-	      if ( FPU_divide_by_zero(1, sign) < 0 )
-		return;
-
-	      setsign(st1_ptr, sign);
-	    }
-	}
-      else if ( st1_tag == TAG_Zero )
-	{
-	  /* st(1) contains zero, st(0) valid <> 0 */
-	  /* Zero is the valid answer */
-	  sign = getsign(st1_ptr);
-	  
-	  if ( signnegative(st0_ptr) )
-	    {
-	      /* log(negative) */
-	      if ( arith_invalid(1) < 0 )
-		return;
-	    }
-	  else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
-	  else
-	    {
-	      if ( exponent(st0_ptr) < 0 )
-		sign ^= SIGN_NEG;
-
-	      FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
-	      setsign(st1_ptr, sign);
-	    }
-	}
-      else
-	{
-	  /* One or both operands are denormals. */
-	  if ( denormal_operand() < 0 )
-	    return;
-	  goto both_valid;
-	}
-    }
-  else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
-    {
-      if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
-	return;
-    }
-  /* One or both arg must be an infinity */
-  else if ( st0_tag == TW_Infinity )
-    {
-      if ( (signnegative(st0_ptr)) || (st1_tag == TAG_Zero) )
-	{
-	  /* log(-infinity) or 0*log(infinity) */
-	  if ( arith_invalid(1) < 0 )
-	    return;
-	}
-      else
-	{
-	  u_char sign = getsign(st1_ptr);
-
-	  if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
-
-	  FPU_copy_to_reg1(&CONST_INF, TAG_Special);
-	  setsign(st1_ptr, sign);
-	}
-    }
-  /* st(1) must be infinity here */
-  else if ( ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal))
-	    && ( signpositive(st0_ptr) ) )
-    {
-      if ( exponent(st0_ptr) >= 0 )
-	{
-	  if ( (exponent(st0_ptr) == 0) &&
-	      (st0_ptr->sigh == 0x80000000) &&
-	      (st0_ptr->sigl == 0) )
-	    {
-	      /* st(0) holds 1.0 */
-	      /* infinity*log(1) */
-	      if ( arith_invalid(1) < 0 )
-		return;
-	    }
-	  /* else st(0) is positive and > 1.0 */
-	}
-      else
-	{
-	  /* st(0) is positive and < 1.0 */
-
-	  if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
-
-	  changesign(st1_ptr);
-	}
-    }
-  else
-    {
-      /* st(0) must be zero or negative */
-      if ( st0_tag == TAG_Zero )
-	{
-	  /* This should be invalid, but a real 80486 is happy with it. */
+	FPU_REG *st1_ptr = &st(1), exponent;
+	u_char st1_tag = FPU_gettagi(1);
+	u_char sign;
+	int e, tag;
+
+	clear_C1();
+
+	if ((st0_tag == TAG_Valid) && (st1_tag == TAG_Valid)) {
+	      both_valid:
+		/* Both regs are Valid or Denormal */
+		if (signpositive(st0_ptr)) {
+			if (st0_tag == TW_Denormal)
+				FPU_to_exp16(st0_ptr, st0_ptr);
+			else
+				/* Convert st(0) for internal use. */
+				setexponent16(st0_ptr, exponent(st0_ptr));
+
+			if ((st0_ptr->sigh == 0x80000000)
+			    && (st0_ptr->sigl == 0)) {
+				/* Special case. The result can be precise. */
+				u_char esign;
+				e = exponent16(st0_ptr);
+				if (e >= 0) {
+					exponent.sigh = e;
+					esign = SIGN_POS;
+				} else {
+					exponent.sigh = -e;
+					esign = SIGN_NEG;
+				}
+				exponent.sigl = 0;
+				setexponent16(&exponent, 31);
+				tag = FPU_normalize_nuo(&exponent);
+				stdexp(&exponent);
+				setsign(&exponent, esign);
+				tag =
+				    FPU_mul(&exponent, tag, 1, FULL_PRECISION);
+				if (tag >= 0)
+					FPU_settagi(1, tag);
+			} else {
+				/* The usual case */
+				sign = getsign(st1_ptr);
+				if (st1_tag == TW_Denormal)
+					FPU_to_exp16(st1_ptr, st1_ptr);
+				else
+					/* Convert st(1) for internal use. */
+					setexponent16(st1_ptr,
+						      exponent(st1_ptr));
+				poly_l2(st0_ptr, st1_ptr, sign);
+			}
+		} else {
+			/* negative */
+			if (arith_invalid(1) < 0)
+				return;
+		}
+
+		FPU_pop();
+
+		return;
+	}
+
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+	if (st1_tag == TAG_Special)
+		st1_tag = FPU_Special(st1_ptr);
+
+	if ((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) {
+		FPU_stack_underflow_pop(1);
+		return;
+	} else if ((st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal)) {
+		if (st0_tag == TAG_Zero) {
+			if (st1_tag == TAG_Zero) {
+				/* Both args zero is invalid */
+				if (arith_invalid(1) < 0)
+					return;
+			} else {
+				u_char sign;
+				sign = getsign(st1_ptr) ^ SIGN_NEG;
+				if (FPU_divide_by_zero(1, sign) < 0)
+					return;
+
+				setsign(st1_ptr, sign);
+			}
+		} else if (st1_tag == TAG_Zero) {
+			/* st(1) contains zero, st(0) valid <> 0 */
+			/* Zero is the valid answer */
+			sign = getsign(st1_ptr);
+
+			if (signnegative(st0_ptr)) {
+				/* log(negative) */
+				if (arith_invalid(1) < 0)
+					return;
+			} else if ((st0_tag == TW_Denormal)
+				   && (denormal_operand() < 0))
+				return;
+			else {
+				if (exponent(st0_ptr) < 0)
+					sign ^= SIGN_NEG;
+
+				FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
+				setsign(st1_ptr, sign);
+			}
+		} else {
+			/* One or both operands are denormals. */
+			if (denormal_operand() < 0)
+				return;
+			goto both_valid;
+		}
+	} else if ((st0_tag == TW_NaN) || (st1_tag == TW_NaN)) {
+		if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+			return;
+	}
+	/* One or both arg must be an infinity */
+	else if (st0_tag == TW_Infinity) {
+		if ((signnegative(st0_ptr)) || (st1_tag == TAG_Zero)) {
+			/* log(-infinity) or 0*log(infinity) */
+			if (arith_invalid(1) < 0)
+				return;
+		} else {
+			u_char sign = getsign(st1_ptr);
+
+			if ((st1_tag == TW_Denormal)
+			    && (denormal_operand() < 0))
+				return;
+
+			FPU_copy_to_reg1(&CONST_INF, TAG_Special);
+			setsign(st1_ptr, sign);
+		}
+	}
+	/* st(1) must be infinity here */
+	else if (((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal))
+		 && (signpositive(st0_ptr))) {
+		if (exponent(st0_ptr) >= 0) {
+			if ((exponent(st0_ptr) == 0) &&
+			    (st0_ptr->sigh == 0x80000000) &&
+			    (st0_ptr->sigl == 0)) {
+				/* st(0) holds 1.0 */
+				/* infinity*log(1) */
+				if (arith_invalid(1) < 0)
+					return;
+			}
+			/* else st(0) is positive and > 1.0 */
+		} else {
+			/* st(0) is positive and < 1.0 */
+
+			if ((st0_tag == TW_Denormal)
+			    && (denormal_operand() < 0))
+				return;
+
+			changesign(st1_ptr);
+		}
+	} else {
+		/* st(0) must be zero or negative */
+		if (st0_tag == TAG_Zero) {
+			/* This should be invalid, but a real 80486 is happy with it. */
 
 #ifndef PECULIAR_486
-	  sign = getsign(st1_ptr);
-	  if ( FPU_divide_by_zero(1, sign) < 0 )
-	    return;
+			sign = getsign(st1_ptr);
+			if (FPU_divide_by_zero(1, sign) < 0)
+				return;
 #endif /* PECULIAR_486 */
 
-	  changesign(st1_ptr);
+			changesign(st1_ptr);
+		} else if (arith_invalid(1) < 0)	/* log(negative) */
+			return;
 	}
-      else if ( arith_invalid(1) < 0 )	  /* log(negative) */
-	return;
-    }
 
-  FPU_pop();
+	FPU_pop();
 }
 
-
 static void fpatan(FPU_REG *st0_ptr, u_char st0_tag)
 {
-  FPU_REG *st1_ptr = &st(1);
-  u_char st1_tag = FPU_gettagi(1);
-  int tag;
+	FPU_REG *st1_ptr = &st(1);
+	u_char st1_tag = FPU_gettagi(1);
+	int tag;
 
-  clear_C1();
-  if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
-    {
-    valid_atan:
+	clear_C1();
+	if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) {
+	      valid_atan:
 
-      poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag);
+		poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag);
 
-      FPU_pop();
+		FPU_pop();
 
-      return;
-    }
+		return;
+	}
 
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-  if ( st1_tag == TAG_Special )
-    st1_tag = FPU_Special(st1_ptr);
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+	if (st1_tag == TAG_Special)
+		st1_tag = FPU_Special(st1_ptr);
 
-  if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
+	if (((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
 	    || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
-	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
-    {
-      if ( denormal_operand() < 0 )
-	return;
-
-      goto valid_atan;
-    }
-  else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) )
-    {
-      FPU_stack_underflow_pop(1);
-      return;
-    }
-  else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) )
-    {
-      if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0 )
-	  FPU_pop();
-      return;
-    }
-  else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) )
-    {
-      u_char sign = getsign(st1_ptr);
-      if ( st0_tag == TW_Infinity )
-	{
-	  if ( st1_tag == TW_Infinity )
-	    {
-	      if ( signpositive(st0_ptr) )
-		{
-		  FPU_copy_to_reg1(&CONST_PI4, TAG_Valid);
-		}
-	      else
-		{
-		  setpositive(st1_ptr);
-		  tag = FPU_u_add(&CONST_PI4, &CONST_PI2, st1_ptr,
-				  FULL_PRECISION, SIGN_POS,
-				  exponent(&CONST_PI4), exponent(&CONST_PI2));
-		  if ( tag >= 0 )
-		    FPU_settagi(1, tag);
-		}
-	    }
-	  else
-	    {
-	      if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
-		return;
-
-	      if ( signpositive(st0_ptr) )
-		{
-		  FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
-		  setsign(st1_ptr, sign);   /* An 80486 preserves the sign */
-		  FPU_pop();
-		  return;
-		}
-	      else
-		{
-		  FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
-		}
-	    }
-	}
-      else
-	{
-	  /* st(1) is infinity, st(0) not infinity */
-	  if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
-
-	  FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
-	}
-      setsign(st1_ptr, sign);
-    }
-  else if ( st1_tag == TAG_Zero )
-    {
-      /* st(0) must be valid or zero */
-      u_char sign = getsign(st1_ptr);
+	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal))) {
+		if (denormal_operand() < 0)
+			return;
+
+		goto valid_atan;
+	} else if ((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) {
+		FPU_stack_underflow_pop(1);
+		return;
+	} else if ((st0_tag == TW_NaN) || (st1_tag == TW_NaN)) {
+		if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0)
+			FPU_pop();
+		return;
+	} else if ((st0_tag == TW_Infinity) || (st1_tag == TW_Infinity)) {
+		u_char sign = getsign(st1_ptr);
+		if (st0_tag == TW_Infinity) {
+			if (st1_tag == TW_Infinity) {
+				if (signpositive(st0_ptr)) {
+					FPU_copy_to_reg1(&CONST_PI4, TAG_Valid);
+				} else {
+					setpositive(st1_ptr);
+					tag =
+					    FPU_u_add(&CONST_PI4, &CONST_PI2,
+						      st1_ptr, FULL_PRECISION,
+						      SIGN_POS,
+						      exponent(&CONST_PI4),
+						      exponent(&CONST_PI2));
+					if (tag >= 0)
+						FPU_settagi(1, tag);
+				}
+			} else {
+				if ((st1_tag == TW_Denormal)
+				    && (denormal_operand() < 0))
+					return;
+
+				if (signpositive(st0_ptr)) {
+					FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
+					setsign(st1_ptr, sign);	/* An 80486 preserves the sign */
+					FPU_pop();
+					return;
+				} else {
+					FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
+				}
+			}
+		} else {
+			/* st(1) is infinity, st(0) not infinity */
+			if ((st0_tag == TW_Denormal)
+			    && (denormal_operand() < 0))
+				return;
 
-      if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	return;
+			FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
+		}
+		setsign(st1_ptr, sign);
+	} else if (st1_tag == TAG_Zero) {
+		/* st(0) must be valid or zero */
+		u_char sign = getsign(st1_ptr);
+
+		if ((st0_tag == TW_Denormal) && (denormal_operand() < 0))
+			return;
+
+		if (signpositive(st0_ptr)) {
+			/* An 80486 preserves the sign */
+			FPU_pop();
+			return;
+		}
 
-      if ( signpositive(st0_ptr) )
-	{
-	  /* An 80486 preserves the sign */
-	  FPU_pop();
-	  return;
-	}
-
-      FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
-      setsign(st1_ptr, sign);
-    }
-  else if ( st0_tag == TAG_Zero )
-    {
-      /* st(1) must be TAG_Valid here */
-      u_char sign = getsign(st1_ptr);
+		FPU_copy_to_reg1(&CONST_PI, TAG_Valid);
+		setsign(st1_ptr, sign);
+	} else if (st0_tag == TAG_Zero) {
+		/* st(1) must be TAG_Valid here */
+		u_char sign = getsign(st1_ptr);
 
-      if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
-	return;
+		if ((st1_tag == TW_Denormal) && (denormal_operand() < 0))
+			return;
 
-      FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
-      setsign(st1_ptr, sign);
-    }
+		FPU_copy_to_reg1(&CONST_PI2, TAG_Valid);
+		setsign(st1_ptr, sign);
+	}
 #ifdef PARANOID
-  else
-    EXCEPTION(EX_INTERNAL | 0x125);
+	else
+		EXCEPTION(EX_INTERNAL | 0x125);
 #endif /* PARANOID */
 
-  FPU_pop();
-  set_precision_flag_up();  /* We do not really know if up or down */
+	FPU_pop();
+	set_precision_flag_up();	/* We do not really know if up or down */
 }
 
-
 static void fprem(FPU_REG *st0_ptr, u_char st0_tag)
 {
-  do_fprem(st0_ptr, st0_tag, RC_CHOP);
+	do_fprem(st0_ptr, st0_tag, RC_CHOP);
 }
 
-
 static void fprem1(FPU_REG *st0_ptr, u_char st0_tag)
 {
-  do_fprem(st0_ptr, st0_tag, RC_RND);
+	do_fprem(st0_ptr, st0_tag, RC_RND);
 }
 
-
 static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag)
 {
-  u_char sign, sign1;
-  FPU_REG *st1_ptr = &st(1), a, b;
-  u_char st1_tag = FPU_gettagi(1);
-
-  clear_C1();
-  if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
-    {
-    valid_yl2xp1:
-
-      sign = getsign(st0_ptr);
-      sign1 = getsign(st1_ptr);
+	u_char sign, sign1;
+	FPU_REG *st1_ptr = &st(1), a, b;
+	u_char st1_tag = FPU_gettagi(1);
 
-      FPU_to_exp16(st0_ptr, &a);
-      FPU_to_exp16(st1_ptr, &b);
-
-      if ( poly_l2p1(sign, sign1, &a, &b, st1_ptr) )
-	return;
+	clear_C1();
+	if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) {
+	      valid_yl2xp1:
 
-      FPU_pop();
-      return;
-    }
-
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-  if ( st1_tag == TAG_Special )
-    st1_tag = FPU_Special(st1_ptr);
+		sign = getsign(st0_ptr);
+		sign1 = getsign(st1_ptr);
 
-  if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
-	    || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
-	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) )
-    {
-      if ( denormal_operand() < 0 )
-	return;
+		FPU_to_exp16(st0_ptr, &a);
+		FPU_to_exp16(st1_ptr, &b);
 
-      goto valid_yl2xp1;
-    }
-  else if ( (st0_tag == TAG_Empty) | (st1_tag == TAG_Empty) )
-    {
-      FPU_stack_underflow_pop(1);
-      return;
-    }
-  else if ( st0_tag == TAG_Zero )
-    {
-      switch ( st1_tag )
-	{
-	case TW_Denormal:
-	  if ( denormal_operand() < 0 )
-	    return;
+		if (poly_l2p1(sign, sign1, &a, &b, st1_ptr))
+			return;
 
-	case TAG_Zero:
-	case TAG_Valid:
-	  setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr));
-	  FPU_copy_to_reg1(st0_ptr, st0_tag);
-	  break;
+		FPU_pop();
+		return;
+	}
 
-	case TW_Infinity:
-	  /* Infinity*log(1) */
-	  if ( arith_invalid(1) < 0 )
-	    return;
-	  break;
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+	if (st1_tag == TAG_Special)
+		st1_tag = FPU_Special(st1_ptr);
 
-	case TW_NaN:
-	  if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
-	    return;
-	  break;
+	if (((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal))
+	    || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid))
+	    || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal))) {
+		if (denormal_operand() < 0)
+			return;
+
+		goto valid_yl2xp1;
+	} else if ((st0_tag == TAG_Empty) | (st1_tag == TAG_Empty)) {
+		FPU_stack_underflow_pop(1);
+		return;
+	} else if (st0_tag == TAG_Zero) {
+		switch (st1_tag) {
+		case TW_Denormal:
+			if (denormal_operand() < 0)
+				return;
+
+		case TAG_Zero:
+		case TAG_Valid:
+			setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr));
+			FPU_copy_to_reg1(st0_ptr, st0_tag);
+			break;
+
+		case TW_Infinity:
+			/* Infinity*log(1) */
+			if (arith_invalid(1) < 0)
+				return;
+			break;
+
+		case TW_NaN:
+			if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+				return;
+			break;
 
-	default:
+		default:
 #ifdef PARANOID
-	  EXCEPTION(EX_INTERNAL | 0x116);
-	  return;
+			EXCEPTION(EX_INTERNAL | 0x116);
+			return;
 #endif /* PARANOID */
-	  break;
-	}
-    }
-  else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
-    {
-      switch ( st1_tag )
-	{
-	case TAG_Zero:
-	  if ( signnegative(st0_ptr) )
-	    {
-	      if ( exponent(st0_ptr) >= 0 )
-		{
-		  /* st(0) holds <= -1.0 */
-#ifdef PECULIAR_486   /* Stupid 80486 doesn't worry about log(negative). */
-		  changesign(st1_ptr);
+			break;
+		}
+	} else if ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) {
+		switch (st1_tag) {
+		case TAG_Zero:
+			if (signnegative(st0_ptr)) {
+				if (exponent(st0_ptr) >= 0) {
+					/* st(0) holds <= -1.0 */
+#ifdef PECULIAR_486		/* Stupid 80486 doesn't worry about log(negative). */
+					changesign(st1_ptr);
 #else
-		  if ( arith_invalid(1) < 0 )
-		    return;
+					if (arith_invalid(1) < 0)
+						return;
 #endif /* PECULIAR_486 */
-		}
-	      else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-		return;
-	      else
-		changesign(st1_ptr);
-	    }
-	  else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
-	  break;
-
-	case TW_Infinity:
-	  if ( signnegative(st0_ptr) )
-	    {
-	      if ( (exponent(st0_ptr) >= 0) &&
-		  !((st0_ptr->sigh == 0x80000000) &&
-		    (st0_ptr->sigl == 0)) )
-		{
-		  /* st(0) holds < -1.0 */
-#ifdef PECULIAR_486   /* Stupid 80486 doesn't worry about log(negative). */
-		  changesign(st1_ptr);
+				} else if ((st0_tag == TW_Denormal)
+					   && (denormal_operand() < 0))
+					return;
+				else
+					changesign(st1_ptr);
+			} else if ((st0_tag == TW_Denormal)
+				   && (denormal_operand() < 0))
+				return;
+			break;
+
+		case TW_Infinity:
+			if (signnegative(st0_ptr)) {
+				if ((exponent(st0_ptr) >= 0) &&
+				    !((st0_ptr->sigh == 0x80000000) &&
+				      (st0_ptr->sigl == 0))) {
+					/* st(0) holds < -1.0 */
+#ifdef PECULIAR_486		/* Stupid 80486 doesn't worry about log(negative). */
+					changesign(st1_ptr);
 #else
-		  if ( arith_invalid(1) < 0 ) return;
+					if (arith_invalid(1) < 0)
+						return;
 #endif /* PECULIAR_486 */
+				} else if ((st0_tag == TW_Denormal)
+					   && (denormal_operand() < 0))
+					return;
+				else
+					changesign(st1_ptr);
+			} else if ((st0_tag == TW_Denormal)
+				   && (denormal_operand() < 0))
+				return;
+			break;
+
+		case TW_NaN:
+			if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+				return;
 		}
-	      else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-		return;
-	      else
-		changesign(st1_ptr);
-	    }
-	  else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
-	  break;
-
-	case TW_NaN:
-	  if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
-	    return;
-	}
 
-    }
-  else if ( st0_tag == TW_NaN )
-    {
-      if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
-	return;
-    }
-  else if ( st0_tag == TW_Infinity )
-    {
-      if ( st1_tag == TW_NaN )
-	{
-	  if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 )
-	    return;
-	}
-      else if ( signnegative(st0_ptr) )
-	{
+	} else if (st0_tag == TW_NaN) {
+		if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+			return;
+	} else if (st0_tag == TW_Infinity) {
+		if (st1_tag == TW_NaN) {
+			if (real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0)
+				return;
+		} else if (signnegative(st0_ptr)) {
 #ifndef PECULIAR_486
-	  /* This should have higher priority than denormals, but... */
-	  if ( arith_invalid(1) < 0 )  /* log(-infinity) */
-	    return;
+			/* This should have higher priority than denormals, but... */
+			if (arith_invalid(1) < 0)	/* log(-infinity) */
+				return;
 #endif /* PECULIAR_486 */
-	  if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
+			if ((st1_tag == TW_Denormal)
+			    && (denormal_operand() < 0))
+				return;
 #ifdef PECULIAR_486
-	  /* Denormal operands actually get higher priority */
-	  if ( arith_invalid(1) < 0 )  /* log(-infinity) */
-	    return;
+			/* Denormal operands actually get higher priority */
+			if (arith_invalid(1) < 0)	/* log(-infinity) */
+				return;
 #endif /* PECULIAR_486 */
-	}
-      else if ( st1_tag == TAG_Zero )
-	{
-	  /* log(infinity) */
-	  if ( arith_invalid(1) < 0 )
-	    return;
-	}
-	
-      /* st(1) must be valid here. */
+		} else if (st1_tag == TAG_Zero) {
+			/* log(infinity) */
+			if (arith_invalid(1) < 0)
+				return;
+		}
 
-      else if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) )
-	return;
+		/* st(1) must be valid here. */
 
-      /* The Manual says that log(Infinity) is invalid, but a real
-	 80486 sensibly says that it is o.k. */
-      else
-	{
-	  u_char sign = getsign(st1_ptr);
-	  FPU_copy_to_reg1(&CONST_INF, TAG_Special);
-	  setsign(st1_ptr, sign);
+		else if ((st1_tag == TW_Denormal) && (denormal_operand() < 0))
+			return;
+
+		/* The Manual says that log(Infinity) is invalid, but a real
+		   80486 sensibly says that it is o.k. */
+		else {
+			u_char sign = getsign(st1_ptr);
+			FPU_copy_to_reg1(&CONST_INF, TAG_Special);
+			setsign(st1_ptr, sign);
+		}
 	}
-    }
 #ifdef PARANOID
-  else
-    {
-      EXCEPTION(EX_INTERNAL | 0x117);
-      return;
-    }
+	else {
+		EXCEPTION(EX_INTERNAL | 0x117);
+		return;
+	}
 #endif /* PARANOID */
 
-  FPU_pop();
-  return;
+	FPU_pop();
+	return;
 
 }
 
-
 static void fscale(FPU_REG *st0_ptr, u_char st0_tag)
 {
-  FPU_REG *st1_ptr = &st(1);
-  u_char st1_tag = FPU_gettagi(1);
-  int old_cw = control_word;
-  u_char sign = getsign(st0_ptr);
-
-  clear_C1();
-  if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) )
-    {
-      long scale;
-      FPU_REG tmp;
-
-      /* Convert register for internal use. */
-      setexponent16(st0_ptr, exponent(st0_ptr));
-
-    valid_scale:
-
-      if ( exponent(st1_ptr) > 30 )
-	{
-	  /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */
-
-	  if ( signpositive(st1_ptr) )
-	    {
-	      EXCEPTION(EX_Overflow);
-	      FPU_copy_to_reg0(&CONST_INF, TAG_Special);
-	    }
-	  else
-	    {
-	      EXCEPTION(EX_Underflow);
-	      FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
-	    }
-	  setsign(st0_ptr, sign);
-	  return;
-	}
-
-      control_word &= ~CW_RC;
-      control_word |= RC_CHOP;
-      reg_copy(st1_ptr, &tmp);
-      FPU_round_to_int(&tmp, st1_tag);      /* This can never overflow here */
-      control_word = old_cw;
-      scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl;
-      scale += exponent16(st0_ptr);
-
-      setexponent16(st0_ptr, scale);
-
-      /* Use FPU_round() to properly detect under/overflow etc */
-      FPU_round(st0_ptr, 0, 0, control_word, sign);
-
-      return;
-    }
-
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-  if ( st1_tag == TAG_Special )
-    st1_tag = FPU_Special(st1_ptr);
-
-  if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) )
-    {
-      switch ( st1_tag )
-	{
-	case TAG_Valid:
-	  /* st(0) must be a denormal */
-	  if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
-
-	  FPU_to_exp16(st0_ptr, st0_ptr);  /* Will not be left on stack */
-	  goto valid_scale;
-
-	case TAG_Zero:
-	  if ( st0_tag == TW_Denormal )
-	    denormal_operand();
-	  return;
-
-	case TW_Denormal:
-	  denormal_operand();
-	  return;
-
-	case TW_Infinity:
-	  if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) )
-	    return;
-
-	  if ( signpositive(st1_ptr) )
-	    FPU_copy_to_reg0(&CONST_INF, TAG_Special);
-	  else
-	    FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
-	  setsign(st0_ptr, sign);
-	  return;
+	FPU_REG *st1_ptr = &st(1);
+	u_char st1_tag = FPU_gettagi(1);
+	int old_cw = control_word;
+	u_char sign = getsign(st0_ptr);
+
+	clear_C1();
+	if (!((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid))) {
+		long scale;
+		FPU_REG tmp;
+
+		/* Convert register for internal use. */
+		setexponent16(st0_ptr, exponent(st0_ptr));
+
+	      valid_scale:
+
+		if (exponent(st1_ptr) > 30) {
+			/* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */
+
+			if (signpositive(st1_ptr)) {
+				EXCEPTION(EX_Overflow);
+				FPU_copy_to_reg0(&CONST_INF, TAG_Special);
+			} else {
+				EXCEPTION(EX_Underflow);
+				FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+			}
+			setsign(st0_ptr, sign);
+			return;
+		}
 
-	case TW_NaN:
-	  real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
-	  return;
-	}
-    }
-  else if ( st0_tag == TAG_Zero )
-    {
-      switch ( st1_tag )
-	{
-	case TAG_Valid:
-	case TAG_Zero:
-	  return;
+		control_word &= ~CW_RC;
+		control_word |= RC_CHOP;
+		reg_copy(st1_ptr, &tmp);
+		FPU_round_to_int(&tmp, st1_tag);	/* This can never overflow here */
+		control_word = old_cw;
+		scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl;
+		scale += exponent16(st0_ptr);
 
-	case TW_Denormal:
-	  denormal_operand();
-	  return;
+		setexponent16(st0_ptr, scale);
 
-	case TW_Infinity:
-	  if ( signpositive(st1_ptr) )
-	    arith_invalid(0); /* Zero scaled by +Infinity */
-	  return;
+		/* Use FPU_round() to properly detect under/overflow etc */
+		FPU_round(st0_ptr, 0, 0, control_word, sign);
 
-	case TW_NaN:
-	  real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
-	  return;
+		return;
 	}
-    }
-  else if ( st0_tag == TW_Infinity )
-    {
-      switch ( st1_tag )
-	{
-	case TAG_Valid:
-	case TAG_Zero:
-	  return;
-
-	case TW_Denormal:
-	  denormal_operand();
-	  return;
-
-	case TW_Infinity:
-	  if ( signnegative(st1_ptr) )
-	    arith_invalid(0); /* Infinity scaled by -Infinity */
-	  return;
 
-	case TW_NaN:
-	  real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
-	  return;
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+	if (st1_tag == TAG_Special)
+		st1_tag = FPU_Special(st1_ptr);
+
+	if ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) {
+		switch (st1_tag) {
+		case TAG_Valid:
+			/* st(0) must be a denormal */
+			if ((st0_tag == TW_Denormal)
+			    && (denormal_operand() < 0))
+				return;
+
+			FPU_to_exp16(st0_ptr, st0_ptr);	/* Will not be left on stack */
+			goto valid_scale;
+
+		case TAG_Zero:
+			if (st0_tag == TW_Denormal)
+				denormal_operand();
+			return;
+
+		case TW_Denormal:
+			denormal_operand();
+			return;
+
+		case TW_Infinity:
+			if ((st0_tag == TW_Denormal)
+			    && (denormal_operand() < 0))
+				return;
+
+			if (signpositive(st1_ptr))
+				FPU_copy_to_reg0(&CONST_INF, TAG_Special);
+			else
+				FPU_copy_to_reg0(&CONST_Z, TAG_Zero);
+			setsign(st0_ptr, sign);
+			return;
+
+		case TW_NaN:
+			real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+			return;
+		}
+	} else if (st0_tag == TAG_Zero) {
+		switch (st1_tag) {
+		case TAG_Valid:
+		case TAG_Zero:
+			return;
+
+		case TW_Denormal:
+			denormal_operand();
+			return;
+
+		case TW_Infinity:
+			if (signpositive(st1_ptr))
+				arith_invalid(0);	/* Zero scaled by +Infinity */
+			return;
+
+		case TW_NaN:
+			real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+			return;
+		}
+	} else if (st0_tag == TW_Infinity) {
+		switch (st1_tag) {
+		case TAG_Valid:
+		case TAG_Zero:
+			return;
+
+		case TW_Denormal:
+			denormal_operand();
+			return;
+
+		case TW_Infinity:
+			if (signnegative(st1_ptr))
+				arith_invalid(0);	/* Infinity scaled by -Infinity */
+			return;
+
+		case TW_NaN:
+			real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+			return;
+		}
+	} else if (st0_tag == TW_NaN) {
+		if (st1_tag != TAG_Empty) {
+			real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr);
+			return;
+		}
 	}
-    }
-  else if ( st0_tag == TW_NaN )
-    {
-      if ( st1_tag != TAG_Empty )
-	{ real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); return; }
-    }
-
 #ifdef PARANOID
-  if ( !((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) )
-    {
-      EXCEPTION(EX_INTERNAL | 0x115);
-      return;
-    }
+	if (!((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty))) {
+		EXCEPTION(EX_INTERNAL | 0x115);
+		return;
+	}
 #endif
 
-  /* At least one of st(0), st(1) must be empty */
-  FPU_stack_underflow();
+	/* At least one of st(0), st(1) must be empty */
+	FPU_stack_underflow();
 
 }
 
-
 /*---------------------------------------------------------------------------*/
 
 static FUNC_ST0 const trig_table_a[] = {
-  f2xm1, fyl2x, fptan, fpatan,
-  fxtract, fprem1, (FUNC_ST0)fdecstp, (FUNC_ST0)fincstp
+	f2xm1, fyl2x, fptan, fpatan,
+	fxtract, fprem1, (FUNC_ST0) fdecstp, (FUNC_ST0) fincstp
 };
 
 void FPU_triga(void)
 {
-  (trig_table_a[FPU_rm])(&st(0), FPU_gettag0());
+	(trig_table_a[FPU_rm]) (&st(0), FPU_gettag0());
 }
 
-
-static FUNC_ST0 const trig_table_b[] =
-  {
-    fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0)fsin, fcos
-  };
+static FUNC_ST0 const trig_table_b[] = {
+	fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0) fsin, fcos
+};
 
 void FPU_trigb(void)
 {
-  (trig_table_b[FPU_rm])(&st(0), FPU_gettag0());
+	(trig_table_b[FPU_rm]) (&st(0), FPU_gettag0());
 }
diff -puN arch/x86/math-emu/get_address.c~git-x86 arch/x86/math-emu/get_address.c
--- a/arch/x86/math-emu/get_address.c~git-x86
+++ a/arch/x86/math-emu/get_address.c
@@ -17,7 +17,6 @@
  |    other processes using the emulator while swapping is in progress.      |
  +---------------------------------------------------------------------------*/
 
-
 #include <linux/stddef.h>
 
 #include <asm/uaccess.h>
@@ -27,31 +26,30 @@
 #include "exception.h"
 #include "fpu_emu.h"
 
-
 #define FPU_WRITE_BIT 0x10
 
 static int reg_offset[] = {
-	offsetof(struct info,___eax),
-	offsetof(struct info,___ecx),
-	offsetof(struct info,___edx),
-	offsetof(struct info,___ebx),
-	offsetof(struct info,___esp),
-	offsetof(struct info,___ebp),
-	offsetof(struct info,___esi),
-	offsetof(struct info,___edi)
+	offsetof(struct info, ___eax),
+	offsetof(struct info, ___ecx),
+	offsetof(struct info, ___edx),
+	offsetof(struct info, ___ebx),
+	offsetof(struct info, ___esp),
+	offsetof(struct info, ___ebp),
+	offsetof(struct info, ___esi),
+	offsetof(struct info, ___edi)
 };
 
 #define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info))
 
 static int reg_offset_vm86[] = {
-	offsetof(struct info,___cs),
-	offsetof(struct info,___vm86_ds),
-	offsetof(struct info,___vm86_es),
-	offsetof(struct info,___vm86_fs),
-	offsetof(struct info,___vm86_gs),
-	offsetof(struct info,___ss),
-	offsetof(struct info,___vm86_ds)
-      };
+	offsetof(struct info, ___cs),
+	offsetof(struct info, ___vm86_ds),
+	offsetof(struct info, ___vm86_es),
+	offsetof(struct info, ___vm86_fs),
+	offsetof(struct info, ___vm86_gs),
+	offsetof(struct info, ___ss),
+	offsetof(struct info, ___vm86_ds)
+};
 
 #define VM86_REG_(x) (*(unsigned short *) \
 		      (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info))
@@ -60,158 +58,141 @@ static int reg_offset_vm86[] = {
 #define ___GS ___ds
 
 static int reg_offset_pm[] = {
-	offsetof(struct info,___cs),
-	offsetof(struct info,___ds),
-	offsetof(struct info,___es),
-	offsetof(struct info,___fs),
-	offsetof(struct info,___GS),
-	offsetof(struct info,___ss),
-	offsetof(struct info,___ds)
-      };
+	offsetof(struct info, ___cs),
+	offsetof(struct info, ___ds),
+	offsetof(struct info, ___es),
+	offsetof(struct info, ___fs),
+	offsetof(struct info, ___GS),
+	offsetof(struct info, ___ss),
+	offsetof(struct info, ___ds)
+};
 
 #define PM_REG_(x) (*(unsigned short *) \
 		      (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info))
 
-
 /* Decode the SIB byte. This function assumes mod != 0 */
 static int sib(int mod, unsigned long *fpu_eip)
 {
-  u_char ss,index,base;
-  long offset;
+	u_char ss, index, base;
+	long offset;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_code_access_ok(1);
-  FPU_get_user(base, (u_char __user *) (*fpu_eip));   /* The SIB byte */
-  RE_ENTRANT_CHECK_ON;
-  (*fpu_eip)++;
-  ss = base >> 6;
-  index = (base >> 3) & 7;
-  base &= 7;
-
-  if ((mod == 0) && (base == 5))
-    offset = 0;              /* No base register */
-  else
-    offset = REG_(base);
-
-  if (index == 4)
-    {
-      /* No index register */
-      /* A non-zero ss is illegal */
-      if ( ss )
-	EXCEPTION(EX_Invalid);
-    }
-  else
-    {
-      offset += (REG_(index)) << ss;
-    }
-
-  if (mod == 1)
-    {
-      /* 8 bit signed displacement */
-      long displacement;
-      RE_ENTRANT_CHECK_OFF;
-      FPU_code_access_ok(1);
-      FPU_get_user(displacement, (signed char __user *) (*fpu_eip));
-      offset += displacement;
-      RE_ENTRANT_CHECK_ON;
-      (*fpu_eip)++;
-    }
-  else if (mod == 2 || base == 5) /* The second condition also has mod==0 */
-    {
-      /* 32 bit displacement */
-      long displacement;
-      RE_ENTRANT_CHECK_OFF;
-      FPU_code_access_ok(4);
-      FPU_get_user(displacement, (long __user *) (*fpu_eip));
-      offset += displacement;
-      RE_ENTRANT_CHECK_ON;
-      (*fpu_eip) += 4;
-    }
+	RE_ENTRANT_CHECK_OFF;
+	FPU_code_access_ok(1);
+	FPU_get_user(base, (u_char __user *) (*fpu_eip));	/* The SIB byte */
+	RE_ENTRANT_CHECK_ON;
+	(*fpu_eip)++;
+	ss = base >> 6;
+	index = (base >> 3) & 7;
+	base &= 7;
+
+	if ((mod == 0) && (base == 5))
+		offset = 0;	/* No base register */
+	else
+		offset = REG_(base);
+
+	if (index == 4) {
+		/* No index register */
+		/* A non-zero ss is illegal */
+		if (ss)
+			EXCEPTION(EX_Invalid);
+	} else {
+		offset += (REG_(index)) << ss;
+	}
 
-  return offset;
-}
+	if (mod == 1) {
+		/* 8 bit signed displacement */
+		long displacement;
+		RE_ENTRANT_CHECK_OFF;
+		FPU_code_access_ok(1);
+		FPU_get_user(displacement, (signed char __user *)(*fpu_eip));
+		offset += displacement;
+		RE_ENTRANT_CHECK_ON;
+		(*fpu_eip)++;
+	} else if (mod == 2 || base == 5) {	/* The second condition also has mod==0 */
+		/* 32 bit displacement */
+		long displacement;
+		RE_ENTRANT_CHECK_OFF;
+		FPU_code_access_ok(4);
+		FPU_get_user(displacement, (long __user *)(*fpu_eip));
+		offset += displacement;
+		RE_ENTRANT_CHECK_ON;
+		(*fpu_eip) += 4;
+	}
 
+	return offset;
+}
 
-static unsigned long vm86_segment(u_char segment,
-				  struct address *addr)
+static unsigned long vm86_segment(u_char segment, struct address *addr)
 {
-  segment--;
+	segment--;
 #ifdef PARANOID
-  if ( segment > PREFIX_SS_ )
-    {
-      EXCEPTION(EX_INTERNAL|0x130);
-      math_abort(FPU_info,SIGSEGV);
-    }
+	if (segment > PREFIX_SS_) {
+		EXCEPTION(EX_INTERNAL | 0x130);
+		math_abort(FPU_info, SIGSEGV);
+	}
 #endif /* PARANOID */
-  addr->selector = VM86_REG_(segment);
-  return (unsigned long)VM86_REG_(segment) << 4;
+	addr->selector = VM86_REG_(segment);
+	return (unsigned long)VM86_REG_(segment) << 4;
 }
 
-
 /* This should work for 16 and 32 bit protected mode. */
 static long pm_address(u_char FPU_modrm, u_char segment,
 		       struct address *addr, long offset)
-{ 
-  struct desc_struct descriptor;
-  unsigned long base_address, limit, address, seg_top;
+{
+	struct desc_struct descriptor;
+	unsigned long base_address, limit, address, seg_top;
 
-  segment--;
+	segment--;
 
 #ifdef PARANOID
-  /* segment is unsigned, so this also detects if segment was 0: */
-  if ( segment > PREFIX_SS_ )
-    {
-      EXCEPTION(EX_INTERNAL|0x132);
-      math_abort(FPU_info,SIGSEGV);
-    }
+	/* segment is unsigned, so this also detects if segment was 0: */
+	if (segment > PREFIX_SS_) {
+		EXCEPTION(EX_INTERNAL | 0x132);
+		math_abort(FPU_info, SIGSEGV);
+	}
 #endif /* PARANOID */
 
-  switch ( segment )
-    {
-      /* gs isn't used by the kernel, so it still has its
-	 user-space value. */
-    case PREFIX_GS_-1:
-      /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
-      savesegment(gs, addr->selector);
-      break;
-    default:
-      addr->selector = PM_REG_(segment);
-    }
-
-  descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
-  base_address = SEG_BASE_ADDR(descriptor);
-  address = base_address + offset;
-  limit = base_address
-	+ (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1;
-  if ( limit < base_address ) limit = 0xffffffff;
-
-  if ( SEG_EXPAND_DOWN(descriptor) )
-    {
-      if ( SEG_G_BIT(descriptor) )
-	seg_top = 0xffffffff;
-      else
-	{
-	  seg_top = base_address + (1 << 20);
-	  if ( seg_top < base_address ) seg_top = 0xffffffff;
-	}
-      access_limit =
-	(address <= limit) || (address >= seg_top) ? 0 :
-	  ((seg_top-address) >= 255 ? 255 : seg_top-address);
-    }
-  else
-    {
-      access_limit =
-	(address > limit) || (address < base_address) ? 0 :
-	  ((limit-address) >= 254 ? 255 : limit-address+1);
-    }
-  if ( SEG_EXECUTE_ONLY(descriptor) ||
-      (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) )
-    {
-      access_limit = 0;
-    }
-  return address;
-}
+	switch (segment) {
+		/* gs isn't used by the kernel, so it still has its
+		   user-space value. */
+	case PREFIX_GS_ - 1:
+		/* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
+		savesegment(gs, addr->selector);
+		break;
+	default:
+		addr->selector = PM_REG_(segment);
+	}
 
+	descriptor = LDT_DESCRIPTOR(PM_REG_(segment));
+	base_address = SEG_BASE_ADDR(descriptor);
+	address = base_address + offset;
+	limit = base_address
+	    + (SEG_LIMIT(descriptor) + 1) * SEG_GRANULARITY(descriptor) - 1;
+	if (limit < base_address)
+		limit = 0xffffffff;
+
+	if (SEG_EXPAND_DOWN(descriptor)) {
+		if (SEG_G_BIT(descriptor))
+			seg_top = 0xffffffff;
+		else {
+			seg_top = base_address + (1 << 20);
+			if (seg_top < base_address)
+				seg_top = 0xffffffff;
+		}
+		access_limit =
+		    (address <= limit) || (address >= seg_top) ? 0 :
+		    ((seg_top - address) >= 255 ? 255 : seg_top - address);
+	} else {
+		access_limit =
+		    (address > limit) || (address < base_address) ? 0 :
+		    ((limit - address) >= 254 ? 255 : limit - address + 1);
+	}
+	if (SEG_EXECUTE_ONLY(descriptor) ||
+	    (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT))) {
+		access_limit = 0;
+	}
+	return address;
+}
 
 /*
        MOD R/M byte:  MOD == 3 has a special use for the FPU
@@ -221,7 +202,6 @@ static long pm_address(u_char FPU_modrm,
        .....   .........   .........
         MOD    OPCODE(2)     R/M
 
-
        SIB byte
 
        7   6   5   4   3   2   1   0
@@ -231,208 +211,194 @@ static long pm_address(u_char FPU_modrm,
 */
 
 void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip,
-		  struct address *addr,
-		  fpu_addr_modes addr_modes)
+			     struct address *addr, fpu_addr_modes addr_modes)
 {
-  u_char mod;
-  unsigned rm = FPU_modrm & 7;
-  long *cpu_reg_ptr;
-  int address = 0;     /* Initialized just to stop compiler warnings. */
-
-  /* Memory accessed via the cs selector is write protected
-     in `non-segmented' 32 bit protected mode. */
-  if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
-      && (addr_modes.override.segment == PREFIX_CS_) )
-    {
-      math_abort(FPU_info,SIGSEGV);
-    }
-
-  addr->selector = FPU_DS;   /* Default, for 32 bit non-segmented mode. */
-
-  mod = (FPU_modrm >> 6) & 3;
-
-  if (rm == 4 && mod != 3)
-    {
-      address = sib(mod, fpu_eip);
-    }
-  else
-    {
-      cpu_reg_ptr = & REG_(rm);
-      switch (mod)
-	{
-	case 0:
-	  if (rm == 5)
-	    {
-	      /* Special case: disp32 */
-	      RE_ENTRANT_CHECK_OFF;
-	      FPU_code_access_ok(4);
-	      FPU_get_user(address, (unsigned long __user *) (*fpu_eip));
-	      (*fpu_eip) += 4;
-	      RE_ENTRANT_CHECK_ON;
-	      addr->offset = address;
-	      return (void __user *) address;
-	    }
-	  else
-	    {
-	      address = *cpu_reg_ptr;  /* Just return the contents
-					  of the cpu register */
-	      addr->offset = address;
-	      return (void __user *) address;
-	    }
-	case 1:
-	  /* 8 bit signed displacement */
-	  RE_ENTRANT_CHECK_OFF;
-	  FPU_code_access_ok(1);
-	  FPU_get_user(address, (signed char __user *) (*fpu_eip));
-	  RE_ENTRANT_CHECK_ON;
-	  (*fpu_eip)++;
-	  break;
-	case 2:
-	  /* 32 bit displacement */
-	  RE_ENTRANT_CHECK_OFF;
-	  FPU_code_access_ok(4);
-	  FPU_get_user(address, (long __user *) (*fpu_eip));
-	  (*fpu_eip) += 4;
-	  RE_ENTRANT_CHECK_ON;
-	  break;
-	case 3:
-	  /* Not legal for the FPU */
-	  EXCEPTION(EX_Invalid);
+	u_char mod;
+	unsigned rm = FPU_modrm & 7;
+	long *cpu_reg_ptr;
+	int address = 0;	/* Initialized just to stop compiler warnings. */
+
+	/* Memory accessed via the cs selector is write protected
+	   in `non-segmented' 32 bit protected mode. */
+	if (!addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
+	    && (addr_modes.override.segment == PREFIX_CS_)) {
+		math_abort(FPU_info, SIGSEGV);
 	}
-      address += *cpu_reg_ptr;
-    }
 
-  addr->offset = address;
+	addr->selector = FPU_DS;	/* Default, for 32 bit non-segmented mode. */
 
-  switch ( addr_modes.default_mode )
-    {
-    case 0:
-      break;
-    case VM86:
-      address += vm86_segment(addr_modes.override.segment, addr);
-      break;
-    case PM16:
-    case SEG32:
-      address = pm_address(FPU_modrm, addr_modes.override.segment,
-			   addr, address);
-      break;
-    default:
-      EXCEPTION(EX_INTERNAL|0x133);
-    }
+	mod = (FPU_modrm >> 6) & 3;
 
-  return (void __user *)address;
-}
+	if (rm == 4 && mod != 3) {
+		address = sib(mod, fpu_eip);
+	} else {
+		cpu_reg_ptr = &REG_(rm);
+		switch (mod) {
+		case 0:
+			if (rm == 5) {
+				/* Special case: disp32 */
+				RE_ENTRANT_CHECK_OFF;
+				FPU_code_access_ok(4);
+				FPU_get_user(address,
+					     (unsigned long __user
+					      *)(*fpu_eip));
+				(*fpu_eip) += 4;
+				RE_ENTRANT_CHECK_ON;
+				addr->offset = address;
+				return (void __user *)address;
+			} else {
+				address = *cpu_reg_ptr;	/* Just return the contents
+							   of the cpu register */
+				addr->offset = address;
+				return (void __user *)address;
+			}
+		case 1:
+			/* 8 bit signed displacement */
+			RE_ENTRANT_CHECK_OFF;
+			FPU_code_access_ok(1);
+			FPU_get_user(address, (signed char __user *)(*fpu_eip));
+			RE_ENTRANT_CHECK_ON;
+			(*fpu_eip)++;
+			break;
+		case 2:
+			/* 32 bit displacement */
+			RE_ENTRANT_CHECK_OFF;
+			FPU_code_access_ok(4);
+			FPU_get_user(address, (long __user *)(*fpu_eip));
+			(*fpu_eip) += 4;
+			RE_ENTRANT_CHECK_ON;
+			break;
+		case 3:
+			/* Not legal for the FPU */
+			EXCEPTION(EX_Invalid);
+		}
+		address += *cpu_reg_ptr;
+	}
 
+	addr->offset = address;
+
+	switch (addr_modes.default_mode) {
+	case 0:
+		break;
+	case VM86:
+		address += vm86_segment(addr_modes.override.segment, addr);
+		break;
+	case PM16:
+	case SEG32:
+		address = pm_address(FPU_modrm, addr_modes.override.segment,
+				     addr, address);
+		break;
+	default:
+		EXCEPTION(EX_INTERNAL | 0x133);
+	}
+
+	return (void __user *)address;
+}
 
 void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip,
-		     struct address *addr,
-		     fpu_addr_modes addr_modes)
+				struct address *addr, fpu_addr_modes addr_modes)
 {
-  u_char mod;
-  unsigned rm = FPU_modrm & 7;
-  int address = 0;     /* Default used for mod == 0 */
-
-  /* Memory accessed via the cs selector is write protected
-     in `non-segmented' 32 bit protected mode. */
-  if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
-      && (addr_modes.override.segment == PREFIX_CS_) )
-    {
-      math_abort(FPU_info,SIGSEGV);
-    }
-
-  addr->selector = FPU_DS;   /* Default, for 32 bit non-segmented mode. */
-
-  mod = (FPU_modrm >> 6) & 3;
-
-  switch (mod)
-    {
-    case 0:
-      if (rm == 6)
-	{
-	  /* Special case: disp16 */
-	  RE_ENTRANT_CHECK_OFF;
-	  FPU_code_access_ok(2);
-	  FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
-	  (*fpu_eip) += 2;
-	  RE_ENTRANT_CHECK_ON;
-	  goto add_segment;
-	}
-      break;
-    case 1:
-      /* 8 bit signed displacement */
-      RE_ENTRANT_CHECK_OFF;
-      FPU_code_access_ok(1);
-      FPU_get_user(address, (signed char __user *) (*fpu_eip));
-      RE_ENTRANT_CHECK_ON;
-      (*fpu_eip)++;
-      break;
-    case 2:
-      /* 16 bit displacement */
-      RE_ENTRANT_CHECK_OFF;
-      FPU_code_access_ok(2);
-      FPU_get_user(address, (unsigned short __user *) (*fpu_eip));
-      (*fpu_eip) += 2;
-      RE_ENTRANT_CHECK_ON;
-      break;
-    case 3:
-      /* Not legal for the FPU */
-      EXCEPTION(EX_Invalid);
-      break;
-    }
-  switch ( rm )
-    {
-    case 0:
-      address += FPU_info->___ebx + FPU_info->___esi;
-      break;
-    case 1:
-      address += FPU_info->___ebx + FPU_info->___edi;
-      break;
-    case 2:
-      address += FPU_info->___ebp + FPU_info->___esi;
-      if ( addr_modes.override.segment == PREFIX_DEFAULT )
-	addr_modes.override.segment = PREFIX_SS_;
-      break;
-    case 3:
-      address += FPU_info->___ebp + FPU_info->___edi;
-      if ( addr_modes.override.segment == PREFIX_DEFAULT )
-	addr_modes.override.segment = PREFIX_SS_;
-      break;
-    case 4:
-      address += FPU_info->___esi;
-      break;
-    case 5:
-      address += FPU_info->___edi;
-      break;
-    case 6:
-      address += FPU_info->___ebp;
-      if ( addr_modes.override.segment == PREFIX_DEFAULT )
-	addr_modes.override.segment = PREFIX_SS_;
-      break;
-    case 7:
-      address += FPU_info->___ebx;
-      break;
-    }
-
- add_segment:
-  address &= 0xffff;
-
-  addr->offset = address;
-
-  switch ( addr_modes.default_mode )
-    {
-    case 0:
-      break;
-    case VM86:
-      address += vm86_segment(addr_modes.override.segment, addr);
-      break;
-    case PM16:
-    case SEG32:
-      address = pm_address(FPU_modrm, addr_modes.override.segment,
-			   addr, address);
-      break;
-    default:
-      EXCEPTION(EX_INTERNAL|0x131);
-    }
+	u_char mod;
+	unsigned rm = FPU_modrm & 7;
+	int address = 0;	/* Default used for mod == 0 */
+
+	/* Memory accessed via the cs selector is write protected
+	   in `non-segmented' 32 bit protected mode. */
+	if (!addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT)
+	    && (addr_modes.override.segment == PREFIX_CS_)) {
+		math_abort(FPU_info, SIGSEGV);
+	}
+
+	addr->selector = FPU_DS;	/* Default, for 32 bit non-segmented mode. */
+
+	mod = (FPU_modrm >> 6) & 3;
+
+	switch (mod) {
+	case 0:
+		if (rm == 6) {
+			/* Special case: disp16 */
+			RE_ENTRANT_CHECK_OFF;
+			FPU_code_access_ok(2);
+			FPU_get_user(address,
+				     (unsigned short __user *)(*fpu_eip));
+			(*fpu_eip) += 2;
+			RE_ENTRANT_CHECK_ON;
+			goto add_segment;
+		}
+		break;
+	case 1:
+		/* 8 bit signed displacement */
+		RE_ENTRANT_CHECK_OFF;
+		FPU_code_access_ok(1);
+		FPU_get_user(address, (signed char __user *)(*fpu_eip));
+		RE_ENTRANT_CHECK_ON;
+		(*fpu_eip)++;
+		break;
+	case 2:
+		/* 16 bit displacement */
+		RE_ENTRANT_CHECK_OFF;
+		FPU_code_access_ok(2);
+		FPU_get_user(address, (unsigned short __user *)(*fpu_eip));
+		(*fpu_eip) += 2;
+		RE_ENTRANT_CHECK_ON;
+		break;
+	case 3:
+		/* Not legal for the FPU */
+		EXCEPTION(EX_Invalid);
+		break;
+	}
+	switch (rm) {
+	case 0:
+		address += FPU_info->___ebx + FPU_info->___esi;
+		break;
+	case 1:
+		address += FPU_info->___ebx + FPU_info->___edi;
+		break;
+	case 2:
+		address += FPU_info->___ebp + FPU_info->___esi;
+		if (addr_modes.override.segment == PREFIX_DEFAULT)
+			addr_modes.override.segment = PREFIX_SS_;
+		break;
+	case 3:
+		address += FPU_info->___ebp + FPU_info->___edi;
+		if (addr_modes.override.segment == PREFIX_DEFAULT)
+			addr_modes.override.segment = PREFIX_SS_;
+		break;
+	case 4:
+		address += FPU_info->___esi;
+		break;
+	case 5:
+		address += FPU_info->___edi;
+		break;
+	case 6:
+		address += FPU_info->___ebp;
+		if (addr_modes.override.segment == PREFIX_DEFAULT)
+			addr_modes.override.segment = PREFIX_SS_;
+		break;
+	case 7:
+		address += FPU_info->___ebx;
+		break;
+	}
+
+      add_segment:
+	address &= 0xffff;
+
+	addr->offset = address;
+
+	switch (addr_modes.default_mode) {
+	case 0:
+		break;
+	case VM86:
+		address += vm86_segment(addr_modes.override.segment, addr);
+		break;
+	case PM16:
+	case SEG32:
+		address = pm_address(FPU_modrm, addr_modes.override.segment,
+				     addr, address);
+		break;
+	default:
+		EXCEPTION(EX_INTERNAL | 0x131);
+	}
 
-  return (void __user *)address ;
+	return (void __user *)address;
 }
diff -puN arch/x86/math-emu/load_store.c~git-x86 arch/x86/math-emu/load_store.c
--- a/arch/x86/math-emu/load_store.c~git-x86
+++ a/arch/x86/math-emu/load_store.c
@@ -26,247 +26,257 @@
 #include "status_w.h"
 #include "control_w.h"
 
-
-#define _NONE_ 0   /* st0_ptr etc not needed */
-#define _REG0_ 1   /* Will be storing st(0) */
-#define _PUSH_ 3   /* Need to check for space to push onto stack */
-#define _null_ 4   /* Function illegal or not implemented */
+#define _NONE_ 0		/* st0_ptr etc not needed */
+#define _REG0_ 1		/* Will be storing st(0) */
+#define _PUSH_ 3		/* Need to check for space to push onto stack */
+#define _null_ 4		/* Function illegal or not implemented */
 
 #define pop_0()	{ FPU_settag0(TAG_Empty); top++; }
 
-
 static u_char const type_table[32] = {
-  _PUSH_, _PUSH_, _PUSH_, _PUSH_,
-  _null_, _null_, _null_, _null_,
-  _REG0_, _REG0_, _REG0_, _REG0_,
-  _REG0_, _REG0_, _REG0_, _REG0_,
-  _NONE_, _null_, _NONE_, _PUSH_,
-  _NONE_, _PUSH_, _null_, _PUSH_,
-  _NONE_, _null_, _NONE_, _REG0_,
-  _NONE_, _REG0_, _NONE_, _REG0_
-  };
+	_PUSH_, _PUSH_, _PUSH_, _PUSH_,
+	_null_, _null_, _null_, _null_,
+	_REG0_, _REG0_, _REG0_, _REG0_,
+	_REG0_, _REG0_, _REG0_, _REG0_,
+	_NONE_, _null_, _NONE_, _PUSH_,
+	_NONE_, _PUSH_, _null_, _PUSH_,
+	_NONE_, _null_, _NONE_, _REG0_,
+	_NONE_, _REG0_, _NONE_, _REG0_
+};
 
 u_char const data_sizes_16[32] = {
-  4,  4,  8,  2,  0,  0,  0,  0,
-  4,  4,  8,  2,  4,  4,  8,  2,
-  14, 0, 94, 10,  2, 10,  0,  8,  
-  14, 0, 94, 10,  2, 10,  2,  8
+	4, 4, 8, 2, 0, 0, 0, 0,
+	4, 4, 8, 2, 4, 4, 8, 2,
+	14, 0, 94, 10, 2, 10, 0, 8,
+	14, 0, 94, 10, 2, 10, 2, 8
 };
 
 static u_char const data_sizes_32[32] = {
-  4,  4,  8,  2,  0,  0,  0,  0,
-  4,  4,  8,  2,  4,  4,  8,  2,
-  28, 0,108, 10,  2, 10,  0,  8,  
-  28, 0,108, 10,  2, 10,  2,  8
+	4, 4, 8, 2, 0, 0, 0, 0,
+	4, 4, 8, 2, 4, 4, 8, 2,
+	28, 0, 108, 10, 2, 10, 0, 8,
+	28, 0, 108, 10, 2, 10, 2, 8
 };
 
 int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
-		     void __user *data_address)
+		   void __user * data_address)
 {
-  FPU_REG loaded_data;
-  FPU_REG *st0_ptr;
-  u_char st0_tag = TAG_Empty;  /* This is just to stop a gcc warning. */
-  u_char loaded_tag;
-
-  st0_ptr = NULL;    /* Initialized just to stop compiler warnings. */
-
-  if ( addr_modes.default_mode & PROTECTED )
-    {
-      if ( addr_modes.default_mode == SEG32 )
-	{
-	  if ( access_limit < data_sizes_32[type] )
-	    math_abort(FPU_info,SIGSEGV);
-	}
-      else if ( addr_modes.default_mode == PM16 )
-	{
-	  if ( access_limit < data_sizes_16[type] )
-	    math_abort(FPU_info,SIGSEGV);
-	}
+	FPU_REG loaded_data;
+	FPU_REG *st0_ptr;
+	u_char st0_tag = TAG_Empty;	/* This is just to stop a gcc warning. */
+	u_char loaded_tag;
+
+	st0_ptr = NULL;		/* Initialized just to stop compiler warnings. */
+
+	if (addr_modes.default_mode & PROTECTED) {
+		if (addr_modes.default_mode == SEG32) {
+			if (access_limit < data_sizes_32[type])
+				math_abort(FPU_info, SIGSEGV);
+		} else if (addr_modes.default_mode == PM16) {
+			if (access_limit < data_sizes_16[type])
+				math_abort(FPU_info, SIGSEGV);
+		}
 #ifdef PARANOID
-      else
-	EXCEPTION(EX_INTERNAL|0x140);
+		else
+			EXCEPTION(EX_INTERNAL | 0x140);
 #endif /* PARANOID */
-    }
+	}
 
-  switch ( type_table[type] )
-    {
-    case _NONE_:
-      break;
-    case _REG0_:
-      st0_ptr = &st(0);       /* Some of these instructions pop after
-				 storing */
-      st0_tag = FPU_gettag0();
-      break;
-    case _PUSH_:
-      {
-	if ( FPU_gettagi(-1) != TAG_Empty )
-	  { FPU_stack_overflow(); return 0; }
-	top--;
-	st0_ptr = &st(0);
-      }
-      break;
-    case _null_:
-      FPU_illegal();
-      return 0;
+	switch (type_table[type]) {
+	case _NONE_:
+		break;
+	case _REG0_:
+		st0_ptr = &st(0);	/* Some of these instructions pop after
+					   storing */
+		st0_tag = FPU_gettag0();
+		break;
+	case _PUSH_:
+		{
+			if (FPU_gettagi(-1) != TAG_Empty) {
+				FPU_stack_overflow();
+				return 0;
+			}
+			top--;
+			st0_ptr = &st(0);
+		}
+		break;
+	case _null_:
+		FPU_illegal();
+		return 0;
 #ifdef PARANOID
-    default:
-      EXCEPTION(EX_INTERNAL|0x141);
-      return 0;
+	default:
+		EXCEPTION(EX_INTERNAL | 0x141);
+		return 0;
 #endif /* PARANOID */
-    }
-
-  switch ( type )
-    {
-    case 000:       /* fld m32real */
-      clear_C1();
-      loaded_tag = FPU_load_single((float __user *)data_address, &loaded_data);
-      if ( (loaded_tag == TAG_Special)
-	   && isNaN(&loaded_data)
-	   && (real_1op_NaN(&loaded_data) < 0) )
-	{
-	  top++;
-	  break;
-	}
-      FPU_copy_to_reg0(&loaded_data, loaded_tag);
-      break;
-    case 001:      /* fild m32int */
-      clear_C1();
-      loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data);
-      FPU_copy_to_reg0(&loaded_data, loaded_tag);
-      break;
-    case 002:      /* fld m64real */
-      clear_C1();
-      loaded_tag = FPU_load_double((double __user *)data_address, &loaded_data);
-      if ( (loaded_tag == TAG_Special)
-	   && isNaN(&loaded_data)
-	   && (real_1op_NaN(&loaded_data) < 0) )
-	{
-	  top++;
-	  break;
 	}
-      FPU_copy_to_reg0(&loaded_data, loaded_tag);
-      break;
-    case 003:      /* fild m16int */
-      clear_C1();
-      loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data);
-      FPU_copy_to_reg0(&loaded_data, loaded_tag);
-      break;
-    case 010:      /* fst m32real */
-      clear_C1();
-      FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address);
-      break;
-    case 011:      /* fist m32int */
-      clear_C1();
-      FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address);
-      break;
-    case 012:     /* fst m64real */
-      clear_C1();
-      FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address);
-      break;
-    case 013:     /* fist m16int */
-      clear_C1();
-      FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address);
-      break;
-    case 014:     /* fstp m32real */
-      clear_C1();
-      if ( FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 015:     /* fistp m32int */
-      clear_C1();
-      if ( FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 016:     /* fstp m64real */
-      clear_C1();
-      if ( FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 017:     /* fistp m16int */
-      clear_C1();
-      if ( FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 020:     /* fldenv  m14/28byte */
-      fldenv(addr_modes, (u_char __user *)data_address);
-      /* Ensure that the values just loaded are not changed by
-	 fix-up operations. */
-      return 1;
-    case 022:     /* frstor m94/108byte */
-      frstor(addr_modes, (u_char __user *)data_address);
-      /* Ensure that the values just loaded are not changed by
-	 fix-up operations. */
-      return 1;
-    case 023:     /* fbld m80dec */
-      clear_C1();
-      loaded_tag = FPU_load_bcd((u_char __user *)data_address);
-      FPU_settag0(loaded_tag);
-      break;
-    case 024:     /* fldcw */
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_READ, data_address, 2);
-      FPU_get_user(control_word, (unsigned short __user *) data_address);
-      RE_ENTRANT_CHECK_ON;
-      if ( partial_status & ~control_word & CW_Exceptions )
-	partial_status |= (SW_Summary | SW_Backward);
-      else
-	partial_status &= ~(SW_Summary | SW_Backward);
+
+	switch (type) {
+	case 000:		/* fld m32real */
+		clear_C1();
+		loaded_tag =
+		    FPU_load_single((float __user *)data_address, &loaded_data);
+		if ((loaded_tag == TAG_Special)
+		    && isNaN(&loaded_data)
+		    && (real_1op_NaN(&loaded_data) < 0)) {
+			top++;
+			break;
+		}
+		FPU_copy_to_reg0(&loaded_data, loaded_tag);
+		break;
+	case 001:		/* fild m32int */
+		clear_C1();
+		loaded_tag =
+		    FPU_load_int32((long __user *)data_address, &loaded_data);
+		FPU_copy_to_reg0(&loaded_data, loaded_tag);
+		break;
+	case 002:		/* fld m64real */
+		clear_C1();
+		loaded_tag =
+		    FPU_load_double((double __user *)data_address,
+				    &loaded_data);
+		if ((loaded_tag == TAG_Special)
+		    && isNaN(&loaded_data)
+		    && (real_1op_NaN(&loaded_data) < 0)) {
+			top++;
+			break;
+		}
+		FPU_copy_to_reg0(&loaded_data, loaded_tag);
+		break;
+	case 003:		/* fild m16int */
+		clear_C1();
+		loaded_tag =
+		    FPU_load_int16((short __user *)data_address, &loaded_data);
+		FPU_copy_to_reg0(&loaded_data, loaded_tag);
+		break;
+	case 010:		/* fst m32real */
+		clear_C1();
+		FPU_store_single(st0_ptr, st0_tag,
+				 (float __user *)data_address);
+		break;
+	case 011:		/* fist m32int */
+		clear_C1();
+		FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address);
+		break;
+	case 012:		/* fst m64real */
+		clear_C1();
+		FPU_store_double(st0_ptr, st0_tag,
+				 (double __user *)data_address);
+		break;
+	case 013:		/* fist m16int */
+		clear_C1();
+		FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address);
+		break;
+	case 014:		/* fstp m32real */
+		clear_C1();
+		if (FPU_store_single
+		    (st0_ptr, st0_tag, (float __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		break;
+	case 015:		/* fistp m32int */
+		clear_C1();
+		if (FPU_store_int32
+		    (st0_ptr, st0_tag, (long __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		break;
+	case 016:		/* fstp m64real */
+		clear_C1();
+		if (FPU_store_double
+		    (st0_ptr, st0_tag, (double __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		break;
+	case 017:		/* fistp m16int */
+		clear_C1();
+		if (FPU_store_int16
+		    (st0_ptr, st0_tag, (short __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		break;
+	case 020:		/* fldenv  m14/28byte */
+		fldenv(addr_modes, (u_char __user *) data_address);
+		/* Ensure that the values just loaded are not changed by
+		   fix-up operations. */
+		return 1;
+	case 022:		/* frstor m94/108byte */
+		frstor(addr_modes, (u_char __user *) data_address);
+		/* Ensure that the values just loaded are not changed by
+		   fix-up operations. */
+		return 1;
+	case 023:		/* fbld m80dec */
+		clear_C1();
+		loaded_tag = FPU_load_bcd((u_char __user *) data_address);
+		FPU_settag0(loaded_tag);
+		break;
+	case 024:		/* fldcw */
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_READ, data_address, 2);
+		FPU_get_user(control_word,
+			     (unsigned short __user *)data_address);
+		RE_ENTRANT_CHECK_ON;
+		if (partial_status & ~control_word & CW_Exceptions)
+			partial_status |= (SW_Summary | SW_Backward);
+		else
+			partial_status &= ~(SW_Summary | SW_Backward);
 #ifdef PECULIAR_486
-      control_word |= 0x40;  /* An 80486 appears to always set this bit */
+		control_word |= 0x40;	/* An 80486 appears to always set this bit */
 #endif /* PECULIAR_486 */
-      return 1;
-    case 025:      /* fld m80real */
-      clear_C1();
-      loaded_tag = FPU_load_extended((long double __user *)data_address, 0);
-      FPU_settag0(loaded_tag);
-      break;
-    case 027:      /* fild m64int */
-      clear_C1();
-      loaded_tag = FPU_load_int64((long long __user *)data_address);
-      if (loaded_tag == TAG_Error)
+		return 1;
+	case 025:		/* fld m80real */
+		clear_C1();
+		loaded_tag =
+		    FPU_load_extended((long double __user *)data_address, 0);
+		FPU_settag0(loaded_tag);
+		break;
+	case 027:		/* fild m64int */
+		clear_C1();
+		loaded_tag = FPU_load_int64((long long __user *)data_address);
+		if (loaded_tag == TAG_Error)
+			return 0;
+		FPU_settag0(loaded_tag);
+		break;
+	case 030:		/* fstenv  m14/28byte */
+		fstenv(addr_modes, (u_char __user *) data_address);
+		return 1;
+	case 032:		/* fsave */
+		fsave(addr_modes, (u_char __user *) data_address);
+		return 1;
+	case 033:		/* fbstp m80dec */
+		clear_C1();
+		if (FPU_store_bcd
+		    (st0_ptr, st0_tag, (u_char __user *) data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		break;
+	case 034:		/* fstcw m16int */
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_WRITE, data_address, 2);
+		FPU_put_user(control_word,
+			     (unsigned short __user *)data_address);
+		RE_ENTRANT_CHECK_ON;
+		return 1;
+	case 035:		/* fstp m80real */
+		clear_C1();
+		if (FPU_store_extended
+		    (st0_ptr, st0_tag, (long double __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		break;
+	case 036:		/* fstsw m2byte */
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_WRITE, data_address, 2);
+		FPU_put_user(status_word(),
+			     (unsigned short __user *)data_address);
+		RE_ENTRANT_CHECK_ON;
+		return 1;
+	case 037:		/* fistp m64int */
+		clear_C1();
+		if (FPU_store_int64
+		    (st0_ptr, st0_tag, (long long __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		break;
+	}
 	return 0;
-      FPU_settag0(loaded_tag);
-      break;
-    case 030:     /* fstenv  m14/28byte */
-      fstenv(addr_modes, (u_char __user *)data_address);
-      return 1;
-    case 032:      /* fsave */
-      fsave(addr_modes, (u_char __user *)data_address);
-      return 1;
-    case 033:      /* fbstp m80dec */
-      clear_C1();
-      if ( FPU_store_bcd(st0_ptr, st0_tag, (u_char __user *)data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 034:      /* fstcw m16int */
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_WRITE,data_address,2);
-      FPU_put_user(control_word, (unsigned short __user *) data_address);
-      RE_ENTRANT_CHECK_ON;
-      return 1;
-    case 035:      /* fstp m80real */
-      clear_C1();
-      if ( FPU_store_extended(st0_ptr, st0_tag, (long double __user *)data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 036:      /* fstsw m2byte */
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_WRITE,data_address,2);
-      FPU_put_user(status_word(),(unsigned short __user *) data_address);
-      RE_ENTRANT_CHECK_ON;
-      return 1;
-    case 037:      /* fistp m64int */
-      clear_C1();
-      if ( FPU_store_int64(st0_ptr, st0_tag, (long long __user *)data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    }
-  return 0;
 }
diff -puN arch/x86/math-emu/poly.h~git-x86 arch/x86/math-emu/poly.h
--- a/arch/x86/math-emu/poly.h~git-x86
+++ a/arch/x86/math-emu/poly.h
@@ -21,9 +21,9 @@
    allows. 9-byte would probably be sufficient.
    */
 typedef struct {
-  unsigned long lsw;
-  unsigned long midw;
-  unsigned long msw;
+	unsigned long lsw;
+	unsigned long midw;
+	unsigned long msw;
 } Xsig;
 
 asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b,
@@ -49,7 +49,6 @@ asmlinkage void div_Xsig(Xsig *x1, const
 /* Macro to access the 8 ms bytes of an Xsig as a long long */
 #define XSIG_LL(x)         (*(unsigned long long *)&x.midw)
 
-
 /*
    Need to run gcc with optimizations on to get these to
    actually be in-line.
@@ -63,59 +62,53 @@ asmlinkage void div_Xsig(Xsig *x1, const
 static inline unsigned long mul_32_32(const unsigned long arg1,
 				      const unsigned long arg2)
 {
-  int retval;
-  asm volatile ("mull %2; movl %%edx,%%eax" \
-		:"=a" (retval) \
-		:"0" (arg1), "g" (arg2) \
-		:"dx");
-  return retval;
+	int retval;
+	asm volatile ("mull %2; movl %%edx,%%eax":"=a" (retval)
+		      :"0"(arg1), "g"(arg2)
+		      :"dx");
+	return retval;
 }
 
-
 /* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */
 static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2)
 {
-  asm volatile ("movl %1,%%edi; movl %2,%%esi;\n"
-                "movl (%%esi),%%eax; addl %%eax,(%%edi);\n"
-                "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n"
-                "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n"
-                 :"=g" (*dest):"g" (dest), "g" (x2)
-                 :"ax","si","di");
+	asm volatile ("movl %1,%%edi; movl %2,%%esi;\n"
+		      "movl (%%esi),%%eax; addl %%eax,(%%edi);\n"
+		      "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n"
+		      "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n":"=g"
+		      (*dest):"g"(dest), "g"(x2)
+		      :"ax", "si", "di");
 }
 
-
 /* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */
 /* Note: the constraints in the asm statement didn't always work properly
    with gcc 2.5.8.  Changing from using edi to using ecx got around the
    problem, but keep fingers crossed! */
 static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp)
 {
-  asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n"
-                "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n"
-                "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n"
-                "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n"
-                "jnc 0f;\n"
-		"rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n"
-                "movl %4,%%ecx; incl (%%ecx)\n"
-                "movl $1,%%eax; jmp 1f;\n"
-                "0: xorl %%eax,%%eax;\n"
-                "1:\n"
-		:"=g" (*exp), "=g" (*dest)
-		:"g" (dest), "g" (x2), "g" (exp)
-		:"cx","si","ax");
+	asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n"
+		      "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n"
+		      "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n"
+		      "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n"
+		      "jnc 0f;\n"
+		      "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n"
+		      "movl %4,%%ecx; incl (%%ecx)\n"
+		      "movl $1,%%eax; jmp 1f;\n"
+		      "0: xorl %%eax,%%eax;\n" "1:\n":"=g" (*exp), "=g"(*dest)
+		      :"g"(dest), "g"(x2), "g"(exp)
+		      :"cx", "si", "ax");
 }
 
-
 /* Negate (subtract from 1.0) the 12 byte Xsig */
 /* This is faster in a loop on my 386 than using the "neg" instruction. */
 static inline void negate_Xsig(Xsig *x)
 {
-  asm volatile("movl %1,%%esi;\n"
-               "xorl %%ecx,%%ecx;\n"
-               "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n"
-               "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n"
-               "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n"
-               :"=g" (*x):"g" (x):"si","ax","cx");
+	asm volatile ("movl %1,%%esi;\n"
+		      "xorl %%ecx,%%ecx;\n"
+		      "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n"
+		      "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n"
+		      "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n":"=g"
+		      (*x):"g"(x):"si", "ax", "cx");
 }
 
 #endif /* _POLY_H */
diff -puN arch/x86/math-emu/poly_2xm1.c~git-x86 arch/x86/math-emu/poly_2xm1.c
--- a/arch/x86/math-emu/poly_2xm1.c~git-x86
+++ a/arch/x86/math-emu/poly_2xm1.c
@@ -17,21 +17,19 @@
 #include "control_w.h"
 #include "poly.h"
 
-
 #define	HIPOWER	11
-static const unsigned long long lterms[HIPOWER] =
-{
-  0x0000000000000000LL,  /* This term done separately as 12 bytes */
-  0xf5fdeffc162c7543LL,
-  0x1c6b08d704a0bfa6LL,
-  0x0276556df749cc21LL,
-  0x002bb0ffcf14f6b8LL,
-  0x0002861225ef751cLL,
-  0x00001ffcbfcd5422LL,
-  0x00000162c005d5f1LL,
-  0x0000000da96ccb1bLL,
-  0x0000000078d1b897LL,
-  0x000000000422b029LL
+static const unsigned long long lterms[HIPOWER] = {
+	0x0000000000000000LL,	/* This term done separately as 12 bytes */
+	0xf5fdeffc162c7543LL,
+	0x1c6b08d704a0bfa6LL,
+	0x0276556df749cc21LL,
+	0x002bb0ffcf14f6b8LL,
+	0x0002861225ef751cLL,
+	0x00001ffcbfcd5422LL,
+	0x00000162c005d5f1LL,
+	0x0000000da96ccb1bLL,
+	0x0000000078d1b897LL,
+	0x000000000422b029LL
 };
 
 static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194);
@@ -45,112 +43,103 @@ static const Xsig shiftterm2 = MK_XSIG(0
 static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9);
 
 static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1,
-				     &shiftterm2, &shiftterm3 };
-
+	&shiftterm2, &shiftterm3
+};
 
 /*--- poly_2xm1() -----------------------------------------------------------+
  | Requires st(0) which is TAG_Valid and < 1.                                |
  +---------------------------------------------------------------------------*/
-int	poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result)
+int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result)
 {
-  long int              exponent, shift;
-  unsigned long long    Xll;
-  Xsig                  accumulator, Denom, argSignif;
-  u_char                tag;
+	long int exponent, shift;
+	unsigned long long Xll;
+	Xsig accumulator, Denom, argSignif;
+	u_char tag;
 
-  exponent = exponent16(arg);
+	exponent = exponent16(arg);
 
 #ifdef PARANOID
-  if ( exponent >= 0 )    	/* Don't want a |number| >= 1.0 */
-    {
-      /* Number negative, too large, or not Valid. */
-      EXCEPTION(EX_INTERNAL|0x127);
-      return 1;
-    }
+	if (exponent >= 0) {	/* Don't want a |number| >= 1.0 */
+		/* Number negative, too large, or not Valid. */
+		EXCEPTION(EX_INTERNAL | 0x127);
+		return 1;
+	}
 #endif /* PARANOID */
 
-  argSignif.lsw = 0;
-  XSIG_LL(argSignif) = Xll = significand(arg);
+	argSignif.lsw = 0;
+	XSIG_LL(argSignif) = Xll = significand(arg);
+
+	if (exponent == -1) {
+		shift = (argSignif.msw & 0x40000000) ? 3 : 2;
+		/* subtract 0.5 or 0.75 */
+		exponent -= 2;
+		XSIG_LL(argSignif) <<= 2;
+		Xll <<= 2;
+	} else if (exponent == -2) {
+		shift = 1;
+		/* subtract 0.25 */
+		exponent--;
+		XSIG_LL(argSignif) <<= 1;
+		Xll <<= 1;
+	} else
+		shift = 0;
+
+	if (exponent < -2) {
+		/* Shift the argument right by the required places. */
+		if (FPU_shrx(&Xll, -2 - exponent) >= 0x80000000U)
+			Xll++;	/* round up */
+	}
+
+	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+	polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER - 1);
+	mul_Xsig_Xsig(&accumulator, &argSignif);
+	shr_Xsig(&accumulator, 3);
+
+	mul_Xsig_Xsig(&argSignif, &hiterm);	/* The leading term */
+	add_two_Xsig(&accumulator, &argSignif, &exponent);
+
+	if (shift) {
+		/* The argument is large, use the identity:
+		   f(x+a) = f(a) * (f(x) + 1) - 1;
+		 */
+		shr_Xsig(&accumulator, -exponent);
+		accumulator.msw |= 0x80000000;	/* add 1.0 */
+		mul_Xsig_Xsig(&accumulator, shiftterm[shift]);
+		accumulator.msw &= 0x3fffffff;	/* subtract 1.0 */
+		exponent = 1;
+	}
 
-  if ( exponent == -1 )
-    {
-      shift = (argSignif.msw & 0x40000000) ? 3 : 2;
-      /* subtract 0.5 or 0.75 */
-      exponent -= 2;
-      XSIG_LL(argSignif) <<= 2;
-      Xll <<= 2;
-    }
-  else if ( exponent == -2 )
-    {
-      shift = 1;
-      /* subtract 0.25 */
-      exponent--;
-      XSIG_LL(argSignif) <<= 1;
-      Xll <<= 1;
-    }
-  else
-    shift = 0;
-
-  if ( exponent < -2 )
-    {
-      /* Shift the argument right by the required places. */
-      if ( FPU_shrx(&Xll, -2-exponent) >= 0x80000000U )
-	Xll++;	/* round up */
-    }
-
-  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
-  polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1);
-  mul_Xsig_Xsig(&accumulator, &argSignif);
-  shr_Xsig(&accumulator, 3);
-
-  mul_Xsig_Xsig(&argSignif, &hiterm);   /* The leading term */
-  add_two_Xsig(&accumulator, &argSignif, &exponent);
-
-  if ( shift )
-    {
-      /* The argument is large, use the identity:
-	 f(x+a) = f(a) * (f(x) + 1) - 1;
-	 */
-      shr_Xsig(&accumulator, - exponent);
-      accumulator.msw |= 0x80000000;      /* add 1.0 */
-      mul_Xsig_Xsig(&accumulator, shiftterm[shift]);
-      accumulator.msw &= 0x3fffffff;      /* subtract 1.0 */
-      exponent = 1;
-    }
-
-  if ( sign != SIGN_POS )
-    {
-      /* The argument is negative, use the identity:
-	     f(-x) = -f(x) / (1 + f(x))
-	 */
-      Denom.lsw = accumulator.lsw;
-      XSIG_LL(Denom) = XSIG_LL(accumulator);
-      if ( exponent < 0 )
-	shr_Xsig(&Denom, - exponent);
-      else if ( exponent > 0 )
-	{
-	  /* exponent must be 1 here */
-	  XSIG_LL(Denom) <<= 1;
-	  if ( Denom.lsw & 0x80000000 )
-	    XSIG_LL(Denom) |= 1;
-	  (Denom.lsw) <<= 1;
+	if (sign != SIGN_POS) {
+		/* The argument is negative, use the identity:
+		   f(-x) = -f(x) / (1 + f(x))
+		 */
+		Denom.lsw = accumulator.lsw;
+		XSIG_LL(Denom) = XSIG_LL(accumulator);
+		if (exponent < 0)
+			shr_Xsig(&Denom, -exponent);
+		else if (exponent > 0) {
+			/* exponent must be 1 here */
+			XSIG_LL(Denom) <<= 1;
+			if (Denom.lsw & 0x80000000)
+				XSIG_LL(Denom) |= 1;
+			(Denom.lsw) <<= 1;
+		}
+		Denom.msw |= 0x80000000;	/* add 1.0 */
+		div_Xsig(&accumulator, &Denom, &accumulator);
 	}
-      Denom.msw |= 0x80000000;      /* add 1.0 */
-      div_Xsig(&accumulator, &Denom, &accumulator);
-    }
 
-  /* Convert to 64 bit signed-compatible */
-  exponent += round_Xsig(&accumulator);
+	/* Convert to 64 bit signed-compatible */
+	exponent += round_Xsig(&accumulator);
 
-  result = &st(0);
-  significand(result) = XSIG_LL(accumulator);
-  setexponent16(result, exponent);
+	result = &st(0);
+	significand(result) = XSIG_LL(accumulator);
+	setexponent16(result, exponent);
 
-  tag = FPU_round(result, 1, 0, FULL_PRECISION, sign);
+	tag = FPU_round(result, 1, 0, FULL_PRECISION, sign);
 
-  setsign(result, sign);
-  FPU_settag0(tag);
+	setsign(result, sign);
+	FPU_settag0(tag);
 
-  return 0;
+	return 0;
 
 }
diff -puN arch/x86/math-emu/poly_atan.c~git-x86 arch/x86/math-emu/poly_atan.c
--- a/arch/x86/math-emu/poly_atan.c~git-x86
+++ a/arch/x86/math-emu/poly_atan.c
@@ -18,28 +18,25 @@
 #include "control_w.h"
 #include "poly.h"
 
-
 #define	HIPOWERon	6	/* odd poly, negative terms */
-static const unsigned long long oddnegterms[HIPOWERon] =
-{
-  0x0000000000000000LL, /* Dummy (not for - 1.0) */
-  0x015328437f756467LL,
-  0x0005dda27b73dec6LL,
-  0x0000226bf2bfb91aLL,
-  0x000000ccc439c5f7LL,
-  0x0000000355438407LL
-} ;
+static const unsigned long long oddnegterms[HIPOWERon] = {
+	0x0000000000000000LL,	/* Dummy (not for - 1.0) */
+	0x015328437f756467LL,
+	0x0005dda27b73dec6LL,
+	0x0000226bf2bfb91aLL,
+	0x000000ccc439c5f7LL,
+	0x0000000355438407LL
+};
 
 #define	HIPOWERop	6	/* odd poly, positive terms */
-static const unsigned long long oddplterms[HIPOWERop] =
-{
+static const unsigned long long oddplterms[HIPOWERop] = {
 /*  0xaaaaaaaaaaaaaaabLL,  transferred to fixedpterm[] */
-  0x0db55a71875c9ac2LL,
-  0x0029fce2d67880b0LL,
-  0x0000dfd3908b4596LL,
-  0x00000550fd61dab4LL,
-  0x0000001c9422b3f9LL,
-  0x000000003e3301e1LL
+	0x0db55a71875c9ac2LL,
+	0x0029fce2d67880b0LL,
+	0x0000dfd3908b4596LL,
+	0x00000550fd61dab4LL,
+	0x0000001c9422b3f9LL,
+	0x000000003e3301e1LL
 };
 
 static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL;
@@ -48,182 +45,164 @@ static const Xsig fixedpterm = MK_XSIG(0
 
 static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b);
 
-
 /*--- poly_atan() -----------------------------------------------------------+
  |                                                                           |
  +---------------------------------------------------------------------------*/
-void	poly_atan(FPU_REG *st0_ptr, u_char st0_tag,
-		  FPU_REG *st1_ptr, u_char st1_tag)
+void poly_atan(FPU_REG *st0_ptr, u_char st0_tag,
+	       FPU_REG *st1_ptr, u_char st1_tag)
 {
-  u_char	transformed, inverted,
-                sign1, sign2;
-  int           exponent;
-  long int   	dummy_exp;
-  Xsig          accumulator, Numer, Denom, accumulatore, argSignif,
-                argSq, argSqSq;
-  u_char        tag;
-  
-  sign1 = getsign(st0_ptr);
-  sign2 = getsign(st1_ptr);
-  if ( st0_tag == TAG_Valid )
-    {
-      exponent = exponent(st0_ptr);
-    }
-  else
-    {
-      /* This gives non-compatible stack contents... */
-      FPU_to_exp16(st0_ptr, st0_ptr);
-      exponent = exponent16(st0_ptr);
-    }
-  if ( st1_tag == TAG_Valid )
-    {
-      exponent -= exponent(st1_ptr);
-    }
-  else
-    {
-      /* This gives non-compatible stack contents... */
-      FPU_to_exp16(st1_ptr, st1_ptr);
-      exponent -= exponent16(st1_ptr);
-    }
-
-  if ( (exponent < 0) || ((exponent == 0) &&
-			  ((st0_ptr->sigh < st1_ptr->sigh) ||
-			   ((st0_ptr->sigh == st1_ptr->sigh) &&
-			    (st0_ptr->sigl < st1_ptr->sigl))) ) )
-    {
-      inverted = 1;
-      Numer.lsw = Denom.lsw = 0;
-      XSIG_LL(Numer) = significand(st0_ptr);
-      XSIG_LL(Denom) = significand(st1_ptr);
-    }
-  else
-    {
-      inverted = 0;
-      exponent = -exponent;
-      Numer.lsw = Denom.lsw = 0;
-      XSIG_LL(Numer) = significand(st1_ptr);
-      XSIG_LL(Denom) = significand(st0_ptr);
-     }
-  div_Xsig(&Numer, &Denom, &argSignif);
-  exponent += norm_Xsig(&argSignif);
-
-  if ( (exponent >= -1)
-      || ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) )
-    {
-      /* The argument is greater than sqrt(2)-1 (=0.414213562...) */
-      /* Convert the argument by an identity for atan */
-      transformed = 1;
+	u_char transformed, inverted, sign1, sign2;
+	int exponent;
+	long int dummy_exp;
+	Xsig accumulator, Numer, Denom, accumulatore, argSignif, argSq, argSqSq;
+	u_char tag;
+
+	sign1 = getsign(st0_ptr);
+	sign2 = getsign(st1_ptr);
+	if (st0_tag == TAG_Valid) {
+		exponent = exponent(st0_ptr);
+	} else {
+		/* This gives non-compatible stack contents... */
+		FPU_to_exp16(st0_ptr, st0_ptr);
+		exponent = exponent16(st0_ptr);
+	}
+	if (st1_tag == TAG_Valid) {
+		exponent -= exponent(st1_ptr);
+	} else {
+		/* This gives non-compatible stack contents... */
+		FPU_to_exp16(st1_ptr, st1_ptr);
+		exponent -= exponent16(st1_ptr);
+	}
+
+	if ((exponent < 0) || ((exponent == 0) &&
+			       ((st0_ptr->sigh < st1_ptr->sigh) ||
+				((st0_ptr->sigh == st1_ptr->sigh) &&
+				 (st0_ptr->sigl < st1_ptr->sigl))))) {
+		inverted = 1;
+		Numer.lsw = Denom.lsw = 0;
+		XSIG_LL(Numer) = significand(st0_ptr);
+		XSIG_LL(Denom) = significand(st1_ptr);
+	} else {
+		inverted = 0;
+		exponent = -exponent;
+		Numer.lsw = Denom.lsw = 0;
+		XSIG_LL(Numer) = significand(st1_ptr);
+		XSIG_LL(Denom) = significand(st0_ptr);
+	}
+	div_Xsig(&Numer, &Denom, &argSignif);
+	exponent += norm_Xsig(&argSignif);
 
-      if ( exponent >= 0 )
-	{
+	if ((exponent >= -1)
+	    || ((exponent == -2) && (argSignif.msw > 0xd413ccd0))) {
+		/* The argument is greater than sqrt(2)-1 (=0.414213562...) */
+		/* Convert the argument by an identity for atan */
+		transformed = 1;
+
+		if (exponent >= 0) {
 #ifdef PARANOID
-	  if ( !( (exponent == 0) && 
-		 (argSignif.lsw == 0) && (argSignif.midw == 0) &&
-		 (argSignif.msw == 0x80000000) ) )
-	    {
-	      EXCEPTION(EX_INTERNAL|0x104);  /* There must be a logic error */
-	      return;
-	    }
+			if (!((exponent == 0) &&
+			      (argSignif.lsw == 0) && (argSignif.midw == 0) &&
+			      (argSignif.msw == 0x80000000))) {
+				EXCEPTION(EX_INTERNAL | 0x104);	/* There must be a logic error */
+				return;
+			}
 #endif /* PARANOID */
-	  argSignif.msw = 0;   /* Make the transformed arg -> 0.0 */
+			argSignif.msw = 0;	/* Make the transformed arg -> 0.0 */
+		} else {
+			Numer.lsw = Denom.lsw = argSignif.lsw;
+			XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif);
+
+			if (exponent < -1)
+				shr_Xsig(&Numer, -1 - exponent);
+			negate_Xsig(&Numer);
+
+			shr_Xsig(&Denom, -exponent);
+			Denom.msw |= 0x80000000;
+
+			div_Xsig(&Numer, &Denom, &argSignif);
+
+			exponent = -1 + norm_Xsig(&argSignif);
+		}
+	} else {
+		transformed = 0;
+	}
+
+	argSq.lsw = argSignif.lsw;
+	argSq.midw = argSignif.midw;
+	argSq.msw = argSignif.msw;
+	mul_Xsig_Xsig(&argSq, &argSq);
+
+	argSqSq.lsw = argSq.lsw;
+	argSqSq.midw = argSq.midw;
+	argSqSq.msw = argSq.msw;
+	mul_Xsig_Xsig(&argSqSq, &argSqSq);
+
+	accumulatore.lsw = argSq.lsw;
+	XSIG_LL(accumulatore) = XSIG_LL(argSq);
+
+	shr_Xsig(&argSq, 2 * (-1 - exponent - 1));
+	shr_Xsig(&argSqSq, 4 * (-1 - exponent - 1));
+
+	/* Now have argSq etc with binary point at the left
+	   .1xxxxxxxx */
+
+	/* Do the basic fixed point polynomial evaluation */
+	accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+	polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq),
+			oddplterms, HIPOWERop - 1);
+	mul64_Xsig(&accumulator, &XSIG_LL(argSq));
+	negate_Xsig(&accumulator);
+	polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms,
+			HIPOWERon - 1);
+	negate_Xsig(&accumulator);
+	add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp);
+
+	mul64_Xsig(&accumulatore, &denomterm);
+	shr_Xsig(&accumulatore, 1 + 2 * (-1 - exponent));
+	accumulatore.msw |= 0x80000000;
+
+	div_Xsig(&accumulator, &accumulatore, &accumulator);
+
+	mul_Xsig_Xsig(&accumulator, &argSignif);
+	mul_Xsig_Xsig(&accumulator, &argSq);
+
+	shr_Xsig(&accumulator, 3);
+	negate_Xsig(&accumulator);
+	add_Xsig_Xsig(&accumulator, &argSignif);
+
+	if (transformed) {
+		/* compute pi/4 - accumulator */
+		shr_Xsig(&accumulator, -1 - exponent);
+		negate_Xsig(&accumulator);
+		add_Xsig_Xsig(&accumulator, &pi_signif);
+		exponent = -1;
+	}
+
+	if (inverted) {
+		/* compute pi/2 - accumulator */
+		shr_Xsig(&accumulator, -exponent);
+		negate_Xsig(&accumulator);
+		add_Xsig_Xsig(&accumulator, &pi_signif);
+		exponent = 0;
+	}
+
+	if (sign1) {
+		/* compute pi - accumulator */
+		shr_Xsig(&accumulator, 1 - exponent);
+		negate_Xsig(&accumulator);
+		add_Xsig_Xsig(&accumulator, &pi_signif);
+		exponent = 1;
 	}
-      else
-	{
-	  Numer.lsw = Denom.lsw = argSignif.lsw;
-	  XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif);
-
-	  if ( exponent < -1 )
-	    shr_Xsig(&Numer, -1-exponent);
-	  negate_Xsig(&Numer);
-      
-	  shr_Xsig(&Denom, -exponent);
-	  Denom.msw |= 0x80000000;
-      
-	  div_Xsig(&Numer, &Denom, &argSignif);
-
-	  exponent = -1 + norm_Xsig(&argSignif);
-	}
-    }
-  else
-    {
-      transformed = 0;
-    }
-
-  argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw;
-  argSq.msw = argSignif.msw;
-  mul_Xsig_Xsig(&argSq, &argSq);
-  
-  argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw;
-  mul_Xsig_Xsig(&argSqSq, &argSqSq);
-
-  accumulatore.lsw = argSq.lsw;
-  XSIG_LL(accumulatore) = XSIG_LL(argSq);
-
-  shr_Xsig(&argSq, 2*(-1-exponent-1));
-  shr_Xsig(&argSqSq, 4*(-1-exponent-1));
-
-  /* Now have argSq etc with binary point at the left
-     .1xxxxxxxx */
-
-  /* Do the basic fixed point polynomial evaluation */
-  accumulator.msw = accumulator.midw = accumulator.lsw = 0;
-  polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq),
-		   oddplterms, HIPOWERop-1);
-  mul64_Xsig(&accumulator, &XSIG_LL(argSq));
-  negate_Xsig(&accumulator);
-  polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1);
-  negate_Xsig(&accumulator);
-  add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp);
-
-  mul64_Xsig(&accumulatore, &denomterm);
-  shr_Xsig(&accumulatore, 1 + 2*(-1-exponent));
-  accumulatore.msw |= 0x80000000;
-
-  div_Xsig(&accumulator, &accumulatore, &accumulator);
-
-  mul_Xsig_Xsig(&accumulator, &argSignif);
-  mul_Xsig_Xsig(&accumulator, &argSq);
-
-  shr_Xsig(&accumulator, 3);
-  negate_Xsig(&accumulator);
-  add_Xsig_Xsig(&accumulator, &argSignif);
-
-  if ( transformed )
-    {
-      /* compute pi/4 - accumulator */
-      shr_Xsig(&accumulator, -1-exponent);
-      negate_Xsig(&accumulator);
-      add_Xsig_Xsig(&accumulator, &pi_signif);
-      exponent = -1;
-    }
-
-  if ( inverted )
-    {
-      /* compute pi/2 - accumulator */
-      shr_Xsig(&accumulator, -exponent);
-      negate_Xsig(&accumulator);
-      add_Xsig_Xsig(&accumulator, &pi_signif);
-      exponent = 0;
-    }
-
-  if ( sign1 )
-    {
-      /* compute pi - accumulator */
-      shr_Xsig(&accumulator, 1 - exponent);
-      negate_Xsig(&accumulator);
-      add_Xsig_Xsig(&accumulator, &pi_signif);
-      exponent = 1;
-    }
-
-  exponent += round_Xsig(&accumulator);
 
-  significand(st1_ptr) = XSIG_LL(accumulator);
-  setexponent16(st1_ptr, exponent);
+	exponent += round_Xsig(&accumulator);
+
+	significand(st1_ptr) = XSIG_LL(accumulator);
+	setexponent16(st1_ptr, exponent);
 
-  tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2);
-  FPU_settagi(1, tag);
+	tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2);
+	FPU_settagi(1, tag);
 
-  set_precision_flag_up();  /* We do not really know if up or down,
-			       use this as the default. */
+	set_precision_flag_up();	/* We do not really know if up or down,
+					   use this as the default. */
 
 }
diff -puN arch/x86/math-emu/poly_l2.c~git-x86 arch/x86/math-emu/poly_l2.c
--- a/arch/x86/math-emu/poly_l2.c~git-x86
+++ a/arch/x86/math-emu/poly_l2.c
@@ -10,7 +10,6 @@
  |                                                                           |
  +---------------------------------------------------------------------------*/
 
-
 #include "exception.h"
 #include "reg_constant.h"
 #include "fpu_emu.h"
@@ -18,184 +17,163 @@
 #include "control_w.h"
 #include "poly.h"
 
-
 static void log2_kernel(FPU_REG const *arg, u_char argsign,
-			Xsig *accum_result, long int *expon);
-
+			Xsig * accum_result, long int *expon);
 
 /*--- poly_l2() -------------------------------------------------------------+
  |   Base 2 logarithm by a polynomial approximation.                         |
  +---------------------------------------------------------------------------*/
-void	poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign)
+void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign)
 {
-  long int	       exponent, expon, expon_expon;
-  Xsig                 accumulator, expon_accum, yaccum;
-  u_char		       sign, argsign;
-  FPU_REG              x;
-  int                  tag;
-
-  exponent = exponent16(st0_ptr);
-
-  /* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */
-  if ( st0_ptr->sigh > (unsigned)0xb504f334 )
-    {
-      /* Treat as  sqrt(2)/2 < st0_ptr < 1 */
-      significand(&x) = - significand(st0_ptr);
-      setexponent16(&x, -1);
-      exponent++;
-      argsign = SIGN_NEG;
-    }
-  else
-    {
-      /* Treat as  1 <= st0_ptr < sqrt(2) */
-      x.sigh = st0_ptr->sigh - 0x80000000;
-      x.sigl = st0_ptr->sigl;
-      setexponent16(&x, 0);
-      argsign = SIGN_POS;
-    }
-  tag = FPU_normalize_nuo(&x);
-
-  if ( tag == TAG_Zero )
-    {
-      expon = 0;
-      accumulator.msw = accumulator.midw = accumulator.lsw = 0;
-    }
-  else
-    {
-      log2_kernel(&x, argsign, &accumulator, &expon);
-    }
-
-  if ( exponent < 0 )
-    {
-      sign = SIGN_NEG;
-      exponent = -exponent;
-    }
-  else
-    sign = SIGN_POS;
-  expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0;
-  if ( exponent )
-    {
-      expon_expon = 31 + norm_Xsig(&expon_accum);
-      shr_Xsig(&accumulator, expon_expon - expon);
-
-      if ( sign ^ argsign )
-	negate_Xsig(&accumulator);
-      add_Xsig_Xsig(&accumulator, &expon_accum);
-    }
-  else
-    {
-      expon_expon = expon;
-      sign = argsign;
-    }
-
-  yaccum.lsw = 0; XSIG_LL(yaccum) = significand(st1_ptr);
-  mul_Xsig_Xsig(&accumulator, &yaccum);
-
-  expon_expon += round_Xsig(&accumulator);
-
-  if ( accumulator.msw == 0 )
-    {
-      FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
-      return;
-    }
-
-  significand(st1_ptr) = XSIG_LL(accumulator);
-  setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1);
+	long int exponent, expon, expon_expon;
+	Xsig accumulator, expon_accum, yaccum;
+	u_char sign, argsign;
+	FPU_REG x;
+	int tag;
+
+	exponent = exponent16(st0_ptr);
+
+	/* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */
+	if (st0_ptr->sigh > (unsigned)0xb504f334) {
+		/* Treat as  sqrt(2)/2 < st0_ptr < 1 */
+		significand(&x) = -significand(st0_ptr);
+		setexponent16(&x, -1);
+		exponent++;
+		argsign = SIGN_NEG;
+	} else {
+		/* Treat as  1 <= st0_ptr < sqrt(2) */
+		x.sigh = st0_ptr->sigh - 0x80000000;
+		x.sigl = st0_ptr->sigl;
+		setexponent16(&x, 0);
+		argsign = SIGN_POS;
+	}
+	tag = FPU_normalize_nuo(&x);
+
+	if (tag == TAG_Zero) {
+		expon = 0;
+		accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+	} else {
+		log2_kernel(&x, argsign, &accumulator, &expon);
+	}
 
-  tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign);
-  FPU_settagi(1, tag);
+	if (exponent < 0) {
+		sign = SIGN_NEG;
+		exponent = -exponent;
+	} else
+		sign = SIGN_POS;
+	expon_accum.msw = exponent;
+	expon_accum.midw = expon_accum.lsw = 0;
+	if (exponent) {
+		expon_expon = 31 + norm_Xsig(&expon_accum);
+		shr_Xsig(&accumulator, expon_expon - expon);
+
+		if (sign ^ argsign)
+			negate_Xsig(&accumulator);
+		add_Xsig_Xsig(&accumulator, &expon_accum);
+	} else {
+		expon_expon = expon;
+		sign = argsign;
+	}
 
-  set_precision_flag_up();  /* 80486 appears to always do this */
+	yaccum.lsw = 0;
+	XSIG_LL(yaccum) = significand(st1_ptr);
+	mul_Xsig_Xsig(&accumulator, &yaccum);
+
+	expon_expon += round_Xsig(&accumulator);
+
+	if (accumulator.msw == 0) {
+		FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
+		return;
+	}
 
-  return;
+	significand(st1_ptr) = XSIG_LL(accumulator);
+	setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1);
 
-}
+	tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign);
+	FPU_settagi(1, tag);
+
+	set_precision_flag_up();	/* 80486 appears to always do this */
 
+	return;
+
+}
 
 /*--- poly_l2p1() -----------------------------------------------------------+
  |   Base 2 logarithm by a polynomial approximation.                         |
  |   log2(x+1)                                                               |
  +---------------------------------------------------------------------------*/
-int	poly_l2p1(u_char sign0, u_char sign1,
-		  FPU_REG *st0_ptr, FPU_REG *st1_ptr, FPU_REG *dest)
+int poly_l2p1(u_char sign0, u_char sign1,
+	      FPU_REG * st0_ptr, FPU_REG * st1_ptr, FPU_REG * dest)
 {
-  u_char             	tag;
-  long int        	exponent;
-  Xsig              	accumulator, yaccum;
-
-  if ( exponent16(st0_ptr) < 0 )
-    {
-      log2_kernel(st0_ptr, sign0, &accumulator, &exponent);
-
-      yaccum.lsw = 0;
-      XSIG_LL(yaccum) = significand(st1_ptr);
-      mul_Xsig_Xsig(&accumulator, &yaccum);
-
-      exponent += round_Xsig(&accumulator);
-
-      exponent += exponent16(st1_ptr) + 1;
-      if ( exponent < EXP_WAY_UNDER ) exponent = EXP_WAY_UNDER;
-
-      significand(dest) = XSIG_LL(accumulator);
-      setexponent16(dest, exponent);
-
-      tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1);
-      FPU_settagi(1, tag);
-
-      if ( tag == TAG_Valid )
-	set_precision_flag_up();   /* 80486 appears to always do this */
-    }
-  else
-    {
-      /* The magnitude of st0_ptr is far too large. */
-
-      if ( sign0 != SIGN_POS )
-	{
-	  /* Trying to get the log of a negative number. */
-#ifdef PECULIAR_486   /* Stupid 80486 doesn't worry about log(negative). */
-	  changesign(st1_ptr);
+	u_char tag;
+	long int exponent;
+	Xsig accumulator, yaccum;
+
+	if (exponent16(st0_ptr) < 0) {
+		log2_kernel(st0_ptr, sign0, &accumulator, &exponent);
+
+		yaccum.lsw = 0;
+		XSIG_LL(yaccum) = significand(st1_ptr);
+		mul_Xsig_Xsig(&accumulator, &yaccum);
+
+		exponent += round_Xsig(&accumulator);
+
+		exponent += exponent16(st1_ptr) + 1;
+		if (exponent < EXP_WAY_UNDER)
+			exponent = EXP_WAY_UNDER;
+
+		significand(dest) = XSIG_LL(accumulator);
+		setexponent16(dest, exponent);
+
+		tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1);
+		FPU_settagi(1, tag);
+
+		if (tag == TAG_Valid)
+			set_precision_flag_up();	/* 80486 appears to always do this */
+	} else {
+		/* The magnitude of st0_ptr is far too large. */
+
+		if (sign0 != SIGN_POS) {
+			/* Trying to get the log of a negative number. */
+#ifdef PECULIAR_486		/* Stupid 80486 doesn't worry about log(negative). */
+			changesign(st1_ptr);
 #else
-	  if ( arith_invalid(1) < 0 )
-	    return 1;
+			if (arith_invalid(1) < 0)
+				return 1;
 #endif /* PECULIAR_486 */
-	}
+		}
 
-      /* 80486 appears to do this */
-      if ( sign0 == SIGN_NEG )
-	set_precision_flag_down();
-      else
-	set_precision_flag_up();
-    }
+		/* 80486 appears to do this */
+		if (sign0 == SIGN_NEG)
+			set_precision_flag_down();
+		else
+			set_precision_flag_up();
+	}
 
-  if ( exponent(dest) <= EXP_UNDER )
-    EXCEPTION(EX_Underflow);
+	if (exponent(dest) <= EXP_UNDER)
+		EXCEPTION(EX_Underflow);
 
-  return 0;
+	return 0;
 
 }
 
-
-
-
 #undef HIPOWER
 #define	HIPOWER	10
-static const unsigned long long logterms[HIPOWER] =
-{
-  0x2a8eca5705fc2ef0LL,
-  0xf6384ee1d01febceLL,
-  0x093bb62877cdf642LL,
-  0x006985d8a9ec439bLL,
-  0x0005212c4f55a9c8LL,
-  0x00004326a16927f0LL,
-  0x0000038d1d80a0e7LL,
-  0x0000003141cc80c6LL,
-  0x00000002b1668c9fLL,
-  0x000000002c7a46aaLL
+static const unsigned long long logterms[HIPOWER] = {
+	0x2a8eca5705fc2ef0LL,
+	0xf6384ee1d01febceLL,
+	0x093bb62877cdf642LL,
+	0x006985d8a9ec439bLL,
+	0x0005212c4f55a9c8LL,
+	0x00004326a16927f0LL,
+	0x0000038d1d80a0e7LL,
+	0x0000003141cc80c6LL,
+	0x00000002b1668c9fLL,
+	0x000000002c7a46aaLL
 };
 
 static const unsigned long leadterm = 0xb8000000;
 
-
 /*--- log2_kernel() ---------------------------------------------------------+
  |   Base 2 logarithm by a polynomial approximation.                         |
  |   log2(x+1)                                                               |
@@ -203,70 +181,64 @@ static const unsigned long leadterm = 0x
 static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result,
 			long int *expon)
 {
-  long int             exponent, adj;
-  unsigned long long   Xsq;
-  Xsig                 accumulator, Numer, Denom, argSignif, arg_signif;
-
-  exponent = exponent16(arg);
-  Numer.lsw = Denom.lsw = 0;
-  XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg);
-  if ( argsign == SIGN_POS )
-    {
-      shr_Xsig(&Denom, 2 - (1 + exponent));
-      Denom.msw |= 0x80000000;
-      div_Xsig(&Numer, &Denom, &argSignif);
-    }
-  else
-    {
-      shr_Xsig(&Denom, 1 - (1 + exponent));
-      negate_Xsig(&Denom);
-      if ( Denom.msw & 0x80000000 )
-	{
-	  div_Xsig(&Numer, &Denom, &argSignif);
-	  exponent ++;
-	}
-      else
-	{
-	  /* Denom must be 1.0 */
-	  argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw;
-	  argSignif.msw = Numer.msw;
+	long int exponent, adj;
+	unsigned long long Xsq;
+	Xsig accumulator, Numer, Denom, argSignif, arg_signif;
+
+	exponent = exponent16(arg);
+	Numer.lsw = Denom.lsw = 0;
+	XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg);
+	if (argsign == SIGN_POS) {
+		shr_Xsig(&Denom, 2 - (1 + exponent));
+		Denom.msw |= 0x80000000;
+		div_Xsig(&Numer, &Denom, &argSignif);
+	} else {
+		shr_Xsig(&Denom, 1 - (1 + exponent));
+		negate_Xsig(&Denom);
+		if (Denom.msw & 0x80000000) {
+			div_Xsig(&Numer, &Denom, &argSignif);
+			exponent++;
+		} else {
+			/* Denom must be 1.0 */
+			argSignif.lsw = Numer.lsw;
+			argSignif.midw = Numer.midw;
+			argSignif.msw = Numer.msw;
+		}
 	}
-    }
 
 #ifndef PECULIAR_486
-  /* Should check here that  |local_arg|  is within the valid range */
-  if ( exponent >= -2 )
-    {
-      if ( (exponent > -2) ||
-	  (argSignif.msw > (unsigned)0xafb0ccc0) )
-	{
-	  /* The argument is too large */
+	/* Should check here that  |local_arg|  is within the valid range */
+	if (exponent >= -2) {
+		if ((exponent > -2) || (argSignif.msw > (unsigned)0xafb0ccc0)) {
+			/* The argument is too large */
+		}
 	}
-    }
 #endif /* PECULIAR_486 */
 
-  arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif);
-  adj = norm_Xsig(&argSignif);
-  accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif);
-  mul_Xsig_Xsig(&accumulator, &accumulator);
-  shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj)));
-  Xsq = XSIG_LL(accumulator);
-  if ( accumulator.lsw & 0x80000000 )
-    Xsq++;
-
-  accumulator.msw = accumulator.midw = accumulator.lsw = 0;
-  /* Do the basic fixed point polynomial evaluation */
-  polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1);
-
-  mul_Xsig_Xsig(&accumulator, &argSignif);
-  shr_Xsig(&accumulator, 6 - adj);
-
-  mul32_Xsig(&arg_signif, leadterm);
-  add_two_Xsig(&accumulator, &arg_signif, &exponent);
-
-  *expon = exponent + 1;
-  accum_result->lsw = accumulator.lsw;
-  accum_result->midw = accumulator.midw;
-  accum_result->msw = accumulator.msw;
+	arg_signif.lsw = argSignif.lsw;
+	XSIG_LL(arg_signif) = XSIG_LL(argSignif);
+	adj = norm_Xsig(&argSignif);
+	accumulator.lsw = argSignif.lsw;
+	XSIG_LL(accumulator) = XSIG_LL(argSignif);
+	mul_Xsig_Xsig(&accumulator, &accumulator);
+	shr_Xsig(&accumulator, 2 * (-1 - (1 + exponent + adj)));
+	Xsq = XSIG_LL(accumulator);
+	if (accumulator.lsw & 0x80000000)
+		Xsq++;
+
+	accumulator.msw = accumulator.midw = accumulator.lsw = 0;
+	/* Do the basic fixed point polynomial evaluation */
+	polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER - 1);
+
+	mul_Xsig_Xsig(&accumulator, &argSignif);
+	shr_Xsig(&accumulator, 6 - adj);
+
+	mul32_Xsig(&arg_signif, leadterm);
+	add_two_Xsig(&accumulator, &arg_signif, &exponent);
+
+	*expon = exponent + 1;
+	accum_result->lsw = accumulator.lsw;
+	accum_result->midw = accumulator.midw;
+	accum_result->msw = accumulator.msw;
 
 }
diff -puN arch/x86/math-emu/poly_sin.c~git-x86 arch/x86/math-emu/poly_sin.c
--- a/arch/x86/math-emu/poly_sin.c~git-x86
+++ a/arch/x86/math-emu/poly_sin.c
@@ -11,7 +11,6 @@
  |                                                                           |
  +---------------------------------------------------------------------------*/
 
-
 #include "exception.h"
 #include "reg_constant.h"
 #include "fpu_emu.h"
@@ -19,379 +18,361 @@
 #include "control_w.h"
 #include "poly.h"
 
-
 #define	N_COEFF_P	4
 #define	N_COEFF_N	4
 
-static const unsigned long long pos_terms_l[N_COEFF_P] =
-{
-  0xaaaaaaaaaaaaaaabLL,
-  0x00d00d00d00cf906LL,
-  0x000006b99159a8bbLL,
-  0x000000000d7392e6LL
+static const unsigned long long pos_terms_l[N_COEFF_P] = {
+	0xaaaaaaaaaaaaaaabLL,
+	0x00d00d00d00cf906LL,
+	0x000006b99159a8bbLL,
+	0x000000000d7392e6LL
 };
 
-static const unsigned long long neg_terms_l[N_COEFF_N] =
-{
-  0x2222222222222167LL,
-  0x0002e3bc74aab624LL,
-  0x0000000b09229062LL,
-  0x00000000000c7973LL
+static const unsigned long long neg_terms_l[N_COEFF_N] = {
+	0x2222222222222167LL,
+	0x0002e3bc74aab624LL,
+	0x0000000b09229062LL,
+	0x00000000000c7973LL
 };
 
-
-
 #define	N_COEFF_PH	4
 #define	N_COEFF_NH	4
-static const unsigned long long pos_terms_h[N_COEFF_PH] =
-{
-  0x0000000000000000LL,
-  0x05b05b05b05b0406LL,
-  0x000049f93edd91a9LL,
-  0x00000000c9c9ed62LL
+static const unsigned long long pos_terms_h[N_COEFF_PH] = {
+	0x0000000000000000LL,
+	0x05b05b05b05b0406LL,
+	0x000049f93edd91a9LL,
+	0x00000000c9c9ed62LL
 };
 
-static const unsigned long long neg_terms_h[N_COEFF_NH] =
-{
-  0xaaaaaaaaaaaaaa98LL,
-  0x001a01a01a019064LL,
-  0x0000008f76c68a77LL,
-  0x0000000000d58f5eLL
+static const unsigned long long neg_terms_h[N_COEFF_NH] = {
+	0xaaaaaaaaaaaaaa98LL,
+	0x001a01a01a019064LL,
+	0x0000008f76c68a77LL,
+	0x0000000000d58f5eLL
 };
 
-
 /*--- poly_sine() -----------------------------------------------------------+
  |                                                                           |
  +---------------------------------------------------------------------------*/
-void	poly_sine(FPU_REG *st0_ptr)
+void poly_sine(FPU_REG *st0_ptr)
 {
-  int                 exponent, echange;
-  Xsig                accumulator, argSqrd, argTo4;
-  unsigned long       fix_up, adj;
-  unsigned long long  fixed_arg;
-  FPU_REG	      result;
-
-  exponent = exponent(st0_ptr);
-
-  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
-
-  /* Split into two ranges, for arguments below and above 1.0 */
-  /* The boundary between upper and lower is approx 0.88309101259 */
-  if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) )
-    {
-      /* The argument is <= 0.88309101259 */
-
-      argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0;
-      mul64_Xsig(&argSqrd, &significand(st0_ptr));
-      shr_Xsig(&argSqrd, 2*(-1-exponent));
-      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
-      argTo4.lsw = argSqrd.lsw;
-      mul_Xsig_Xsig(&argTo4, &argTo4);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
-		      N_COEFF_N-1);
-      mul_Xsig_Xsig(&accumulator, &argSqrd);
-      negate_Xsig(&accumulator);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
-		      N_COEFF_P-1);
-
-      shr_Xsig(&accumulator, 2);    /* Divide by four */
-      accumulator.msw |= 0x80000000;  /* Add 1.0 */
-
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
-
-      /* Divide by four, FPU_REG compatible, etc */
-      exponent = 3*exponent;
-
-      /* The minimum exponent difference is 3 */
-      shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
-
-      negate_Xsig(&accumulator);
-      XSIG_LL(accumulator) += significand(st0_ptr);
-
-      echange = round_Xsig(&accumulator);
-
-      setexponentpos(&result, exponent(st0_ptr) + echange);
-    }
-  else
-    {
-      /* The argument is > 0.88309101259 */
-      /* We use sin(st(0)) = cos(pi/2-st(0)) */
-
-      fixed_arg = significand(st0_ptr);
-
-      if ( exponent == 0 )
-	{
-	  /* The argument is >= 1.0 */
-
-	  /* Put the binary point at the left. */
-	  fixed_arg <<= 1;
-	}
-      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
-      fixed_arg = 0x921fb54442d18469LL - fixed_arg;
-      /* There is a special case which arises due to rounding, to fix here. */
-      if ( fixed_arg == 0xffffffffffffffffLL )
-	fixed_arg = 0;
-
-      XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
-      mul64_Xsig(&argSqrd, &fixed_arg);
-
-      XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
-      mul_Xsig_Xsig(&argTo4, &argTo4);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
-		      N_COEFF_NH-1);
-      mul_Xsig_Xsig(&accumulator, &argSqrd);
-      negate_Xsig(&accumulator);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
-		      N_COEFF_PH-1);
-      negate_Xsig(&accumulator);
-
-      mul64_Xsig(&accumulator, &fixed_arg);
-      mul64_Xsig(&accumulator, &fixed_arg);
-
-      shr_Xsig(&accumulator, 3);
-      negate_Xsig(&accumulator);
-
-      add_Xsig_Xsig(&accumulator, &argSqrd);
-
-      shr_Xsig(&accumulator, 1);
-
-      accumulator.lsw |= 1;  /* A zero accumulator here would cause problems */
-      negate_Xsig(&accumulator);
-
-      /* The basic computation is complete. Now fix the answer to
-	 compensate for the error due to the approximation used for
-	 pi/2
-	 */
-
-      /* This has an exponent of -65 */
-      fix_up = 0x898cc517;
-      /* The fix-up needs to be improved for larger args */
-      if ( argSqrd.msw & 0xffc00000 )
-	{
-	  /* Get about 32 bit precision in these: */
-	  fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
-	}
-      fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
+	int exponent, echange;
+	Xsig accumulator, argSqrd, argTo4;
+	unsigned long fix_up, adj;
+	unsigned long long fixed_arg;
+	FPU_REG result;
+
+	exponent = exponent(st0_ptr);
+
+	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
+
+	/* Split into two ranges, for arguments below and above 1.0 */
+	/* The boundary between upper and lower is approx 0.88309101259 */
+	if ((exponent < -1)
+	    || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
+		/* The argument is <= 0.88309101259 */
+
+		argSqrd.msw = st0_ptr->sigh;
+		argSqrd.midw = st0_ptr->sigl;
+		argSqrd.lsw = 0;
+		mul64_Xsig(&argSqrd, &significand(st0_ptr));
+		shr_Xsig(&argSqrd, 2 * (-1 - exponent));
+		argTo4.msw = argSqrd.msw;
+		argTo4.midw = argSqrd.midw;
+		argTo4.lsw = argSqrd.lsw;
+		mul_Xsig_Xsig(&argTo4, &argTo4);
+
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
+				N_COEFF_N - 1);
+		mul_Xsig_Xsig(&accumulator, &argSqrd);
+		negate_Xsig(&accumulator);
+
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
+				N_COEFF_P - 1);
+
+		shr_Xsig(&accumulator, 2);	/* Divide by four */
+		accumulator.msw |= 0x80000000;	/* Add 1.0 */
+
+		mul64_Xsig(&accumulator, &significand(st0_ptr));
+		mul64_Xsig(&accumulator, &significand(st0_ptr));
+		mul64_Xsig(&accumulator, &significand(st0_ptr));
+
+		/* Divide by four, FPU_REG compatible, etc */
+		exponent = 3 * exponent;
+
+		/* The minimum exponent difference is 3 */
+		shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
+
+		negate_Xsig(&accumulator);
+		XSIG_LL(accumulator) += significand(st0_ptr);
+
+		echange = round_Xsig(&accumulator);
+
+		setexponentpos(&result, exponent(st0_ptr) + echange);
+	} else {
+		/* The argument is > 0.88309101259 */
+		/* We use sin(st(0)) = cos(pi/2-st(0)) */
+
+		fixed_arg = significand(st0_ptr);
+
+		if (exponent == 0) {
+			/* The argument is >= 1.0 */
+
+			/* Put the binary point at the left. */
+			fixed_arg <<= 1;
+		}
+		/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+		fixed_arg = 0x921fb54442d18469LL - fixed_arg;
+		/* There is a special case which arises due to rounding, to fix here. */
+		if (fixed_arg == 0xffffffffffffffffLL)
+			fixed_arg = 0;
+
+		XSIG_LL(argSqrd) = fixed_arg;
+		argSqrd.lsw = 0;
+		mul64_Xsig(&argSqrd, &fixed_arg);
+
+		XSIG_LL(argTo4) = XSIG_LL(argSqrd);
+		argTo4.lsw = argSqrd.lsw;
+		mul_Xsig_Xsig(&argTo4, &argTo4);
+
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
+				N_COEFF_NH - 1);
+		mul_Xsig_Xsig(&accumulator, &argSqrd);
+		negate_Xsig(&accumulator);
+
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
+				N_COEFF_PH - 1);
+		negate_Xsig(&accumulator);
+
+		mul64_Xsig(&accumulator, &fixed_arg);
+		mul64_Xsig(&accumulator, &fixed_arg);
+
+		shr_Xsig(&accumulator, 3);
+		negate_Xsig(&accumulator);
+
+		add_Xsig_Xsig(&accumulator, &argSqrd);
+
+		shr_Xsig(&accumulator, 1);
+
+		accumulator.lsw |= 1;	/* A zero accumulator here would cause problems */
+		negate_Xsig(&accumulator);
+
+		/* The basic computation is complete. Now fix the answer to
+		   compensate for the error due to the approximation used for
+		   pi/2
+		 */
+
+		/* This has an exponent of -65 */
+		fix_up = 0x898cc517;
+		/* The fix-up needs to be improved for larger args */
+		if (argSqrd.msw & 0xffc00000) {
+			/* Get about 32 bit precision in these: */
+			fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
+		}
+		fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
+
+		adj = accumulator.lsw;	/* temp save */
+		accumulator.lsw -= fix_up;
+		if (accumulator.lsw > adj)
+			XSIG_LL(accumulator)--;
 
-      adj = accumulator.lsw;    /* temp save */
-      accumulator.lsw -= fix_up;
-      if ( accumulator.lsw > adj )
-	XSIG_LL(accumulator) --;
+		echange = round_Xsig(&accumulator);
 
-      echange = round_Xsig(&accumulator);
-
-      setexponentpos(&result, echange - 1);
-    }
+		setexponentpos(&result, echange - 1);
+	}
 
-  significand(&result) = XSIG_LL(accumulator);
-  setsign(&result, getsign(st0_ptr));
-  FPU_copy_to_reg0(&result, TAG_Valid);
+	significand(&result) = XSIG_LL(accumulator);
+	setsign(&result, getsign(st0_ptr));
+	FPU_copy_to_reg0(&result, TAG_Valid);
 
 #ifdef PARANOID
-  if ( (exponent(&result) >= 0)
-      && (significand(&result) > 0x8000000000000000LL) )
-    {
-      EXCEPTION(EX_INTERNAL|0x150);
-    }
+	if ((exponent(&result) >= 0)
+	    && (significand(&result) > 0x8000000000000000LL)) {
+		EXCEPTION(EX_INTERNAL | 0x150);
+	}
 #endif /* PARANOID */
 
 }
 
-
-
 /*--- poly_cos() ------------------------------------------------------------+
  |                                                                           |
  +---------------------------------------------------------------------------*/
-void	poly_cos(FPU_REG *st0_ptr)
+void poly_cos(FPU_REG *st0_ptr)
 {
-  FPU_REG	      result;
-  long int            exponent, exp2, echange;
-  Xsig                accumulator, argSqrd, fix_up, argTo4;
-  unsigned long long  fixed_arg;
+	FPU_REG result;
+	long int exponent, exp2, echange;
+	Xsig accumulator, argSqrd, fix_up, argTo4;
+	unsigned long long fixed_arg;
 
 #ifdef PARANOID
-  if ( (exponent(st0_ptr) > 0)
-      || ((exponent(st0_ptr) == 0)
-	  && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) )
-    {
-      EXCEPTION(EX_Invalid);
-      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
-      return;
-    }
+	if ((exponent(st0_ptr) > 0)
+	    || ((exponent(st0_ptr) == 0)
+		&& (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
+		EXCEPTION(EX_Invalid);
+		FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
+		return;
+	}
 #endif /* PARANOID */
 
-  exponent = exponent(st0_ptr);
+	exponent = exponent(st0_ptr);
 
-  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
-
-  if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) )
-    {
-      /* arg is < 0.687705 */
-
-      argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl;
-      argSqrd.lsw = 0;
-      mul64_Xsig(&argSqrd, &significand(st0_ptr));
-
-      if ( exponent < -1 )
-	{
-	  /* shift the argument right by the required places */
-	  shr_Xsig(&argSqrd, 2*(-1-exponent));
-	}
+	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
 
-      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
-      argTo4.lsw = argSqrd.lsw;
-      mul_Xsig_Xsig(&argTo4, &argTo4);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
-		      N_COEFF_NH-1);
-      mul_Xsig_Xsig(&accumulator, &argSqrd);
-      negate_Xsig(&accumulator);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
-		      N_COEFF_PH-1);
-      negate_Xsig(&accumulator);
-
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
-      shr_Xsig(&accumulator, -2*(1+exponent));
-
-      shr_Xsig(&accumulator, 3);
-      negate_Xsig(&accumulator);
-
-      add_Xsig_Xsig(&accumulator, &argSqrd);
-
-      shr_Xsig(&accumulator, 1);
-
-      /* It doesn't matter if accumulator is all zero here, the
-	 following code will work ok */
-      negate_Xsig(&accumulator);
-
-      if ( accumulator.lsw & 0x80000000 )
-	XSIG_LL(accumulator) ++;
-      if ( accumulator.msw == 0 )
-	{
-	  /* The result is 1.0 */
-	  FPU_copy_to_reg0(&CONST_1, TAG_Valid);
-	  return;
-	}
-      else
-	{
-	  significand(&result) = XSIG_LL(accumulator);
-      
-	  /* will be a valid positive nr with expon = -1 */
-	  setexponentpos(&result, -1);
-	}
-    }
-  else
-    {
-      fixed_arg = significand(st0_ptr);
-
-      if ( exponent == 0 )
-	{
-	  /* The argument is >= 1.0 */
+	if ((exponent < -1)
+	    || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
+		/* arg is < 0.687705 */
+
+		argSqrd.msw = st0_ptr->sigh;
+		argSqrd.midw = st0_ptr->sigl;
+		argSqrd.lsw = 0;
+		mul64_Xsig(&argSqrd, &significand(st0_ptr));
+
+		if (exponent < -1) {
+			/* shift the argument right by the required places */
+			shr_Xsig(&argSqrd, 2 * (-1 - exponent));
+		}
+
+		argTo4.msw = argSqrd.msw;
+		argTo4.midw = argSqrd.midw;
+		argTo4.lsw = argSqrd.lsw;
+		mul_Xsig_Xsig(&argTo4, &argTo4);
+
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
+				N_COEFF_NH - 1);
+		mul_Xsig_Xsig(&accumulator, &argSqrd);
+		negate_Xsig(&accumulator);
+
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
+				N_COEFF_PH - 1);
+		negate_Xsig(&accumulator);
+
+		mul64_Xsig(&accumulator, &significand(st0_ptr));
+		mul64_Xsig(&accumulator, &significand(st0_ptr));
+		shr_Xsig(&accumulator, -2 * (1 + exponent));
+
+		shr_Xsig(&accumulator, 3);
+		negate_Xsig(&accumulator);
+
+		add_Xsig_Xsig(&accumulator, &argSqrd);
+
+		shr_Xsig(&accumulator, 1);
+
+		/* It doesn't matter if accumulator is all zero here, the
+		   following code will work ok */
+		negate_Xsig(&accumulator);
+
+		if (accumulator.lsw & 0x80000000)
+			XSIG_LL(accumulator)++;
+		if (accumulator.msw == 0) {
+			/* The result is 1.0 */
+			FPU_copy_to_reg0(&CONST_1, TAG_Valid);
+			return;
+		} else {
+			significand(&result) = XSIG_LL(accumulator);
+
+			/* will be a valid positive nr with expon = -1 */
+			setexponentpos(&result, -1);
+		}
+	} else {
+		fixed_arg = significand(st0_ptr);
+
+		if (exponent == 0) {
+			/* The argument is >= 1.0 */
+
+			/* Put the binary point at the left. */
+			fixed_arg <<= 1;
+		}
+		/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+		fixed_arg = 0x921fb54442d18469LL - fixed_arg;
+		/* There is a special case which arises due to rounding, to fix here. */
+		if (fixed_arg == 0xffffffffffffffffLL)
+			fixed_arg = 0;
+
+		exponent = -1;
+		exp2 = -1;
+
+		/* A shift is needed here only for a narrow range of arguments,
+		   i.e. for fixed_arg approx 2^-32, but we pick up more... */
+		if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
+			fixed_arg <<= 16;
+			exponent -= 16;
+			exp2 -= 16;
+		}
+
+		XSIG_LL(argSqrd) = fixed_arg;
+		argSqrd.lsw = 0;
+		mul64_Xsig(&argSqrd, &fixed_arg);
+
+		if (exponent < -1) {
+			/* shift the argument right by the required places */
+			shr_Xsig(&argSqrd, 2 * (-1 - exponent));
+		}
+
+		argTo4.msw = argSqrd.msw;
+		argTo4.midw = argSqrd.midw;
+		argTo4.lsw = argSqrd.lsw;
+		mul_Xsig_Xsig(&argTo4, &argTo4);
+
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
+				N_COEFF_N - 1);
+		mul_Xsig_Xsig(&accumulator, &argSqrd);
+		negate_Xsig(&accumulator);
+
+		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
+				N_COEFF_P - 1);
+
+		shr_Xsig(&accumulator, 2);	/* Divide by four */
+		accumulator.msw |= 0x80000000;	/* Add 1.0 */
+
+		mul64_Xsig(&accumulator, &fixed_arg);
+		mul64_Xsig(&accumulator, &fixed_arg);
+		mul64_Xsig(&accumulator, &fixed_arg);
+
+		/* Divide by four, FPU_REG compatible, etc */
+		exponent = 3 * exponent;
+
+		/* The minimum exponent difference is 3 */
+		shr_Xsig(&accumulator, exp2 - exponent);
+
+		negate_Xsig(&accumulator);
+		XSIG_LL(accumulator) += fixed_arg;
+
+		/* The basic computation is complete. Now fix the answer to
+		   compensate for the error due to the approximation used for
+		   pi/2
+		 */
+
+		/* This has an exponent of -65 */
+		XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
+		fix_up.lsw = 0;
+
+		/* The fix-up needs to be improved for larger args */
+		if (argSqrd.msw & 0xffc00000) {
+			/* Get about 32 bit precision in these: */
+			fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
+			fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
+		}
+
+		exp2 += norm_Xsig(&accumulator);
+		shr_Xsig(&accumulator, 1);	/* Prevent overflow */
+		exp2++;
+		shr_Xsig(&fix_up, 65 + exp2);
 
-	  /* Put the binary point at the left. */
-	  fixed_arg <<= 1;
-	}
-      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
-      fixed_arg = 0x921fb54442d18469LL - fixed_arg;
-      /* There is a special case which arises due to rounding, to fix here. */
-      if ( fixed_arg == 0xffffffffffffffffLL )
-	fixed_arg = 0;
-
-      exponent = -1;
-      exp2 = -1;
-
-      /* A shift is needed here only for a narrow range of arguments,
-	 i.e. for fixed_arg approx 2^-32, but we pick up more... */
-      if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
-	{
-	  fixed_arg <<= 16;
-	  exponent -= 16;
-	  exp2 -= 16;
-	}
+		add_Xsig_Xsig(&accumulator, &fix_up);
 
-      XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
-      mul64_Xsig(&argSqrd, &fixed_arg);
+		echange = round_Xsig(&accumulator);
 
-      if ( exponent < -1 )
-	{
-	  /* shift the argument right by the required places */
-	  shr_Xsig(&argSqrd, 2*(-1-exponent));
+		setexponentpos(&result, exp2 + echange);
+		significand(&result) = XSIG_LL(accumulator);
 	}
 
-      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
-      argTo4.lsw = argSqrd.lsw;
-      mul_Xsig_Xsig(&argTo4, &argTo4);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
-		      N_COEFF_N-1);
-      mul_Xsig_Xsig(&accumulator, &argSqrd);
-      negate_Xsig(&accumulator);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
-		      N_COEFF_P-1);
-
-      shr_Xsig(&accumulator, 2);    /* Divide by four */
-      accumulator.msw |= 0x80000000;  /* Add 1.0 */
-
-      mul64_Xsig(&accumulator, &fixed_arg);
-      mul64_Xsig(&accumulator, &fixed_arg);
-      mul64_Xsig(&accumulator, &fixed_arg);
-
-      /* Divide by four, FPU_REG compatible, etc */
-      exponent = 3*exponent;
-
-      /* The minimum exponent difference is 3 */
-      shr_Xsig(&accumulator, exp2 - exponent);
-
-      negate_Xsig(&accumulator);
-      XSIG_LL(accumulator) += fixed_arg;
-
-      /* The basic computation is complete. Now fix the answer to
-	 compensate for the error due to the approximation used for
-	 pi/2
-	 */
-
-      /* This has an exponent of -65 */
-      XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
-      fix_up.lsw = 0;
-
-      /* The fix-up needs to be improved for larger args */
-      if ( argSqrd.msw & 0xffc00000 )
-	{
-	  /* Get about 32 bit precision in these: */
-	  fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
-	  fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
-	}
-
-      exp2 += norm_Xsig(&accumulator);
-      shr_Xsig(&accumulator, 1); /* Prevent overflow */
-      exp2++;
-      shr_Xsig(&fix_up, 65 + exp2);
-
-      add_Xsig_Xsig(&accumulator, &fix_up);
-
-      echange = round_Xsig(&accumulator);
-
-      setexponentpos(&result, exp2 + echange);
-      significand(&result) = XSIG_LL(accumulator);
-    }
-
-  FPU_copy_to_reg0(&result, TAG_Valid);
+	FPU_copy_to_reg0(&result, TAG_Valid);
 
 #ifdef PARANOID
-  if ( (exponent(&result) >= 0)
-      && (significand(&result) > 0x8000000000000000LL) )
-    {
-      EXCEPTION(EX_INTERNAL|0x151);
-    }
+	if ((exponent(&result) >= 0)
+	    && (significand(&result) > 0x8000000000000000LL)) {
+		EXCEPTION(EX_INTERNAL | 0x151);
+	}
 #endif /* PARANOID */
 
 }
diff -puN arch/x86/math-emu/poly_tan.c~git-x86 arch/x86/math-emu/poly_tan.c
--- a/arch/x86/math-emu/poly_tan.c~git-x86
+++ a/arch/x86/math-emu/poly_tan.c
@@ -17,206 +17,196 @@
 #include "control_w.h"
 #include "poly.h"
 
-
 #define	HiPOWERop	3	/* odd poly, positive terms */
-static const unsigned long long oddplterm[HiPOWERop] =
-{
-  0x0000000000000000LL,
-  0x0051a1cf08fca228LL,
-  0x0000000071284ff7LL
+static const unsigned long long oddplterm[HiPOWERop] = {
+	0x0000000000000000LL,
+	0x0051a1cf08fca228LL,
+	0x0000000071284ff7LL
 };
 
 #define	HiPOWERon	2	/* odd poly, negative terms */
-static const unsigned long long oddnegterm[HiPOWERon] =
-{
-   0x1291a9a184244e80LL,
-   0x0000583245819c21LL
+static const unsigned long long oddnegterm[HiPOWERon] = {
+	0x1291a9a184244e80LL,
+	0x0000583245819c21LL
 };
 
 #define	HiPOWERep	2	/* even poly, positive terms */
-static const unsigned long long evenplterm[HiPOWERep] =
-{
-  0x0e848884b539e888LL,
-  0x00003c7f18b887daLL
+static const unsigned long long evenplterm[HiPOWERep] = {
+	0x0e848884b539e888LL,
+	0x00003c7f18b887daLL
 };
 
 #define	HiPOWERen	2	/* even poly, negative terms */
-static const unsigned long long evennegterm[HiPOWERen] =
-{
-  0xf1f0200fd51569ccLL,
-  0x003afb46105c4432LL
+static const unsigned long long evennegterm[HiPOWERen] = {
+	0xf1f0200fd51569ccLL,
+	0x003afb46105c4432LL
 };
 
 static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL;
 
-
 /*--- poly_tan() ------------------------------------------------------------+
  |                                                                           |
  +---------------------------------------------------------------------------*/
-void	poly_tan(FPU_REG *st0_ptr)
+void poly_tan(FPU_REG *st0_ptr)
 {
-  long int    		exponent;
-  int                   invert;
-  Xsig                  argSq, argSqSq, accumulatoro, accumulatore, accum,
-                        argSignif, fix_up;
-  unsigned long         adj;
+	long int exponent;
+	int invert;
+	Xsig argSq, argSqSq, accumulatoro, accumulatore, accum,
+	    argSignif, fix_up;
+	unsigned long adj;
 
-  exponent = exponent(st0_ptr);
+	exponent = exponent(st0_ptr);
 
 #ifdef PARANOID
-  if ( signnegative(st0_ptr) )	/* Can't hack a number < 0.0 */
-    { arith_invalid(0); return; }  /* Need a positive number */
+	if (signnegative(st0_ptr)) {	/* Can't hack a number < 0.0 */
+		arith_invalid(0);
+		return;
+	}			/* Need a positive number */
 #endif /* PARANOID */
 
-  /* Split the problem into two domains, smaller and larger than pi/4 */
-  if ( (exponent == 0) || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2)) )
-    {
-      /* The argument is greater than (approx) pi/4 */
-      invert = 1;
-      accum.lsw = 0;
-      XSIG_LL(accum) = significand(st0_ptr);
- 
-      if ( exponent == 0 )
-	{
-	  /* The argument is >= 1.0 */
-	  /* Put the binary point at the left. */
-	  XSIG_LL(accum) <<= 1;
-	}
-      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
-      XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum);
-      /* This is a special case which arises due to rounding. */
-      if ( XSIG_LL(accum) == 0xffffffffffffffffLL )
-	{
-	  FPU_settag0(TAG_Valid);
-	  significand(st0_ptr) = 0x8a51e04daabda360LL;
-	  setexponent16(st0_ptr, (0x41 + EXTENDED_Ebias) | SIGN_Negative);
-	  return;
+	/* Split the problem into two domains, smaller and larger than pi/4 */
+	if ((exponent == 0)
+	    || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2))) {
+		/* The argument is greater than (approx) pi/4 */
+		invert = 1;
+		accum.lsw = 0;
+		XSIG_LL(accum) = significand(st0_ptr);
+
+		if (exponent == 0) {
+			/* The argument is >= 1.0 */
+			/* Put the binary point at the left. */
+			XSIG_LL(accum) <<= 1;
+		}
+		/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
+		XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum);
+		/* This is a special case which arises due to rounding. */
+		if (XSIG_LL(accum) == 0xffffffffffffffffLL) {
+			FPU_settag0(TAG_Valid);
+			significand(st0_ptr) = 0x8a51e04daabda360LL;
+			setexponent16(st0_ptr,
+				      (0x41 + EXTENDED_Ebias) | SIGN_Negative);
+			return;
+		}
+
+		argSignif.lsw = accum.lsw;
+		XSIG_LL(argSignif) = XSIG_LL(accum);
+		exponent = -1 + norm_Xsig(&argSignif);
+	} else {
+		invert = 0;
+		argSignif.lsw = 0;
+		XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr);
+
+		if (exponent < -1) {
+			/* shift the argument right by the required places */
+			if (FPU_shrx(&XSIG_LL(accum), -1 - exponent) >=
+			    0x80000000U)
+				XSIG_LL(accum)++;	/* round up */
+		}
 	}
 
-      argSignif.lsw = accum.lsw;
-      XSIG_LL(argSignif) = XSIG_LL(accum);
-      exponent = -1 + norm_Xsig(&argSignif);
-    }
-  else
-    {
-      invert = 0;
-      argSignif.lsw = 0;
-      XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr);
- 
-      if ( exponent < -1 )
-	{
-	  /* shift the argument right by the required places */
-	  if ( FPU_shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U )
-	    XSIG_LL(accum) ++;	/* round up */
+	XSIG_LL(argSq) = XSIG_LL(accum);
+	argSq.lsw = accum.lsw;
+	mul_Xsig_Xsig(&argSq, &argSq);
+	XSIG_LL(argSqSq) = XSIG_LL(argSq);
+	argSqSq.lsw = argSq.lsw;
+	mul_Xsig_Xsig(&argSqSq, &argSqSq);
+
+	/* Compute the negative terms for the numerator polynomial */
+	accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0;
+	polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm,
+			HiPOWERon - 1);
+	mul_Xsig_Xsig(&accumulatoro, &argSq);
+	negate_Xsig(&accumulatoro);
+	/* Add the positive terms */
+	polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm,
+			HiPOWERop - 1);
+
+	/* Compute the positive terms for the denominator polynomial */
+	accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0;
+	polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm,
+			HiPOWERep - 1);
+	mul_Xsig_Xsig(&accumulatore, &argSq);
+	negate_Xsig(&accumulatore);
+	/* Add the negative terms */
+	polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm,
+			HiPOWERen - 1);
+	/* Multiply by arg^2 */
+	mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
+	mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
+	/* de-normalize and divide by 2 */
+	shr_Xsig(&accumulatore, -2 * (1 + exponent) + 1);
+	negate_Xsig(&accumulatore);	/* This does 1 - accumulator */
+
+	/* Now find the ratio. */
+	if (accumulatore.msw == 0) {
+		/* accumulatoro must contain 1.0 here, (actually, 0) but it
+		   really doesn't matter what value we use because it will
+		   have negligible effect in later calculations
+		 */
+		XSIG_LL(accum) = 0x8000000000000000LL;
+		accum.lsw = 0;
+	} else {
+		div_Xsig(&accumulatoro, &accumulatore, &accum);
 	}
-    }
 
-  XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw;
-  mul_Xsig_Xsig(&argSq, &argSq);
-  XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw;
-  mul_Xsig_Xsig(&argSqSq, &argSqSq);
-
-  /* Compute the negative terms for the numerator polynomial */
-  accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0;
-  polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1);
-  mul_Xsig_Xsig(&accumulatoro, &argSq);
-  negate_Xsig(&accumulatoro);
-  /* Add the positive terms */
-  polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1);
-
-  
-  /* Compute the positive terms for the denominator polynomial */
-  accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0;
-  polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1);
-  mul_Xsig_Xsig(&accumulatore, &argSq);
-  negate_Xsig(&accumulatore);
-  /* Add the negative terms */
-  polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1);
-  /* Multiply by arg^2 */
-  mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
-  mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
-  /* de-normalize and divide by 2 */
-  shr_Xsig(&accumulatore, -2*(1+exponent) + 1);
-  negate_Xsig(&accumulatore);      /* This does 1 - accumulator */
-
-  /* Now find the ratio. */
-  if ( accumulatore.msw == 0 )
-    {
-      /* accumulatoro must contain 1.0 here, (actually, 0) but it
-	 really doesn't matter what value we use because it will
-	 have negligible effect in later calculations
-	 */
-      XSIG_LL(accum) = 0x8000000000000000LL;
-      accum.lsw = 0;
-    }
-  else
-    {
-      div_Xsig(&accumulatoro, &accumulatore, &accum);
-    }
-
-  /* Multiply by 1/3 * arg^3 */
-  mul64_Xsig(&accum, &XSIG_LL(argSignif));
-  mul64_Xsig(&accum, &XSIG_LL(argSignif));
-  mul64_Xsig(&accum, &XSIG_LL(argSignif));
-  mul64_Xsig(&accum, &twothirds);
-  shr_Xsig(&accum, -2*(exponent+1));
-
-  /* tan(arg) = arg + accum */
-  add_two_Xsig(&accum, &argSignif, &exponent);
-
-  if ( invert )
-    {
-      /* We now have the value of tan(pi_2 - arg) where pi_2 is an
-	 approximation for pi/2
-	 */
-      /* The next step is to fix the answer to compensate for the
-	 error due to the approximation used for pi/2
-	 */
-
-      /* This is (approx) delta, the error in our approx for pi/2
-	 (see above). It has an exponent of -65
-	 */
-      XSIG_LL(fix_up) = 0x898cc51701b839a2LL;
-      fix_up.lsw = 0;
-
-      if ( exponent == 0 )
-	adj = 0xffffffff;   /* We want approx 1.0 here, but
-			       this is close enough. */
-      else if ( exponent > -30 )
-	{
-	  adj = accum.msw >> -(exponent+1);      /* tan */
-	  adj = mul_32_32(adj, adj);             /* tan^2 */
+	/* Multiply by 1/3 * arg^3 */
+	mul64_Xsig(&accum, &XSIG_LL(argSignif));
+	mul64_Xsig(&accum, &XSIG_LL(argSignif));
+	mul64_Xsig(&accum, &XSIG_LL(argSignif));
+	mul64_Xsig(&accum, &twothirds);
+	shr_Xsig(&accum, -2 * (exponent + 1));
+
+	/* tan(arg) = arg + accum */
+	add_two_Xsig(&accum, &argSignif, &exponent);
+
+	if (invert) {
+		/* We now have the value of tan(pi_2 - arg) where pi_2 is an
+		   approximation for pi/2
+		 */
+		/* The next step is to fix the answer to compensate for the
+		   error due to the approximation used for pi/2
+		 */
+
+		/* This is (approx) delta, the error in our approx for pi/2
+		   (see above). It has an exponent of -65
+		 */
+		XSIG_LL(fix_up) = 0x898cc51701b839a2LL;
+		fix_up.lsw = 0;
+
+		if (exponent == 0)
+			adj = 0xffffffff;	/* We want approx 1.0 here, but
+						   this is close enough. */
+		else if (exponent > -30) {
+			adj = accum.msw >> -(exponent + 1);	/* tan */
+			adj = mul_32_32(adj, adj);	/* tan^2 */
+		} else
+			adj = 0;
+		adj = mul_32_32(0x898cc517, adj);	/* delta * tan^2 */
+
+		fix_up.msw += adj;
+		if (!(fix_up.msw & 0x80000000)) {	/* did fix_up overflow ? */
+			/* Yes, we need to add an msb */
+			shr_Xsig(&fix_up, 1);
+			fix_up.msw |= 0x80000000;
+			shr_Xsig(&fix_up, 64 + exponent);
+		} else
+			shr_Xsig(&fix_up, 65 + exponent);
+
+		add_two_Xsig(&accum, &fix_up, &exponent);
+
+		/* accum now contains tan(pi/2 - arg).
+		   Use tan(arg) = 1.0 / tan(pi/2 - arg)
+		 */
+		accumulatoro.lsw = accumulatoro.midw = 0;
+		accumulatoro.msw = 0x80000000;
+		div_Xsig(&accumulatoro, &accum, &accum);
+		exponent = -exponent - 1;
 	}
-      else
-	adj = 0;
-      adj = mul_32_32(0x898cc517, adj);          /* delta * tan^2 */
-
-      fix_up.msw += adj;
-      if ( !(fix_up.msw & 0x80000000) )   /* did fix_up overflow ? */
-	{
-	  /* Yes, we need to add an msb */
-	  shr_Xsig(&fix_up, 1);
-	  fix_up.msw |= 0x80000000;
-	  shr_Xsig(&fix_up, 64 + exponent);
-	}
-      else
-	shr_Xsig(&fix_up, 65 + exponent);
-
-      add_two_Xsig(&accum, &fix_up, &exponent);
 
-      /* accum now contains tan(pi/2 - arg).
-	 Use tan(arg) = 1.0 / tan(pi/2 - arg)
-	 */
-      accumulatoro.lsw = accumulatoro.midw = 0;
-      accumulatoro.msw = 0x80000000;
-      div_Xsig(&accumulatoro, &accum, &accum);
-      exponent = - exponent - 1;
-    }
-
-  /* Transfer the result */
-  round_Xsig(&accum);
-  FPU_settag0(TAG_Valid);
-  significand(st0_ptr) = XSIG_LL(accum);
-  setexponent16(st0_ptr, exponent + EXTENDED_Ebias);  /* Result is positive. */
+	/* Transfer the result */
+	round_Xsig(&accum);
+	FPU_settag0(TAG_Valid);
+	significand(st0_ptr) = XSIG_LL(accum);
+	setexponent16(st0_ptr, exponent + EXTENDED_Ebias);	/* Result is positive. */
 
 }
diff -puN arch/x86/math-emu/reg_add_sub.c~git-x86 arch/x86/math-emu/reg_add_sub.c
--- a/arch/x86/math-emu/reg_add_sub.c~git-x86
+++ a/arch/x86/math-emu/reg_add_sub.c
@@ -27,7 +27,7 @@
 static
 int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
 		     FPU_REG const *b, u_char tagb, u_char signb,
-		     FPU_REG *dest, int deststnr, int control_w);
+		     FPU_REG * dest, int deststnr, int control_w);
 
 /*
   Operates on st(0) and st(n), or on st(0) and temporary data.
@@ -35,340 +35,299 @@ int add_sub_specials(FPU_REG const *a, u
   */
 int FPU_add(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
 {
-  FPU_REG *a = &st(0);
-  FPU_REG *dest = &st(deststnr);
-  u_char signb = getsign(b);
-  u_char taga = FPU_gettag0();
-  u_char signa = getsign(a);
-  u_char saved_sign = getsign(dest);
-  int diff, tag, expa, expb;
-  
-  if ( !(taga | tagb) )
-    {
-      expa = exponent(a);
-      expb = exponent(b);
-
-    valid_add:
-      /* Both registers are valid */
-      if (!(signa ^ signb))
-	{
-	  /* signs are the same */
-	  tag = FPU_u_add(a, b, dest, control_w, signa, expa, expb);
-	}
-      else
-	{
-	  /* The signs are different, so do a subtraction */
-	  diff = expa - expb;
-	  if (!diff)
-	    {
-	      diff = a->sigh - b->sigh;  /* This works only if the ms bits
-					    are identical. */
-	      if (!diff)
-		{
-		  diff = a->sigl > b->sigl;
-		  if (!diff)
-		    diff = -(a->sigl < b->sigl);
+	FPU_REG *a = &st(0);
+	FPU_REG *dest = &st(deststnr);
+	u_char signb = getsign(b);
+	u_char taga = FPU_gettag0();
+	u_char signa = getsign(a);
+	u_char saved_sign = getsign(dest);
+	int diff, tag, expa, expb;
+
+	if (!(taga | tagb)) {
+		expa = exponent(a);
+		expb = exponent(b);
+
+	      valid_add:
+		/* Both registers are valid */
+		if (!(signa ^ signb)) {
+			/* signs are the same */
+			tag =
+			    FPU_u_add(a, b, dest, control_w, signa, expa, expb);
+		} else {
+			/* The signs are different, so do a subtraction */
+			diff = expa - expb;
+			if (!diff) {
+				diff = a->sigh - b->sigh;	/* This works only if the ms bits
+								   are identical. */
+				if (!diff) {
+					diff = a->sigl > b->sigl;
+					if (!diff)
+						diff = -(a->sigl < b->sigl);
+				}
+			}
+
+			if (diff > 0) {
+				tag =
+				    FPU_u_sub(a, b, dest, control_w, signa,
+					      expa, expb);
+			} else if (diff < 0) {
+				tag =
+				    FPU_u_sub(b, a, dest, control_w, signb,
+					      expb, expa);
+			} else {
+				FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+				/* sign depends upon rounding mode */
+				setsign(dest, ((control_w & CW_RC) != RC_DOWN)
+					? SIGN_POS : SIGN_NEG);
+				return TAG_Zero;
+			}
 		}
-	    }
-      
-	  if (diff > 0)
-	    {
-	      tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
-	    }
-	  else if ( diff < 0 )
-	    {
-	      tag = FPU_u_sub(b, a, dest, control_w, signb, expb, expa);
-	    }
-	  else
-	    {
-	      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-	      /* sign depends upon rounding mode */
-	      setsign(dest, ((control_w & CW_RC) != RC_DOWN)
-		      ? SIGN_POS : SIGN_NEG);
-	      return TAG_Zero;
-	    }
-	}
 
-      if ( tag < 0 )
-	{
-	  setsign(dest, saved_sign);
-	  return tag;
+		if (tag < 0) {
+			setsign(dest, saved_sign);
+			return tag;
+		}
+		FPU_settagi(deststnr, tag);
+		return tag;
 	}
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-
-  if ( taga == TAG_Special )
-    taga = FPU_Special(a);
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
 
-  if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+	if (taga == TAG_Special)
+		taga = FPU_Special(a);
+	if (tagb == TAG_Special)
+		tagb = FPU_Special(b);
+
+	if (((taga == TAG_Valid) && (tagb == TW_Denormal))
 	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
-	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
-    {
-      FPU_REG x, y;
+	    || ((taga == TW_Denormal) && (tagb == TW_Denormal))) {
+		FPU_REG x, y;
 
-      if ( denormal_operand() < 0 )
-	return FPU_Exception;
+		if (denormal_operand() < 0)
+			return FPU_Exception;
 
-      FPU_to_exp16(a, &x);
-      FPU_to_exp16(b, &y);
-      a = &x;
-      b = &y;
-      expa = exponent16(a);
-      expb = exponent16(b);
-      goto valid_add;
-    }
-
-  if ( (taga == TW_NaN) || (tagb == TW_NaN) )
-    {
-      if ( deststnr == 0 )
-	return real_2op_NaN(b, tagb, deststnr, a);
-      else
-	return real_2op_NaN(a, taga, deststnr, a);
-    }
+		FPU_to_exp16(a, &x);
+		FPU_to_exp16(b, &y);
+		a = &x;
+		b = &y;
+		expa = exponent16(a);
+		expb = exponent16(b);
+		goto valid_add;
+	}
+
+	if ((taga == TW_NaN) || (tagb == TW_NaN)) {
+		if (deststnr == 0)
+			return real_2op_NaN(b, tagb, deststnr, a);
+		else
+			return real_2op_NaN(a, taga, deststnr, a);
+	}
 
-  return add_sub_specials(a, taga, signa, b, tagb, signb,
-			  dest, deststnr, control_w);
+	return add_sub_specials(a, taga, signa, b, tagb, signb,
+				dest, deststnr, control_w);
 }
 
-
 /* Subtract b from a.  (a-b) -> dest */
 int FPU_sub(int flags, int rm, int control_w)
 {
-  FPU_REG const *a, *b;
-  FPU_REG *dest;
-  u_char taga, tagb, signa, signb, saved_sign, sign;
-  int diff, tag = 0, expa, expb, deststnr;
-
-  a = &st(0);
-  taga = FPU_gettag0();
-
-  deststnr = 0;
-  if ( flags & LOADED )
-    {
-      b = (FPU_REG *)rm;
-      tagb = flags & 0x0f;
-    }
-  else
-    {
-      b = &st(rm);
-      tagb = FPU_gettagi(rm);
-
-      if ( flags & DEST_RM )
-	deststnr = rm;
-    }
-
-  signa = getsign(a);
-  signb = getsign(b);
-
-  if ( flags & REV )
-    {
-      signa ^= SIGN_NEG;
-      signb ^= SIGN_NEG;
-    }
-
-  dest = &st(deststnr);
-  saved_sign = getsign(dest);
-
-  if ( !(taga | tagb) )
-    {
-      expa = exponent(a);
-      expb = exponent(b);
-
-    valid_subtract:
-      /* Both registers are valid */
-
-      diff = expa - expb;
-
-      if (!diff)
-	{
-	  diff = a->sigh - b->sigh;  /* Works only if ms bits are identical */
-	  if (!diff)
-	    {
-	      diff = a->sigl > b->sigl;
-	      if (!diff)
-		diff = -(a->sigl < b->sigl);
-	    }
+	FPU_REG const *a, *b;
+	FPU_REG *dest;
+	u_char taga, tagb, signa, signb, saved_sign, sign;
+	int diff, tag = 0, expa, expb, deststnr;
+
+	a = &st(0);
+	taga = FPU_gettag0();
+
+	deststnr = 0;
+	if (flags & LOADED) {
+		b = (FPU_REG *) rm;
+		tagb = flags & 0x0f;
+	} else {
+		b = &st(rm);
+		tagb = FPU_gettagi(rm);
+
+		if (flags & DEST_RM)
+			deststnr = rm;
 	}
 
-      switch ( (((int)signa)*2 + signb) / SIGN_NEG )
-	{
-	case 0: /* P - P */
-	case 3: /* N - N */
-	  if (diff > 0)
-	    {
-	      /* |a| > |b| */
-	      tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
-	    }
-	  else if ( diff == 0 )
-	    {
-	      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-
-	      /* sign depends upon rounding mode */
-	      setsign(dest, ((control_w & CW_RC) != RC_DOWN)
-		? SIGN_POS : SIGN_NEG);
-	      return TAG_Zero;
-	    }
-	  else
-	    {
-	      sign = signa ^ SIGN_NEG;
-	      tag = FPU_u_sub(b, a, dest, control_w, sign, expb, expa);
-	    }
-	  break;
-	case 1: /* P - N */
-	  tag = FPU_u_add(a, b, dest, control_w, SIGN_POS, expa, expb);
-	  break;
-	case 2: /* N - P */
-	  tag = FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa, expb);
-	  break;
+	signa = getsign(a);
+	signb = getsign(b);
+
+	if (flags & REV) {
+		signa ^= SIGN_NEG;
+		signb ^= SIGN_NEG;
+	}
+
+	dest = &st(deststnr);
+	saved_sign = getsign(dest);
+
+	if (!(taga | tagb)) {
+		expa = exponent(a);
+		expb = exponent(b);
+
+	      valid_subtract:
+		/* Both registers are valid */
+
+		diff = expa - expb;
+
+		if (!diff) {
+			diff = a->sigh - b->sigh;	/* Works only if ms bits are identical */
+			if (!diff) {
+				diff = a->sigl > b->sigl;
+				if (!diff)
+					diff = -(a->sigl < b->sigl);
+			}
+		}
+
+		switch ((((int)signa) * 2 + signb) / SIGN_NEG) {
+		case 0:	/* P - P */
+		case 3:	/* N - N */
+			if (diff > 0) {
+				/* |a| > |b| */
+				tag =
+				    FPU_u_sub(a, b, dest, control_w, signa,
+					      expa, expb);
+			} else if (diff == 0) {
+				FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+
+				/* sign depends upon rounding mode */
+				setsign(dest, ((control_w & CW_RC) != RC_DOWN)
+					? SIGN_POS : SIGN_NEG);
+				return TAG_Zero;
+			} else {
+				sign = signa ^ SIGN_NEG;
+				tag =
+				    FPU_u_sub(b, a, dest, control_w, sign, expb,
+					      expa);
+			}
+			break;
+		case 1:	/* P - N */
+			tag =
+			    FPU_u_add(a, b, dest, control_w, SIGN_POS, expa,
+				      expb);
+			break;
+		case 2:	/* N - P */
+			tag =
+			    FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa,
+				      expb);
+			break;
 #ifdef PARANOID
-	default:
-	  EXCEPTION(EX_INTERNAL|0x111);
-	  return -1;
+		default:
+			EXCEPTION(EX_INTERNAL | 0x111);
+			return -1;
 #endif
+		}
+		if (tag < 0) {
+			setsign(dest, saved_sign);
+			return tag;
+		}
+		FPU_settagi(deststnr, tag);
+		return tag;
 	}
-      if ( tag < 0 )
-	{
-	  setsign(dest, saved_sign);
-	  return tag;
-	}
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-
-  if ( taga == TAG_Special )
-    taga = FPU_Special(a);
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
 
-  if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+	if (taga == TAG_Special)
+		taga = FPU_Special(a);
+	if (tagb == TAG_Special)
+		tagb = FPU_Special(b);
+
+	if (((taga == TAG_Valid) && (tagb == TW_Denormal))
 	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
-	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
-    {
-      FPU_REG x, y;
+	    || ((taga == TW_Denormal) && (tagb == TW_Denormal))) {
+		FPU_REG x, y;
 
-      if ( denormal_operand() < 0 )
-	return FPU_Exception;
+		if (denormal_operand() < 0)
+			return FPU_Exception;
 
-      FPU_to_exp16(a, &x);
-      FPU_to_exp16(b, &y);
-      a = &x;
-      b = &y;
-      expa = exponent16(a);
-      expb = exponent16(b);
-
-      goto valid_subtract;
-    }
-
-  if ( (taga == TW_NaN) || (tagb == TW_NaN) )
-    {
-      FPU_REG const *d1, *d2;
-      if ( flags & REV )
-	{
-	  d1 = b;
-	  d2 = a;
-	}
-      else
-	{
-	  d1 = a;
-	  d2 = b;
+		FPU_to_exp16(a, &x);
+		FPU_to_exp16(b, &y);
+		a = &x;
+		b = &y;
+		expa = exponent16(a);
+		expb = exponent16(b);
+
+		goto valid_subtract;
+	}
+
+	if ((taga == TW_NaN) || (tagb == TW_NaN)) {
+		FPU_REG const *d1, *d2;
+		if (flags & REV) {
+			d1 = b;
+			d2 = a;
+		} else {
+			d1 = a;
+			d2 = b;
+		}
+		if (flags & LOADED)
+			return real_2op_NaN(b, tagb, deststnr, d1);
+		if (flags & DEST_RM)
+			return real_2op_NaN(a, taga, deststnr, d2);
+		else
+			return real_2op_NaN(b, tagb, deststnr, d2);
 	}
-      if ( flags & LOADED )
-	return real_2op_NaN(b, tagb, deststnr, d1);
-      if ( flags & DEST_RM )
-	return real_2op_NaN(a, taga, deststnr, d2);
-      else
-	return real_2op_NaN(b, tagb, deststnr, d2);
-    }
 
-    return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG,
-			    dest, deststnr, control_w);
+	return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG,
+				dest, deststnr, control_w);
 }
 
-
 static
 int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
 		     FPU_REG const *b, u_char tagb, u_char signb,
-		     FPU_REG *dest, int deststnr, int control_w)
+		     FPU_REG * dest, int deststnr, int control_w)
 {
-  if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
-       && (denormal_operand() < 0) )
-    return FPU_Exception;
-
-  if (taga == TAG_Zero)
-    {
-      if (tagb == TAG_Zero)
-	{
-	  /* Both are zero, result will be zero. */
-	  u_char different_signs = signa ^ signb;
-
-	  FPU_copy_to_regi(a, TAG_Zero, deststnr);
-	  if ( different_signs )
-	    {
-	      /* Signs are different. */
-	      /* Sign of answer depends upon rounding mode. */
-	      setsign(dest, ((control_w & CW_RC) != RC_DOWN)
-		      ? SIGN_POS : SIGN_NEG);
-	    }
-	  else
-	    setsign(dest, signa);  /* signa may differ from the sign of a. */
-	  return TAG_Zero;
-	}
-      else
-	{
-	  reg_copy(b, dest);
-	  if ( (tagb == TW_Denormal) && (b->sigh & 0x80000000) )
-	    {
-	      /* A pseudoDenormal, convert it. */
-	      addexponent(dest, 1);
-	      tagb = TAG_Valid;
-	    }
-	  else if ( tagb > TAG_Empty )
-	    tagb = TAG_Special;
-	  setsign(dest, signb);  /* signb may differ from the sign of b. */
-	  FPU_settagi(deststnr, tagb);
-	  return tagb;
-	}
-    }
-  else if (tagb == TAG_Zero)
-    {
-      reg_copy(a, dest);
-      if ( (taga == TW_Denormal) && (a->sigh & 0x80000000) )
-	{
-	  /* A pseudoDenormal */
-	  addexponent(dest, 1);
-	  taga = TAG_Valid;
-	}
-      else if ( taga > TAG_Empty )
-	taga = TAG_Special;
-      setsign(dest, signa);  /* signa may differ from the sign of a. */
-      FPU_settagi(deststnr, taga);
-      return taga;
-    }
-  else if (taga == TW_Infinity)
-    {
-      if ( (tagb != TW_Infinity) || (signa == signb) )
-	{
-	  FPU_copy_to_regi(a, TAG_Special, deststnr);
-	  setsign(dest, signa);  /* signa may differ from the sign of a. */
-	  return taga;
+	if (((taga == TW_Denormal) || (tagb == TW_Denormal))
+	    && (denormal_operand() < 0))
+		return FPU_Exception;
+
+	if (taga == TAG_Zero) {
+		if (tagb == TAG_Zero) {
+			/* Both are zero, result will be zero. */
+			u_char different_signs = signa ^ signb;
+
+			FPU_copy_to_regi(a, TAG_Zero, deststnr);
+			if (different_signs) {
+				/* Signs are different. */
+				/* Sign of answer depends upon rounding mode. */
+				setsign(dest, ((control_w & CW_RC) != RC_DOWN)
+					? SIGN_POS : SIGN_NEG);
+			} else
+				setsign(dest, signa);	/* signa may differ from the sign of a. */
+			return TAG_Zero;
+		} else {
+			reg_copy(b, dest);
+			if ((tagb == TW_Denormal) && (b->sigh & 0x80000000)) {
+				/* A pseudoDenormal, convert it. */
+				addexponent(dest, 1);
+				tagb = TAG_Valid;
+			} else if (tagb > TAG_Empty)
+				tagb = TAG_Special;
+			setsign(dest, signb);	/* signb may differ from the sign of b. */
+			FPU_settagi(deststnr, tagb);
+			return tagb;
+		}
+	} else if (tagb == TAG_Zero) {
+		reg_copy(a, dest);
+		if ((taga == TW_Denormal) && (a->sigh & 0x80000000)) {
+			/* A pseudoDenormal */
+			addexponent(dest, 1);
+			taga = TAG_Valid;
+		} else if (taga > TAG_Empty)
+			taga = TAG_Special;
+		setsign(dest, signa);	/* signa may differ from the sign of a. */
+		FPU_settagi(deststnr, taga);
+		return taga;
+	} else if (taga == TW_Infinity) {
+		if ((tagb != TW_Infinity) || (signa == signb)) {
+			FPU_copy_to_regi(a, TAG_Special, deststnr);
+			setsign(dest, signa);	/* signa may differ from the sign of a. */
+			return taga;
+		}
+		/* Infinity-Infinity is undefined. */
+		return arith_invalid(deststnr);
+	} else if (tagb == TW_Infinity) {
+		FPU_copy_to_regi(b, TAG_Special, deststnr);
+		setsign(dest, signb);	/* signb may differ from the sign of b. */
+		return tagb;
 	}
-      /* Infinity-Infinity is undefined. */
-      return arith_invalid(deststnr);
-    }
-  else if (tagb == TW_Infinity)
-    {
-      FPU_copy_to_regi(b, TAG_Special, deststnr);
-      setsign(dest, signb);  /* signb may differ from the sign of b. */
-      return tagb;
-    }
-
 #ifdef PARANOID
-  EXCEPTION(EX_INTERNAL|0x101);
+	EXCEPTION(EX_INTERNAL | 0x101);
 #endif
 
-  return FPU_Exception;
+	return FPU_Exception;
 }
-
diff -puN arch/x86/math-emu/reg_compare.c~git-x86 arch/x86/math-emu/reg_compare.c
--- a/arch/x86/math-emu/reg_compare.c~git-x86
+++ a/arch/x86/math-emu/reg_compare.c
@@ -20,362 +20,331 @@
 #include "control_w.h"
 #include "status_w.h"
 
-
 static int compare(FPU_REG const *b, int tagb)
 {
-  int diff, exp0, expb;
-  u_char	  	st0_tag;
-  FPU_REG  	*st0_ptr;
-  FPU_REG	x, y;
-  u_char		st0_sign, signb = getsign(b);
-
-  st0_ptr = &st(0);
-  st0_tag = FPU_gettag0();
-  st0_sign = getsign(st0_ptr);
-
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-
-  if ( ((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal))
-       || ((tagb != TAG_Valid) && (tagb != TW_Denormal)) )
-    {
-      if ( st0_tag == TAG_Zero )
-	{
-	  if ( tagb == TAG_Zero ) return COMP_A_eq_B;
-	  if ( tagb == TAG_Valid )
-	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
-	  if ( tagb == TW_Denormal )
-	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
-	    | COMP_Denormal;
-	}
-      else if ( tagb == TAG_Zero )
-	{
-	  if ( st0_tag == TAG_Valid )
-	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
-	  if ( st0_tag == TW_Denormal )
-	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
-	    | COMP_Denormal;
-	}
-
-      if ( st0_tag == TW_Infinity )
-	{
-	  if ( (tagb == TAG_Valid) || (tagb == TAG_Zero) )
-	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
-	  else if ( tagb == TW_Denormal )
-	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
-	      | COMP_Denormal;
-	  else if ( tagb == TW_Infinity )
-	    {
-	      /* The 80486 book says that infinities can be equal! */
-	      return (st0_sign == signb) ? COMP_A_eq_B :
-		((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
-	    }
-	  /* Fall through to the NaN code */
-	}
-      else if ( tagb == TW_Infinity )
-	{
-	  if ( (st0_tag == TAG_Valid) || (st0_tag == TAG_Zero) )
-	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
-	  if ( st0_tag == TW_Denormal )
-	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
-		| COMP_Denormal;
-	  /* Fall through to the NaN code */
-	}
-
-      /* The only possibility now should be that one of the arguments
-	 is a NaN */
-      if ( (st0_tag == TW_NaN) || (tagb == TW_NaN) )
-	{
-	  int signalling = 0, unsupported = 0;
-	  if ( st0_tag == TW_NaN )
-	    {
-	      signalling = (st0_ptr->sigh & 0xc0000000) == 0x80000000;
-	      unsupported = !((exponent(st0_ptr) == EXP_OVER)
-			      && (st0_ptr->sigh & 0x80000000));
-	    }
-	  if ( tagb == TW_NaN )
-	    {
-	      signalling |= (b->sigh & 0xc0000000) == 0x80000000;
-	      unsupported |= !((exponent(b) == EXP_OVER)
-			       && (b->sigh & 0x80000000));
-	    }
-	  if ( signalling || unsupported )
-	    return COMP_No_Comp | COMP_SNaN | COMP_NaN;
-	  else
-	    /* Neither is a signaling NaN */
-	    return COMP_No_Comp | COMP_NaN;
-	}
-      
-      EXCEPTION(EX_Invalid);
-    }
-  
-  if (st0_sign != signb)
-    {
-      return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
-	| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
-	    COMP_Denormal : 0);
-    }
-
-  if ( (st0_tag == TW_Denormal) || (tagb == TW_Denormal) )
-    {
-      FPU_to_exp16(st0_ptr, &x);
-      FPU_to_exp16(b, &y);
-      st0_ptr = &x;
-      b = &y;
-      exp0 = exponent16(st0_ptr);
-      expb = exponent16(b);
-    }
-  else
-    {
-      exp0 = exponent(st0_ptr);
-      expb = exponent(b);
-    }
+	int diff, exp0, expb;
+	u_char st0_tag;
+	FPU_REG *st0_ptr;
+	FPU_REG x, y;
+	u_char st0_sign, signb = getsign(b);
+
+	st0_ptr = &st(0);
+	st0_tag = FPU_gettag0();
+	st0_sign = getsign(st0_ptr);
+
+	if (tagb == TAG_Special)
+		tagb = FPU_Special(b);
+	if (st0_tag == TAG_Special)
+		st0_tag = FPU_Special(st0_ptr);
+
+	if (((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal))
+	    || ((tagb != TAG_Valid) && (tagb != TW_Denormal))) {
+		if (st0_tag == TAG_Zero) {
+			if (tagb == TAG_Zero)
+				return COMP_A_eq_B;
+			if (tagb == TAG_Valid)
+				return ((signb ==
+					 SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
+			if (tagb == TW_Denormal)
+				return ((signb ==
+					 SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+				    | COMP_Denormal;
+		} else if (tagb == TAG_Zero) {
+			if (st0_tag == TAG_Valid)
+				return ((st0_sign ==
+					 SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
+			if (st0_tag == TW_Denormal)
+				return ((st0_sign ==
+					 SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+				    | COMP_Denormal;
+		}
+
+		if (st0_tag == TW_Infinity) {
+			if ((tagb == TAG_Valid) || (tagb == TAG_Zero))
+				return ((st0_sign ==
+					 SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
+			else if (tagb == TW_Denormal)
+				return ((st0_sign ==
+					 SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+				    | COMP_Denormal;
+			else if (tagb == TW_Infinity) {
+				/* The 80486 book says that infinities can be equal! */
+				return (st0_sign == signb) ? COMP_A_eq_B :
+				    ((st0_sign ==
+				      SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
+			}
+			/* Fall through to the NaN code */
+		} else if (tagb == TW_Infinity) {
+			if ((st0_tag == TAG_Valid) || (st0_tag == TAG_Zero))
+				return ((signb ==
+					 SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
+			if (st0_tag == TW_Denormal)
+				return ((signb ==
+					 SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+				    | COMP_Denormal;
+			/* Fall through to the NaN code */
+		}
+
+		/* The only possibility now should be that one of the arguments
+		   is a NaN */
+		if ((st0_tag == TW_NaN) || (tagb == TW_NaN)) {
+			int signalling = 0, unsupported = 0;
+			if (st0_tag == TW_NaN) {
+				signalling =
+				    (st0_ptr->sigh & 0xc0000000) == 0x80000000;
+				unsupported = !((exponent(st0_ptr) == EXP_OVER)
+						&& (st0_ptr->
+						    sigh & 0x80000000));
+			}
+			if (tagb == TW_NaN) {
+				signalling |=
+				    (b->sigh & 0xc0000000) == 0x80000000;
+				unsupported |= !((exponent(b) == EXP_OVER)
+						 && (b->sigh & 0x80000000));
+			}
+			if (signalling || unsupported)
+				return COMP_No_Comp | COMP_SNaN | COMP_NaN;
+			else
+				/* Neither is a signaling NaN */
+				return COMP_No_Comp | COMP_NaN;
+		}
+
+		EXCEPTION(EX_Invalid);
+	}
+
+	if (st0_sign != signb) {
+		return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+		    | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+		       COMP_Denormal : 0);
+	}
+
+	if ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) {
+		FPU_to_exp16(st0_ptr, &x);
+		FPU_to_exp16(b, &y);
+		st0_ptr = &x;
+		b = &y;
+		exp0 = exponent16(st0_ptr);
+		expb = exponent16(b);
+	} else {
+		exp0 = exponent(st0_ptr);
+		expb = exponent(b);
+	}
 
 #ifdef PARANOID
-  if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
-  if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
+	if (!(st0_ptr->sigh & 0x80000000))
+		EXCEPTION(EX_Invalid);
+	if (!(b->sigh & 0x80000000))
+		EXCEPTION(EX_Invalid);
 #endif /* PARANOID */
 
-  diff = exp0 - expb;
-  if ( diff == 0 )
-    {
-      diff = st0_ptr->sigh - b->sigh;  /* Works only if ms bits are
-					      identical */
-      if ( diff == 0 )
-	{
-	diff = st0_ptr->sigl > b->sigl;
-	if ( diff == 0 )
-	  diff = -(st0_ptr->sigl < b->sigl);
-	}
-    }
-
-  if ( diff > 0 )
-    {
-      return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
-	| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
-	    COMP_Denormal : 0);
-    }
-  if ( diff < 0 )
-    {
-      return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
-	| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
-	    COMP_Denormal : 0);
-    }
-
-  return COMP_A_eq_B
-    | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
-	COMP_Denormal : 0);
+	diff = exp0 - expb;
+	if (diff == 0) {
+		diff = st0_ptr->sigh - b->sigh;	/* Works only if ms bits are
+						   identical */
+		if (diff == 0) {
+			diff = st0_ptr->sigl > b->sigl;
+			if (diff == 0)
+				diff = -(st0_ptr->sigl < b->sigl);
+		}
+	}
 
-}
+	if (diff > 0) {
+		return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
+		    | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+		       COMP_Denormal : 0);
+	}
+	if (diff < 0) {
+		return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
+		    | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+		       COMP_Denormal : 0);
+	}
 
+	return COMP_A_eq_B
+	    | (((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
+	       COMP_Denormal : 0);
+
+}
 
 /* This function requires that st(0) is not empty */
 int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
 {
-  int f = 0, c;
+	int f = 0, c;
 
-  c = compare(loaded_data, loaded_tag);
+	c = compare(loaded_data, loaded_tag);
 
-  if (c & COMP_NaN)
-    {
-      EXCEPTION(EX_Invalid);
-      f = SW_C3 | SW_C2 | SW_C0;
-    }
-  else
-    switch (c & 7)
-      {
-      case COMP_A_lt_B:
-	f = SW_C0;
-	break;
-      case COMP_A_eq_B:
-	f = SW_C3;
-	break;
-      case COMP_A_gt_B:
-	f = 0;
-	break;
-      case COMP_No_Comp:
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
+	if (c & COMP_NaN) {
+		EXCEPTION(EX_Invalid);
+		f = SW_C3 | SW_C2 | SW_C0;
+	} else
+		switch (c & 7) {
+		case COMP_A_lt_B:
+			f = SW_C0;
+			break;
+		case COMP_A_eq_B:
+			f = SW_C3;
+			break;
+		case COMP_A_gt_B:
+			f = 0;
+			break;
+		case COMP_No_Comp:
+			f = SW_C3 | SW_C2 | SW_C0;
+			break;
 #ifdef PARANOID
-      default:
-	EXCEPTION(EX_INTERNAL|0x121);
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
+		default:
+			EXCEPTION(EX_INTERNAL | 0x121);
+			f = SW_C3 | SW_C2 | SW_C0;
+			break;
 #endif /* PARANOID */
-      }
-  setcc(f);
-  if (c & COMP_Denormal)
-    {
-      return denormal_operand() < 0;
-    }
-  return 0;
+		}
+	setcc(f);
+	if (c & COMP_Denormal) {
+		return denormal_operand() < 0;
+	}
+	return 0;
 }
 
-
 static int compare_st_st(int nr)
 {
-  int f = 0, c;
-  FPU_REG *st_ptr;
+	int f = 0, c;
+	FPU_REG *st_ptr;
 
-  if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
-    {
-      setcc(SW_C3 | SW_C2 | SW_C0);
-      /* Stack fault */
-      EXCEPTION(EX_StackUnder);
-      return !(control_word & CW_Invalid);
-    }
-
-  st_ptr = &st(nr);
-  c = compare(st_ptr, FPU_gettagi(nr));
-  if (c & COMP_NaN)
-    {
-      setcc(SW_C3 | SW_C2 | SW_C0);
-      EXCEPTION(EX_Invalid);
-      return !(control_word & CW_Invalid);
-    }
-  else
-    switch (c & 7)
-      {
-      case COMP_A_lt_B:
-	f = SW_C0;
-	break;
-      case COMP_A_eq_B:
-	f = SW_C3;
-	break;
-      case COMP_A_gt_B:
-	f = 0;
-	break;
-      case COMP_No_Comp:
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
+	if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
+		setcc(SW_C3 | SW_C2 | SW_C0);
+		/* Stack fault */
+		EXCEPTION(EX_StackUnder);
+		return !(control_word & CW_Invalid);
+	}
+
+	st_ptr = &st(nr);
+	c = compare(st_ptr, FPU_gettagi(nr));
+	if (c & COMP_NaN) {
+		setcc(SW_C3 | SW_C2 | SW_C0);
+		EXCEPTION(EX_Invalid);
+		return !(control_word & CW_Invalid);
+	} else
+		switch (c & 7) {
+		case COMP_A_lt_B:
+			f = SW_C0;
+			break;
+		case COMP_A_eq_B:
+			f = SW_C3;
+			break;
+		case COMP_A_gt_B:
+			f = 0;
+			break;
+		case COMP_No_Comp:
+			f = SW_C3 | SW_C2 | SW_C0;
+			break;
 #ifdef PARANOID
-      default:
-	EXCEPTION(EX_INTERNAL|0x122);
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
+		default:
+			EXCEPTION(EX_INTERNAL | 0x122);
+			f = SW_C3 | SW_C2 | SW_C0;
+			break;
 #endif /* PARANOID */
-      }
-  setcc(f);
-  if (c & COMP_Denormal)
-    {
-      return denormal_operand() < 0;
-    }
-  return 0;
+		}
+	setcc(f);
+	if (c & COMP_Denormal) {
+		return denormal_operand() < 0;
+	}
+	return 0;
 }
 
-
 static int compare_u_st_st(int nr)
 {
-  int f = 0, c;
-  FPU_REG *st_ptr;
+	int f = 0, c;
+	FPU_REG *st_ptr;
 
-  if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
-    {
-      setcc(SW_C3 | SW_C2 | SW_C0);
-      /* Stack fault */
-      EXCEPTION(EX_StackUnder);
-      return !(control_word & CW_Invalid);
-    }
-
-  st_ptr = &st(nr);
-  c = compare(st_ptr, FPU_gettagi(nr));
-  if (c & COMP_NaN)
-    {
-      setcc(SW_C3 | SW_C2 | SW_C0);
-      if (c & COMP_SNaN)       /* This is the only difference between
-				  un-ordered and ordinary comparisons */
-	{
-	  EXCEPTION(EX_Invalid);
-	  return !(control_word & CW_Invalid);
-	}
-      return 0;
-    }
-  else
-    switch (c & 7)
-      {
-      case COMP_A_lt_B:
-	f = SW_C0;
-	break;
-      case COMP_A_eq_B:
-	f = SW_C3;
-	break;
-      case COMP_A_gt_B:
-	f = 0;
-	break;
-      case COMP_No_Comp:
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
+	if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
+		setcc(SW_C3 | SW_C2 | SW_C0);
+		/* Stack fault */
+		EXCEPTION(EX_StackUnder);
+		return !(control_word & CW_Invalid);
+	}
+
+	st_ptr = &st(nr);
+	c = compare(st_ptr, FPU_gettagi(nr));
+	if (c & COMP_NaN) {
+		setcc(SW_C3 | SW_C2 | SW_C0);
+		if (c & COMP_SNaN) {	/* This is the only difference between
+					   un-ordered and ordinary comparisons */
+			EXCEPTION(EX_Invalid);
+			return !(control_word & CW_Invalid);
+		}
+		return 0;
+	} else
+		switch (c & 7) {
+		case COMP_A_lt_B:
+			f = SW_C0;
+			break;
+		case COMP_A_eq_B:
+			f = SW_C3;
+			break;
+		case COMP_A_gt_B:
+			f = 0;
+			break;
+		case COMP_No_Comp:
+			f = SW_C3 | SW_C2 | SW_C0;
+			break;
 #ifdef PARANOID
-      default:
-	EXCEPTION(EX_INTERNAL|0x123);
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
-#endif /* PARANOID */ 
-      }
-  setcc(f);
-  if (c & COMP_Denormal)
-    {
-      return denormal_operand() < 0;
-    }
-  return 0;
+		default:
+			EXCEPTION(EX_INTERNAL | 0x123);
+			f = SW_C3 | SW_C2 | SW_C0;
+			break;
+#endif /* PARANOID */
+		}
+	setcc(f);
+	if (c & COMP_Denormal) {
+		return denormal_operand() < 0;
+	}
+	return 0;
 }
 
 /*---------------------------------------------------------------------------*/
 
 void fcom_st(void)
 {
-  /* fcom st(i) */
-  compare_st_st(FPU_rm);
+	/* fcom st(i) */
+	compare_st_st(FPU_rm);
 }
 
-
 void fcompst(void)
 {
-  /* fcomp st(i) */
-  if ( !compare_st_st(FPU_rm) )
-    FPU_pop();
+	/* fcomp st(i) */
+	if (!compare_st_st(FPU_rm))
+		FPU_pop();
 }
 
-
 void fcompp(void)
 {
-  /* fcompp */
-  if (FPU_rm != 1)
-    {
-      FPU_illegal();
-      return;
-    }
-  if ( !compare_st_st(1) )
-      poppop();
+	/* fcompp */
+	if (FPU_rm != 1) {
+		FPU_illegal();
+		return;
+	}
+	if (!compare_st_st(1))
+		poppop();
 }
 
-
 void fucom_(void)
 {
-  /* fucom st(i) */
-  compare_u_st_st(FPU_rm);
+	/* fucom st(i) */
+	compare_u_st_st(FPU_rm);
 
 }
 
-
 void fucomp(void)
 {
-  /* fucomp st(i) */
-  if ( !compare_u_st_st(FPU_rm) )
-    FPU_pop();
+	/* fucomp st(i) */
+	if (!compare_u_st_st(FPU_rm))
+		FPU_pop();
 }
 
-
 void fucompp(void)
 {
-  /* fucompp */
-  if (FPU_rm == 1)
-    {
-      if ( !compare_u_st_st(1) )
-	poppop();
-    }
-  else
-    FPU_illegal();
+	/* fucompp */
+	if (FPU_rm == 1) {
+		if (!compare_u_st_st(1))
+			poppop();
+	} else
+		FPU_illegal();
 }
diff -puN arch/x86/math-emu/reg_constant.c~git-x86 arch/x86/math-emu/reg_constant.c
--- a/arch/x86/math-emu/reg_constant.c~git-x86
+++ a/arch/x86/math-emu/reg_constant.c
@@ -16,29 +16,28 @@
 #include "reg_constant.h"
 #include "control_w.h"
 
-
 #define MAKE_REG(s,e,l,h) { l, h, \
                             ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
 
-FPU_REG const CONST_1    = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
+FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
 #if 0
-FPU_REG const CONST_2    = MAKE_REG(POS, 1, 0x00000000, 0x80000000);
+FPU_REG const CONST_2 = MAKE_REG(POS, 1, 0x00000000, 0x80000000);
 FPU_REG const CONST_HALF = MAKE_REG(POS, -1, 0x00000000, 0x80000000);
-#endif  /*  0  */
-static FPU_REG const CONST_L2T  = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b);
-static FPU_REG const CONST_L2E  = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29);
-FPU_REG const CONST_PI   = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2);
-FPU_REG const CONST_PI2  = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2);
-FPU_REG const CONST_PI4  = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2);
-static FPU_REG const CONST_LG2  = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84);
-static FPU_REG const CONST_LN2  = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7);
+#endif /*  0  */
+static FPU_REG const CONST_L2T = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b);
+static FPU_REG const CONST_L2E = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29);
+FPU_REG const CONST_PI = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2);
+FPU_REG const CONST_PI2 = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2);
+FPU_REG const CONST_PI4 = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2);
+static FPU_REG const CONST_LG2 = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84);
+static FPU_REG const CONST_LN2 = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7);
 
 /* Extra bits to take pi/2 to more than 128 bits precision. */
 FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66,
-					 0xfc8f8cbb, 0xece675d1);
+					0xfc8f8cbb, 0xece675d1);
 
 /* Only the sign (and tag) is used in internal zeroes */
-FPU_REG const CONST_Z    = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0);
+FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0);
 
 /* Only the sign and significand (and tag) are used in internal NaNs */
 /* The 80486 never generates one of these 
@@ -48,24 +47,22 @@ FPU_REG const CONST_SNAN = MAKE_REG(POS,
 FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000);
 
 /* Only the sign (and tag) is used in internal infinities */
-FPU_REG const CONST_INF  = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000);
-
+FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000);
 
 static void fld_const(FPU_REG const *c, int adj, u_char tag)
 {
-  FPU_REG *st_new_ptr;
+	FPU_REG *st_new_ptr;
 
-  if ( STACK_OVERFLOW )
-    {
-      FPU_stack_overflow();
-      return;
-    }
-  push();
-  reg_copy(c, st_new_ptr);
-  st_new_ptr->sigl += adj;  /* For all our fldxxx constants, we don't need to
-			       borrow or carry. */
-  FPU_settag0(tag);
-  clear_C1();
+	if (STACK_OVERFLOW) {
+		FPU_stack_overflow();
+		return;
+	}
+	push();
+	reg_copy(c, st_new_ptr);
+	st_new_ptr->sigl += adj;	/* For all our fldxxx constants, we don't need to
+					   borrow or carry. */
+	FPU_settag0(tag);
+	clear_C1();
 }
 
 /* A fast way to find out whether x is one of RC_DOWN or RC_CHOP
@@ -75,46 +72,46 @@ static void fld_const(FPU_REG const *c, 
 
 static void fld1(int rc)
 {
-  fld_const(&CONST_1, 0, TAG_Valid);
+	fld_const(&CONST_1, 0, TAG_Valid);
 }
 
 static void fldl2t(int rc)
 {
-  fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid);
+	fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid);
 }
 
 static void fldl2e(int rc)
 {
-  fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+	fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
 }
 
 static void fldpi(int rc)
 {
-  fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+	fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
 }
 
 static void fldlg2(int rc)
 {
-  fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+	fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
 }
 
 static void fldln2(int rc)
 {
-  fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
+	fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
 }
 
 static void fldz(int rc)
 {
-  fld_const(&CONST_Z, 0, TAG_Zero);
+	fld_const(&CONST_Z, 0, TAG_Zero);
 }
 
-typedef void (*FUNC_RC)(int);
+typedef void (*FUNC_RC) (int);
 
 static FUNC_RC constants_table[] = {
-  fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal
+	fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC) FPU_illegal
 };
 
 void fconst(void)
 {
-  (constants_table[FPU_rm])(control_word & CW_RC);
+	(constants_table[FPU_rm]) (control_word & CW_RC);
 }
diff -puN arch/x86/math-emu/reg_convert.c~git-x86 arch/x86/math-emu/reg_convert.c
--- a/arch/x86/math-emu/reg_convert.c~git-x86
+++ a/arch/x86/math-emu/reg_convert.c
@@ -13,41 +13,34 @@
 #include "exception.h"
 #include "fpu_emu.h"
 
-
 int FPU_to_exp16(FPU_REG const *a, FPU_REG *x)
 {
-  int sign = getsign(a);
+	int sign = getsign(a);
 
-  *(long long *)&(x->sigl) = *(const long long *)&(a->sigl);
+	*(long long *)&(x->sigl) = *(const long long *)&(a->sigl);
 
-  /* Set up the exponent as a 16 bit quantity. */
-  setexponent16(x, exponent(a));
+	/* Set up the exponent as a 16 bit quantity. */
+	setexponent16(x, exponent(a));
 
-  if ( exponent16(x) == EXP_UNDER )
-    {
-      /* The number is a de-normal or pseudodenormal. */
-      /* We only deal with the significand and exponent. */
-
-      if (x->sigh & 0x80000000)
-	{
-	  /* Is a pseudodenormal. */
-	  /* This is non-80486 behaviour because the number
-	     loses its 'denormal' identity. */
-	  addexponent(x, 1);
-	}
-      else
-	{
-	  /* Is a denormal. */
-	  addexponent(x, 1);
-	  FPU_normalize_nuo(x);
+	if (exponent16(x) == EXP_UNDER) {
+		/* The number is a de-normal or pseudodenormal. */
+		/* We only deal with the significand and exponent. */
+
+		if (x->sigh & 0x80000000) {
+			/* Is a pseudodenormal. */
+			/* This is non-80486 behaviour because the number
+			   loses its 'denormal' identity. */
+			addexponent(x, 1);
+		} else {
+			/* Is a denormal. */
+			addexponent(x, 1);
+			FPU_normalize_nuo(x);
+		}
 	}
-    }
 
-  if ( !(x->sigh & 0x80000000) )
-    {
-      EXCEPTION(EX_INTERNAL | 0x180);
-    }
+	if (!(x->sigh & 0x80000000)) {
+		EXCEPTION(EX_INTERNAL | 0x180);
+	}
 
-  return sign;
+	return sign;
 }
-
diff -puN arch/x86/math-emu/reg_divide.c~git-x86 arch/x86/math-emu/reg_divide.c
--- a/arch/x86/math-emu/reg_divide.c~git-x86
+++ a/arch/x86/math-emu/reg_divide.c
@@ -26,182 +26,157 @@
   */
 int FPU_div(int flags, int rm, int control_w)
 {
-  FPU_REG x, y;
-  FPU_REG const *a, *b, *st0_ptr, *st_ptr;
-  FPU_REG *dest;
-  u_char taga, tagb, signa, signb, sign, saved_sign;
-  int tag, deststnr;
-
-  if ( flags & DEST_RM )
-    deststnr = rm;
-  else
-    deststnr = 0;
-
-  if ( flags & REV )
-    {
-      b = &st(0);
-      st0_ptr = b;
-      tagb = FPU_gettag0();
-      if ( flags & LOADED )
-	{
-	  a = (FPU_REG *)rm;
-	  taga = flags & 0x0f;
-	}
-      else
-	{
-	  a = &st(rm);
-	  st_ptr = a;
-	  taga = FPU_gettagi(rm);
-	}
-    }
-  else
-    {
-      a = &st(0);
-      st0_ptr = a;
-      taga = FPU_gettag0();
-      if ( flags & LOADED )
-	{
-	  b = (FPU_REG *)rm;
-	  tagb = flags & 0x0f;
-	}
-      else
-	{
-	  b = &st(rm);
-	  st_ptr = b;
-	  tagb = FPU_gettagi(rm);
-	}
-    }
+	FPU_REG x, y;
+	FPU_REG const *a, *b, *st0_ptr, *st_ptr;
+	FPU_REG *dest;
+	u_char taga, tagb, signa, signb, sign, saved_sign;
+	int tag, deststnr;
+
+	if (flags & DEST_RM)
+		deststnr = rm;
+	else
+		deststnr = 0;
+
+	if (flags & REV) {
+		b = &st(0);
+		st0_ptr = b;
+		tagb = FPU_gettag0();
+		if (flags & LOADED) {
+			a = (FPU_REG *) rm;
+			taga = flags & 0x0f;
+		} else {
+			a = &st(rm);
+			st_ptr = a;
+			taga = FPU_gettagi(rm);
+		}
+	} else {
+		a = &st(0);
+		st0_ptr = a;
+		taga = FPU_gettag0();
+		if (flags & LOADED) {
+			b = (FPU_REG *) rm;
+			tagb = flags & 0x0f;
+		} else {
+			b = &st(rm);
+			st_ptr = b;
+			tagb = FPU_gettagi(rm);
+		}
+	}
+
+	signa = getsign(a);
+	signb = getsign(b);
+
+	sign = signa ^ signb;
+
+	dest = &st(deststnr);
+	saved_sign = getsign(dest);
+
+	if (!(taga | tagb)) {
+		/* Both regs Valid, this should be the most common case. */
+		reg_copy(a, &x);
+		reg_copy(b, &y);
+		setpositive(&x);
+		setpositive(&y);
+		tag = FPU_u_div(&x, &y, dest, control_w, sign);
+
+		if (tag < 0)
+			return tag;
+
+		FPU_settagi(deststnr, tag);
+		return tag;
+	}
+
+	if (taga == TAG_Special)
+		taga = FPU_Special(a);
+	if (tagb == TAG_Special)
+		tagb = FPU_Special(b);
 
-  signa = getsign(a);
-  signb = getsign(b);
-
-  sign = signa ^ signb;
-
-  dest = &st(deststnr);
-  saved_sign = getsign(dest);
-
-  if ( !(taga | tagb) )
-    {
-      /* Both regs Valid, this should be the most common case. */
-      reg_copy(a, &x);
-      reg_copy(b, &y);
-      setpositive(&x);
-      setpositive(&y);
-      tag = FPU_u_div(&x, &y, dest, control_w, sign);
-
-      if ( tag < 0 )
-	return tag;
-
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-
-  if ( taga == TAG_Special )
-    taga = FPU_Special(a);
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
-
-  if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+	if (((taga == TAG_Valid) && (tagb == TW_Denormal))
 	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
-	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
-    {
-      if ( denormal_operand() < 0 )
-	return FPU_Exception;
-
-      FPU_to_exp16(a, &x);
-      FPU_to_exp16(b, &y);
-      tag = FPU_u_div(&x, &y, dest, control_w, sign);
-      if ( tag < 0 )
-	return tag;
-
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-  else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
-    {
-      if ( tagb != TAG_Zero )
-	{
-	  /* Want to find Zero/Valid */
-	  if ( tagb == TW_Denormal )
-	    {
-	      if ( denormal_operand() < 0 )
-		return FPU_Exception;
-	    }
-
-	  /* The result is zero. */
-	  FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-	  setsign(dest, sign);
-	  return TAG_Zero;
-	}
-      /* We have an exception condition, either 0/0 or Valid/Zero. */
-      if ( taga == TAG_Zero )
-	{
-	  /* 0/0 */
-	  return arith_invalid(deststnr);
-	}
-      /* Valid/Zero */
-      return FPU_divide_by_zero(deststnr, sign);
-    }
-  /* Must have infinities, NaNs, etc */
-  else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
-    {
-      if ( flags & LOADED )
-	return real_2op_NaN((FPU_REG *)rm, flags & 0x0f, 0, st0_ptr);
-
-      if ( flags & DEST_RM )
-	{
-	  int tag;
-	  tag = FPU_gettag0();
-	  if ( tag == TAG_Special )
-	    tag = FPU_Special(st0_ptr);
-	  return real_2op_NaN(st0_ptr, tag, rm, (flags & REV) ? st0_ptr : &st(rm));
-	}
-      else
-	{
-	  int tag;
-	  tag = FPU_gettagi(rm);
-	  if ( tag == TAG_Special )
-	    tag = FPU_Special(&st(rm));
-	  return real_2op_NaN(&st(rm), tag, 0, (flags & REV) ? st0_ptr : &st(rm));
+	    || ((taga == TW_Denormal) && (tagb == TW_Denormal))) {
+		if (denormal_operand() < 0)
+			return FPU_Exception;
+
+		FPU_to_exp16(a, &x);
+		FPU_to_exp16(b, &y);
+		tag = FPU_u_div(&x, &y, dest, control_w, sign);
+		if (tag < 0)
+			return tag;
+
+		FPU_settagi(deststnr, tag);
+		return tag;
+	} else if ((taga <= TW_Denormal) && (tagb <= TW_Denormal)) {
+		if (tagb != TAG_Zero) {
+			/* Want to find Zero/Valid */
+			if (tagb == TW_Denormal) {
+				if (denormal_operand() < 0)
+					return FPU_Exception;
+			}
+
+			/* The result is zero. */
+			FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+			setsign(dest, sign);
+			return TAG_Zero;
+		}
+		/* We have an exception condition, either 0/0 or Valid/Zero. */
+		if (taga == TAG_Zero) {
+			/* 0/0 */
+			return arith_invalid(deststnr);
+		}
+		/* Valid/Zero */
+		return FPU_divide_by_zero(deststnr, sign);
+	}
+	/* Must have infinities, NaNs, etc */
+	else if ((taga == TW_NaN) || (tagb == TW_NaN)) {
+		if (flags & LOADED)
+			return real_2op_NaN((FPU_REG *) rm, flags & 0x0f, 0,
+					    st0_ptr);
+
+		if (flags & DEST_RM) {
+			int tag;
+			tag = FPU_gettag0();
+			if (tag == TAG_Special)
+				tag = FPU_Special(st0_ptr);
+			return real_2op_NaN(st0_ptr, tag, rm,
+					    (flags & REV) ? st0_ptr : &st(rm));
+		} else {
+			int tag;
+			tag = FPU_gettagi(rm);
+			if (tag == TAG_Special)
+				tag = FPU_Special(&st(rm));
+			return real_2op_NaN(&st(rm), tag, 0,
+					    (flags & REV) ? st0_ptr : &st(rm));
+		}
+	} else if (taga == TW_Infinity) {
+		if (tagb == TW_Infinity) {
+			/* infinity/infinity */
+			return arith_invalid(deststnr);
+		} else {
+			/* tagb must be Valid or Zero */
+			if ((tagb == TW_Denormal) && (denormal_operand() < 0))
+				return FPU_Exception;
+
+			/* Infinity divided by Zero or Valid does
+			   not raise and exception, but returns Infinity */
+			FPU_copy_to_regi(a, TAG_Special, deststnr);
+			setsign(dest, sign);
+			return taga;
+		}
+	} else if (tagb == TW_Infinity) {
+		if ((taga == TW_Denormal) && (denormal_operand() < 0))
+			return FPU_Exception;
+
+		/* The result is zero. */
+		FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+		setsign(dest, sign);
+		return TAG_Zero;
 	}
-    }
-  else if (taga == TW_Infinity)
-    {
-      if (tagb == TW_Infinity)
-	{
-	  /* infinity/infinity */
-	  return arith_invalid(deststnr);
-	}
-      else
-	{
-	  /* tagb must be Valid or Zero */
-	  if ( (tagb == TW_Denormal) && (denormal_operand() < 0) )
-	    return FPU_Exception;
-	  
-	  /* Infinity divided by Zero or Valid does
-	     not raise and exception, but returns Infinity */
-	  FPU_copy_to_regi(a, TAG_Special, deststnr);
-	  setsign(dest, sign);
-	  return taga;
-	}
-    }
-  else if (tagb == TW_Infinity)
-    {
-      if ( (taga == TW_Denormal) && (denormal_operand() < 0) )
-	return FPU_Exception;
-
-      /* The result is zero. */
-      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-      setsign(dest, sign);
-      return TAG_Zero;
-    }
 #ifdef PARANOID
-  else
-    {
-      EXCEPTION(EX_INTERNAL|0x102);
-      return FPU_Exception;
-    }
-#endif /* PARANOID */ 
+	else {
+		EXCEPTION(EX_INTERNAL | 0x102);
+		return FPU_Exception;
+	}
+#endif /* PARANOID */
 
 	return 0;
 }
diff -puN arch/x86/math-emu/reg_ld_str.c~git-x86 arch/x86/math-emu/reg_ld_str.c
--- a/arch/x86/math-emu/reg_ld_str.c~git-x86
+++ a/arch/x86/math-emu/reg_ld_str.c
@@ -27,1084 +27,938 @@
 #include "control_w.h"
 #include "status_w.h"
 
-
-#define DOUBLE_Emax 1023         /* largest valid exponent */
+#define DOUBLE_Emax 1023	/* largest valid exponent */
 #define DOUBLE_Ebias 1023
-#define DOUBLE_Emin (-1022)      /* smallest valid exponent */
+#define DOUBLE_Emin (-1022)	/* smallest valid exponent */
 
-#define SINGLE_Emax 127          /* largest valid exponent */
+#define SINGLE_Emax 127		/* largest valid exponent */
 #define SINGLE_Ebias 127
-#define SINGLE_Emin (-126)       /* smallest valid exponent */
-
+#define SINGLE_Emin (-126)	/* smallest valid exponent */
 
 static u_char normalize_no_excep(FPU_REG *r, int exp, int sign)
 {
-  u_char tag;
+	u_char tag;
 
-  setexponent16(r, exp);
+	setexponent16(r, exp);
 
-  tag = FPU_normalize_nuo(r);
-  stdexp(r);
-  if ( sign )
-    setnegative(r);
+	tag = FPU_normalize_nuo(r);
+	stdexp(r);
+	if (sign)
+		setnegative(r);
 
-  return tag;
+	return tag;
 }
 
-
 int FPU_tagof(FPU_REG *ptr)
 {
-  int exp;
+	int exp;
 
-  exp = exponent16(ptr) & 0x7fff;
-  if ( exp == 0 )
-    {
-      if ( !(ptr->sigh | ptr->sigl) )
-	{
-	  return TAG_Zero;
-	}
-      /* The number is a de-normal or pseudodenormal. */
-      return TAG_Special;
-    }
-
-  if ( exp == 0x7fff )
-    {
-      /* Is an Infinity, a NaN, or an unsupported data type. */
-      return TAG_Special;
-    }
-
-  if ( !(ptr->sigh & 0x80000000) )
-    {
-      /* Unsupported data type. */
-      /* Valid numbers have the ms bit set to 1. */
-      /* Unnormal. */
-      return TAG_Special;
-    }
+	exp = exponent16(ptr) & 0x7fff;
+	if (exp == 0) {
+		if (!(ptr->sigh | ptr->sigl)) {
+			return TAG_Zero;
+		}
+		/* The number is a de-normal or pseudodenormal. */
+		return TAG_Special;
+	}
 
-  return TAG_Valid;
-}
+	if (exp == 0x7fff) {
+		/* Is an Infinity, a NaN, or an unsupported data type. */
+		return TAG_Special;
+	}
 
+	if (!(ptr->sigh & 0x80000000)) {
+		/* Unsupported data type. */
+		/* Valid numbers have the ms bit set to 1. */
+		/* Unnormal. */
+		return TAG_Special;
+	}
+
+	return TAG_Valid;
+}
 
 /* Get a long double from user memory */
 int FPU_load_extended(long double __user *s, int stnr)
 {
-  FPU_REG *sti_ptr = &st(stnr);
+	FPU_REG *sti_ptr = &st(stnr);
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, s, 10);
-  __copy_from_user(sti_ptr, s, 10);
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, s, 10);
+	__copy_from_user(sti_ptr, s, 10);
+	RE_ENTRANT_CHECK_ON;
 
-  return FPU_tagof(sti_ptr);
+	return FPU_tagof(sti_ptr);
 }
 
-
 /* Get a double from user memory */
 int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data)
 {
-  int exp, tag, negative;
-  unsigned m64, l64;
+	int exp, tag, negative;
+	unsigned m64, l64;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, dfloat, 8);
-  FPU_get_user(m64, 1 + (unsigned long __user *) dfloat);
-  FPU_get_user(l64, (unsigned long __user *) dfloat);
-  RE_ENTRANT_CHECK_ON;
-
-  negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
-  exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias;
-  m64 &= 0xfffff;
-  if ( exp > DOUBLE_Emax + EXTENDED_Ebias )
-    {
-      /* Infinity or NaN */
-      if ((m64 == 0) && (l64 == 0))
-	{
-	  /* +- infinity */
-	  loaded_data->sigh = 0x80000000;
-	  loaded_data->sigl = 0x00000000;
-	  exp = EXP_Infinity + EXTENDED_Ebias;
-	  tag = TAG_Special;
-	}
-      else
-	{
-	  /* Must be a signaling or quiet NaN */
-	  exp = EXP_NaN + EXTENDED_Ebias;
-	  loaded_data->sigh = (m64 << 11) | 0x80000000;
-	  loaded_data->sigh |= l64 >> 21;
-	  loaded_data->sigl = l64 << 11;
-	  tag = TAG_Special;    /* The calling function must look for NaNs */
-	}
-    }
-  else if ( exp < DOUBLE_Emin + EXTENDED_Ebias )
-    {
-      /* Zero or de-normal */
-      if ((m64 == 0) && (l64 == 0))
-	{
-	  /* Zero */
-	  reg_copy(&CONST_Z, loaded_data);
-	  exp = 0;
-	  tag = TAG_Zero;
-	}
-      else
-	{
-	  /* De-normal */
-	  loaded_data->sigh = m64 << 11;
-	  loaded_data->sigh |= l64 >> 21;
-	  loaded_data->sigl = l64 << 11;
-
-	  return normalize_no_excep(loaded_data, DOUBLE_Emin, negative)
-	    | (denormal_operand() < 0 ? FPU_Exception : 0);
-	}
-    }
-  else
-    {
-      loaded_data->sigh = (m64 << 11) | 0x80000000;
-      loaded_data->sigh |= l64 >> 21;
-      loaded_data->sigl = l64 << 11;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, dfloat, 8);
+	FPU_get_user(m64, 1 + (unsigned long __user *)dfloat);
+	FPU_get_user(l64, (unsigned long __user *)dfloat);
+	RE_ENTRANT_CHECK_ON;
+
+	negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
+	exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias;
+	m64 &= 0xfffff;
+	if (exp > DOUBLE_Emax + EXTENDED_Ebias) {
+		/* Infinity or NaN */
+		if ((m64 == 0) && (l64 == 0)) {
+			/* +- infinity */
+			loaded_data->sigh = 0x80000000;
+			loaded_data->sigl = 0x00000000;
+			exp = EXP_Infinity + EXTENDED_Ebias;
+			tag = TAG_Special;
+		} else {
+			/* Must be a signaling or quiet NaN */
+			exp = EXP_NaN + EXTENDED_Ebias;
+			loaded_data->sigh = (m64 << 11) | 0x80000000;
+			loaded_data->sigh |= l64 >> 21;
+			loaded_data->sigl = l64 << 11;
+			tag = TAG_Special;	/* The calling function must look for NaNs */
+		}
+	} else if (exp < DOUBLE_Emin + EXTENDED_Ebias) {
+		/* Zero or de-normal */
+		if ((m64 == 0) && (l64 == 0)) {
+			/* Zero */
+			reg_copy(&CONST_Z, loaded_data);
+			exp = 0;
+			tag = TAG_Zero;
+		} else {
+			/* De-normal */
+			loaded_data->sigh = m64 << 11;
+			loaded_data->sigh |= l64 >> 21;
+			loaded_data->sigl = l64 << 11;
+
+			return normalize_no_excep(loaded_data, DOUBLE_Emin,
+						  negative)
+			    | (denormal_operand() < 0 ? FPU_Exception : 0);
+		}
+	} else {
+		loaded_data->sigh = (m64 << 11) | 0x80000000;
+		loaded_data->sigh |= l64 >> 21;
+		loaded_data->sigl = l64 << 11;
 
-      tag = TAG_Valid;
-    }
+		tag = TAG_Valid;
+	}
 
-  setexponent16(loaded_data, exp | negative);
+	setexponent16(loaded_data, exp | negative);
 
-  return tag;
+	return tag;
 }
 
-
 /* Get a float from user memory */
 int FPU_load_single(float __user *single, FPU_REG *loaded_data)
 {
-  unsigned m32;
-  int exp, tag, negative;
+	unsigned m32;
+	int exp, tag, negative;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, single, 4);
-  FPU_get_user(m32, (unsigned long __user *) single);
-  RE_ENTRANT_CHECK_ON;
-
-  negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
-
-  if (!(m32 & 0x7fffffff))
-    {
-      /* Zero */
-      reg_copy(&CONST_Z, loaded_data);
-      addexponent(loaded_data, negative);
-      return TAG_Zero;
-    }
-  exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias;
-  m32 = (m32 & 0x7fffff) << 8;
-  if ( exp < SINGLE_Emin + EXTENDED_Ebias )
-    {
-      /* De-normals */
-      loaded_data->sigh = m32;
-      loaded_data->sigl = 0;
-
-      return normalize_no_excep(loaded_data, SINGLE_Emin, negative)
-	| (denormal_operand() < 0 ? FPU_Exception : 0);
-    }
-  else if ( exp > SINGLE_Emax + EXTENDED_Ebias )
-    {
-    /* Infinity or NaN */
-      if ( m32 == 0 )
-	{
-	  /* +- infinity */
-	  loaded_data->sigh = 0x80000000;
-	  loaded_data->sigl = 0x00000000;
-	  exp = EXP_Infinity + EXTENDED_Ebias;
-	  tag = TAG_Special;
-	}
-      else
-	{
-	  /* Must be a signaling or quiet NaN */
-	  exp = EXP_NaN + EXTENDED_Ebias;
-	  loaded_data->sigh = m32 | 0x80000000;
-	  loaded_data->sigl = 0;
-	  tag = TAG_Special;  /* The calling function must look for NaNs */
-	}
-    }
-  else
-    {
-      loaded_data->sigh = m32 | 0x80000000;
-      loaded_data->sigl = 0;
-      tag = TAG_Valid;
-    }
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, single, 4);
+	FPU_get_user(m32, (unsigned long __user *)single);
+	RE_ENTRANT_CHECK_ON;
+
+	negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive;
+
+	if (!(m32 & 0x7fffffff)) {
+		/* Zero */
+		reg_copy(&CONST_Z, loaded_data);
+		addexponent(loaded_data, negative);
+		return TAG_Zero;
+	}
+	exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias;
+	m32 = (m32 & 0x7fffff) << 8;
+	if (exp < SINGLE_Emin + EXTENDED_Ebias) {
+		/* De-normals */
+		loaded_data->sigh = m32;
+		loaded_data->sigl = 0;
+
+		return normalize_no_excep(loaded_data, SINGLE_Emin, negative)
+		    | (denormal_operand() < 0 ? FPU_Exception : 0);
+	} else if (exp > SINGLE_Emax + EXTENDED_Ebias) {
+		/* Infinity or NaN */
+		if (m32 == 0) {
+			/* +- infinity */
+			loaded_data->sigh = 0x80000000;
+			loaded_data->sigl = 0x00000000;
+			exp = EXP_Infinity + EXTENDED_Ebias;
+			tag = TAG_Special;
+		} else {
+			/* Must be a signaling or quiet NaN */
+			exp = EXP_NaN + EXTENDED_Ebias;
+			loaded_data->sigh = m32 | 0x80000000;
+			loaded_data->sigl = 0;
+			tag = TAG_Special;	/* The calling function must look for NaNs */
+		}
+	} else {
+		loaded_data->sigh = m32 | 0x80000000;
+		loaded_data->sigl = 0;
+		tag = TAG_Valid;
+	}
 
-  setexponent16(loaded_data, exp | negative);  /* Set the sign. */
+	setexponent16(loaded_data, exp | negative);	/* Set the sign. */
 
-  return tag;
+	return tag;
 }
 
-
 /* Get a long long from user memory */
 int FPU_load_int64(long long __user *_s)
 {
-  long long s;
-  int sign;
-  FPU_REG *st0_ptr = &st(0);
-
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, _s, 8);
-  if (copy_from_user(&s,_s,8))
-    FPU_abort;
-  RE_ENTRANT_CHECK_ON;
-
-  if (s == 0)
-    {
-      reg_copy(&CONST_Z, st0_ptr);
-      return TAG_Zero;
-    }
-
-  if (s > 0)
-    sign = SIGN_Positive;
-  else
-  {
-    s = -s;
-    sign = SIGN_Negative;
-  }
+	long long s;
+	int sign;
+	FPU_REG *st0_ptr = &st(0);
 
-  significand(st0_ptr) = s;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, _s, 8);
+	if (copy_from_user(&s, _s, 8))
+		FPU_abort;
+	RE_ENTRANT_CHECK_ON;
 
-  return normalize_no_excep(st0_ptr, 63, sign);
-}
+	if (s == 0) {
+		reg_copy(&CONST_Z, st0_ptr);
+		return TAG_Zero;
+	}
 
+	if (s > 0)
+		sign = SIGN_Positive;
+	else {
+		s = -s;
+		sign = SIGN_Negative;
+	}
+
+	significand(st0_ptr) = s;
+
+	return normalize_no_excep(st0_ptr, 63, sign);
+}
 
 /* Get a long from user memory */
 int FPU_load_int32(long __user *_s, FPU_REG *loaded_data)
 {
-  long s;
-  int negative;
+	long s;
+	int negative;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, _s, 4);
-  FPU_get_user(s, _s);
-  RE_ENTRANT_CHECK_ON;
-
-  if (s == 0)
-    { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; }
-
-  if (s > 0)
-    negative = SIGN_Positive;
-  else
-    {
-      s = -s;
-      negative = SIGN_Negative;
-    }
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, _s, 4);
+	FPU_get_user(s, _s);
+	RE_ENTRANT_CHECK_ON;
 
-  loaded_data->sigh = s;
-  loaded_data->sigl = 0;
+	if (s == 0) {
+		reg_copy(&CONST_Z, loaded_data);
+		return TAG_Zero;
+	}
 
-  return normalize_no_excep(loaded_data, 31, negative);
-}
+	if (s > 0)
+		negative = SIGN_Positive;
+	else {
+		s = -s;
+		negative = SIGN_Negative;
+	}
 
+	loaded_data->sigh = s;
+	loaded_data->sigl = 0;
+
+	return normalize_no_excep(loaded_data, 31, negative);
+}
 
 /* Get a short from user memory */
 int FPU_load_int16(short __user *_s, FPU_REG *loaded_data)
 {
-  int s, negative;
+	int s, negative;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, _s, 2);
-  /* Cast as short to get the sign extended. */
-  FPU_get_user(s, _s);
-  RE_ENTRANT_CHECK_ON;
-
-  if (s == 0)
-    { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; }
-
-  if (s > 0)
-    negative = SIGN_Positive;
-  else
-    {
-      s = -s;
-      negative = SIGN_Negative;
-    }
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, _s, 2);
+	/* Cast as short to get the sign extended. */
+	FPU_get_user(s, _s);
+	RE_ENTRANT_CHECK_ON;
 
-  loaded_data->sigh = s << 16;
-  loaded_data->sigl = 0;
+	if (s == 0) {
+		reg_copy(&CONST_Z, loaded_data);
+		return TAG_Zero;
+	}
 
-  return normalize_no_excep(loaded_data, 15, negative);
-}
+	if (s > 0)
+		negative = SIGN_Positive;
+	else {
+		s = -s;
+		negative = SIGN_Negative;
+	}
+
+	loaded_data->sigh = s << 16;
+	loaded_data->sigl = 0;
 
+	return normalize_no_excep(loaded_data, 15, negative);
+}
 
 /* Get a packed bcd array from user memory */
 int FPU_load_bcd(u_char __user *s)
 {
-  FPU_REG *st0_ptr = &st(0);
-  int pos;
-  u_char bcd;
-  long long l=0;
-  int sign;
-
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ, s, 10);
-  RE_ENTRANT_CHECK_ON;
-  for ( pos = 8; pos >= 0; pos--)
-    {
-      l *= 10;
-      RE_ENTRANT_CHECK_OFF;
-      FPU_get_user(bcd, s+pos);
-      RE_ENTRANT_CHECK_ON;
-      l += bcd >> 4;
-      l *= 10;
-      l += bcd & 0x0f;
-    }
- 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_get_user(sign, s+9);
-  sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive;
-  RE_ENTRANT_CHECK_ON;
-
-  if ( l == 0 )
-    {
-      reg_copy(&CONST_Z, st0_ptr);
-      addexponent(st0_ptr, sign);   /* Set the sign. */
-      return TAG_Zero;
-    }
-  else
-    {
-      significand(st0_ptr) = l;
-      return normalize_no_excep(st0_ptr, 63, sign);
-    }
+	FPU_REG *st0_ptr = &st(0);
+	int pos;
+	u_char bcd;
+	long long l = 0;
+	int sign;
+
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, s, 10);
+	RE_ENTRANT_CHECK_ON;
+	for (pos = 8; pos >= 0; pos--) {
+		l *= 10;
+		RE_ENTRANT_CHECK_OFF;
+		FPU_get_user(bcd, s + pos);
+		RE_ENTRANT_CHECK_ON;
+		l += bcd >> 4;
+		l *= 10;
+		l += bcd & 0x0f;
+	}
+
+	RE_ENTRANT_CHECK_OFF;
+	FPU_get_user(sign, s + 9);
+	sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive;
+	RE_ENTRANT_CHECK_ON;
+
+	if (l == 0) {
+		reg_copy(&CONST_Z, st0_ptr);
+		addexponent(st0_ptr, sign);	/* Set the sign. */
+		return TAG_Zero;
+	} else {
+		significand(st0_ptr) = l;
+		return normalize_no_excep(st0_ptr, 63, sign);
+	}
 }
 
 /*===========================================================================*/
 
 /* Put a long double into user memory */
-int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, long double __user *d)
+int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag,
+		       long double __user * d)
 {
-  /*
-    The only exception raised by an attempt to store to an
-    extended format is the Invalid Stack exception, i.e.
-    attempting to store from an empty register.
-   */
-
-  if ( st0_tag != TAG_Empty )
-    {
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_WRITE, d, 10);
-
-      FPU_put_user(st0_ptr->sigl, (unsigned long __user *) d);
-      FPU_put_user(st0_ptr->sigh, (unsigned long __user *) ((u_char __user *)d + 4));
-      FPU_put_user(exponent16(st0_ptr), (unsigned short __user *) ((u_char __user *)d + 8));
-      RE_ENTRANT_CHECK_ON;
-
-      return 1;
-    }
-
-  /* Empty register (stack underflow) */
-  EXCEPTION(EX_StackUnder);
-  if ( control_word & CW_Invalid )
-    {
-      /* The masked response */
-      /* Put out the QNaN indefinite */
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_WRITE,d,10);
-      FPU_put_user(0, (unsigned long __user *) d);
-      FPU_put_user(0xc0000000, 1 + (unsigned long __user *) d);
-      FPU_put_user(0xffff, 4 + (short __user *) d);
-      RE_ENTRANT_CHECK_ON;
-      return 1;
-    }
-  else
-    return 0;
+	/*
+	   The only exception raised by an attempt to store to an
+	   extended format is the Invalid Stack exception, i.e.
+	   attempting to store from an empty register.
+	 */
+
+	if (st0_tag != TAG_Empty) {
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_WRITE, d, 10);
+
+		FPU_put_user(st0_ptr->sigl, (unsigned long __user *)d);
+		FPU_put_user(st0_ptr->sigh,
+			     (unsigned long __user *)((u_char __user *) d + 4));
+		FPU_put_user(exponent16(st0_ptr),
+			     (unsigned short __user *)((u_char __user *) d +
+						       8));
+		RE_ENTRANT_CHECK_ON;
+
+		return 1;
+	}
+
+	/* Empty register (stack underflow) */
+	EXCEPTION(EX_StackUnder);
+	if (control_word & CW_Invalid) {
+		/* The masked response */
+		/* Put out the QNaN indefinite */
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_WRITE, d, 10);
+		FPU_put_user(0, (unsigned long __user *)d);
+		FPU_put_user(0xc0000000, 1 + (unsigned long __user *)d);
+		FPU_put_user(0xffff, 4 + (short __user *)d);
+		RE_ENTRANT_CHECK_ON;
+		return 1;
+	} else
+		return 0;
 
 }
 
-
 /* Put a double into user memory */
 int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat)
 {
-  unsigned long l[2];
-  unsigned long increment = 0;	/* avoid gcc warnings */
-  int precision_loss;
-  int exp;
-  FPU_REG tmp;
-
-  if ( st0_tag == TAG_Valid )
-    {
-      reg_copy(st0_ptr, &tmp);
-      exp = exponent(&tmp);
-
-      if ( exp < DOUBLE_Emin )     /* It may be a denormal */
-	{
-	  addexponent(&tmp, -DOUBLE_Emin + 52);  /* largest exp to be 51 */
+	unsigned long l[2];
+	unsigned long increment = 0;	/* avoid gcc warnings */
+	int precision_loss;
+	int exp;
+	FPU_REG tmp;
+
+	if (st0_tag == TAG_Valid) {
+		reg_copy(st0_ptr, &tmp);
+		exp = exponent(&tmp);
 
-	denormal_arg:
+		if (exp < DOUBLE_Emin) {	/* It may be a denormal */
+			addexponent(&tmp, -DOUBLE_Emin + 52);	/* largest exp to be 51 */
 
-	  if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) )
-	    {
+		      denormal_arg:
+
+			if ((precision_loss = FPU_round_to_int(&tmp, st0_tag))) {
 #ifdef PECULIAR_486
-	      /* Did it round to a non-denormal ? */
-	      /* This behaviour might be regarded as peculiar, it appears
-		 that the 80486 rounds to the dest precision, then
-		 converts to decide underflow. */
-	      if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) &&
-		  (st0_ptr->sigl & 0x000007ff)) )
+				/* Did it round to a non-denormal ? */
+				/* This behaviour might be regarded as peculiar, it appears
+				   that the 80486 rounds to the dest precision, then
+				   converts to decide underflow. */
+				if (!
+				    ((tmp.sigh == 0x00100000) && (tmp.sigl == 0)
+				     && (st0_ptr->sigl & 0x000007ff)))
 #endif /* PECULIAR_486 */
-		{
-		  EXCEPTION(EX_Underflow);
-		  /* This is a special case: see sec 16.2.5.1 of
-		     the 80486 book */
-		  if ( !(control_word & CW_Underflow) )
-		    return 0;
-		}
-	      EXCEPTION(precision_loss);
-	      if ( !(control_word & CW_Precision) )
-		return 0;
-	    }
-	  l[0] = tmp.sigl;
-	  l[1] = tmp.sigh;
-	}
-      else
-	{
-	  if ( tmp.sigl & 0x000007ff )
-	    {
-	      precision_loss = 1;
-	      switch (control_word & CW_RC)
-		{
-		case RC_RND:
-		  /* Rounding can get a little messy.. */
-		  increment = ((tmp.sigl & 0x7ff) > 0x400) |  /* nearest */
-		    ((tmp.sigl & 0xc00) == 0xc00);            /* odd -> even */
-		  break;
-		case RC_DOWN:   /* towards -infinity */
-		  increment = signpositive(&tmp) ? 0 : tmp.sigl & 0x7ff;
-		  break;
-		case RC_UP:     /* towards +infinity */
-		  increment = signpositive(&tmp) ? tmp.sigl & 0x7ff : 0;
-		  break;
-		case RC_CHOP:
-		  increment = 0;
-		  break;
-		}
-	  
-	      /* Truncate the mantissa */
-	      tmp.sigl &= 0xfffff800;
-	  
-	      if ( increment )
-		{
-		  if ( tmp.sigl >= 0xfffff800 )
-		    {
-		      /* the sigl part overflows */
-		      if ( tmp.sigh == 0xffffffff )
-			{
-			  /* The sigh part overflows */
-			  tmp.sigh = 0x80000000;
-			  exp++;
-			  if (exp >= EXP_OVER)
-			    goto overflow;
+				{
+					EXCEPTION(EX_Underflow);
+					/* This is a special case: see sec 16.2.5.1 of
+					   the 80486 book */
+					if (!(control_word & CW_Underflow))
+						return 0;
+				}
+				EXCEPTION(precision_loss);
+				if (!(control_word & CW_Precision))
+					return 0;
 			}
-		      else
-			{
-			  tmp.sigh ++;
+			l[0] = tmp.sigl;
+			l[1] = tmp.sigh;
+		} else {
+			if (tmp.sigl & 0x000007ff) {
+				precision_loss = 1;
+				switch (control_word & CW_RC) {
+				case RC_RND:
+					/* Rounding can get a little messy.. */
+					increment = ((tmp.sigl & 0x7ff) > 0x400) |	/* nearest */
+					    ((tmp.sigl & 0xc00) == 0xc00);	/* odd -> even */
+					break;
+				case RC_DOWN:	/* towards -infinity */
+					increment =
+					    signpositive(&tmp) ? 0 : tmp.
+					    sigl & 0x7ff;
+					break;
+				case RC_UP:	/* towards +infinity */
+					increment =
+					    signpositive(&tmp) ? tmp.
+					    sigl & 0x7ff : 0;
+					break;
+				case RC_CHOP:
+					increment = 0;
+					break;
+				}
+
+				/* Truncate the mantissa */
+				tmp.sigl &= 0xfffff800;
+
+				if (increment) {
+					if (tmp.sigl >= 0xfffff800) {
+						/* the sigl part overflows */
+						if (tmp.sigh == 0xffffffff) {
+							/* The sigh part overflows */
+							tmp.sigh = 0x80000000;
+							exp++;
+							if (exp >= EXP_OVER)
+								goto overflow;
+						} else {
+							tmp.sigh++;
+						}
+						tmp.sigl = 0x00000000;
+					} else {
+						/* We only need to increment sigl */
+						tmp.sigl += 0x00000800;
+					}
+				}
+			} else
+				precision_loss = 0;
+
+			l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21);
+			l[1] = ((tmp.sigh >> 11) & 0xfffff);
+
+			if (exp > DOUBLE_Emax) {
+			      overflow:
+				EXCEPTION(EX_Overflow);
+				if (!(control_word & CW_Overflow))
+					return 0;
+				set_precision_flag_up();
+				if (!(control_word & CW_Precision))
+					return 0;
+
+				/* This is a special case: see sec 16.2.5.1 of the 80486 book */
+				/* Overflow to infinity */
+				l[0] = 0x00000000;	/* Set to */
+				l[1] = 0x7ff00000;	/* + INF */
+			} else {
+				if (precision_loss) {
+					if (increment)
+						set_precision_flag_up();
+					else
+						set_precision_flag_down();
+				}
+				/* Add the exponent */
+				l[1] |= (((exp + DOUBLE_Ebias) & 0x7ff) << 20);
 			}
-		      tmp.sigl = 0x00000000;
-		    }
-		  else
-		    {
-		      /* We only need to increment sigl */
-		      tmp.sigl += 0x00000800;
-		    }
 		}
-	    }
-	  else
-	    precision_loss = 0;
-	  
-	  l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21);
-	  l[1] = ((tmp.sigh >> 11) & 0xfffff);
-
-	  if ( exp > DOUBLE_Emax )
-	    {
-	    overflow:
-	      EXCEPTION(EX_Overflow);
-	      if ( !(control_word & CW_Overflow) )
-		return 0;
-	      set_precision_flag_up();
-	      if ( !(control_word & CW_Precision) )
-		return 0;
-
-	      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
-	      /* Overflow to infinity */
-	      l[0] = 0x00000000;	/* Set to */
-	      l[1] = 0x7ff00000;	/* + INF */
-	    }
-	  else
-	    {
-	      if ( precision_loss )
-		{
-		  if ( increment )
-		    set_precision_flag_up();
-		  else
-		    set_precision_flag_down();
-		}
-	      /* Add the exponent */
-	      l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20);
-	    }
-	}
-    }
-  else if (st0_tag == TAG_Zero)
-    {
-      /* Number is zero */
-      l[0] = 0;
-      l[1] = 0;
-    }
-  else if ( st0_tag == TAG_Special )
-    {
-      st0_tag = FPU_Special(st0_ptr);
-      if ( st0_tag == TW_Denormal )
-	{
-	  /* A denormal will always underflow. */
+	} else if (st0_tag == TAG_Zero) {
+		/* Number is zero */
+		l[0] = 0;
+		l[1] = 0;
+	} else if (st0_tag == TAG_Special) {
+		st0_tag = FPU_Special(st0_ptr);
+		if (st0_tag == TW_Denormal) {
+			/* A denormal will always underflow. */
 #ifndef PECULIAR_486
-	  /* An 80486 is supposed to be able to generate
-	     a denormal exception here, but... */
-	  /* Underflow has priority. */
-	  if ( control_word & CW_Underflow )
-	    denormal_operand();
+			/* An 80486 is supposed to be able to generate
+			   a denormal exception here, but... */
+			/* Underflow has priority. */
+			if (control_word & CW_Underflow)
+				denormal_operand();
 #endif /* PECULIAR_486 */
-	  reg_copy(st0_ptr, &tmp);
-	  goto denormal_arg;
-	}
-      else if (st0_tag == TW_Infinity)
-	{
-	  l[0] = 0;
-	  l[1] = 0x7ff00000;
-	}
-      else if (st0_tag == TW_NaN)
-	{
-	  /* Is it really a NaN ? */
-	  if ( (exponent(st0_ptr) == EXP_OVER)
-	       && (st0_ptr->sigh & 0x80000000) )
-	    {
-	      /* See if we can get a valid NaN from the FPU_REG */
-	      l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21);
-	      l[1] = ((st0_ptr->sigh >> 11) & 0xfffff);
-	      if ( !(st0_ptr->sigh & 0x40000000) )
-		{
-		  /* It is a signalling NaN */
-		  EXCEPTION(EX_Invalid);
-		  if ( !(control_word & CW_Invalid) )
-		    return 0;
-		  l[1] |= (0x40000000 >> 11);
+			reg_copy(st0_ptr, &tmp);
+			goto denormal_arg;
+		} else if (st0_tag == TW_Infinity) {
+			l[0] = 0;
+			l[1] = 0x7ff00000;
+		} else if (st0_tag == TW_NaN) {
+			/* Is it really a NaN ? */
+			if ((exponent(st0_ptr) == EXP_OVER)
+			    && (st0_ptr->sigh & 0x80000000)) {
+				/* See if we can get a valid NaN from the FPU_REG */
+				l[0] =
+				    (st0_ptr->sigl >> 11) | (st0_ptr->
+							     sigh << 21);
+				l[1] = ((st0_ptr->sigh >> 11) & 0xfffff);
+				if (!(st0_ptr->sigh & 0x40000000)) {
+					/* It is a signalling NaN */
+					EXCEPTION(EX_Invalid);
+					if (!(control_word & CW_Invalid))
+						return 0;
+					l[1] |= (0x40000000 >> 11);
+				}
+				l[1] |= 0x7ff00000;
+			} else {
+				/* It is an unsupported data type */
+				EXCEPTION(EX_Invalid);
+				if (!(control_word & CW_Invalid))
+					return 0;
+				l[0] = 0;
+				l[1] = 0xfff80000;
+			}
 		}
-	      l[1] |= 0x7ff00000;
-	    }
-	  else
-	    {
-	      /* It is an unsupported data type */
-	      EXCEPTION(EX_Invalid);
-	      if ( !(control_word & CW_Invalid) )
-		return 0;
-	      l[0] = 0;
-	      l[1] = 0xfff80000;
-	    }
-	}
-    }
-  else if ( st0_tag == TAG_Empty )
-    {
-      /* Empty register (stack underflow) */
-      EXCEPTION(EX_StackUnder);
-      if ( control_word & CW_Invalid )
-	{
-	  /* The masked response */
-	  /* Put out the QNaN indefinite */
-	  RE_ENTRANT_CHECK_OFF;
-	  FPU_access_ok(VERIFY_WRITE,dfloat,8);
-	  FPU_put_user(0, (unsigned long __user *) dfloat);
-	  FPU_put_user(0xfff80000, 1 + (unsigned long __user *) dfloat);
-	  RE_ENTRANT_CHECK_ON;
-	  return 1;
-	}
-      else
-	return 0;
-    }
-  if ( getsign(st0_ptr) )
-    l[1] |= 0x80000000;
-
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE,dfloat,8);
-  FPU_put_user(l[0], (unsigned long __user *)dfloat);
-  FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat);
-  RE_ENTRANT_CHECK_ON;
+	} else if (st0_tag == TAG_Empty) {
+		/* Empty register (stack underflow) */
+		EXCEPTION(EX_StackUnder);
+		if (control_word & CW_Invalid) {
+			/* The masked response */
+			/* Put out the QNaN indefinite */
+			RE_ENTRANT_CHECK_OFF;
+			FPU_access_ok(VERIFY_WRITE, dfloat, 8);
+			FPU_put_user(0, (unsigned long __user *)dfloat);
+			FPU_put_user(0xfff80000,
+				     1 + (unsigned long __user *)dfloat);
+			RE_ENTRANT_CHECK_ON;
+			return 1;
+		} else
+			return 0;
+	}
+	if (getsign(st0_ptr))
+		l[1] |= 0x80000000;
+
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, dfloat, 8);
+	FPU_put_user(l[0], (unsigned long __user *)dfloat);
+	FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat);
+	RE_ENTRANT_CHECK_ON;
 
-  return 1;
+	return 1;
 }
 
-
 /* Put a float into user memory */
 int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single)
 {
-  long templ = 0;
-  unsigned long increment = 0;     	/* avoid gcc warnings */
-  int precision_loss;
-  int exp;
-  FPU_REG tmp;
-
-  if ( st0_tag == TAG_Valid )
-    {
-
-      reg_copy(st0_ptr, &tmp);
-      exp = exponent(&tmp);
-
-      if ( exp < SINGLE_Emin )
-	{
-	  addexponent(&tmp, -SINGLE_Emin + 23);  /* largest exp to be 22 */
+	long templ = 0;
+	unsigned long increment = 0;	/* avoid gcc warnings */
+	int precision_loss;
+	int exp;
+	FPU_REG tmp;
+
+	if (st0_tag == TAG_Valid) {
 
-	denormal_arg:
+		reg_copy(st0_ptr, &tmp);
+		exp = exponent(&tmp);
 
-	  if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) )
-	    {
+		if (exp < SINGLE_Emin) {
+			addexponent(&tmp, -SINGLE_Emin + 23);	/* largest exp to be 22 */
+
+		      denormal_arg:
+
+			if ((precision_loss = FPU_round_to_int(&tmp, st0_tag))) {
 #ifdef PECULIAR_486
-	      /* Did it round to a non-denormal ? */
-	      /* This behaviour might be regarded as peculiar, it appears
-		 that the 80486 rounds to the dest precision, then
-		 converts to decide underflow. */
-	      if ( !((tmp.sigl == 0x00800000) &&
-		  ((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) )
+				/* Did it round to a non-denormal ? */
+				/* This behaviour might be regarded as peculiar, it appears
+				   that the 80486 rounds to the dest precision, then
+				   converts to decide underflow. */
+				if (!((tmp.sigl == 0x00800000) &&
+				      ((st0_ptr->sigh & 0x000000ff)
+				       || st0_ptr->sigl)))
 #endif /* PECULIAR_486 */
-		{
-		  EXCEPTION(EX_Underflow);
-		  /* This is a special case: see sec 16.2.5.1 of
-		     the 80486 book */
-		  if ( !(control_word & CW_Underflow) )
-		    return 0;
-		}
-	      EXCEPTION(precision_loss);
-	      if ( !(control_word & CW_Precision) )
-		return 0;
-	    }
-	  templ = tmp.sigl;
-      }
-      else
-	{
-	  if ( tmp.sigl | (tmp.sigh & 0x000000ff) )
-	    {
-	      unsigned long sigh = tmp.sigh;
-	      unsigned long sigl = tmp.sigl;
-	      
-	      precision_loss = 1;
-	      switch (control_word & CW_RC)
-		{
-		case RC_RND:
-		  increment = ((sigh & 0xff) > 0x80)       /* more than half */
-		    || (((sigh & 0xff) == 0x80) && sigl)   /* more than half */
-		    || ((sigh & 0x180) == 0x180);        /* round to even */
-		  break;
-		case RC_DOWN:   /* towards -infinity */
-		  increment = signpositive(&tmp)
-		    ? 0 : (sigl | (sigh & 0xff));
-		  break;
-		case RC_UP:     /* towards +infinity */
-		  increment = signpositive(&tmp)
-		    ? (sigl | (sigh & 0xff)) : 0;
-		  break;
-		case RC_CHOP:
-		  increment = 0;
-		  break;
-		}
-	  
-	      /* Truncate part of the mantissa */
-	      tmp.sigl = 0;
-	  
-	      if (increment)
-		{
-		  if ( sigh >= 0xffffff00 )
-		    {
-		      /* The sigh part overflows */
-		      tmp.sigh = 0x80000000;
-		      exp++;
-		      if ( exp >= EXP_OVER )
-			goto overflow;
-		    }
-		  else
-		    {
-		      tmp.sigh &= 0xffffff00;
-		      tmp.sigh += 0x100;
-		    }
-		}
-	      else
-		{
-		  tmp.sigh &= 0xffffff00;  /* Finish the truncation */
-		}
-	    }
-	  else
-	    precision_loss = 0;
-      
-	  templ = (tmp.sigh >> 8) & 0x007fffff;
-
-	  if ( exp > SINGLE_Emax )
-	    {
-	    overflow:
-	      EXCEPTION(EX_Overflow);
-	      if ( !(control_word & CW_Overflow) )
-		return 0;
-	      set_precision_flag_up();
-	      if ( !(control_word & CW_Precision) )
-		return 0;
-
-	      /* This is a special case: see sec 16.2.5.1 of the 80486 book. */
-	      /* Masked response is overflow to infinity. */
-	      templ = 0x7f800000;
-	    }
-	  else
-	    {
-	      if ( precision_loss )
-		{
-		  if ( increment )
-		    set_precision_flag_up();
-		  else
-		    set_precision_flag_down();
+				{
+					EXCEPTION(EX_Underflow);
+					/* This is a special case: see sec 16.2.5.1 of
+					   the 80486 book */
+					if (!(control_word & CW_Underflow))
+						return 0;
+				}
+				EXCEPTION(precision_loss);
+				if (!(control_word & CW_Precision))
+					return 0;
+			}
+			templ = tmp.sigl;
+		} else {
+			if (tmp.sigl | (tmp.sigh & 0x000000ff)) {
+				unsigned long sigh = tmp.sigh;
+				unsigned long sigl = tmp.sigl;
+
+				precision_loss = 1;
+				switch (control_word & CW_RC) {
+				case RC_RND:
+					increment = ((sigh & 0xff) > 0x80)	/* more than half */
+					    ||(((sigh & 0xff) == 0x80) && sigl)	/* more than half */
+					    ||((sigh & 0x180) == 0x180);	/* round to even */
+					break;
+				case RC_DOWN:	/* towards -infinity */
+					increment = signpositive(&tmp)
+					    ? 0 : (sigl | (sigh & 0xff));
+					break;
+				case RC_UP:	/* towards +infinity */
+					increment = signpositive(&tmp)
+					    ? (sigl | (sigh & 0xff)) : 0;
+					break;
+				case RC_CHOP:
+					increment = 0;
+					break;
+				}
+
+				/* Truncate part of the mantissa */
+				tmp.sigl = 0;
+
+				if (increment) {
+					if (sigh >= 0xffffff00) {
+						/* The sigh part overflows */
+						tmp.sigh = 0x80000000;
+						exp++;
+						if (exp >= EXP_OVER)
+							goto overflow;
+					} else {
+						tmp.sigh &= 0xffffff00;
+						tmp.sigh += 0x100;
+					}
+				} else {
+					tmp.sigh &= 0xffffff00;	/* Finish the truncation */
+				}
+			} else
+				precision_loss = 0;
+
+			templ = (tmp.sigh >> 8) & 0x007fffff;
+
+			if (exp > SINGLE_Emax) {
+			      overflow:
+				EXCEPTION(EX_Overflow);
+				if (!(control_word & CW_Overflow))
+					return 0;
+				set_precision_flag_up();
+				if (!(control_word & CW_Precision))
+					return 0;
+
+				/* This is a special case: see sec 16.2.5.1 of the 80486 book. */
+				/* Masked response is overflow to infinity. */
+				templ = 0x7f800000;
+			} else {
+				if (precision_loss) {
+					if (increment)
+						set_precision_flag_up();
+					else
+						set_precision_flag_down();
+				}
+				/* Add the exponent */
+				templ |= ((exp + SINGLE_Ebias) & 0xff) << 23;
+			}
 		}
-	      /* Add the exponent */
-	      templ |= ((exp+SINGLE_Ebias) & 0xff) << 23;
-	    }
-	}
-    }
-  else if (st0_tag == TAG_Zero)
-    {
-      templ = 0;
-    }
-  else if ( st0_tag == TAG_Special )
-    {
-      st0_tag = FPU_Special(st0_ptr);
-      if (st0_tag == TW_Denormal)
-	{
-	  reg_copy(st0_ptr, &tmp);
+	} else if (st0_tag == TAG_Zero) {
+		templ = 0;
+	} else if (st0_tag == TAG_Special) {
+		st0_tag = FPU_Special(st0_ptr);
+		if (st0_tag == TW_Denormal) {
+			reg_copy(st0_ptr, &tmp);
 
-	  /* A denormal will always underflow. */
+			/* A denormal will always underflow. */
 #ifndef PECULIAR_486
-	  /* An 80486 is supposed to be able to generate
-	     a denormal exception here, but... */
-	  /* Underflow has priority. */
-	  if ( control_word & CW_Underflow )
-	    denormal_operand();
-#endif /* PECULIAR_486 */ 
-	  goto denormal_arg;
-	}
-      else if (st0_tag == TW_Infinity)
-	{
-	  templ = 0x7f800000;
-	}
-      else if (st0_tag == TW_NaN)
-	{
-	  /* Is it really a NaN ? */
-	  if ( (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000) )
-	    {
-	      /* See if we can get a valid NaN from the FPU_REG */
-	      templ = st0_ptr->sigh >> 8;
-	      if ( !(st0_ptr->sigh & 0x40000000) )
-		{
-		  /* It is a signalling NaN */
-		  EXCEPTION(EX_Invalid);
-		  if ( !(control_word & CW_Invalid) )
-		    return 0;
-		  templ |= (0x40000000 >> 8);
+			/* An 80486 is supposed to be able to generate
+			   a denormal exception here, but... */
+			/* Underflow has priority. */
+			if (control_word & CW_Underflow)
+				denormal_operand();
+#endif /* PECULIAR_486 */
+			goto denormal_arg;
+		} else if (st0_tag == TW_Infinity) {
+			templ = 0x7f800000;
+		} else if (st0_tag == TW_NaN) {
+			/* Is it really a NaN ? */
+			if ((exponent(st0_ptr) == EXP_OVER)
+			    && (st0_ptr->sigh & 0x80000000)) {
+				/* See if we can get a valid NaN from the FPU_REG */
+				templ = st0_ptr->sigh >> 8;
+				if (!(st0_ptr->sigh & 0x40000000)) {
+					/* It is a signalling NaN */
+					EXCEPTION(EX_Invalid);
+					if (!(control_word & CW_Invalid))
+						return 0;
+					templ |= (0x40000000 >> 8);
+				}
+				templ |= 0x7f800000;
+			} else {
+				/* It is an unsupported data type */
+				EXCEPTION(EX_Invalid);
+				if (!(control_word & CW_Invalid))
+					return 0;
+				templ = 0xffc00000;
+			}
 		}
-	      templ |= 0x7f800000;
-	    }
-	  else
-	    {
-	      /* It is an unsupported data type */
-	      EXCEPTION(EX_Invalid);
-	      if ( !(control_word & CW_Invalid) )
-		return 0;
-	      templ = 0xffc00000;
-	    }
-	}
 #ifdef PARANOID
-      else
-	{
-	  EXCEPTION(EX_INTERNAL|0x164);
-	  return 0;
-	}
+		else {
+			EXCEPTION(EX_INTERNAL | 0x164);
+			return 0;
+		}
 #endif
-    }
-  else if ( st0_tag == TAG_Empty )
-    {
-      /* Empty register (stack underflow) */
-      EXCEPTION(EX_StackUnder);
-      if ( control_word & EX_Invalid )
-	{
-	  /* The masked response */
-	  /* Put out the QNaN indefinite */
-	  RE_ENTRANT_CHECK_OFF;
-	  FPU_access_ok(VERIFY_WRITE,single,4);
-	  FPU_put_user(0xffc00000, (unsigned long __user *) single);
-	  RE_ENTRANT_CHECK_ON;
-	  return 1;
-	}
-      else
-	return 0;
-    }
+	} else if (st0_tag == TAG_Empty) {
+		/* Empty register (stack underflow) */
+		EXCEPTION(EX_StackUnder);
+		if (control_word & EX_Invalid) {
+			/* The masked response */
+			/* Put out the QNaN indefinite */
+			RE_ENTRANT_CHECK_OFF;
+			FPU_access_ok(VERIFY_WRITE, single, 4);
+			FPU_put_user(0xffc00000,
+				     (unsigned long __user *)single);
+			RE_ENTRANT_CHECK_ON;
+			return 1;
+		} else
+			return 0;
+	}
 #ifdef PARANOID
-  else
-    {
-      EXCEPTION(EX_INTERNAL|0x163);
-      return 0;
-    }
+	else {
+		EXCEPTION(EX_INTERNAL | 0x163);
+		return 0;
+	}
 #endif
-  if ( getsign(st0_ptr) )
-    templ |= 0x80000000;
+	if (getsign(st0_ptr))
+		templ |= 0x80000000;
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE,single,4);
-  FPU_put_user(templ,(unsigned long __user *) single);
-  RE_ENTRANT_CHECK_ON;
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, single, 4);
+	FPU_put_user(templ, (unsigned long __user *)single);
+	RE_ENTRANT_CHECK_ON;
 
-  return 1;
+	return 1;
 }
 
-
 /* Put a long long into user memory */
 int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d)
 {
-  FPU_REG t;
-  long long tll;
-  int precision_loss;
-
-  if ( st0_tag == TAG_Empty )
-    {
-      /* Empty register (stack underflow) */
-      EXCEPTION(EX_StackUnder);
-      goto invalid_operand;
-    }
-  else if ( st0_tag == TAG_Special )
-    {
-      st0_tag = FPU_Special(st0_ptr);
-      if ( (st0_tag == TW_Infinity) ||
-	   (st0_tag == TW_NaN) )
-	{
-	  EXCEPTION(EX_Invalid);
-	  goto invalid_operand;
-	}
-    }
-
-  reg_copy(st0_ptr, &t);
-  precision_loss = FPU_round_to_int(&t, st0_tag);
-  ((long *)&tll)[0] = t.sigl;
-  ((long *)&tll)[1] = t.sigh;
-  if ( (precision_loss == 1) ||
-      ((t.sigh & 0x80000000) &&
-       !((t.sigh == 0x80000000) && (t.sigl == 0) &&
-	 signnegative(&t))) )
-    {
-      EXCEPTION(EX_Invalid);
-      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
-    invalid_operand:
-      if ( control_word & EX_Invalid )
-	{
-	  /* Produce something like QNaN "indefinite" */
-	  tll = 0x8000000000000000LL;
-	}
-      else
-	return 0;
-    }
-  else
-    {
-      if ( precision_loss )
-	set_precision_flag(precision_loss);
-      if ( signnegative(&t) )
-	tll = - tll;
-    }
-
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE,d,8);
-  if (copy_to_user(d, &tll, 8))
-    FPU_abort;
-  RE_ENTRANT_CHECK_ON;
+	FPU_REG t;
+	long long tll;
+	int precision_loss;
+
+	if (st0_tag == TAG_Empty) {
+		/* Empty register (stack underflow) */
+		EXCEPTION(EX_StackUnder);
+		goto invalid_operand;
+	} else if (st0_tag == TAG_Special) {
+		st0_tag = FPU_Special(st0_ptr);
+		if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) {
+			EXCEPTION(EX_Invalid);
+			goto invalid_operand;
+		}
+	}
 
-  return 1;
-}
+	reg_copy(st0_ptr, &t);
+	precision_loss = FPU_round_to_int(&t, st0_tag);
+	((long *)&tll)[0] = t.sigl;
+	((long *)&tll)[1] = t.sigh;
+	if ((precision_loss == 1) ||
+	    ((t.sigh & 0x80000000) &&
+	     !((t.sigh == 0x80000000) && (t.sigl == 0) && signnegative(&t)))) {
+		EXCEPTION(EX_Invalid);
+		/* This is a special case: see sec 16.2.5.1 of the 80486 book */
+	      invalid_operand:
+		if (control_word & EX_Invalid) {
+			/* Produce something like QNaN "indefinite" */
+			tll = 0x8000000000000000LL;
+		} else
+			return 0;
+	} else {
+		if (precision_loss)
+			set_precision_flag(precision_loss);
+		if (signnegative(&t))
+			tll = -tll;
+	}
+
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, d, 8);
+	if (copy_to_user(d, &tll, 8))
+		FPU_abort;
+	RE_ENTRANT_CHECK_ON;
 
+	return 1;
+}
 
 /* Put a long into user memory */
 int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d)
 {
-  FPU_REG t;
-  int precision_loss;
+	FPU_REG t;
+	int precision_loss;
+
+	if (st0_tag == TAG_Empty) {
+		/* Empty register (stack underflow) */
+		EXCEPTION(EX_StackUnder);
+		goto invalid_operand;
+	} else if (st0_tag == TAG_Special) {
+		st0_tag = FPU_Special(st0_ptr);
+		if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) {
+			EXCEPTION(EX_Invalid);
+			goto invalid_operand;
+		}
+	}
 
-  if ( st0_tag == TAG_Empty )
-    {
-      /* Empty register (stack underflow) */
-      EXCEPTION(EX_StackUnder);
-      goto invalid_operand;
-    }
-  else if ( st0_tag == TAG_Special )
-    {
-      st0_tag = FPU_Special(st0_ptr);
-      if ( (st0_tag == TW_Infinity) ||
-	   (st0_tag == TW_NaN) )
-	{
-	  EXCEPTION(EX_Invalid);
-	  goto invalid_operand;
-	}
-    }
-
-  reg_copy(st0_ptr, &t);
-  precision_loss = FPU_round_to_int(&t, st0_tag);
-  if (t.sigh ||
-      ((t.sigl & 0x80000000) &&
-       !((t.sigl == 0x80000000) && signnegative(&t))) )
-    {
-      EXCEPTION(EX_Invalid);
-      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
-    invalid_operand:
-      if ( control_word & EX_Invalid )
-	{
-	  /* Produce something like QNaN "indefinite" */
-	  t.sigl = 0x80000000;
-	}
-      else
-	return 0;
-    }
-  else
-    {
-      if ( precision_loss )
-	set_precision_flag(precision_loss);
-      if ( signnegative(&t) )
-	t.sigl = -(long)t.sigl;
-    }
-
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE,d,4);
-  FPU_put_user(t.sigl, (unsigned long __user *) d);
-  RE_ENTRANT_CHECK_ON;
+	reg_copy(st0_ptr, &t);
+	precision_loss = FPU_round_to_int(&t, st0_tag);
+	if (t.sigh ||
+	    ((t.sigl & 0x80000000) &&
+	     !((t.sigl == 0x80000000) && signnegative(&t)))) {
+		EXCEPTION(EX_Invalid);
+		/* This is a special case: see sec 16.2.5.1 of the 80486 book */
+	      invalid_operand:
+		if (control_word & EX_Invalid) {
+			/* Produce something like QNaN "indefinite" */
+			t.sigl = 0x80000000;
+		} else
+			return 0;
+	} else {
+		if (precision_loss)
+			set_precision_flag(precision_loss);
+		if (signnegative(&t))
+			t.sigl = -(long)t.sigl;
+	}
+
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, d, 4);
+	FPU_put_user(t.sigl, (unsigned long __user *)d);
+	RE_ENTRANT_CHECK_ON;
 
-  return 1;
+	return 1;
 }
 
-
 /* Put a short into user memory */
 int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d)
 {
-  FPU_REG t;
-  int precision_loss;
+	FPU_REG t;
+	int precision_loss;
 
-  if ( st0_tag == TAG_Empty )
-    {
-      /* Empty register (stack underflow) */
-      EXCEPTION(EX_StackUnder);
-      goto invalid_operand;
-    }
-  else if ( st0_tag == TAG_Special )
-    {
-      st0_tag = FPU_Special(st0_ptr);
-      if ( (st0_tag == TW_Infinity) ||
-	   (st0_tag == TW_NaN) )
-	{
-	  EXCEPTION(EX_Invalid);
-	  goto invalid_operand;
-	}
-    }
-
-  reg_copy(st0_ptr, &t);
-  precision_loss = FPU_round_to_int(&t, st0_tag);
-  if (t.sigh ||
-      ((t.sigl & 0xffff8000) &&
-       !((t.sigl == 0x8000) && signnegative(&t))) )
-    {
-      EXCEPTION(EX_Invalid);
-      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
-    invalid_operand:
-      if ( control_word & EX_Invalid )
-	{
-	  /* Produce something like QNaN "indefinite" */
-	  t.sigl = 0x8000;
-	}
-      else
-	return 0;
-    }
-  else
-    {
-      if ( precision_loss )
-	set_precision_flag(precision_loss);
-      if ( signnegative(&t) )
-	t.sigl = -t.sigl;
-    }
-
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE,d,2);
-  FPU_put_user((short)t.sigl, d);
-  RE_ENTRANT_CHECK_ON;
+	if (st0_tag == TAG_Empty) {
+		/* Empty register (stack underflow) */
+		EXCEPTION(EX_StackUnder);
+		goto invalid_operand;
+	} else if (st0_tag == TAG_Special) {
+		st0_tag = FPU_Special(st0_ptr);
+		if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) {
+			EXCEPTION(EX_Invalid);
+			goto invalid_operand;
+		}
+	}
 
-  return 1;
-}
+	reg_copy(st0_ptr, &t);
+	precision_loss = FPU_round_to_int(&t, st0_tag);
+	if (t.sigh ||
+	    ((t.sigl & 0xffff8000) &&
+	     !((t.sigl == 0x8000) && signnegative(&t)))) {
+		EXCEPTION(EX_Invalid);
+		/* This is a special case: see sec 16.2.5.1 of the 80486 book */
+	      invalid_operand:
+		if (control_word & EX_Invalid) {
+			/* Produce something like QNaN "indefinite" */
+			t.sigl = 0x8000;
+		} else
+			return 0;
+	} else {
+		if (precision_loss)
+			set_precision_flag(precision_loss);
+		if (signnegative(&t))
+			t.sigl = -t.sigl;
+	}
+
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, d, 2);
+	FPU_put_user((short)t.sigl, d);
+	RE_ENTRANT_CHECK_ON;
 
+	return 1;
+}
 
 /* Put a packed bcd array into user memory */
 int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d)
 {
-  FPU_REG t;
-  unsigned long long ll;
-  u_char b;
-  int i, precision_loss;
-  u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0;
-
-  if ( st0_tag == TAG_Empty )
-    {
-      /* Empty register (stack underflow) */
-      EXCEPTION(EX_StackUnder);
-      goto invalid_operand;
-    }
-  else if ( st0_tag == TAG_Special )
-    {
-      st0_tag = FPU_Special(st0_ptr);
-      if ( (st0_tag == TW_Infinity) ||
-	   (st0_tag == TW_NaN) )
-	{
-	  EXCEPTION(EX_Invalid);
-	  goto invalid_operand;
-	}
-    }
-
-  reg_copy(st0_ptr, &t);
-  precision_loss = FPU_round_to_int(&t, st0_tag);
-  ll = significand(&t);
-
-  /* Check for overflow, by comparing with 999999999999999999 decimal. */
-  if ( (t.sigh > 0x0de0b6b3) ||
-      ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) )
-    {
-      EXCEPTION(EX_Invalid);
-      /* This is a special case: see sec 16.2.5.1 of the 80486 book */
-    invalid_operand:
-      if ( control_word & CW_Invalid )
-	{
-	  /* Produce the QNaN "indefinite" */
-	  RE_ENTRANT_CHECK_OFF;
-	  FPU_access_ok(VERIFY_WRITE,d,10);
-	  for ( i = 0; i < 7; i++)
-	    FPU_put_user(0, d+i); /* These bytes "undefined" */
-	  FPU_put_user(0xc0, d+7); /* This byte "undefined" */
-	  FPU_put_user(0xff, d+8);
-	  FPU_put_user(0xff, d+9);
-	  RE_ENTRANT_CHECK_ON;
-	  return 1;
-	}
-      else
-	return 0;
-    }
-  else if ( precision_loss )
-    {
-      /* Precision loss doesn't stop the data transfer */
-      set_precision_flag(precision_loss);
-    }
-
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE,d,10);
-  RE_ENTRANT_CHECK_ON;
-  for ( i = 0; i < 9; i++)
-    {
-      b = FPU_div_small(&ll, 10);
-      b |= (FPU_div_small(&ll, 10)) << 4;
-      RE_ENTRANT_CHECK_OFF;
-      FPU_put_user(b, d+i);
-      RE_ENTRANT_CHECK_ON;
-    }
-  RE_ENTRANT_CHECK_OFF;
-  FPU_put_user(sign, d+9);
-  RE_ENTRANT_CHECK_ON;
+	FPU_REG t;
+	unsigned long long ll;
+	u_char b;
+	int i, precision_loss;
+	u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0;
+
+	if (st0_tag == TAG_Empty) {
+		/* Empty register (stack underflow) */
+		EXCEPTION(EX_StackUnder);
+		goto invalid_operand;
+	} else if (st0_tag == TAG_Special) {
+		st0_tag = FPU_Special(st0_ptr);
+		if ((st0_tag == TW_Infinity) || (st0_tag == TW_NaN)) {
+			EXCEPTION(EX_Invalid);
+			goto invalid_operand;
+		}
+	}
+
+	reg_copy(st0_ptr, &t);
+	precision_loss = FPU_round_to_int(&t, st0_tag);
+	ll = significand(&t);
+
+	/* Check for overflow, by comparing with 999999999999999999 decimal. */
+	if ((t.sigh > 0x0de0b6b3) ||
+	    ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff))) {
+		EXCEPTION(EX_Invalid);
+		/* This is a special case: see sec 16.2.5.1 of the 80486 book */
+	      invalid_operand:
+		if (control_word & CW_Invalid) {
+			/* Produce the QNaN "indefinite" */
+			RE_ENTRANT_CHECK_OFF;
+			FPU_access_ok(VERIFY_WRITE, d, 10);
+			for (i = 0; i < 7; i++)
+				FPU_put_user(0, d + i);	/* These bytes "undefined" */
+			FPU_put_user(0xc0, d + 7);	/* This byte "undefined" */
+			FPU_put_user(0xff, d + 8);
+			FPU_put_user(0xff, d + 9);
+			RE_ENTRANT_CHECK_ON;
+			return 1;
+		} else
+			return 0;
+	} else if (precision_loss) {
+		/* Precision loss doesn't stop the data transfer */
+		set_precision_flag(precision_loss);
+	}
+
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, d, 10);
+	RE_ENTRANT_CHECK_ON;
+	for (i = 0; i < 9; i++) {
+		b = FPU_div_small(&ll, 10);
+		b |= (FPU_div_small(&ll, 10)) << 4;
+		RE_ENTRANT_CHECK_OFF;
+		FPU_put_user(b, d + i);
+		RE_ENTRANT_CHECK_ON;
+	}
+	RE_ENTRANT_CHECK_OFF;
+	FPU_put_user(sign, d + 9);
+	RE_ENTRANT_CHECK_ON;
 
-  return 1;
+	return 1;
 }
 
 /*===========================================================================*/
@@ -1119,59 +973,56 @@ int FPU_store_bcd(FPU_REG *st0_ptr, u_ch
    largest possible value */
 int FPU_round_to_int(FPU_REG *r, u_char tag)
 {
-  u_char     very_big;
-  unsigned eax;
+	u_char very_big;
+	unsigned eax;
 
-  if (tag == TAG_Zero)
-    {
-      /* Make sure that zero is returned */
-      significand(r) = 0;
-      return 0;        /* o.k. */
-    }
-
-  if (exponent(r) > 63)
-    {
-      r->sigl = r->sigh = ~0;      /* The largest representable number */
-      return 1;        /* overflow */
-    }
+	if (tag == TAG_Zero) {
+		/* Make sure that zero is returned */
+		significand(r) = 0;
+		return 0;	/* o.k. */
+	}
+
+	if (exponent(r) > 63) {
+		r->sigl = r->sigh = ~0;	/* The largest representable number */
+		return 1;	/* overflow */
+	}
 
-  eax = FPU_shrxs(&r->sigl, 63 - exponent(r));
-  very_big = !(~(r->sigh) | ~(r->sigl));  /* test for 0xfff...fff */
+	eax = FPU_shrxs(&r->sigl, 63 - exponent(r));
+	very_big = !(~(r->sigh) | ~(r->sigl));	/* test for 0xfff...fff */
 #define	half_or_more	(eax & 0x80000000)
 #define	frac_part	(eax)
 #define more_than_half  ((eax & 0x80000001) == 0x80000001)
-  switch (control_word & CW_RC)
-    {
-    case RC_RND:
-      if ( more_than_half               	/* nearest */
-	  || (half_or_more && (r->sigl & 1)) )	/* odd -> even */
-	{
-	  if ( very_big ) return 1;        /* overflow */
-	  significand(r) ++;
-	  return PRECISION_LOST_UP;
-	}
-      break;
-    case RC_DOWN:
-      if (frac_part && getsign(r))
-	{
-	  if ( very_big ) return 1;        /* overflow */
-	  significand(r) ++;
-	  return PRECISION_LOST_UP;
-	}
-      break;
-    case RC_UP:
-      if (frac_part && !getsign(r))
-	{
-	  if ( very_big ) return 1;        /* overflow */
-	  significand(r) ++;
-	  return PRECISION_LOST_UP;
-	}
-      break;
-    case RC_CHOP:
-      break;
-    }
+	switch (control_word & CW_RC) {
+	case RC_RND:
+		if (more_than_half	/* nearest */
+		    || (half_or_more && (r->sigl & 1))) {	/* odd -> even */
+			if (very_big)
+				return 1;	/* overflow */
+			significand(r)++;
+			return PRECISION_LOST_UP;
+		}
+		break;
+	case RC_DOWN:
+		if (frac_part && getsign(r)) {
+			if (very_big)
+				return 1;	/* overflow */
+			significand(r)++;
+			return PRECISION_LOST_UP;
+		}
+		break;
+	case RC_UP:
+		if (frac_part && !getsign(r)) {
+			if (very_big)
+				return 1;	/* overflow */
+			significand(r)++;
+			return PRECISION_LOST_UP;
+		}
+		break;
+	case RC_CHOP:
+		break;
+	}
 
-  return eax ? PRECISION_LOST_DOWN : 0;
+	return eax ? PRECISION_LOST_DOWN : 0;
 
 }
 
@@ -1179,197 +1030,195 @@ int FPU_round_to_int(FPU_REG *r, u_char 
 
 u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s)
 {
-  unsigned short tag_word = 0;
-  u_char tag;
-  int i;
-
-  if ( (addr_modes.default_mode == VM86) ||
-      ((addr_modes.default_mode == PM16)
-      ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
-    {
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_READ, s, 0x0e);
-      FPU_get_user(control_word, (unsigned short __user *) s);
-      FPU_get_user(partial_status, (unsigned short __user *) (s+2));
-      FPU_get_user(tag_word, (unsigned short __user *) (s+4));
-      FPU_get_user(instruction_address.offset, (unsigned short __user *) (s+6));
-      FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+8));
-      FPU_get_user(operand_address.offset, (unsigned short __user *) (s+0x0a));
-      FPU_get_user(operand_address.selector, (unsigned short __user *) (s+0x0c));
-      RE_ENTRANT_CHECK_ON;
-      s += 0x0e;
-      if ( addr_modes.default_mode == VM86 )
-	{
-	  instruction_address.offset
-	    += (instruction_address.selector & 0xf000) << 4;
-	  operand_address.offset += (operand_address.selector & 0xf000) << 4;
-	}
-    }
-  else
-    {
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_READ, s, 0x1c);
-      FPU_get_user(control_word, (unsigned short __user *) s);
-      FPU_get_user(partial_status, (unsigned short __user *) (s+4));
-      FPU_get_user(tag_word, (unsigned short __user *) (s+8));
-      FPU_get_user(instruction_address.offset, (unsigned long __user *) (s+0x0c));
-      FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+0x10));
-      FPU_get_user(instruction_address.opcode, (unsigned short __user *) (s+0x12));
-      FPU_get_user(operand_address.offset, (unsigned long __user *) (s+0x14));
-      FPU_get_user(operand_address.selector, (unsigned long __user *) (s+0x18));
-      RE_ENTRANT_CHECK_ON;
-      s += 0x1c;
-    }
+	unsigned short tag_word = 0;
+	u_char tag;
+	int i;
+
+	if ((addr_modes.default_mode == VM86) ||
+	    ((addr_modes.default_mode == PM16)
+	     ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX))) {
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_READ, s, 0x0e);
+		FPU_get_user(control_word, (unsigned short __user *)s);
+		FPU_get_user(partial_status, (unsigned short __user *)(s + 2));
+		FPU_get_user(tag_word, (unsigned short __user *)(s + 4));
+		FPU_get_user(instruction_address.offset,
+			     (unsigned short __user *)(s + 6));
+		FPU_get_user(instruction_address.selector,
+			     (unsigned short __user *)(s + 8));
+		FPU_get_user(operand_address.offset,
+			     (unsigned short __user *)(s + 0x0a));
+		FPU_get_user(operand_address.selector,
+			     (unsigned short __user *)(s + 0x0c));
+		RE_ENTRANT_CHECK_ON;
+		s += 0x0e;
+		if (addr_modes.default_mode == VM86) {
+			instruction_address.offset
+			    += (instruction_address.selector & 0xf000) << 4;
+			operand_address.offset +=
+			    (operand_address.selector & 0xf000) << 4;
+		}
+	} else {
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_READ, s, 0x1c);
+		FPU_get_user(control_word, (unsigned short __user *)s);
+		FPU_get_user(partial_status, (unsigned short __user *)(s + 4));
+		FPU_get_user(tag_word, (unsigned short __user *)(s + 8));
+		FPU_get_user(instruction_address.offset,
+			     (unsigned long __user *)(s + 0x0c));
+		FPU_get_user(instruction_address.selector,
+			     (unsigned short __user *)(s + 0x10));
+		FPU_get_user(instruction_address.opcode,
+			     (unsigned short __user *)(s + 0x12));
+		FPU_get_user(operand_address.offset,
+			     (unsigned long __user *)(s + 0x14));
+		FPU_get_user(operand_address.selector,
+			     (unsigned long __user *)(s + 0x18));
+		RE_ENTRANT_CHECK_ON;
+		s += 0x1c;
+	}
 
 #ifdef PECULIAR_486
-  control_word &= ~0xe080;
-#endif /* PECULIAR_486 */ 
+	control_word &= ~0xe080;
+#endif /* PECULIAR_486 */
 
-  top = (partial_status >> SW_Top_Shift) & 7;
+	top = (partial_status >> SW_Top_Shift) & 7;
 
-  if ( partial_status & ~control_word & CW_Exceptions )
-    partial_status |= (SW_Summary | SW_Backward);
-  else
-    partial_status &= ~(SW_Summary | SW_Backward);
-
-  for ( i = 0; i < 8; i++ )
-    {
-      tag = tag_word & 3;
-      tag_word >>= 2;
-
-      if ( tag == TAG_Empty )
-	/* New tag is empty.  Accept it */
-	FPU_settag(i, TAG_Empty);
-      else if ( FPU_gettag(i) == TAG_Empty )
-	{
-	  /* Old tag is empty and new tag is not empty.  New tag is determined
-	     by old reg contents */
-	  if ( exponent(&fpu_register(i)) == - EXTENDED_Ebias )
-	    {
-	      if ( !(fpu_register(i).sigl | fpu_register(i).sigh) )
-		FPU_settag(i, TAG_Zero);
-	      else
-		FPU_settag(i, TAG_Special);
-	    }
-	  else if ( exponent(&fpu_register(i)) == 0x7fff - EXTENDED_Ebias )
-	    {
-	      FPU_settag(i, TAG_Special);
-	    }
-	  else if ( fpu_register(i).sigh & 0x80000000 )
-	    FPU_settag(i, TAG_Valid);
-	  else
-	    FPU_settag(i, TAG_Special);   /* An Un-normal */
-  	}
-      /* Else old tag is not empty and new tag is not empty.  Old tag
-	 remains correct */
-    }
+	if (partial_status & ~control_word & CW_Exceptions)
+		partial_status |= (SW_Summary | SW_Backward);
+	else
+		partial_status &= ~(SW_Summary | SW_Backward);
+
+	for (i = 0; i < 8; i++) {
+		tag = tag_word & 3;
+		tag_word >>= 2;
+
+		if (tag == TAG_Empty)
+			/* New tag is empty.  Accept it */
+			FPU_settag(i, TAG_Empty);
+		else if (FPU_gettag(i) == TAG_Empty) {
+			/* Old tag is empty and new tag is not empty.  New tag is determined
+			   by old reg contents */
+			if (exponent(&fpu_register(i)) == -EXTENDED_Ebias) {
+				if (!
+				    (fpu_register(i).sigl | fpu_register(i).
+				     sigh))
+					FPU_settag(i, TAG_Zero);
+				else
+					FPU_settag(i, TAG_Special);
+			} else if (exponent(&fpu_register(i)) ==
+				   0x7fff - EXTENDED_Ebias) {
+				FPU_settag(i, TAG_Special);
+			} else if (fpu_register(i).sigh & 0x80000000)
+				FPU_settag(i, TAG_Valid);
+			else
+				FPU_settag(i, TAG_Special);	/* An Un-normal */
+		}
+		/* Else old tag is not empty and new tag is not empty.  Old tag
+		   remains correct */
+	}
 
-  return s;
+	return s;
 }
 
-
 void frstor(fpu_addr_modes addr_modes, u_char __user *data_address)
 {
-  int i, regnr;
-  u_char __user *s = fldenv(addr_modes, data_address);
-  int offset = (top & 7) * 10, other = 80 - offset;
-
-  /* Copy all registers in stack order. */
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_READ,s,80);
-  __copy_from_user(register_base+offset, s, other);
-  if ( offset )
-    __copy_from_user(register_base, s+other, offset);
-  RE_ENTRANT_CHECK_ON;
-
-  for ( i = 0; i < 8; i++ )
-    {
-      regnr = (i+top) & 7;
-      if ( FPU_gettag(regnr) != TAG_Empty )
-	/* The loaded data over-rides all other cases. */
-	FPU_settag(regnr, FPU_tagof(&st(i)));
-    }
+	int i, regnr;
+	u_char __user *s = fldenv(addr_modes, data_address);
+	int offset = (top & 7) * 10, other = 80 - offset;
+
+	/* Copy all registers in stack order. */
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_READ, s, 80);
+	__copy_from_user(register_base + offset, s, other);
+	if (offset)
+		__copy_from_user(register_base, s + other, offset);
+	RE_ENTRANT_CHECK_ON;
+
+	for (i = 0; i < 8; i++) {
+		regnr = (i + top) & 7;
+		if (FPU_gettag(regnr) != TAG_Empty)
+			/* The loaded data over-rides all other cases. */
+			FPU_settag(regnr, FPU_tagof(&st(i)));
+	}
 
 }
 
-
 u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d)
 {
-  if ( (addr_modes.default_mode == VM86) ||
-      ((addr_modes.default_mode == PM16)
-      ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) )
-    {
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_WRITE,d,14);
+	if ((addr_modes.default_mode == VM86) ||
+	    ((addr_modes.default_mode == PM16)
+	     ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX))) {
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_WRITE, d, 14);
 #ifdef PECULIAR_486
-      FPU_put_user(control_word & ~0xe080, (unsigned long __user *) d);
+		FPU_put_user(control_word & ~0xe080, (unsigned long __user *)d);
 #else
-      FPU_put_user(control_word, (unsigned short __user *) d);
+		FPU_put_user(control_word, (unsigned short __user *)d);
 #endif /* PECULIAR_486 */
-      FPU_put_user(status_word(), (unsigned short __user *) (d+2));
-      FPU_put_user(fpu_tag_word, (unsigned short __user *) (d+4));
-      FPU_put_user(instruction_address.offset, (unsigned short __user *) (d+6));
-      FPU_put_user(operand_address.offset, (unsigned short __user *) (d+0x0a));
-      if ( addr_modes.default_mode == VM86 )
-	{
-	  FPU_put_user((instruction_address.offset & 0xf0000) >> 4,
-		      (unsigned short __user *) (d+8));
-	  FPU_put_user((operand_address.offset & 0xf0000) >> 4,
-		      (unsigned short __user *) (d+0x0c));
-	}
-      else
-	{
-	  FPU_put_user(instruction_address.selector, (unsigned short __user *) (d+8));
-	  FPU_put_user(operand_address.selector, (unsigned short __user *) (d+0x0c));
-	}
-      RE_ENTRANT_CHECK_ON;
-      d += 0x0e;
-    }
-  else
-    {
-      RE_ENTRANT_CHECK_OFF;
-      FPU_access_ok(VERIFY_WRITE, d, 7*4);
+		FPU_put_user(status_word(), (unsigned short __user *)(d + 2));
+		FPU_put_user(fpu_tag_word, (unsigned short __user *)(d + 4));
+		FPU_put_user(instruction_address.offset,
+			     (unsigned short __user *)(d + 6));
+		FPU_put_user(operand_address.offset,
+			     (unsigned short __user *)(d + 0x0a));
+		if (addr_modes.default_mode == VM86) {
+			FPU_put_user((instruction_address.
+				      offset & 0xf0000) >> 4,
+				     (unsigned short __user *)(d + 8));
+			FPU_put_user((operand_address.offset & 0xf0000) >> 4,
+				     (unsigned short __user *)(d + 0x0c));
+		} else {
+			FPU_put_user(instruction_address.selector,
+				     (unsigned short __user *)(d + 8));
+			FPU_put_user(operand_address.selector,
+				     (unsigned short __user *)(d + 0x0c));
+		}
+		RE_ENTRANT_CHECK_ON;
+		d += 0x0e;
+	} else {
+		RE_ENTRANT_CHECK_OFF;
+		FPU_access_ok(VERIFY_WRITE, d, 7 * 4);
 #ifdef PECULIAR_486
-      control_word &= ~0xe080;
-      /* An 80486 sets nearly all of the reserved bits to 1. */
-      control_word |= 0xffff0040;
-      partial_status = status_word() | 0xffff0000;
-      fpu_tag_word |= 0xffff0000;
-      I387.soft.fcs &= ~0xf8000000;
-      I387.soft.fos |= 0xffff0000;
+		control_word &= ~0xe080;
+		/* An 80486 sets nearly all of the reserved bits to 1. */
+		control_word |= 0xffff0040;
+		partial_status = status_word() | 0xffff0000;
+		fpu_tag_word |= 0xffff0000;
+		I387.soft.fcs &= ~0xf8000000;
+		I387.soft.fos |= 0xffff0000;
 #endif /* PECULIAR_486 */
-      if (__copy_to_user(d, &control_word, 7*4))
-	FPU_abort;
-      RE_ENTRANT_CHECK_ON;
-      d += 0x1c;
-    }
-  
-  control_word |= CW_Exceptions;
-  partial_status &= ~(SW_Summary | SW_Backward);
+		if (__copy_to_user(d, &control_word, 7 * 4))
+			FPU_abort;
+		RE_ENTRANT_CHECK_ON;
+		d += 0x1c;
+	}
 
-  return d;
-}
+	control_word |= CW_Exceptions;
+	partial_status &= ~(SW_Summary | SW_Backward);
 
+	return d;
+}
 
 void fsave(fpu_addr_modes addr_modes, u_char __user *data_address)
 {
-  u_char __user *d;
-  int offset = (top & 7) * 10, other = 80 - offset;
+	u_char __user *d;
+	int offset = (top & 7) * 10, other = 80 - offset;
 
-  d = fstenv(addr_modes, data_address);
+	d = fstenv(addr_modes, data_address);
 
-  RE_ENTRANT_CHECK_OFF;
-  FPU_access_ok(VERIFY_WRITE,d,80);
+	RE_ENTRANT_CHECK_OFF;
+	FPU_access_ok(VERIFY_WRITE, d, 80);
 
-  /* Copy all registers in stack order. */
-  if (__copy_to_user(d, register_base+offset, other))
-    FPU_abort;
-  if ( offset )
-    if (__copy_to_user(d+other, register_base, offset))
-      FPU_abort;
-  RE_ENTRANT_CHECK_ON;
+	/* Copy all registers in stack order. */
+	if (__copy_to_user(d, register_base + offset, other))
+		FPU_abort;
+	if (offset)
+		if (__copy_to_user(d + other, register_base, offset))
+			FPU_abort;
+	RE_ENTRANT_CHECK_ON;
 
-  finit();
+	finit();
 }
 
 /*===========================================================================*/
diff -puN arch/x86/math-emu/reg_mul.c~git-x86 arch/x86/math-emu/reg_mul.c
--- a/arch/x86/math-emu/reg_mul.c~git-x86
+++ a/arch/x86/math-emu/reg_mul.c
@@ -20,7 +20,6 @@
 #include "reg_constant.h"
 #include "fpu_system.h"
 
-
 /*
   Multiply two registers to give a register result.
   The sources are st(deststnr) and (b,tagb,signb).
@@ -29,104 +28,88 @@
 /* This routine must be called with non-empty source registers */
 int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
 {
-  FPU_REG *a = &st(deststnr);
-  FPU_REG *dest = a;
-  u_char taga = FPU_gettagi(deststnr);
-  u_char saved_sign = getsign(dest);
-  u_char sign = (getsign(a) ^ getsign(b));
-  int tag;
-
-
-  if ( !(taga | tagb) )
-    {
-      /* Both regs Valid, this should be the most common case. */
-
-      tag = FPU_u_mul(a, b, dest, control_w, sign, exponent(a) + exponent(b));
-      if ( tag < 0 )
-	{
-	  setsign(dest, saved_sign);
-	  return tag;
+	FPU_REG *a = &st(deststnr);
+	FPU_REG *dest = a;
+	u_char taga = FPU_gettagi(deststnr);
+	u_char saved_sign = getsign(dest);
+	u_char sign = (getsign(a) ^ getsign(b));
+	int tag;
+
+	if (!(taga | tagb)) {
+		/* Both regs Valid, this should be the most common case. */
+
+		tag =
+		    FPU_u_mul(a, b, dest, control_w, sign,
+			      exponent(a) + exponent(b));
+		if (tag < 0) {
+			setsign(dest, saved_sign);
+			return tag;
+		}
+		FPU_settagi(deststnr, tag);
+		return tag;
 	}
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-
-  if ( taga == TAG_Special )
-    taga = FPU_Special(a);
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
 
-  if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
+	if (taga == TAG_Special)
+		taga = FPU_Special(a);
+	if (tagb == TAG_Special)
+		tagb = FPU_Special(b);
+
+	if (((taga == TAG_Valid) && (tagb == TW_Denormal))
 	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
-	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
-    {
-      FPU_REG x, y;
-      if ( denormal_operand() < 0 )
-	return FPU_Exception;
-
-      FPU_to_exp16(a, &x);
-      FPU_to_exp16(b, &y);
-      tag = FPU_u_mul(&x, &y, dest, control_w, sign,
-		      exponent16(&x) + exponent16(&y));
-      if ( tag < 0 )
-	{
-	  setsign(dest, saved_sign);
-	  return tag;
+	    || ((taga == TW_Denormal) && (tagb == TW_Denormal))) {
+		FPU_REG x, y;
+		if (denormal_operand() < 0)
+			return FPU_Exception;
+
+		FPU_to_exp16(a, &x);
+		FPU_to_exp16(b, &y);
+		tag = FPU_u_mul(&x, &y, dest, control_w, sign,
+				exponent16(&x) + exponent16(&y));
+		if (tag < 0) {
+			setsign(dest, saved_sign);
+			return tag;
+		}
+		FPU_settagi(deststnr, tag);
+		return tag;
+	} else if ((taga <= TW_Denormal) && (tagb <= TW_Denormal)) {
+		if (((tagb == TW_Denormal) || (taga == TW_Denormal))
+		    && (denormal_operand() < 0))
+			return FPU_Exception;
+
+		/* Must have either both arguments == zero, or
+		   one valid and the other zero.
+		   The result is therefore zero. */
+		FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
+		/* The 80486 book says that the answer is +0, but a real
+		   80486 behaves this way.
+		   IEEE-754 apparently says it should be this way. */
+		setsign(dest, sign);
+		return TAG_Zero;
+	}
+	/* Must have infinities, NaNs, etc */
+	else if ((taga == TW_NaN) || (tagb == TW_NaN)) {
+		return real_2op_NaN(b, tagb, deststnr, &st(0));
+	} else if (((taga == TW_Infinity) && (tagb == TAG_Zero))
+		   || ((tagb == TW_Infinity) && (taga == TAG_Zero))) {
+		return arith_invalid(deststnr);	/* Zero*Infinity is invalid */
+	} else if (((taga == TW_Denormal) || (tagb == TW_Denormal))
+		   && (denormal_operand() < 0)) {
+		return FPU_Exception;
+	} else if (taga == TW_Infinity) {
+		FPU_copy_to_regi(a, TAG_Special, deststnr);
+		setsign(dest, sign);
+		return TAG_Special;
+	} else if (tagb == TW_Infinity) {
+		FPU_copy_to_regi(b, TAG_Special, deststnr);
+		setsign(dest, sign);
+		return TAG_Special;
 	}
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-  else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
-    {
-      if ( ((tagb == TW_Denormal) || (taga == TW_Denormal))
-	   && (denormal_operand() < 0) )
-	return FPU_Exception;
-
-      /* Must have either both arguments == zero, or
-	 one valid and the other zero.
-	 The result is therefore zero. */
-      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-      /* The 80486 book says that the answer is +0, but a real
-	 80486 behaves this way.
-	 IEEE-754 apparently says it should be this way. */
-      setsign(dest, sign);
-      return TAG_Zero;
-    }
-      /* Must have infinities, NaNs, etc */
-  else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
-    {
-      return real_2op_NaN(b, tagb, deststnr, &st(0));
-    }
-  else if ( ((taga == TW_Infinity) && (tagb == TAG_Zero))
-	    || ((tagb == TW_Infinity) && (taga == TAG_Zero)) )
-    {
-      return arith_invalid(deststnr);  /* Zero*Infinity is invalid */
-    }
-  else if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
-	    && (denormal_operand() < 0) )
-    {
-      return FPU_Exception;
-    }
-  else if (taga == TW_Infinity)
-    {
-      FPU_copy_to_regi(a, TAG_Special, deststnr);
-      setsign(dest, sign);
-      return TAG_Special;
-    }
-  else if (tagb == TW_Infinity)
-    {
-      FPU_copy_to_regi(b, TAG_Special, deststnr);
-      setsign(dest, sign);
-      return TAG_Special;
-    }
-
 #ifdef PARANOID
-  else
-    {
-      EXCEPTION(EX_INTERNAL|0x102);
-      return FPU_Exception;
-    }
-#endif /* PARANOID */ 
+	else {
+		EXCEPTION(EX_INTERNAL | 0x102);
+		return FPU_Exception;
+	}
+#endif /* PARANOID */
 
 	return 0;
 }
diff -puN arch/x86/math-emu/status_w.h~git-x86 arch/x86/math-emu/status_w.h
--- a/arch/x86/math-emu/status_w.h~git-x86
+++ a/arch/x86/math-emu/status_w.h
@@ -10,7 +10,7 @@
 #ifndef _STATUS_H_
 #define _STATUS_H_
 
-#include "fpu_emu.h"    /* for definition of PECULIAR_486 */
+#include "fpu_emu.h"		/* for definition of PECULIAR_486 */
 
 #ifdef __ASSEMBLY__
 #define	Const__(x)	$##x
@@ -34,7 +34,7 @@
 #define SW_Denorm_Op   	Const__(0x0002)	/* denormalized operand */
 #define SW_Invalid     	Const__(0x0001)	/* invalid operation */
 
-#define SW_Exc_Mask     Const__(0x27f)  /* Status word exception bit mask */
+#define SW_Exc_Mask     Const__(0x27f)	/* Status word exception bit mask */
 
 #ifndef __ASSEMBLY__
 
@@ -50,8 +50,8 @@
   ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top))
 static inline void setcc(int cc)
 {
-	partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3);
-	partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3);
+	partial_status &= ~(SW_C0 | SW_C1 | SW_C2 | SW_C3);
+	partial_status |= (cc) & (SW_C0 | SW_C1 | SW_C2 | SW_C3);
 }
 
 #ifdef PECULIAR_486
diff -puN arch/x86/mm/Makefile_64~git-x86 arch/x86/mm/Makefile_64
--- a/arch/x86/mm/Makefile_64~git-x86
+++ a/arch/x86/mm/Makefile_64
@@ -7,4 +7,4 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpag
 obj-$(CONFIG_NUMA) += numa_64.o
 obj-$(CONFIG_K8_NUMA) += k8topology_64.o
 obj-$(CONFIG_ACPI_NUMA) += srat_64.o
-
+obj-$(CONFIG_IA32_EMULATION) += mmap_32.o
diff -puN arch/x86/mm/boot_ioremap_32.c~git-x86 arch/x86/mm/boot_ioremap_32.c
--- a/arch/x86/mm/boot_ioremap_32.c~git-x86
+++ a/arch/x86/mm/boot_ioremap_32.c
@@ -1,7 +1,7 @@
 /*
  * arch/i386/mm/boot_ioremap.c
- * 
- * Re-map functions for early boot-time before paging_init() when the 
+ *
+ * Re-map functions for early boot-time before paging_init() when the
  * boot-time pagetables are still in use
  *
  * Written by Dave Hansen <haveblue@us.ibm.com>
@@ -23,15 +23,15 @@
 #include <linux/init.h>
 #include <linux/stddef.h>
 
-/* 
- * I'm cheating here.  It is known that the two boot PTE pages are 
+/*
+ * I'm cheating here.  It is known that the two boot PTE pages are
  * allocated next to each other.  I'm pretending that they're just
- * one big array. 
+ * one big array.
  */
 
 #define BOOT_PTE_PTRS (PTRS_PER_PTE*2)
 
-static unsigned long boot_pte_index(unsigned long vaddr) 
+static unsigned long boot_pte_index(unsigned long vaddr)
 {
 	return __pa(vaddr) >> PAGE_SHIFT;
 }
@@ -47,7 +47,7 @@ static inline boot_pte_t* boot_vaddr_to_
  * phys_addr and virtual_source, and who also has a preference
  * about which virtual address from which to steal ptes
  */
-static void __boot_ioremap(unsigned long phys_addr, unsigned long nrpages, 
+static void __boot_ioremap(unsigned long phys_addr, unsigned long nrpages,
 		    void* virtual_source)
 {
 	boot_pte_t* pte;
@@ -57,7 +57,7 @@ static void __boot_ioremap(unsigned long
 	pte = boot_vaddr_to_pte(virtual_source);
 	for (i=0; i < nrpages; i++, phys_addr += PAGE_SIZE, pte++) {
 		set_pte(pte, pfn_pte(phys_addr>>PAGE_SHIFT, PAGE_KERNEL));
-		__flush_tlb_one(&vaddr[i*PAGE_SIZE]);
+		__flush_tlb_one((unsigned long) &vaddr[i*PAGE_SIZE]);
 	}
 }
 
@@ -70,7 +70,7 @@ static __initdata char boot_ioremap_spac
 /*
  * This only applies to things which need to ioremap before paging_init()
  * bt_ioremap() and plain ioremap() are both useless at this point.
- * 
+ *
  * When used, we're still using the boot-time pagetables, which only
  * have 2 PTE pages mapping the first 8MB
  *
@@ -82,18 +82,18 @@ __init void* boot_ioremap(unsigned long 
 {
 	unsigned long last_addr, offset;
 	unsigned int nrpages;
-	
+
 	last_addr = phys_addr + size - 1;
 
 	/* page align the requested address */
 	offset = phys_addr & ~PAGE_MASK;
 	phys_addr &= PAGE_MASK;
 	size = PAGE_ALIGN(last_addr) - phys_addr;
-	
+
 	nrpages = size >> PAGE_SHIFT;
 	if (nrpages > BOOT_IOREMAP_PAGES)
 		return NULL;
-	
+
 	__boot_ioremap(phys_addr, nrpages, boot_ioremap_space);
 
 	return &boot_ioremap_space[offset];
diff -puN arch/x86/mm/extable_32.c~git-x86 arch/x86/mm/extable_32.c
--- a/arch/x86/mm/extable_32.c~git-x86
+++ a/arch/x86/mm/extable_32.c
@@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs
 	const struct exception_table_entry *fixup;
 
 #ifdef CONFIG_PNPBIOS
-	if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs)))
+	if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs)))
 	{
 		extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
 		extern u32 pnp_bios_is_utter_crap;
@@ -25,9 +25,9 @@ int fixup_exception(struct pt_regs *regs
 	}
 #endif
 
-	fixup = search_exception_tables(regs->eip);
+	fixup = search_exception_tables(regs->ip);
 	if (fixup) {
-		regs->eip = fixup->fixup;
+		regs->ip = fixup->fixup;
 		return 1;
 	}
 
diff -puN arch/x86/mm/fault_32.c~git-x86 arch/x86/mm/fault_32.c
--- a/arch/x86/mm/fault_32.c~git-x86
+++ a/arch/x86/mm/fault_32.c
@@ -72,15 +72,15 @@ static inline int notify_page_fault(stru
 static inline unsigned long get_segment_eip(struct pt_regs *regs,
 					    unsigned long *eip_limit)
 {
-	unsigned long eip = regs->eip;
-	unsigned seg = regs->xcs & 0xffff;
+	unsigned long ip = regs->ip;
+	unsigned seg = regs->cs & 0xffff;
 	u32 seg_ar, seg_limit, base, *desc;
 
 	/* Unlikely, but must come before segment checks. */
-	if (unlikely(regs->eflags & VM_MASK)) {
+	if (unlikely(regs->flags & VM_MASK)) {
 		base = seg << 4;
 		*eip_limit = base + 0xffff;
-		return base + (eip & 0xffff);
+		return base + (ip & 0xffff);
 	}
 
 	/* The standard kernel/user address space limit. */
@@ -88,16 +88,16 @@ static inline unsigned long get_segment_
 	
 	/* By far the most common cases. */
 	if (likely(SEGMENT_IS_FLAT_CODE(seg)))
-		return eip;
+		return ip;
 
 	/* Check the segment exists, is within the current LDT/GDT size,
 	   that kernel/user (ring 0..3) has the appropriate privilege,
 	   that it's a code segment, and get the limit. */
 	__asm__ ("larl %3,%0; lsll %3,%1"
 		 : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
-	if ((~seg_ar & 0x9800) || eip > seg_limit) {
+	if ((~seg_ar & 0x9800) || ip > seg_limit) {
 		*eip_limit = 0;
-		return 1;	 /* So that returned eip > *eip_limit. */
+		return 1;	 /* So that returned ip > *eip_limit. */
 	}
 
 	/* Get the GDT/LDT descriptor base. 
@@ -127,7 +127,7 @@ static inline unsigned long get_segment_
 	seg_limit += base;
 	if (seg_limit < *eip_limit && seg_limit >= base)
 		*eip_limit = seg_limit;
-	return eip + base;
+	return ip + base;
 }
 
 /* 
@@ -345,7 +345,7 @@ fastcall void __kprobes do_page_fault(st
 
 	/* It's safe to allow irq's after cr2 has been saved and the vmalloc
 	   fault has been handled. */
-	if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
+	if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
 		local_irq_enable();
 
 	mm = tsk->mm;
@@ -374,7 +374,7 @@ fastcall void __kprobes do_page_fault(st
 	 */
 	if (!down_read_trylock(&mm->mmap_sem)) {
 		if ((error_code & 4) == 0 &&
-		    !search_exception_tables(regs->eip))
+		    !search_exception_tables(regs->ip))
 			goto bad_area_nosemaphore;
 		down_read(&mm->mmap_sem);
 	}
@@ -388,12 +388,12 @@ fastcall void __kprobes do_page_fault(st
 		goto bad_area;
 	if (error_code & 4) {
 		/*
-		 * Accessing the stack below %esp is always a bug.
+		 * Accessing the stack below %sp is always a bug.
 		 * The large cushion allows instructions like enter
 		 * and pusha to work.  ("enter $65535,$31" pushes
-		 * 32 pointers and then decrements %esp by 65535.)
+		 * 32 pointers and then decrements %sp by 65535.)
 		 */
-		if (address + 65536 + 32 * sizeof(unsigned long) < regs->esp)
+		if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
 			goto bad_area;
 	}
 	if (expand_stack(vma, address))
@@ -442,7 +442,7 @@ good_area:
 	/*
 	 * Did it hit the DOS screen memory VA from vm86 mode?
 	 */
-	if (regs->eflags & VM_MASK) {
+	if (regs->flags & VM_MASK) {
 		unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
 		if (bit < 32)
 			tsk->thread.screen_bitmap |= 1 << bit;
@@ -474,11 +474,11 @@ bad_area_nosemaphore:
 
 		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 		    printk_ratelimit()) {
-			printk("%s%s[%d]: segfault at %08lx eip %08lx "
-			    "esp %08lx error %lx\n",
+			printk("%s%s[%d]: segfault at %08lx ip %08lx "
+			    "sp %08lx error %lx\n",
 			    task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
-			    tsk->comm, task_pid_nr(tsk), address, regs->eip,
-			    regs->esp, error_code);
+			    tsk->comm, task_pid_nr(tsk), address, regs->ip,
+			    regs->sp, error_code);
 		}
 		tsk->thread.cr2 = address;
 		/* Kernel addresses are always protection faults */
@@ -544,7 +544,7 @@ no_context:
 			printk(KERN_ALERT "BUG: unable to handle kernel paging"
 					" request");
 		printk(" at virtual address %08lx\n",address);
-		printk(KERN_ALERT "printing eip: %08lx ", regs->eip);
+		printk(KERN_ALERT "printing ip: %08lx ", regs->ip);
 
 		page = read_cr3();
 		page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
diff -puN arch/x86/mm/fault_64.c~git-x86 arch/x86/mm/fault_64.c
--- a/arch/x86/mm/fault_64.c~git-x86
+++ a/arch/x86/mm/fault_64.c
@@ -198,7 +198,7 @@ KERN_ERR "******* Disabling USB legacy i
 static int is_errata93(struct pt_regs *regs, unsigned long address) 
 {
 	static int warned;
-	if (address != regs->rip)
+	if (address != regs->ip)
 		return 0;
 	if ((address >> 32) != 0) 
 		return 0;
@@ -209,7 +209,7 @@ static int is_errata93(struct pt_regs *r
 			printk(errata93_warning); 		
 			warned = 1;
 		}
-		regs->rip = address;
+		regs->ip = address;
 		return 1;
 	}
 	return 0;
@@ -355,7 +355,7 @@ asmlinkage void __kprobes do_page_fault(
 	if (notify_page_fault(regs))
 		return;
 
-	if (likely(regs->eflags & X86_EFLAGS_IF))
+	if (likely(regs->flags & X86_EFLAGS_IF))
 		local_irq_enable();
 
 	if (unlikely(error_code & PF_RSVD))
@@ -393,7 +393,7 @@ asmlinkage void __kprobes do_page_fault(
 	 */
 	if (!down_read_trylock(&mm->mmap_sem)) {
 		if ((error_code & PF_USER) == 0 &&
-		    !search_exception_tables(regs->rip))
+		    !search_exception_tables(regs->ip))
 			goto bad_area_nosemaphore;
 		down_read(&mm->mmap_sem);
 	}
@@ -409,7 +409,7 @@ asmlinkage void __kprobes do_page_fault(
 		/* Allow userspace just enough access below the stack pointer
 		 * to let the 'enter' instruction work.
 		 */
-		if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp)
+		if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
 			goto bad_area;
 	}
 	if (expand_stack(vma, address))
@@ -488,10 +488,10 @@ bad_area_nosemaphore:
 		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 		    printk_ratelimit()) {
 			printk(
-		       "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n",
+		       "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx\n",
 					tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
-					tsk->comm, tsk->pid, address, regs->rip,
-					regs->rsp, error_code);
+					tsk->comm, tsk->pid, address, regs->ip,
+					regs->sp, error_code);
 		}
        
 		tsk->thread.cr2 = address;
@@ -509,9 +509,9 @@ bad_area_nosemaphore:
 no_context:
 	
 	/* Are we prepared to handle this kernel fault?  */
-	fixup = search_exception_tables(regs->rip);
+	fixup = search_exception_tables(regs->ip);
 	if (fixup) {
-		regs->rip = fixup->fixup;
+		regs->ip = fixup->fixup;
 		return;
 	}
 
@@ -537,7 +537,7 @@ no_context:
 	else
 		printk(KERN_ALERT "Unable to handle kernel paging request");
 	printk(" at %016lx RIP: \n" KERN_ALERT,address);
-	printk_address(regs->rip);
+	printk_address(regs->ip);
 	dump_pagetable(address);
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;
diff -puN arch/x86/mm/highmem_32.c~git-x86 arch/x86/mm/highmem_32.c
--- a/arch/x86/mm/highmem_32.c~git-x86
+++ a/arch/x86/mm/highmem_32.c
@@ -18,6 +18,49 @@ void kunmap(struct page *page)
 	kunmap_high(page);
 }
 
+static void debug_kmap_atomic_prot(enum km_type type)
+{
+#ifdef CONFIG_DEBUG_HIGHMEM
+	static unsigned warn_count = 10;
+
+	if (unlikely(warn_count == 0))
+		return;
+
+	if (unlikely(in_interrupt())) {
+		if (in_irq()) {
+			if (type != KM_IRQ0 && type != KM_IRQ1 &&
+			    type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ &&
+			    type != KM_BOUNCE_READ) {
+				WARN_ON(1);
+				warn_count--;
+			}
+		} else if (!irqs_disabled()) {	/* softirq */
+			if (type != KM_IRQ0 && type != KM_IRQ1 &&
+			    type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
+			    type != KM_SKB_SUNRPC_DATA &&
+			    type != KM_SKB_DATA_SOFTIRQ &&
+			    type != KM_BOUNCE_READ) {
+				WARN_ON(1);
+				warn_count--;
+			}
+		}
+	}
+
+	if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
+			type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) {
+		if (!irqs_disabled()) {
+			WARN_ON(1);
+			warn_count--;
+		}
+	} else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
+		if (irq_count() == 0 && !irqs_disabled()) {
+			WARN_ON(1);
+			warn_count--;
+		}
+	}
+#endif
+}
+
 /*
  * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
  * no global lock is needed and because the kmap code must perform a global TLB
@@ -30,8 +73,10 @@ void *kmap_atomic_prot(struct page *page
 {
 	enum fixed_addresses idx;
 	unsigned long vaddr;
-
 	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+
+	debug_kmap_atomic_prot(type);
+
 	pagefault_disable();
 
 	if (!PageHighMem(page))
diff -puN arch/x86/mm/init_32.c~git-x86 arch/x86/mm/init_32.c
--- a/arch/x86/mm/init_32.c~git-x86
+++ a/arch/x86/mm/init_32.c
@@ -165,16 +165,25 @@ static void __init kernel_physical_mappi
 		pmd = one_md_table_init(pgd);
 		if (pfn >= max_low_pfn)
 			continue;
-		for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
+		for (pmd_idx = 0;
+		     pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn;
+		     pmd++, pmd_idx++) {
 			unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
 
-			/* Map with big pages if possible, otherwise create normal page tables. */
+			/* Map with big pages if possible, otherwise
+			   create normal page tables. */
 			if (cpu_has_pse) {
-				unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
-				if (is_kernel_text(address) || is_kernel_text(address2))
-					set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
-				else
-					set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+				unsigned int address2;
+				pgprot_t prot = PAGE_KERNEL_LARGE;
+
+				address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE +
+					PAGE_OFFSET + PAGE_SIZE-1;
+
+				if (is_kernel_text(address) ||
+				    is_kernel_text(address2))
+					prot = PAGE_KERNEL_LARGE_EXEC;
+
+				set_pmd(pmd, pfn_pmd(pfn, prot));
 
 				pfn += PTRS_PER_PTE;
 			} else {
@@ -183,10 +192,12 @@ static void __init kernel_physical_mappi
 				for (pte_ofs = 0;
 				     pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
 				     pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
+					pgprot_t prot = PAGE_KERNEL;
+
 					if (is_kernel_text(address))
-						set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
-					else
-						set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+						prot = PAGE_KERNEL_EXEC;
+
+					set_pte(pte, pfn_pte(pfn, prot));
 				}
 			}
 		}
diff -puN arch/x86/mm/init_64.c~git-x86 arch/x86/mm/init_64.c
--- a/arch/x86/mm/init_64.c~git-x86
+++ a/arch/x86/mm/init_64.c
@@ -43,6 +43,8 @@
 #include <asm/proto.h>
 #include <asm/smp.h>
 #include <asm/sections.h>
+#include <asm/kdebug.h>
+#include <asm/numa.h>
 
 #ifndef Dprintk
 #define Dprintk(x...)
@@ -224,7 +226,7 @@ __meminit void *early_ioremap(unsigned l
 		vaddr += addr & ~PMD_MASK;
 		addr &= PMD_MASK;
 		for (i = 0; i < pmds; i++, addr += PMD_SIZE)
-			set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
+			set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
 		__flush_tlb();
 		return (void *)vaddr;
 	next:
@@ -268,7 +270,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned 
 		if (pmd_val(*pmd))
 			continue;
 
-		entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
+		entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
 		entry &= __supported_pte_mask;
 		set_pmd(pmd, __pmd(entry));
 	}
@@ -484,34 +486,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-/*
- * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
- * just online the pages.
- */
-int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
-{
-	int err = -EIO;
-	unsigned long pfn;
-	unsigned long total = 0, mem = 0;
-	for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
-		if (pfn_valid(pfn)) {
-			online_page(pfn_to_page(pfn));
-			err = 0;
-			mem++;
-		}
-		total++;
-	}
-	if (!err) {
-		z->spanned_pages += total;
-		z->present_pages += mem;
-		z->zone_pgdat->node_spanned_pages += total;
-		z->zone_pgdat->node_present_pages += mem;
-	}
-	return err;
-}
-#endif
-
 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
 			 kcore_vsyscall;
 
@@ -737,7 +711,6 @@ const char *arch_vma_name(struct vm_area
 	return NULL;
 }
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
  */
@@ -768,8 +741,7 @@ int __meminit vmemmap_populate(struct pa
 			if (!p)
 				return -ENOMEM;
 
-			entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
-			mk_pte_huge(entry);
+			entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE);
 			set_pmd(pmd, __pmd(pte_val(entry)));
 
 			printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
@@ -780,4 +752,3 @@ int __meminit vmemmap_populate(struct pa
 
 	return 0;
 }
-#endif
diff -puN arch/x86/mm/ioremap_64.c~git-x86 arch/x86/mm/ioremap_64.c
--- a/arch/x86/mm/ioremap_64.c~git-x86
+++ a/arch/x86/mm/ioremap_64.c
@@ -86,25 +86,7 @@ void __iomem * __ioremap(unsigned long p
 	if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
 		return (__force void __iomem *)phys_to_virt(phys_addr);
 
-#ifdef CONFIG_FLATMEM
-	/*
-	 * Don't allow anybody to remap normal RAM that we're using..
-	 */
-	if (last_addr < virt_to_phys(high_memory)) {
-		char *t_addr, *t_end;
- 		struct page *page;
-
-		t_addr = __va(phys_addr);
-		t_end = t_addr + (size - 1);
-	   
-		for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
-			if(!PageReserved(page))
-				return NULL;
-	}
-#endif
-
-	pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_GLOBAL
-			  | _PAGE_DIRTY | _PAGE_ACCESSED | flags);
+	pgprot = __pgprot(__PAGE_KERNEL_EXEC | _PAGE_GLOBAL | flags);
 	/*
 	 * Mappings have to be page-aligned
 	 */
diff -puN arch/x86/mm/k8topology_64.c~git-x86 arch/x86/mm/k8topology_64.c
--- a/arch/x86/mm/k8topology_64.c~git-x86
+++ a/arch/x86/mm/k8topology_64.c
@@ -44,12 +44,14 @@ int __init k8_scan_nodes(unsigned long s
 { 
 	unsigned long prevbase;
 	struct bootnode nodes[8];
-	int nodeid, i, j, nb;
+	int nodeid, i, nb;
 	unsigned char nodeids[8];
 	int found = 0;
 	u32 reg;
 	unsigned numnodes;
-	unsigned num_cores;
+	unsigned cores;
+	unsigned bits;
+	int j;
 
 	if (!early_pci_allowed())
 		return -1;
@@ -60,9 +62,6 @@ int __init k8_scan_nodes(unsigned long s
 
 	printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb); 
 
-	num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
-	printk(KERN_INFO "CPU has %d num_cores\n", num_cores);
-
 	reg = read_pci_config(0, nb, 0, 0x60); 
 	numnodes = ((reg >> 4) & 0xF) + 1;
 	if (numnodes <= 1)
@@ -168,11 +167,15 @@ int __init k8_scan_nodes(unsigned long s
 	} 
 	printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); 
 
+	/* use the coreid bits from early_identify_cpu */
+	bits = boot_cpu_data.x86_coreid_bits;
+	cores = (1<<bits);
+
 	for (i = 0; i < 8; i++) {
 		if (nodes[i].start != nodes[i].end) { 
 			nodeid = nodeids[i];
-			for (j = 0; j < num_cores; j++)
-				apicid_to_node[(nodeid * num_cores) + j] = i;
+			for (j = 0; j < cores; j++)
+				apicid_to_node[(nodeid << bits) + j] = i;
 			setup_node_bootmem(i, nodes[i].start, nodes[i].end); 
 		} 
 	}
diff -puN arch/x86/mm/mmap_32.c~git-x86 arch/x86/mm/mmap_32.c
--- a/arch/x86/mm/mmap_32.c~git-x86
+++ a/arch/x86/mm/mmap_32.c
@@ -57,15 +57,19 @@ static inline unsigned long mmap_base(st
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
+#ifdef CONFIG_X86_32
 void arch_pick_mmap_layout(struct mm_struct *mm)
+#else
+void ia32_pick_mmap_layout(struct mm_struct *mm)
+#endif
 {
 	/*
 	 * Fall back to the standard layout if the personality
 	 * bit is set, or if the expected stack growth is unlimited:
 	 */
 	if (sysctl_legacy_va_layout ||
-			(current->personality & ADDR_COMPAT_LAYOUT) ||
-			current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
+	    (current->personality & ADDR_COMPAT_LAYOUT) ||
+	    current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 		mm->unmap_area = arch_unmap_area;
diff -puN arch/x86/mm/mmap_64.c~git-x86 arch/x86/mm/mmap_64.c
--- a/arch/x86/mm/mmap_64.c~git-x86
+++ a/arch/x86/mm/mmap_64.c
@@ -1,29 +1,116 @@
-/* Copyright 2005 Andi Kleen, SuSE Labs.
- * Licensed under GPL, v.2
+/*
+ *  linux/arch/x86-64/mm/mmap.c
+ *
+ *  flexible mmap layout support
+ *
+ * Based on code by Ingo Molnar and Andi Kleen, copyrighted
+ * as follows:
+ *
+ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * All Rights Reserved.
+ * Copyright 2005 Andi Kleen, SUSE Labs.
+ * Copyright 2007 Jiri Kosina, SUSE Labs.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
  */
+
+#include <linux/personality.h>
 #include <linux/mm.h>
-#include <linux/sched.h>
 #include <linux/random.h>
+#include <linux/limits.h>
+#include <linux/sched.h>
 #include <asm/ia32.h>
 
-/* Notebook: move the mmap code from sys_x86_64.c over here. */
+/*
+ * Top of mmap area (just below the process stack).
+ *
+ * Leave an at least ~128 MB hole.
+ */
+#define MIN_GAP (128*1024*1024)
+#define MAX_GAP (TASK_SIZE/6*5)
+
+static unsigned long mmap_base(void)
+{
+	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+
+	if (gap < MIN_GAP)
+		gap = MIN_GAP;
+	else if (gap > MAX_GAP)
+		gap = MAX_GAP;
 
+	return TASK_SIZE - (gap & PAGE_MASK);
+}
+
+static int mmap_is_32(void)
+{
+#ifdef CONFIG_IA32_EMULATION
+	if (test_thread_flag(TIF_IA32))
+		return 1;
+#endif
+	return 0;
+}
+
+static int mmap_is_legacy(void)
+{
+	if (current->personality & ADDR_COMPAT_LAYOUT)
+		return 1;
+
+	if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)
+		return 1;
+
+	return sysctl_legacy_va_layout;
+}
+
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
+	int rnd = 0;
+	if (current->flags & PF_RANDOMIZE) {
+		/*
+		 * Add 28bit randomness which is about 40bits of address space
+		 * because mmap base has to be page aligned.
+		 * or ~1/128 of the total user VM
+		 * (total user address space is 47bits)
+		 */
+		rnd = get_random_int() & 0xfffffff;
+	}
+
+	/*
+	 * Fall back to the standard layout if the personality
+	 * bit is set, or if the expected stack growth is unlimited:
+	 */
+	if (mmap_is_32()) {
 #ifdef CONFIG_IA32_EMULATION
-	if (current_thread_info()->flags & _TIF_IA32)
+		/* ia32_pick_mmap_layout has its own. */
 		return ia32_pick_mmap_layout(mm);
 #endif
-	mm->mmap_base = TASK_UNMAPPED_BASE;
-	if (current->flags & PF_RANDOMIZE) {
-		/* Add 28bit randomness which is about 40bits of address space
-		   because mmap base has to be page aligned.
- 		   or ~1/128 of the total user VM
-	   	   (total user address space is 47bits) */
-		unsigned rnd = get_random_int() & 0xfffffff;
-		mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT;
+	} else if (mmap_is_legacy()) {
+		mm->mmap_base = TASK_UNMAPPED_BASE;
+		mm->get_unmapped_area = arch_get_unmapped_area;
+		mm->unmap_area = arch_unmap_area;
+	} else {
+		mm->mmap_base = mmap_base();
+		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+		mm->unmap_area = arch_unmap_area_topdown;
+		if (current->flags & PF_RANDOMIZE)
+			rnd = -rnd;
 	}
-	mm->get_unmapped_area = arch_get_unmapped_area;
-	mm->unmap_area = arch_unmap_area;
+	if (current->flags & PF_RANDOMIZE)
+		mm->mmap_base += ((long)rnd) << PAGE_SHIFT;
 }
-
diff -puN arch/x86/mm/numa_64.c~git-x86 arch/x86/mm/numa_64.c
--- a/arch/x86/mm/numa_64.c~git-x86
+++ a/arch/x86/mm/numa_64.c
@@ -1,7 +1,7 @@
-/* 
+/*
  * Generic VM initialization for x86-64 NUMA setups.
  * Copyright 2002,2003 Andi Kleen, SuSE Labs.
- */ 
+ */
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/string.h>
@@ -17,29 +17,35 @@
 #include <asm/dma.h>
 #include <asm/numa.h>
 #include <asm/acpi.h>
+#include <asm/k8.h>
 
 #ifndef Dprintk
 #define Dprintk(x...)
 #endif
 
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_data);
+
 bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 
 struct memnode memnode;
 
-unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
+int cpu_to_node_map[NR_CPUS] __read_mostly = {
 	[0 ... NR_CPUS-1] = NUMA_NO_NODE
 };
+EXPORT_SYMBOL(cpu_to_node_map);
+
 unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
- 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
-cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
+
+cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_to_cpumask_map);
 
 int numa_off __initdata;
 unsigned long __initdata nodemap_addr;
 unsigned long __initdata nodemap_size;
 
-
 /*
  * Given a shift value, try to populate memnodemap[]
  * Returns :
@@ -47,12 +53,11 @@ unsigned long __initdata nodemap_size;
  * 0 if memnodmap[] too small (of shift too small)
  * -1 if node overlap or lost ram (shift too big)
  */
-static int __init
-populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift)
+static int __init populate_memnodemap(const struct bootnode *nodes,
+				      int numnodes, int shift)
 {
-	int i; 
-	int res = -1;
 	unsigned long addr, end;
+	int i, res = -1;
 
 	memset(memnodemap, 0xff, memnodemapsize);
 	for (i = 0; i < numnodes; i++) {
@@ -69,7 +74,7 @@ populate_memnodemap(const struct bootnod
 			addr += (1UL << shift);
 		} while (addr < end);
 		res = 1;
-	} 
+	}
 	return res;
 }
 
@@ -104,8 +109,8 @@ static int __init allocate_cachealigned_
  * The LSB of all start and end addresses in the node map is the value of the
  * maximum possible shift.
  */
-static int __init
-extract_lsb_from_nodes (const struct bootnode *nodes, int numnodes)
+static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
+					 int numnodes)
 {
 	int i, nodes_used = 0;
 	unsigned long start, end;
@@ -140,51 +145,50 @@ int __init compute_hash_shift(struct boo
 		shift);
 
 	if (populate_memnodemap(nodes, numnodes, shift) != 1) {
-		printk(KERN_INFO
-	"Your memory is not aligned you need to rebuild your kernel "
-	"with a bigger NODEMAPSIZE shift=%d\n",
-			shift);
+		printk(KERN_INFO "Your memory is not aligned you need to "
+		       "rebuild your kernel with a bigger NODEMAPSIZE "
+		       "shift=%d\n", shift);
 		return -1;
 	}
 	return shift;
 }
 
-#ifdef CONFIG_SPARSEMEM
 int early_pfn_to_nid(unsigned long pfn)
 {
 	return phys_to_nid(pfn << PAGE_SHIFT);
 }
-#endif
 
-static void * __init
-early_node_mem(int nodeid, unsigned long start, unsigned long end,
-	      unsigned long size)
+static void * __init early_node_mem(int nodeid, unsigned long start,
+				    unsigned long end, unsigned long size)
 {
 	unsigned long mem = find_e820_area(start, end, size);
 	void *ptr;
+
 	if (mem != -1L)
 		return __va(mem);
 	ptr = __alloc_bootmem_nopanic(size,
 				SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS));
 	if (ptr == NULL) {
 		printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
-			size, nodeid);
+		       size, nodeid);
 		return NULL;
 	}
 	return ptr;
 }
 
 /* Initialize bootmem allocator for a node */
-void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
-{ 
-	unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start; 
-	unsigned long nodedata_phys;
+void __init setup_node_bootmem(int nodeid, unsigned long start,
+			       unsigned long end)
+{
+	unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size;
+	unsigned long bootmap_start, nodedata_phys;
 	void *bootmap;
 	const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
 
-	start = round_up(start, ZONE_ALIGN); 
+	start = round_up(start, ZONE_ALIGN);
 
-	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end);
+	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
+	       start, end);
 
 	start_pfn = start >> PAGE_SHIFT;
 	end_pfn = end >> PAGE_SHIFT;
@@ -200,75 +204,81 @@ void __init setup_node_bootmem(int nodei
 	NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
 
 	/* Find a place for the bootmem map */
-	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 
+	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
 	bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
 	bootmap = early_node_mem(nodeid, bootmap_start, end,
 					bootmap_pages<<PAGE_SHIFT);
 	if (bootmap == NULL)  {
 		if (nodedata_phys < start || nodedata_phys >= end)
-			free_bootmem((unsigned long)node_data[nodeid],pgdat_size);
+			free_bootmem((unsigned long)node_data[nodeid],
+				     pgdat_size);
 		node_data[nodeid] = NULL;
 		return;
 	}
 	bootmap_start = __pa(bootmap);
-	Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); 
-	
+	Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
+
 	bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
-					 bootmap_start >> PAGE_SHIFT, 
-					 start_pfn, end_pfn); 
+					 bootmap_start >> PAGE_SHIFT,
+					 start_pfn, end_pfn);
 
 	free_bootmem_with_active_regions(nodeid, end);
 
-	reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); 
-	reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
+	reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
+	reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
+			     bootmap_pages<<PAGE_SHIFT);
 #ifdef CONFIG_ACPI_NUMA
 	srat_reserve_add_area(nodeid);
 #endif
 	node_set_online(nodeid);
-} 
+}
 
 /* Initialize final allocator for a zone */
 void __init setup_node_zones(int nodeid)
-{ 
+{
 	unsigned long start_pfn, end_pfn, memmapsize, limit;
 
- 	start_pfn = node_start_pfn(nodeid);
- 	end_pfn = node_end_pfn(nodeid);
+	start_pfn = node_start_pfn(nodeid);
+	end_pfn = node_end_pfn(nodeid);
 
 	Dprintk(KERN_INFO "Setting up memmap for node %d %lx-%lx\n",
 		nodeid, start_pfn, end_pfn);
 
-	/* Try to allocate mem_map at end to not fill up precious <4GB
-	   memory. */
+	/*
+	 * Try to allocate mem_map at end to not fill up precious <4GB
+	 * memory.
+	 */
 	memmapsize = sizeof(struct page) * (end_pfn-start_pfn);
 	limit = end_pfn << PAGE_SHIFT;
 #ifdef CONFIG_FLAT_NODE_MEM_MAP
-	NODE_DATA(nodeid)->node_mem_map = 
-		__alloc_bootmem_core(NODE_DATA(nodeid)->bdata, 
-				memmapsize, SMP_CACHE_BYTES, 
-				round_down(limit - memmapsize, PAGE_SIZE), 
-				limit);
+	NODE_DATA(nodeid)->node_mem_map =
+		__alloc_bootmem_core(NODE_DATA(nodeid)->bdata,
+				     memmapsize, SMP_CACHE_BYTES,
+				     round_down(limit - memmapsize, PAGE_SIZE),
+				     limit);
 #endif
-} 
+}
 
+/*
+ * There are unfortunately some poorly designed mainboards around that
+ * only connect memory to a single CPU. This breaks the 1:1 cpu->node
+ * mapping. To avoid this fill in the mapping for all possible CPUs,
+ * as the number of CPUs is not known yet. We round robin the existing
+ * nodes.
+ */
 void __init numa_init_array(void)
 {
 	int rr, i;
-	/* There are unfortunately some poorly designed mainboards around
-	   that only connect memory to a single CPU. This breaks the 1:1 cpu->node
-	   mapping. To avoid this fill in the mapping for all possible
-	   CPUs, as the number of CPUs is not known yet. 
-	   We round robin the existing nodes. */
+
 	rr = first_node(node_online_map);
 	for (i = 0; i < NR_CPUS; i++) {
 		if (cpu_to_node(i) != NUMA_NO_NODE)
 			continue;
- 		numa_set_node(i, rr);
+		numa_set_node(i, rr);
 		rr = next_node(rr, node_online_map);
 		if (rr == MAX_NUMNODES)
 			rr = first_node(node_online_map);
 	}
-
 }
 
 #ifdef CONFIG_NUMA_EMU
@@ -276,15 +286,17 @@ void __init numa_init_array(void)
 char *cmdline __initdata;
 
 /*
- * Setups up nid to range from addr to addr + size.  If the end boundary is
- * greater than max_addr, then max_addr is used instead.  The return value is 0
- * if there is additional memory left for allocation past addr and -1 otherwise.
- * addr is adjusted to be at the end of the node.
+ * Setups up nid to range from addr to addr + size.  If the end
+ * boundary is greater than max_addr, then max_addr is used instead.
+ * The return value is 0 if there is additional memory left for
+ * allocation past addr and -1 otherwise.  addr is adjusted to be at
+ * the end of the node.
  */
 static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
 				   u64 size, u64 max_addr)
 {
 	int ret = 0;
+
 	nodes[nid].start = *addr;
 	*addr += size;
 	if (*addr >= max_addr) {
@@ -335,6 +347,7 @@ static int __init split_nodes_equally(st
 
 	for (i = node_start; i < num_nodes + node_start; i++) {
 		u64 end = *addr + size;
+
 		if (i < big)
 			end += FAKE_NODE_MIN_SIZE;
 		/*
@@ -380,14 +393,9 @@ static int __init split_nodes_by_size(st
 static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
 {
 	struct bootnode nodes[MAX_NUMNODES];
-	u64 addr = start_pfn << PAGE_SHIFT;
+	u64 size, addr = start_pfn << PAGE_SHIFT;
 	u64 max_addr = end_pfn << PAGE_SHIFT;
-	int num_nodes = 0;
-	int coeff_flag;
-	int coeff = -1;
-	int num = 0;
-	u64 size;
-	int i;
+	int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
 
 	memset(&nodes, 0, sizeof(nodes));
 	/*
@@ -395,8 +403,9 @@ static int __init numa_emulation(unsigne
 	 * system RAM into N fake nodes.
 	 */
 	if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
-		num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0,
-						simple_strtol(cmdline, NULL, 0));
+		long n = simple_strtol(cmdline, NULL, 0);
+
+		num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n);
 		if (num_nodes < 0)
 			return num_nodes;
 		goto out;
@@ -483,46 +492,47 @@ out:
 	for_each_node_mask(i, node_possible_map) {
 		e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
 						nodes[i].end >> PAGE_SHIFT);
- 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 	}
 	acpi_fake_nodes(nodes, num_nodes);
- 	numa_init_array();
- 	return 0;
+	numa_init_array();
+	return 0;
 }
 #endif /* CONFIG_NUMA_EMU */
 
 void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{ 
+{
 	int i;
 
 	nodes_clear(node_possible_map);
 
 #ifdef CONFIG_NUMA_EMU
 	if (cmdline && !numa_emulation(start_pfn, end_pfn))
- 		return;
+		return;
 	nodes_clear(node_possible_map);
 #endif
 
 #ifdef CONFIG_ACPI_NUMA
 	if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
 					  end_pfn << PAGE_SHIFT))
- 		return;
+		return;
 	nodes_clear(node_possible_map);
 #endif
 
 #ifdef CONFIG_K8_NUMA
-	if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
+	if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT,
+					end_pfn<<PAGE_SHIFT))
 		return;
 	nodes_clear(node_possible_map);
 #endif
 	printk(KERN_INFO "%s\n",
 	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
 
-	printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 
+	printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
 	       start_pfn << PAGE_SHIFT,
-	       end_pfn << PAGE_SHIFT); 
-		/* setup dummy node covering all memory */ 
-	memnode_shift = 63; 
+	       end_pfn << PAGE_SHIFT);
+	/* setup dummy node covering all memory */
+	memnode_shift = 63;
 	memnodemap = memnode.embedded_map;
 	memnodemap[0] = 0;
 	nodes_clear(node_online_map);
@@ -530,36 +540,38 @@ void __init numa_initmem_init(unsigned l
 	node_set(0, node_possible_map);
 	for (i = 0; i < NR_CPUS; i++)
 		numa_set_node(i, 0);
-	node_to_cpumask[0] = cpumask_of_cpu(0);
+	node_to_cpumask_map[0] = cpumask_of_cpu(0);
 	e820_register_active_regions(0, start_pfn, end_pfn);
 	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
 }
 
 __cpuinit void numa_add_cpu(int cpu)
 {
-	set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
-} 
+	set_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]);
+}
 
 void __cpuinit numa_set_node(int cpu, int node)
 {
 	cpu_pda(cpu)->nodenumber = node;
-	cpu_to_node(cpu) = node;
+	cpu_to_node_map[cpu] = node;
 }
 
-unsigned long __init numa_free_all_bootmem(void) 
-{ 
-	int i;
+unsigned long __init numa_free_all_bootmem(void)
+{
 	unsigned long pages = 0;
-	for_each_online_node(i) {
+	int i;
+
+	for_each_online_node(i)
 		pages += free_all_bootmem_node(NODE_DATA(i));
-	}
+
 	return pages;
-} 
+}
 
 void __init paging_init(void)
-{ 
-	int i;
+{
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
+	int i;
+
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
@@ -568,32 +580,30 @@ void __init paging_init(void)
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
 
-	for_each_online_node(i) {
-		setup_node_zones(i); 
-	}
+	for_each_online_node(i)
+		setup_node_zones(i);
 
 	free_area_init_nodes(max_zone_pfns);
-} 
+}
 
 static __init int numa_setup(char *opt)
-{ 
+{
 	if (!opt)
 		return -EINVAL;
-	if (!strncmp(opt,"off",3))
+	if (!strncmp(opt, "off", 3))
 		numa_off = 1;
 #ifdef CONFIG_NUMA_EMU
 	if (!strncmp(opt, "fake=", 5))
 		cmdline = opt + 5;
 #endif
 #ifdef CONFIG_ACPI_NUMA
- 	if (!strncmp(opt,"noacpi",6))
- 		acpi_numa = -1;
-	if (!strncmp(opt,"hotadd=", 7))
+	if (!strncmp(opt, "noacpi", 6))
+		acpi_numa = -1;
+	if (!strncmp(opt, "hotadd=", 7))
 		hotadd_percent = simple_strtoul(opt+7, NULL, 10);
 #endif
 	return 0;
-} 
-
+}
 early_param("numa", numa_setup);
 
 /*
@@ -611,38 +621,16 @@ early_param("numa", numa_setup);
 void __init init_cpu_to_node(void)
 {
 	int i;
- 	for (i = 0; i < NR_CPUS; i++) {
+
+	for (i = 0; i < NR_CPUS; i++) {
 		u8 apicid = x86_cpu_to_apicid_init[i];
+
 		if (apicid == BAD_APICID)
 			continue;
 		if (apicid_to_node[apicid] == NUMA_NO_NODE)
 			continue;
-		numa_set_node(i,apicid_to_node[apicid]);
+		numa_set_node(i, apicid_to_node[apicid]);
 	}
 }
 
-EXPORT_SYMBOL(cpu_to_node);
-EXPORT_SYMBOL(node_to_cpumask);
-EXPORT_SYMBOL(memnode);
-EXPORT_SYMBOL(node_data);
-
-#ifdef CONFIG_DISCONTIGMEM
-/*
- * Functions to convert PFNs from/to per node page addresses.
- * These are out of line because they are quite big.
- * They could be all tuned by pre caching more state.
- * Should do that.
- */
 
-int pfn_valid(unsigned long pfn)
-{
-	unsigned nid;
-	if (pfn >= num_physpages)
-		return 0;
-	nid = pfn_to_nid(pfn);
-	if (nid == 0xff)
-		return 0;
-	return pfn >= node_start_pfn(nid) && (pfn) < node_end_pfn(nid);
-}
-EXPORT_SYMBOL(pfn_valid);
-#endif
diff -puN arch/x86/mm/pageattr_32.c~git-x86 arch/x86/mm/pageattr_32.c
--- a/arch/x86/mm/pageattr_32.c~git-x86
+++ a/arch/x86/mm/pageattr_32.c
@@ -1,28 +1,29 @@
-/* 
- * Copyright 2002 Andi Kleen, SuSE Labs. 
+/*
+ * Copyright 2002 Andi Kleen, SuSE Labs.
  * Thanks to Ben LaHaise for precious feedback.
- */ 
+ */
 
-#include <linux/mm.h>
-#include <linux/sched.h>
 #include <linux/highmem.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/mm.h>
+
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
 #include <asm/sections.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
 
 static DEFINE_SPINLOCK(cpa_lock);
 static struct list_head df_list = LIST_HEAD_INIT(df_list);
 
-
-pte_t *lookup_address(unsigned long address) 
-{ 
+pte_t *lookup_address(unsigned long address)
+{
 	pgd_t *pgd = pgd_offset_k(address);
 	pud_t *pud;
 	pmd_t *pmd;
+
 	if (pgd_none(*pgd))
 		return NULL;
 	pud = pud_offset(pgd, address);
@@ -33,21 +34,22 @@ pte_t *lookup_address(unsigned long addr
 		return NULL;
 	if (pmd_large(*pmd))
 		return (pte_t *)pmd;
-        return pte_offset_kernel(pmd, address);
-} 
 
-static struct page *split_large_page(unsigned long address, pgprot_t prot,
-					pgprot_t ref_prot)
-{ 
-	int i; 
+	return pte_offset_kernel(pmd, address);
+}
+
+static struct page *
+split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
+{
 	unsigned long addr;
 	struct page *base;
 	pte_t *pbase;
+	int i;
 
 	spin_unlock_irq(&cpa_lock);
 	base = alloc_pages(GFP_KERNEL, 0);
 	spin_lock_irq(&cpa_lock);
-	if (!base) 
+	if (!base)
 		return NULL;
 
 	/*
@@ -58,22 +60,24 @@ static struct page *split_large_page(uns
 	page_private(base) = 0;
 
 	address = __pa(address);
-	addr = address & LARGE_PAGE_MASK; 
+	addr = address & LARGE_PAGE_MASK;
 	pbase = (pte_t *)page_address(base);
 	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
+
 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
-               set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
-                                          addr == address ? prot : ref_prot));
+		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
+					   addr == address ? prot : ref_prot));
 	}
 	return base;
-} 
+}
 
 static void cache_flush_page(struct page *p)
-{ 
-	void *adr = page_address(p);
+{
+	void *addr = page_address(p);
 	int i;
+
 	for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
-		clflush(adr+i);
+		clflush(addr + i);
 }
 
 static void flush_kernel_map(void *arg)
@@ -83,23 +87,27 @@ static void flush_kernel_map(void *arg)
 
 	/* High level code is not ready for clflush yet */
 	if (0 && cpu_has_clflush) {
-		list_for_each_entry (p, lh, lru)
+		list_for_each_entry(p, lh, lru)
 			cache_flush_page(p);
-	} else if (boot_cpu_data.x86_model >= 4)
-		wbinvd();
+	} else {
+		if (boot_cpu_data.x86_model >= 4)
+			wbinvd();
+	}
 
-	/* Flush all to work around Errata in early athlons regarding 
-	 * large page flushing. 
+	/*
+	 * Flush all to work around Errata in early athlons regarding
+	 * large page flushing.
 	 */
-	__flush_tlb_all(); 	
+	__flush_tlb_all();
 }
 
-static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) 
-{ 
-	struct page *page;
+static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
+{
 	unsigned long flags;
+	struct page *page;
 
-	set_pte_atomic(kpte, pte); 	/* change init_mm */
+	/* change init_mm */
+	set_pte_atomic(kpte, pte);
 	if (SHARED_KERNEL_PMD)
 		return;
 
@@ -108,6 +116,7 @@ static void set_pmd_pte(pte_t *kpte, uns
 		pgd_t *pgd;
 		pud_t *pud;
 		pmd_t *pmd;
+
 		pgd = (pgd_t *)page_address(page) + pgd_index(address);
 		pud = pud_offset(pgd, address);
 		pmd = pmd_offset(pud, address);
@@ -116,9 +125,9 @@ static void set_pmd_pte(pte_t *kpte, uns
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
-/* 
- * No more special protections in this 2/4MB area - revert to a
- * large page again. 
+/*
+ * No more special protections in this 2/4MB area - revert to a large
+ * page again.
  */
 static inline void revert_page(struct page *kpte_page, unsigned long address)
 {
@@ -142,12 +151,11 @@ static inline void save_page(struct page
 		list_add(&kpte_page->lru, &df_list);
 }
 
-static int
-__change_page_attr(struct page *page, pgprot_t prot)
-{ 
-	pte_t *kpte; 
-	unsigned long address;
+static int __change_page_attr(struct page *page, pgprot_t prot)
+{
 	struct page *kpte_page;
+	unsigned long address;
+	pte_t *kpte;
 
 	BUG_ON(PageHighMem(page));
 	address = (unsigned long)page_address(page);
@@ -155,16 +163,17 @@ __change_page_attr(struct page *page, pg
 	kpte = lookup_address(address);
 	if (!kpte)
 		return -EINVAL;
+
 	kpte_page = virt_to_page(kpte);
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
 
-	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { 
+	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
 		if (!pte_huge(*kpte)) {
-			set_pte_atomic(kpte, mk_pte(page, prot)); 
+			set_pte_atomic(kpte, mk_pte(page, prot));
 		} else {
-			pgprot_t ref_prot;
 			struct page *split;
+			pgprot_t ref_prot;
 
 			ref_prot =
 			((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
@@ -172,16 +181,19 @@ __change_page_attr(struct page *page, pg
 			split = split_large_page(address, prot, ref_prot);
 			if (!split)
 				return -ENOMEM;
-			set_pmd_pte(kpte,address,mk_pte(split, ref_prot));
+
+			set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
 			kpte_page = split;
 		}
 		page_private(kpte_page)++;
-	} else if (!pte_huge(*kpte)) {
-		set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
-		BUG_ON(page_private(kpte_page) == 0);
-		page_private(kpte_page)--;
-	} else
-		BUG();
+	} else {
+		if (!pte_huge(*kpte)) {
+			set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
+			BUG_ON(page_private(kpte_page) == 0);
+			page_private(kpte_page)--;
+		} else
+			BUG();
+	}
 
 	/*
 	 * If the pte was reserved, it means it was created at boot
@@ -197,7 +209,7 @@ __change_page_attr(struct page *page, pg
 		}
 	}
 	return 0;
-} 
+}
 
 static inline void flush_map(struct list_head *l)
 {
@@ -211,32 +223,33 @@ static inline void flush_map(struct list
  * than write-back somewhere - some CPUs do not like it when mappings with
  * different caching policies exist. This changes the page attributes of the
  * in kernel linear mapping too.
- * 
+ *
  * The caller needs to ensure that there are no conflicting mappings elsewhere.
  * This function only deals with the kernel linear map.
- * 
+ *
  * Caller must call global_flush_tlb() after this.
  */
 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 {
-	int err = 0; 
-	int i; 
 	unsigned long flags;
+	int err = 0, i;
 
 	spin_lock_irqsave(&cpa_lock, flags);
-	for (i = 0; i < numpages; i++, page++) { 
+	for (i = 0; i < numpages; i++, page++) {
 		err = __change_page_attr(page, prot);
-		if (err) 
-			break; 
-	} 	
+		if (err)
+			break;
+	}
 	spin_unlock_irqrestore(&cpa_lock, flags);
+
 	return err;
 }
+EXPORT_SYMBOL(change_page_attr);
 
 void global_flush_tlb(void)
 {
-	struct list_head l;
 	struct page *pg, *next;
+	struct list_head l;
 
 	BUG_ON(irqs_disabled());
 
@@ -253,26 +266,28 @@ void global_flush_tlb(void)
 		__free_page(pg);
 	}
 }
+EXPORT_SYMBOL(global_flush_tlb);
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
 void kernel_map_pages(struct page *page, int numpages, int enable)
 {
 	if (PageHighMem(page))
 		return;
-	if (!enable)
+	if (!enable) {
 		debug_check_no_locks_freed(page_address(page),
 					   numpages * PAGE_SIZE);
+	}
 
-	/* the return value is ignored - the calls cannot fail,
+	/*
+	 * the return value is ignored - the calls cannot fail,
 	 * large pages are disabled at boot time.
 	 */
 	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
-	/* we should perform an IPI and flush all tlbs,
+
+	/*
+	 * we should perform an IPI and flush all tlbs,
 	 * but that can deadlock->flush only current cpu.
 	 */
 	__flush_tlb_all();
 }
 #endif
-
-EXPORT_SYMBOL(change_page_attr);
-EXPORT_SYMBOL(global_flush_tlb);
diff -puN arch/x86/mm/pageattr_64.c~git-x86 arch/x86/mm/pageattr_64.c
--- a/arch/x86/mm/pageattr_64.c~git-x86
+++ a/arch/x86/mm/pageattr_64.c
@@ -1,48 +1,54 @@
-/* 
- * Copyright 2002 Andi Kleen, SuSE Labs. 
+/*
+ * Copyright 2002 Andi Kleen, SuSE Labs.
  * Thanks to Ben LaHaise for precious feedback.
- */ 
+ */
 
-#include <linux/mm.h>
-#include <linux/sched.h>
 #include <linux/highmem.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/mm.h>
+
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
+#include <asm/uaccess.h>
 #include <asm/io.h>
 
 pte_t *lookup_address(unsigned long address)
-{ 
+{
 	pgd_t *pgd = pgd_offset_k(address);
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
+
 	if (pgd_none(*pgd))
 		return NULL;
 	pud = pud_offset(pgd, address);
 	if (!pud_present(*pud))
-		return NULL; 
+		return NULL;
 	pmd = pmd_offset(pud, address);
 	if (!pmd_present(*pmd))
-		return NULL; 
+		return NULL;
 	if (pmd_large(*pmd))
 		return (pte_t *)pmd;
+
 	pte = pte_offset_kernel(pmd, address);
 	if (pte && !pte_present(*pte))
-		pte = NULL; 
+		pte = NULL;
+
 	return pte;
-} 
+}
 
-static struct page *split_large_page(unsigned long address, pgprot_t prot,
-				     pgprot_t ref_prot)
-{ 
-	int i; 
+static struct page *
+split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
+{
 	unsigned long addr;
-	struct page *base = alloc_pages(GFP_KERNEL, 0);
+	struct page *base;
 	pte_t *pbase;
-	if (!base) 
+	int i;
+
+	base = alloc_pages(GFP_KERNEL, 0);
+	if (!base)
 		return NULL;
 	/*
 	 * page_private is used to track the number of entries in
@@ -52,20 +58,21 @@ static struct page *split_large_page(uns
 	page_private(base) = 0;
 
 	address = __pa(address);
-	addr = address & LARGE_PAGE_MASK; 
+	addr = address & LARGE_PAGE_MASK;
 	pbase = (pte_t *)page_address(base);
 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
-		pbase[i] = pfn_pte(addr >> PAGE_SHIFT, 
+		pbase[i] = pfn_pte(addr >> PAGE_SHIFT,
 				   addr == address ? prot : ref_prot);
 	}
 	return base;
-} 
+}
 
-void clflush_cache_range(void *adr, int size)
+void clflush_cache_range(void *addr, int size)
 {
 	int i;
+
 	for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
-		clflush(adr+i);
+		clflush(addr+i);
 }
 
 static void flush_kernel_map(void *arg)
@@ -76,17 +83,20 @@ static void flush_kernel_map(void *arg)
 	/* When clflush is available always use it because it is
 	   much cheaper than WBINVD. */
 	/* clflush is still broken. Disable for now. */
-	if (1 || !cpu_has_clflush)
+	if (1 || !cpu_has_clflush) {
 		asm volatile("wbinvd" ::: "memory");
-	else list_for_each_entry(pg, l, lru) {
-		void *adr = page_address(pg);
-		clflush_cache_range(adr, PAGE_SIZE);
+	} else {
+		list_for_each_entry(pg, l, lru) {
+			void *addr = page_address(pg);
+
+			clflush_cache_range(addr, PAGE_SIZE);
+		}
 	}
 	__flush_tlb_all();
 }
 
 static inline void flush_map(struct list_head *l)
-{	
+{
 	on_each_cpu(flush_kernel_map, l, 1, 1);
 }
 
@@ -98,52 +108,56 @@ static inline void save_page(struct page
 		list_add(&fpage->lru, &deferred_pages);
 }
 
-/* 
+/*
  * No more special protections in this 2/4MB area - revert to a
- * large page again. 
+ * large page again.
  */
 static void revert_page(unsigned long address, pgprot_t ref_prot)
 {
+	unsigned long pfn;
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t large_pte;
-	unsigned long pfn;
 
 	pgd = pgd_offset_k(address);
 	BUG_ON(pgd_none(*pgd));
-	pud = pud_offset(pgd,address);
+	pud = pud_offset(pgd, address);
 	BUG_ON(pud_none(*pud));
 	pmd = pmd_offset(pud, address);
 	BUG_ON(pmd_val(*pmd) & _PAGE_PSE);
 	pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT;
 	large_pte = pfn_pte(pfn, ref_prot);
 	large_pte = pte_mkhuge(large_pte);
+
 	set_pte((pte_t *)pmd, large_pte);
-}      
+}
 
 static int
 __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
-				   pgprot_t ref_prot)
-{ 
-	pte_t *kpte; 
+		   pgprot_t ref_prot)
+{
 	struct page *kpte_page;
 	pgprot_t ref_prot2;
+	pte_t *kpte;
 
 	kpte = lookup_address(address);
-	if (!kpte) return 0;
+	if (!kpte)
+		return 0;
+
 	kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
-	if (pgprot_val(prot) != pgprot_val(ref_prot)) { 
+	if (pgprot_val(prot) != pgprot_val(ref_prot)) {
 		if (!pte_huge(*kpte)) {
 			set_pte(kpte, pfn_pte(pfn, prot));
 		} else {
- 			/*
+			/*
 			 * split_large_page will take the reference for this
 			 * change_page_attr on the split page.
- 			 */
+			 */
 			struct page *split;
+
 			ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
 			split = split_large_page(address, prot, ref_prot2);
 			if (!split)
@@ -153,21 +167,23 @@ __change_page_attr(unsigned long address
 			kpte_page = split;
 		}
 		page_private(kpte_page)++;
-	} else if (!pte_huge(*kpte)) {
-		set_pte(kpte, pfn_pte(pfn, ref_prot));
-		BUG_ON(page_private(kpte_page) == 0);
-		page_private(kpte_page)--;
-	} else
-		BUG();
+	} else {
+		if (!pte_huge(*kpte)) {
+			set_pte(kpte, pfn_pte(pfn, ref_prot));
+			BUG_ON(page_private(kpte_page) == 0);
+			page_private(kpte_page)--;
+		} else
+			BUG();
+	}
 
 	/* on x86-64 the direct mapping set at boot is not using 4k pages */
- 	BUG_ON(PageReserved(kpte_page));
+	BUG_ON(PageReserved(kpte_page));
 
 	save_page(kpte_page);
 	if (page_private(kpte_page) == 0)
 		revert_page(address, ref_prot);
 	return 0;
-} 
+}
 
 /*
  * Change the page attributes of an page in the linear mapping.
@@ -176,19 +192,19 @@ __change_page_attr(unsigned long address
  * than write-back somewhere - some CPUs do not like it when mappings with
  * different caching policies exist. This changes the page attributes of the
  * in kernel linear mapping too.
- * 
+ *
  * The caller needs to ensure that there are no conflicting mappings elsewhere.
  * This function only deals with the kernel linear map.
- * 
+ *
  * Caller must call global_flush_tlb() after this.
  */
 int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 {
-	int err = 0, kernel_map = 0;
-	int i; 
+	int err = 0, kernel_map = 0, i;
+
+	if (address >= __START_KERNEL_map &&
+			address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
 
-	if (address >= __START_KERNEL_map
-	    && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
 		address = (unsigned long)__va(__pa(address));
 		kernel_map = 1;
 	}
@@ -198,7 +214,8 @@ int change_page_attr_addr(unsigned long 
 		unsigned long pfn = __pa(address) >> PAGE_SHIFT;
 
 		if (!kernel_map || pte_present(pfn_pte(0, prot))) {
-			err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
+			err = __change_page_attr(address, pfn, prot,
+						PAGE_KERNEL);
 			if (err)
 				break;
 		}
@@ -207,14 +224,16 @@ int change_page_attr_addr(unsigned long 
 		if (__pa(address) < KERNEL_TEXT_SIZE) {
 			unsigned long addr2;
 			pgprot_t prot2;
+
 			addr2 = __START_KERNEL_map + __pa(address);
 			/* Make sure the kernel mappings stay executable */
 			prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
 			err = __change_page_attr(addr2, pfn, prot2,
 						 PAGE_KERNEL_EXEC);
-		} 
-	} 	
-	up_write(&init_mm.mmap_sem); 
+		}
+	}
+	up_write(&init_mm.mmap_sem);
+
 	return err;
 }
 
@@ -222,11 +241,13 @@ int change_page_attr_addr(unsigned long 
 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 {
 	unsigned long addr = (unsigned long)page_address(page);
+
 	return change_page_attr_addr(addr, numpages, prot);
 }
+EXPORT_SYMBOL(change_page_attr);
 
 void global_flush_tlb(void)
-{ 
+{
 	struct page *pg, *next;
 	struct list_head l;
 
@@ -248,8 +269,6 @@ void global_flush_tlb(void)
 			continue;
 		ClearPagePrivate(pg);
 		__free_page(pg);
-	} 
-} 
-
-EXPORT_SYMBOL(change_page_attr);
+	}
+}
 EXPORT_SYMBOL(global_flush_tlb);
diff -puN arch/x86/mm/srat_64.c~git-x86 arch/x86/mm/srat_64.c
--- a/arch/x86/mm/srat_64.c~git-x86
+++ a/arch/x86/mm/srat_64.c
@@ -151,62 +151,6 @@ acpi_numa_processor_affinity_init(struct
 	       pxm, pa->apic_id, node);
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-/*
- * Protect against too large hotadd areas that would fill up memory.
- */
-static int hotadd_enough_memory(struct bootnode *nd)
-{
-	static unsigned long allocated;
-	static unsigned long last_area_end;
-	unsigned long pages = (nd->end - nd->start) >> PAGE_SHIFT;
-	long mem = pages * sizeof(struct page);
-	unsigned long addr;
-	unsigned long allowed;
-	unsigned long oldpages = pages;
-
-	if (mem < 0)
-		return 0;
-	allowed = (end_pfn - absent_pages_in_range(0, end_pfn)) * PAGE_SIZE;
-	allowed = (allowed / 100) * hotadd_percent;
-	if (allocated + mem > allowed) {
-		unsigned long range;
-		/* Give them at least part of their hotadd memory upto hotadd_percent
-		   It would be better to spread the limit out
-		   over multiple hotplug areas, but that is too complicated
-		   right now */
-		if (allocated >= allowed)
-			return 0;
-		range = allowed - allocated;
-		pages = (range / PAGE_SIZE);
-		mem = pages * sizeof(struct page);
-		nd->end = nd->start + range;
-	}
-	/* Not completely fool proof, but a good sanity check */
-	addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem);
-	if (addr == -1UL)
-		return 0;
-	if (pages != oldpages)
-		printk(KERN_NOTICE "SRAT: Hotadd area limited to %lu bytes\n",
-			pages << PAGE_SHIFT);
-	last_area_end = addr + mem;
-	allocated += mem;
-	return 1;
-}
-
-static int update_end_of_memory(unsigned long end)
-{
-	found_add_area = 1;
-	if ((end >> PAGE_SHIFT) > end_pfn)
-		end_pfn = end >> PAGE_SHIFT;
-	return 1;
-}
-
-static inline int save_add_info(void)
-{
-	return hotadd_percent > 0;
-}
-#else
 int update_end_of_memory(unsigned long end) {return -1;}
 static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
@@ -214,7 +158,6 @@ static inline int save_add_info(void) {r
 #else
 static inline int save_add_info(void) {return 0;}
 #endif
-#endif
 /*
  * Update nodes_add and decide if to include add are in the zone.
  * Both SPARSE and RESERVE need nodes_add infomation.
diff -puN arch/x86/oprofile/backtrace.c~git-x86 arch/x86/oprofile/backtrace.c
--- a/arch/x86/oprofile/backtrace.c~git-x86
+++ a/arch/x86/oprofile/backtrace.c
@@ -48,7 +48,7 @@ static struct stacktrace_ops backtrace_o
 };
 
 struct frame_head {
-	struct frame_head *ebp;
+	struct frame_head *bp;
 	unsigned long ret;
 } __attribute__((packed));
 
@@ -67,10 +67,10 @@ dump_user_backtrace(struct frame_head * 
 
 	/* frame pointers should strictly progress back up the stack
 	 * (towards higher addresses) */
-	if (head >= bufhead[0].ebp)
+	if (head >= bufhead[0].bp)
 		return NULL;
 
-	return bufhead[0].ebp;
+	return bufhead[0].bp;
 }
 
 void
diff -puN arch/x86/oprofile/op_model_athlon.c~git-x86 arch/x86/oprofile/op_model_athlon.c
--- a/arch/x86/oprofile/op_model_athlon.c~git-x86
+++ a/arch/x86/oprofile/op_model_athlon.c
@@ -1,6 +1,6 @@
 /**
  * @file op_model_athlon.h
- * athlon / K7 model-specific MSR operations
+ * athlon / K7 / K8 / Family 10h model-specific MSR operations
  *
  * @remark Copyright 2002 OProfile authors
  * @remark Read the file COPYING
@@ -31,12 +31,16 @@
 #define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
 #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
 #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
-#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_CLEAR_LO(x) (x &= (1<<21))
+#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
 #define CTRL_SET_ENABLE(val) (val |= 1<<20)
 #define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
 #define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
 #define CTRL_SET_UM(val, m) (val |= (m << 8))
-#define CTRL_SET_EVENT(val, e) (val |= e)
+#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
+#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
+#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
+#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8))
 
 static unsigned long reset_value[NUM_COUNTERS];
  
@@ -70,7 +74,8 @@ static void athlon_setup_ctrs(struct op_
 		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
 			continue;
 		CTRL_READ(low, high, msrs, i);
-		CTRL_CLEAR(low);
+		CTRL_CLEAR_LO(low);
+		CTRL_CLEAR_HI(high);
 		CTRL_WRITE(low, high, msrs, i);
 	}
 
@@ -89,12 +94,17 @@ static void athlon_setup_ctrs(struct op_
 			CTR_WRITE(counter_config[i].count, msrs, i);
 
 			CTRL_READ(low, high, msrs, i);
-			CTRL_CLEAR(low);
+			CTRL_CLEAR_LO(low);
+			CTRL_CLEAR_HI(high);
 			CTRL_SET_ENABLE(low);
 			CTRL_SET_USR(low, counter_config[i].user);
 			CTRL_SET_KERN(low, counter_config[i].kernel);
 			CTRL_SET_UM(low, counter_config[i].unit_mask);
-			CTRL_SET_EVENT(low, counter_config[i].event);
+			CTRL_SET_EVENT_LOW(low, counter_config[i].event);
+			CTRL_SET_EVENT_HIGH(high, counter_config[i].event);
+			CTRL_SET_HOST_ONLY(high, 0);
+			CTRL_SET_GUEST_ONLY(high, 0);
+
 			CTRL_WRITE(low, high, msrs, i);
 		} else {
 			reset_value[i] = 0;
diff -puN arch/x86/pci/fixup.c~git-x86 arch/x86/pci/fixup.c
--- a/arch/x86/pci/fixup.c~git-x86
+++ a/arch/x86/pci/fixup.c
@@ -491,3 +491,16 @@ static void __devinit pci_siemens_interr
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
 			  pci_siemens_interrupt_controller);
+
+/*
+ * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config
+ * have 4096 bytes.  Even if the device is capable, that doesn't mean we can
+ * access it.  Maybe we don't have a way to generate extended config space
+ * accesses.   So check it
+ */
+static void fam10h_pci_cfg_space_size(struct pci_dev *dev)
+{
+	dev->cfg_size = pci_cfg_space_size_ext(dev, 0);
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_ANY_ID, fam10h_pci_cfg_space_size);
diff -puN arch/x86/pci/init.c~git-x86 arch/x86/pci/init.c
--- a/arch/x86/pci/init.c~git-x86
+++ a/arch/x86/pci/init.c
@@ -11,9 +11,7 @@ static __init int pci_access_init(void)
 #ifdef CONFIG_PCI_DIRECT
 	type = pci_direct_probe();
 #endif
-#ifdef CONFIG_PCI_MMCONFIG
-	pci_mmcfg_init(type);
-#endif
+	pci_mmcfg_early_init(type);
 	if (raw_pci_ops)
 		return 0;
 #ifdef CONFIG_PCI_BIOS
diff -puN arch/x86/pci/mmconfig-shared.c~git-x86 arch/x86/pci/mmconfig-shared.c
--- a/arch/x86/pci/mmconfig-shared.c~git-x86
+++ a/arch/x86/pci/mmconfig-shared.c
@@ -133,33 +133,80 @@ static const char __init *pci_mmcfg_inte
 	return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub";
 }
 
+static const char __init *pci_mmcfg_amd_fam10h(void)
+{
+	u32 low, high, address;
+	u64 base;
+	int i;
+	unsigned segnbits = 0, busnbits;
+
+	address = 0xc0010058;
+	if (rdmsr_safe(address, &low, &high))
+		return NULL;
+
+	/* mmconfig is not enable */
+	if (!(low & 1))
+		return NULL;
+
+	base = high & 0xffff;
+	base <<= 32;
+
+	busnbits = (low >> 2) & 0x0f;
+	if (busnbits > 8) {
+		segnbits = busnbits - 8;
+		busnbits = 8;
+	}
+
+	pci_mmcfg_config_num = (1 << segnbits);
+	pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]) *
+				   pci_mmcfg_config_num, GFP_KERNEL);
+	if (!pci_mmcfg_config)
+		return NULL;
+
+	for (i = 0; i < (1 << segnbits); i++) {
+		pci_mmcfg_config[i].address = base + (1<<28) * i;
+		pci_mmcfg_config[i].pci_segment = i;
+		pci_mmcfg_config[i].start_bus_number = 0;
+		pci_mmcfg_config[i].end_bus_number = (1 << busnbits) - 1;
+	}
+
+	return "AMD Family 10h NB";
+}
+
 struct pci_mmcfg_hostbridge_probe {
+	u32 bus;
+	u32 devfn;
 	u32 vendor;
 	u32 device;
 	const char *(*probe)(void);
 };
 
 static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = {
-	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 },
-	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 },
+	{ 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 },
+	{ 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 },
+	{ 0, PCI_DEVFN(0x18, 0), PCI_VENDOR_ID_AMD, 0x1200, pci_mmcfg_amd_fam10h },
+	{ 0xff, PCI_DEVFN(0, 0), PCI_VENDOR_ID_AMD, 0x1200, pci_mmcfg_amd_fam10h },
 };
 
 static int __init pci_mmcfg_check_hostbridge(void)
 {
 	u32 l;
+	u32 bus, devfn;
 	u16 vendor, device;
 	int i;
 	const char *name;
 
-	pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0, 4, &l);
-	vendor = l & 0xffff;
-	device = (l >> 16) & 0xffff;
-
 	pci_mmcfg_config_num = 0;
 	pci_mmcfg_config = NULL;
 	name = NULL;
 
 	for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) {
+		bus =  pci_mmcfg_probes[i].bus;
+		devfn = pci_mmcfg_probes[i].devfn;
+		pci_conf1_read(0, bus, devfn, 0, 4, &l);
+		vendor = l & 0xffff;
+		device = (l >> 16) & 0xffff;
+
 		if (pci_mmcfg_probes[i].vendor == vendor &&
 		    pci_mmcfg_probes[i].device == device)
 			name = pci_mmcfg_probes[i].probe();
@@ -206,9 +253,78 @@ static void __init pci_mmcfg_insert_reso
 	pci_mmcfg_resources_inserted = 1;
 }
 
-static void __init pci_mmcfg_reject_broken(int type)
+static acpi_status __init check_mcfg_resource(struct acpi_resource *res,
+					      void *data)
+{
+	struct resource *mcfg_res = data;
+	struct acpi_resource_address64 address;
+	acpi_status status;
+
+	if (res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) {
+		struct acpi_resource_fixed_memory32 *fixmem32 =
+			&res->data.fixed_memory32;
+		if (!fixmem32)
+			return AE_OK;
+		if ((mcfg_res->start >= fixmem32->address) &&
+		    (mcfg_res->end < (fixmem32->address +
+				      fixmem32->address_length))) {
+			mcfg_res->flags = 1;
+			return AE_CTRL_TERMINATE;
+		}
+	}
+	if ((res->type != ACPI_RESOURCE_TYPE_ADDRESS32) &&
+	    (res->type != ACPI_RESOURCE_TYPE_ADDRESS64))
+		return AE_OK;
+
+	status = acpi_resource_to_address64(res, &address);
+	if (ACPI_FAILURE(status) ||
+	   (address.address_length <= 0) ||
+	   (address.resource_type != ACPI_MEMORY_RANGE))
+		return AE_OK;
+
+	if ((mcfg_res->start >= address.minimum) &&
+	    (mcfg_res->end < (address.minimum + address.address_length))) {
+		mcfg_res->flags = 1;
+		return AE_CTRL_TERMINATE;
+	}
+	return AE_OK;
+}
+
+static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl,
+		void *context, void **rv)
+{
+	struct resource *mcfg_res = context;
+
+	acpi_walk_resources(handle, METHOD_NAME__CRS,
+			    check_mcfg_resource, context);
+
+	if (mcfg_res->flags)
+		return AE_CTRL_TERMINATE;
+
+	return AE_OK;
+}
+
+static int __init is_acpi_reserved(unsigned long start, unsigned long end)
+{
+	struct resource mcfg_res;
+
+	mcfg_res.start = start;
+	mcfg_res.end = end;
+	mcfg_res.flags = 0;
+
+	acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL);
+
+	if (!mcfg_res.flags)
+		acpi_get_devices("PNP0C02", find_mboard_resource, &mcfg_res,
+				 NULL);
+
+	return mcfg_res.flags;
+}
+
+static void __init pci_mmcfg_reject_broken(void)
 {
 	typeof(pci_mmcfg_config[0]) *cfg;
+	int i;
 
 	if ((pci_mmcfg_config_num == 0) ||
 	    (pci_mmcfg_config == NULL) ||
@@ -229,17 +345,37 @@ static void __init pci_mmcfg_reject_brok
 		goto reject;
 	}
 
-	/*
-	 * Only do this check when type 1 works. If it doesn't work
-	 * assume we run on a Mac and always use MCFG
-	 */
-	if (type == 1 && !e820_all_mapped(cfg->address,
-					  cfg->address + MMCONFIG_APER_MIN,
-					  E820_RESERVED)) {
-		printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not"
-		       " E820-reserved\n", cfg->address);
-		goto reject;
+	for (i = 0; i < pci_mmcfg_config_num; i++) {
+		u32 size = (cfg->end_bus_number + 1) << 20;
+		cfg = &pci_mmcfg_config[i];
+		printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lu "
+		       "segment %hu buses %u - %u\n",
+		       i, (unsigned long)cfg->address, cfg->pci_segment,
+		       (unsigned int)cfg->start_bus_number,
+		       (unsigned int)cfg->end_bus_number);
+		if (is_acpi_reserved(cfg->address, cfg->address + size - 1)) {
+			printk(KERN_NOTICE "PCI: MCFG area at %Lx reserved "
+			       "in ACPI motherboard resources\n",
+			       cfg->address);
+		} else {
+			printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not"
+			       " reserved in ACPI motherboard resources\n",
+			       cfg->address);
+			/* Don't try to do this check unless configuration
+			   type 1 is available. */
+			if ((pci_probe & PCI_PROBE_CONF1) &&
+			    e820_all_mapped(cfg->address,
+					    cfg->address + size - 1,
+					    E820_RESERVED))
+				printk(KERN_NOTICE
+				       "PCI: MCFG area at %Lx reserved in "
+				       "E820\n",
+				       cfg->address);
+			else
+				goto reject;
+		}
 	}
+
 	return;
 
 reject:
@@ -249,20 +385,46 @@ reject:
 	pci_mmcfg_config_num = 0;
 }
 
-void __init pci_mmcfg_init(int type)
+void __init pci_mmcfg_early_init(int type)
+{
+	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
+		return;
+
+	/* If type 1 access is available, no need to enable MMCONFIG yet, we can
+	   defer until later when the ACPI interpreter is available to better
+	   validate things. */
+	if (type == 1)
+		return;
+
+	acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
+
+	if ((pci_mmcfg_config_num == 0) ||
+	    (pci_mmcfg_config == NULL) ||
+	    (pci_mmcfg_config[0].address == 0))
+		return;
+
+	if (pci_mmcfg_arch_init())
+		pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
+}
+
+void __init pci_mmcfg_late_init(void)
 {
 	int known_bridge = 0;
 
+	/* MMCONFIG disabled */
 	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
 		return;
 
-	if (type == 1 && pci_mmcfg_check_hostbridge())
-		known_bridge = 1;
+	/* MMCONFIG already enabled */
+	if (!(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF))
+		return;
 
-	if (!known_bridge) {
+	if ((pci_probe & PCI_PROBE_CONF1) && pci_mmcfg_check_hostbridge())
+		known_bridge = 1;
+	else
 		acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
-		pci_mmcfg_reject_broken(type);
-	}
+
+	pci_mmcfg_reject_broken();
 
 	if ((pci_mmcfg_config_num == 0) ||
 	    (pci_mmcfg_config == NULL) ||
@@ -270,7 +432,7 @@ void __init pci_mmcfg_init(int type)
 		return;
 
 	if (pci_mmcfg_arch_init()) {
-		if (type == 1)
+		if (pci_probe & PCI_PROBE_CONF1)
 			unreachable_devices();
 		if (known_bridge)
 			pci_mmcfg_insert_resources(IORESOURCE_BUSY);
diff -puN arch/x86/pci/pci.h~git-x86 arch/x86/pci/pci.h
--- a/arch/x86/pci/pci.h~git-x86
+++ a/arch/x86/pci/pci.h
@@ -93,7 +93,6 @@ extern int pci_conf1_read(unsigned int s
 extern int pci_direct_probe(void);
 extern void pci_direct_init(int type);
 extern void pci_pcbios_init(void);
-extern void pci_mmcfg_init(int type);
 extern void pcibios_sort(void);
 
 /* pci-mmconfig.c */
diff -puN arch/x86/power/cpu.c~git-x86 arch/x86/power/cpu.c
--- a/arch/x86/power/cpu.c~git-x86
+++ a/arch/x86/power/cpu.c
@@ -74,14 +74,14 @@ static void fix_processor_context(void)
 	/*
 	 * Now maybe reload the debug registers
 	 */
-	if (current->thread.debugreg[7]){
-		set_debugreg(current->thread.debugreg[0], 0);
-		set_debugreg(current->thread.debugreg[1], 1);
-		set_debugreg(current->thread.debugreg[2], 2);
-		set_debugreg(current->thread.debugreg[3], 3);
+	if (current->thread.debugreg7) {
+		set_debugreg(current->thread.debugreg0, 0);
+		set_debugreg(current->thread.debugreg1, 1);
+		set_debugreg(current->thread.debugreg2, 2);
+		set_debugreg(current->thread.debugreg3, 3);
 		/* no 4 and 5 */
-		set_debugreg(current->thread.debugreg[6], 6);
-		set_debugreg(current->thread.debugreg[7], 7);
+		set_debugreg(current->thread.debugreg6, 6);
+		set_debugreg(current->thread.debugreg7, 7);
 	}
 
 }
diff -puN arch/x86/vdso/Makefile~git-x86 arch/x86/vdso/Makefile
--- a/arch/x86/vdso/Makefile~git-x86
+++ a/arch/x86/vdso/Makefile
@@ -1,39 +1,37 @@
 #
-# x86-64 vDSO.
+# Building vDSO images for x86.
 #
 
+VDSO64-$(CONFIG_X86_64)		:= y
+VDSO32-$(CONFIG_X86_32)		:= y
+VDSO32-$(CONFIG_COMPAT)		:= y
+
+vdso-install-$(VDSO64-y)	+= vdso.so
+vdso-install-$(VDSO32-y)	+= $(vdso32-y:=.so)
+
+
 # files to link into the vdso
-# vdso-start.o has to be first
-vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
+vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
 
 # files to link into kernel
-obj-y := vma.o vdso.o vdso-syms.o
+obj-$(VDSO64-y)			+= vma.o vdso.o
+obj-$(VDSO32-y)			+= vdso32.o vdso32-setup.o
 
 vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
 
 $(obj)/vdso.o: $(obj)/vdso.so
 
-targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) vdso-syms.o
-
-# The DSO images are built using a special linker script.
-quiet_cmd_syscall = SYSCALL $@
-      cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \
-		          -Wl,-T,$(filter-out FORCE,$^) -o $@
+targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
 
 export CPPFLAGS_vdso.lds += -P -C
 
-vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \
-		 $(call ld-option, -Wl$(comma)--hash-style=sysv) \
-		-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
-SYSCFLAGS_vdso.so = $(vdso-flags)
-SYSCFLAGS_vdso.so.dbg = $(vdso-flags)
+VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -Wl,-soname=linux-vdso.so.1 \
+		      	-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
 
 $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
 
-$(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE
-
 $(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
-	$(call if_changed,syscall)
+	$(call if_changed,vdso)
 
 $(obj)/%.so: OBJCOPYFLAGS := -S
 $(obj)/%.so: $(obj)/%.so.dbg FORCE
@@ -41,24 +39,92 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
 
 CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64
 
-$(obj)/vclock_gettime.o: KBUILD_CFLAGS = $(CFL)
-$(obj)/vgetcpu.o: KBUILD_CFLAGS = $(CFL)
+$(vobjs): KBUILD_CFLAGS = $(CFL)
 
-# We also create a special relocatable object that should mirror the symbol
-# table and layout of the linked DSO.  With ld -R we can then refer to
-# these symbols in the kernel code rather than hand-coded addresses.
-extra-y += vdso-syms.o
-$(obj)/built-in.o: $(obj)/vdso-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o
-
-SYSCFLAGS_vdso-syms.o = -r -d
-$(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE
-	$(call if_changed,syscall)
+targets += vdso-syms.lds
+obj-$(VDSO64-y)			+= vdso-syms.lds
 
+#
+# Match symbols in the DSO that look like VDSO*; produce a file of constants.
+#
+sed-vdsosym := -e 's/^00*/0/' \
+	-e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
+quiet_cmd_vdsosym = VDSOSYM $@
+      cmd_vdsosym = $(NM) $< | sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
+
+$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
+	$(call if_changed,vdsosym)
+
+#
+# Build multiple 32-bit vDSO images to choose from at boot time.
+#
+obj-$(VDSO32-y)			+= vdso32-syms.lds
+vdso32.so-$(CONFIG_X86_32)	+= int80
+vdso32.so-$(CONFIG_COMPAT)	+= syscall
+vdso32.so-$(VDSO32-y)		+= sysenter
+
+CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
+VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1
+
+# This makes sure the $(obj) subdirectory exists even though vdso32/
+# is not a kbuild sub-make subdirectory.
+override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
+
+targets += vdso32/vdso32.lds
+targets += $(vdso32.so-y:%=vdso32-%.so.dbg) $(vdso32.so-y:%=vdso32-%.so)
+targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o)
+
+extra-y	+= $(vdso32.so-y:%=vdso32-%.so)
+
+$(obj)/vdso32.o: $(vdso32.so-y:%=$(obj)/vdso32-%.so)
+
+$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): asflags-$(CONFIG_X86_64) += -m32
+
+$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
+					 $(obj)/vdso32/vdso32.lds \
+					 $(obj)/vdso32/note.o \
+					 $(obj)/vdso32/%.o
+	$(call if_changed,vdso)
+
+# Make vdso32-*-syms.lds from each image, and then make sure they match.
+# The only difference should be that some do not define VDSO32_SYSENTER_RETURN.
+
+targets += vdso32-syms.lds $(vdso32.so-y:%=vdso32-%-syms.lds)
+
+quiet_cmd_vdso32sym = VDSOSYM $@
+define cmd_vdso32sym
+	if LC_ALL=C sort -u $(filter-out FORCE,$^) > $(@D)/.tmp_$(@F) && \
+	   $(foreach H,$(filter-out FORCE,$^),\
+		     if grep -q VDSO32_SYSENTER_RETURN $H; \
+		     then diff -u $(@D)/.tmp_$(@F) $H; \
+		     else sed /VDSO32_SYSENTER_RETURN/d $(@D)/.tmp_$(@F) | \
+			  diff -u - $H; fi &&) : ;\
+	then mv -f $(@D)/.tmp_$(@F) $@; \
+	else rm -f $(@D)/.tmp_$(@F); exit 1; \
+	fi
+endef
+
+$(obj)/vdso32-syms.lds: $(vdso32.so-y:%=$(obj)/vdso32-%-syms.lds) FORCE
+	$(call if_changed,vdso32sym)
+
+#
+# The DSO images are built using a special linker script.
+#
+quiet_cmd_vdso = VDSO    $@
+      cmd_vdso = $(CC) -nostdlib -o $@ \
+		       $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
+		       -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
+
+VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv)
+
+#
+# Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
+#
 quiet_cmd_vdso_install = INSTALL $@
       cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-vdso.so:
+$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE
 	@mkdir -p $(MODLIB)/vdso
 	$(call cmd,vdso_install)
 
-vdso_install: vdso.so
+PHONY += vdso_install $(vdso-install-y)
+vdso_install: $(vdso-install-y)
diff -puN arch/x86/vdso/vclock_gettime.c~git-x86 arch/x86/vdso/vclock_gettime.c
--- a/arch/x86/vdso/vclock_gettime.c~git-x86
+++ a/arch/x86/vdso/vclock_gettime.c
@@ -19,7 +19,6 @@
 #include <asm/hpet.h>
 #include <asm/unistd.h>
 #include <asm/io.h>
-#include <asm/vgtod.h>
 #include "vextern.h"
 
 #define gtod vdso_vsyscall_gtod_data
diff -puN /dev/null arch/x86/vdso/vdso-layout.lds.S
--- /dev/null
+++ a/arch/x86/vdso/vdso-layout.lds.S
@@ -0,0 +1,64 @@
+/*
+ * Linker script for vDSO.  This is an ELF shared object prelinked to
+ * its virtual address, and with only one read-only segment.
+ * This script controls its layout.
+ */
+
+SECTIONS
+{
+	. = VDSO_PRELINK + SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.rodata		: { *(.rodata*) }		:text
+	.data		: {
+	      *(.data*)
+	      *(.sdata*)
+	      *(.got.plt) *(.got)
+	      *(.gnu.linkonce.d.*)
+	      *(.bss*)
+	      *(.dynbss*)
+	      *(.gnu.linkonce.b.*)
+	}
+
+	.altinstructions	: { *(.altinstructions) }
+	.altinstr_replacement	: { *(.altinstr_replacement) }
+
+	/*
+	 * Align the actual code well away from the non-instruction data.
+	 * This is the best thing for the I-cache.
+	 */
+	. = ALIGN(0x100);
+
+	.text		: { *(.text*) }			:text	=0x90909090
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
+	note		PT_NOTE		FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
diff -puN arch/x86/vdso/vdso-start.S~git-x86 /dev/null
--- a/arch/x86/vdso/vdso-start.S
+++ /dev/null
@@ -1,2 +0,0 @@
-	.globl vdso_kernel_start
-vdso_kernel_start:
diff -puN arch/x86/vdso/vdso.lds.S~git-x86 arch/x86/vdso/vdso.lds.S
--- a/arch/x86/vdso/vdso.lds.S~git-x86
+++ a/arch/x86/vdso/vdso.lds.S
@@ -1,79 +1,37 @@
 /*
- * Linker script for vsyscall DSO.  The vsyscall page is an ELF shared
- * object prelinked to its virtual address, and with only one read-only
- * segment (that fits in one page).  This script controls its layout.
+ * Linker script for 64-bit vDSO.
+ * We #include the file to define the layout details.
+ * Here we only choose the prelinked virtual address.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO.  We can define local symbols here called VDSO* to make their
+ * values visible using the asm-x86/vdso.h macros from the kernel proper.
  */
-#include <asm/asm-offsets.h>
-#include "voffset.h"
 
 #define VDSO_PRELINK 0xffffffffff700000
-
-SECTIONS
-{
-  . = VDSO_PRELINK + SIZEOF_HEADERS;
-
-  .hash           : { *(.hash) }		:text
-  .gnu.hash       : { *(.gnu.hash) }
-  .dynsym         : { *(.dynsym) }
-  .dynstr         : { *(.dynstr) }
-  .gnu.version    : { *(.gnu.version) }
-  .gnu.version_d  : { *(.gnu.version_d) }
-  .gnu.version_r  : { *(.gnu.version_r) }
-
-  /* This linker script is used both with -r and with -shared.
-     For the layouts to match, we need to skip more than enough
-     space for the dynamic symbol table et al.  If this amount
-     is insufficient, ld -shared will barf.  Just increase it here.  */
-  . = VDSO_PRELINK + VDSO_TEXT_OFFSET;
-
-  .text           : { *(.text*) }		:text
-  .rodata         : { *(.rodata*) }		:text
-  .data		  : {
-	*(.data*)
-	*(.sdata*)
-	*(.bss*)
-	*(.dynbss*)
-  }						:text
-
-  .altinstructions : { *(.altinstructions) }		:text
-  .altinstr_replacement  : { *(.altinstr_replacement) }	:text
-
-  .note		  : { *(.note.*) }		:text :note
-  .eh_frame_hdr   : { *(.eh_frame_hdr) }	:text :eh_frame_hdr
-  .eh_frame       : { KEEP (*(.eh_frame)) }	:text
-  .dynamic        : { *(.dynamic) }		:text :dynamic
-  .useless        : {
-  	*(.got.plt) *(.got)
-	*(.gnu.linkonce.d.*)
-	*(.gnu.linkonce.b.*)
-  }						:text
-}
+#include "vdso-layout.lds.S"
 
 /*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ * This controls what userland symbols we export from the vDSO.
  */
-PHDRS
-{
-  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
-  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
-  note PT_NOTE FLAGS(4); /* PF_R */
-  eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+VERSION {
+	LINUX_2.6 {
+	global:
+		clock_gettime;
+		__vdso_clock_gettime;
+		gettimeofday;
+		__vdso_gettimeofday;
+		getcpu;
+		__vdso_getcpu;
+	local: *;
+	};
 }
 
+VDSO64_PRELINK = VDSO_PRELINK;
+
 /*
- * This controls what symbols we export from the DSO.
+ * Define VDSO64_x for each VEXTERN(x), for use via VDSO64_SYMBOL.
  */
-VERSION
-{
-  LINUX_2.6 {
-    global:
-	clock_gettime;
-	__vdso_clock_gettime;
-	gettimeofday;
-	__vdso_gettimeofday;
-	getcpu;
-	__vdso_getcpu;
-    local: *;
-  };
-}
+#define VEXTERN(x)	VDSO64_ ## x = vdso_ ## x;
+#include "vextern.h"
+#undef	VEXTERN
diff -puN /dev/null arch/x86/vdso/vdso32-setup.c
--- /dev/null
+++ a/arch/x86/vdso/vdso32-setup.c
@@ -0,0 +1,411 @@
+/*
+ * (C) Copyright 2002 Linus Torvalds
+ * Portions based on the vdso-randomization code from exec-shield:
+ * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
+ *
+ * This file contains the needed initializations to support sysenter.
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/sched.h>
+#include <linux/gfp.h>
+#include <linux/string.h>
+#include <linux/elf.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/module.h>
+
+#include <asm/cpufeature.h>
+#include <asm/msr.h>
+#include <asm/pgtable.h>
+#include <asm/unistd.h>
+#include <asm/elf.h>
+#include <asm/tlbflush.h>
+#include <asm/vdso.h>
+#include <asm/proto.h>
+
+enum {
+	VDSO_DISABLED = 0,
+	VDSO_ENABLED = 1,
+	VDSO_COMPAT = 2,
+};
+
+#ifdef CONFIG_COMPAT_VDSO
+#define VDSO_DEFAULT	VDSO_COMPAT
+#else
+#define VDSO_DEFAULT	VDSO_ENABLED
+#endif
+
+#ifdef CONFIG_X86_64
+#define vdso_enabled			sysctl_vsyscall32
+#define arch_setup_additional_pages	syscall32_setup_pages
+#endif
+
+/*
+ * This is the difference between the prelinked addresses in the vDSO images
+ * and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO
+ * in the user address space.
+ */
+#define VDSO_ADDR_ADJUST	(VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK)
+
+/*
+ * Should the kernel map a VDSO page into processes and pass its
+ * address down to glibc upon exec()?
+ */
+unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
+
+static int __init vdso_setup(char *s)
+{
+	vdso_enabled = simple_strtoul(s, NULL, 0);
+
+	return 1;
+}
+
+/*
+ * For consistency, the argument vdso32=[012] affects the 32-bit vDSO
+ * behavior on both 64-bit and 32-bit kernels.
+ * On 32-bit kernels, vdso=[012] means the same thing.
+ */
+__setup("vdso32=", vdso_setup);
+
+#ifdef CONFIG_X86_32
+__setup_param("vdso=", vdso32_setup, vdso_setup, 0);
+
+EXPORT_SYMBOL_GPL(vdso_enabled);
+#endif
+
+static __init void reloc_symtab(Elf32_Ehdr *ehdr,
+				unsigned offset, unsigned size)
+{
+	Elf32_Sym *sym = (void *)ehdr + offset;
+	unsigned nsym = size / sizeof(*sym);
+	unsigned i;
+
+	for(i = 0; i < nsym; i++, sym++) {
+		if (sym->st_shndx == SHN_UNDEF ||
+		    sym->st_shndx == SHN_ABS)
+			continue;  /* skip */
+
+		if (sym->st_shndx > SHN_LORESERVE) {
+			printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
+			       sym->st_shndx);
+			continue;
+		}
+
+		switch(ELF_ST_TYPE(sym->st_info)) {
+		case STT_OBJECT:
+		case STT_FUNC:
+		case STT_SECTION:
+		case STT_FILE:
+			sym->st_value += VDSO_ADDR_ADJUST;
+		}
+	}
+}
+
+static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
+{
+	Elf32_Dyn *dyn = (void *)ehdr + offset;
+
+	for(; dyn->d_tag != DT_NULL; dyn++)
+		switch(dyn->d_tag) {
+		case DT_PLTGOT:
+		case DT_HASH:
+		case DT_STRTAB:
+		case DT_SYMTAB:
+		case DT_RELA:
+		case DT_INIT:
+		case DT_FINI:
+		case DT_REL:
+		case DT_DEBUG:
+		case DT_JMPREL:
+		case DT_VERSYM:
+		case DT_VERDEF:
+		case DT_VERNEED:
+		case DT_ADDRRNGLO ... DT_ADDRRNGHI:
+			/* definitely pointers needing relocation */
+			dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
+			break;
+
+		case DT_ENCODING ... OLD_DT_LOOS-1:
+		case DT_LOOS ... DT_HIOS-1:
+			/* Tags above DT_ENCODING are pointers if
+			   they're even */
+			if (dyn->d_tag >= DT_ENCODING &&
+			    (dyn->d_tag & 1) == 0)
+				dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
+			break;
+
+		case DT_VERDEFNUM:
+		case DT_VERNEEDNUM:
+		case DT_FLAGS_1:
+		case DT_RELACOUNT:
+		case DT_RELCOUNT:
+		case DT_VALRNGLO ... DT_VALRNGHI:
+			/* definitely not pointers */
+			break;
+
+		case OLD_DT_LOOS ... DT_LOOS-1:
+		case DT_HIOS ... DT_VALRNGLO-1:
+		default:
+			if (dyn->d_tag > DT_ENCODING)
+				printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
+				       dyn->d_tag);
+			break;
+		}
+}
+
+static __init void relocate_vdso(Elf32_Ehdr *ehdr)
+{
+	Elf32_Phdr *phdr;
+	Elf32_Shdr *shdr;
+	int i;
+
+	BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
+	       !elf_check_arch_ia32(ehdr) ||
+	       ehdr->e_type != ET_DYN);
+
+	ehdr->e_entry += VDSO_ADDR_ADJUST;
+
+	/* rebase phdrs */
+	phdr = (void *)ehdr + ehdr->e_phoff;
+	for (i = 0; i < ehdr->e_phnum; i++) {
+		phdr[i].p_vaddr += VDSO_ADDR_ADJUST;
+
+		/* relocate dynamic stuff */
+		if (phdr[i].p_type == PT_DYNAMIC)
+			reloc_dyn(ehdr, phdr[i].p_offset);
+	}
+
+	/* rebase sections */
+	shdr = (void *)ehdr + ehdr->e_shoff;
+	for(i = 0; i < ehdr->e_shnum; i++) {
+		if (!(shdr[i].sh_flags & SHF_ALLOC))
+			continue;
+
+		shdr[i].sh_addr += VDSO_ADDR_ADJUST;
+
+		if (shdr[i].sh_type == SHT_SYMTAB ||
+		    shdr[i].sh_type == SHT_DYNSYM)
+			reloc_symtab(ehdr, shdr[i].sh_offset,
+				     shdr[i].sh_size);
+	}
+}
+
+/*
+ * These symbols are defined by vdso32.S to mark the bounds
+ * of the ELF DSO images included therein.
+ */
+extern const char vdso32_default_start, vdso32_default_end;
+extern const char vdso32_sysenter_start, vdso32_sysenter_end;
+static struct page *vdso32_pages[1];
+
+#ifdef CONFIG_X86_64
+
+static int use_sysenter __read_mostly = -1;
+
+#define	vdso32_sysenter()	(use_sysenter > 0)
+
+/* May not be __init: called during resume */
+void syscall32_cpu_init(void)
+{
+	if (use_sysenter < 0)
+		use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
+
+	/* Load these always in case some future AMD CPU supports
+	   SYSENTER from compat mode too. */
+	checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+	checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
+	checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+
+	wrmsrl(MSR_CSTAR, ia32_cstar_target);
+}
+
+#define compat_uses_vma		1
+
+static inline void map_compat_vdso(int map)
+{
+}
+
+#else  /* CONFIG_X86_32 */
+
+#define vdso32_sysenter()	(boot_cpu_has(X86_FEATURE_SEP))
+
+void enable_sep_cpu(void)
+{
+	int cpu = get_cpu();
+	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+
+	if (!boot_cpu_has(X86_FEATURE_SEP)) {
+		put_cpu();
+		return;
+	}
+
+	tss->x86_tss.ss1 = __KERNEL_CS;
+	tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
+	wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
+	wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
+	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
+	put_cpu();	
+}
+
+static struct vm_area_struct gate_vma;
+
+static int __init gate_vma_init(void)
+{
+	gate_vma.vm_mm = NULL;
+	gate_vma.vm_start = FIXADDR_USER_START;
+	gate_vma.vm_end = FIXADDR_USER_END;
+	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+	gate_vma.vm_page_prot = __P101;
+	/*
+	 * Make sure the vDSO gets into every core dump.
+	 * Dumping its contents makes post-mortem fully interpretable later
+	 * without matching up the same kernel and hardware config to see
+	 * what PC values meant.
+	 */
+	gate_vma.vm_flags |= VM_ALWAYSDUMP;
+	return 0;
+}
+
+#define compat_uses_vma		0
+
+static void map_compat_vdso(int map)
+{
+	static int vdso_mapped;
+
+	if (map == vdso_mapped)
+		return;
+
+	vdso_mapped = map;
+
+	__set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT,
+		     map ? PAGE_READONLY_EXEC : PAGE_NONE);
+
+	/* flush stray tlbs */
+	flush_tlb_all();
+}
+
+#endif	/* CONFIG_X86_64 */
+
+int __init sysenter_setup(void)
+{
+	void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+	const void *vsyscall;
+	size_t vsyscall_len;
+
+	vdso32_pages[0] = virt_to_page(syscall_page);
+
+#ifdef CONFIG_X86_32
+	gate_vma_init();
+
+	printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
+#endif
+
+	if (!vdso32_sysenter()) {
+		vsyscall = &vdso32_default_start;
+		vsyscall_len = &vdso32_default_end - &vdso32_default_start;
+	} else {
+		vsyscall = &vdso32_sysenter_start;
+		vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start;
+	}
+
+	memcpy(syscall_page, vsyscall, vsyscall_len);
+	relocate_vdso(syscall_page);
+
+	return 0;
+}
+
+/* Setup a VMA at program startup for the vsyscall page */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr;
+	int ret = 0;
+	bool compat;
+
+	down_write(&mm->mmap_sem);
+
+	/* Test compat mode once here, in case someone
+	   changes it via sysctl */
+	compat = (vdso_enabled == VDSO_COMPAT);
+
+	map_compat_vdso(compat);
+
+	if (compat)
+		addr = VDSO_HIGH_BASE;
+	else {
+		addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+		if (IS_ERR_VALUE(addr)) {
+			ret = addr;
+			goto up_fail;
+		}
+	}
+
+	if (compat_uses_vma || !compat) {
+		/*
+		 * MAYWRITE to allow gdb to COW and set breakpoints
+		 *
+		 * Make sure the vDSO gets into every core dump.
+		 * Dumping its contents makes post-mortem fully
+		 * interpretable later without matching up the same
+		 * kernel and hardware config to see what PC values
+		 * meant.
+		 */
+		ret = install_special_mapping(mm, addr, PAGE_SIZE,
+					      VM_READ|VM_EXEC|
+					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+					      VM_ALWAYSDUMP,
+					      vdso32_pages);
+
+		if (ret)
+			goto up_fail;
+	}
+
+	current->mm->context.vdso = (void *)addr;
+	current_thread_info()->sysenter_return =
+		VDSO32_SYMBOL(addr, SYSENTER_RETURN);
+
+  up_fail:
+	up_write(&mm->mmap_sem);
+
+	return ret;
+}
+
+#ifdef CONFIG_X86_64
+
+__initcall(sysenter_setup);
+
+#else  /* CONFIG_X86_32 */
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
+		return "[vdso]";
+	return NULL;
+}
+
+struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+{
+	struct mm_struct *mm = tsk->mm;
+
+	/* Check to see if this task was created in compat vdso mode */
+	if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
+		return &gate_vma;
+	return NULL;
+}
+
+int in_gate_area(struct task_struct *task, unsigned long addr)
+{
+	const struct vm_area_struct *vma = get_gate_vma(task);
+
+	return vma && addr >= vma->vm_start && addr < vma->vm_end;
+}
+
+int in_gate_area_no_task(unsigned long addr)
+{
+	return 0;
+}
+
+#endif	/* CONFIG_X86_64 */
diff -puN /dev/null arch/x86/vdso/vdso32.S
--- /dev/null
+++ a/arch/x86/vdso/vdso32.S
@@ -0,0 +1,19 @@
+#include <linux/init.h>
+
+__INITDATA
+
+	.globl vdso32_default_start, vdso32_default_end
+vdso32_default_start:
+#ifdef CONFIG_X86_32
+	.incbin "arch/x86/vdso/vdso32-int80.so"
+#else
+	.incbin "arch/x86/vdso/vdso32-syscall.so"
+#endif
+vdso32_default_end:
+
+	.globl vdso32_sysenter_start, vdso32_sysenter_end
+vdso32_sysenter_start:
+	.incbin "arch/x86/vdso/vdso32-sysenter.so"
+vdso32_sysenter_end:
+
+__FINIT
diff -puN /dev/null arch/x86/vdso/vdso32/int80.S
--- /dev/null
+++ a/arch/x86/vdso/vdso32/int80.S
@@ -0,0 +1,56 @@
+/*
+ * Code for the vDSO.  This version uses the old int $0x80 method.
+ *
+ * First get the common code for the sigreturn entry points.
+ * This must come first.
+ */
+#include "sigreturn.S"
+
+	.text
+	.globl __kernel_vsyscall
+	.type __kernel_vsyscall,@function
+	ALIGN
+__kernel_vsyscall:
+.LSTART_vsyscall:
+	int $0x80
+	ret
+.LEND_vsyscall:
+	.size __kernel_vsyscall,.-.LSTART_vsyscall
+	.previous
+
+	.section .eh_frame,"a",@progbits
+.LSTARTFRAMEDLSI:
+	.long .LENDCIEDLSI-.LSTARTCIEDLSI
+.LSTARTCIEDLSI:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.string "zR"		/* NUL-terminated augmentation string */
+	.uleb128 1		/* Code alignment factor */
+	.sleb128 -4		/* Data alignment factor */
+	.byte 8			/* Return address register column */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+	.byte 0x0c		/* DW_CFA_def_cfa */
+	.uleb128 4
+	.uleb128 4
+	.byte 0x88		/* DW_CFA_offset, column 0x8 */
+	.uleb128 1
+	.align 4
+.LENDCIEDLSI:
+	.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
+.LSTARTFDEDLSI:
+	.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
+	.long .LSTART_vsyscall-.	/* PC-relative start address */
+	.long .LEND_vsyscall-.LSTART_vsyscall
+	.uleb128 0
+	.align 4
+.LENDFDEDLSI:
+	.previous
+
+	/*
+	 * Pad out the segment to match the size of the sysenter.S version.
+	 */
+VDSO32_vsyscall_eh_frame_size = 0x40
+	.section .data,"aw",@progbits
+	.space VDSO32_vsyscall_eh_frame_size-(.LENDFDEDLSI-.LSTARTFRAMEDLSI), 0
+	.previous
diff -puN /dev/null arch/x86/vdso/vdso32/note.S
--- /dev/null
+++ a/arch/x86/vdso/vdso32/note.S
@@ -0,0 +1,44 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+/* Ideally this would use UTS_NAME, but using a quoted string here
+   doesn't work. Remember to change this when changing the
+   kernel's name. */
+ELFNOTE_START(Linux, 0, "a")
+	.long LINUX_VERSION_CODE
+ELFNOTE_END
+
+#ifdef CONFIG_XEN
+/*
+ * Add a special note telling glibc's dynamic linker a fake hardware
+ * flavor that it will use to choose the search path for libraries in the
+ * same way it uses real hardware capabilities like "mmx".
+ * We supply "nosegneg" as the fake capability, to indicate that we
+ * do not like negative offsets in instructions using segment overrides,
+ * since we implement those inefficiently.  This makes it possible to
+ * install libraries optimized to avoid those access patterns in someplace
+ * like /lib/i686/tls/nosegneg.  Note that an /etc/ld.so.conf.d/file
+ * corresponding to the bits here is needed to make ldconfig work right.
+ * It should contain:
+ *	hwcap 1 nosegneg
+ * to match the mapping of bit to name that we give here.
+ *
+ * At runtime, the fake hardware feature will be considered to be present
+ * if its bit is set in the mask word.  So, we start with the mask 0, and
+ * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
+ */
+
+#include "../../xen/vdso.h"	/* Defines VDSO_NOTE_NONEGSEG_BIT.  */
+
+ELFNOTE_START(GNU, 2, "a")
+	.long 1			/* ncaps */
+VDSO32_NOTE_MASK:		/* Symbol used by arch/x86/xen/setup.c */
+	.long 0			/* mask */
+	.byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg"	/* bit, name */
+ELFNOTE_END
+#endif
diff -puN /dev/null arch/x86/vdso/vdso32/sigreturn.S
--- /dev/null
+++ a/arch/x86/vdso/vdso32/sigreturn.S
@@ -0,0 +1,144 @@
+/*
+ * Common code for the sigreturn entry points in vDSO images.
+ * So far this code is the same for both int80 and sysenter versions.
+ * This file is #include'd by int80.S et al to define them first thing.
+ * The kernel assumes that the addresses of these routines are constant
+ * for all vDSO implementations.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd_32.h>
+#include <asm/asm-offsets.h>
+
+#ifndef SYSCALL_ENTER_KERNEL
+#define	SYSCALL_ENTER_KERNEL	int $0x80
+#endif
+
+	.text
+	.globl __kernel_sigreturn
+	.type __kernel_sigreturn,@function
+	ALIGN
+__kernel_sigreturn:
+.LSTART_sigreturn:
+	popl %eax		/* XXX does this mean it needs unwind info? */
+	movl $__NR_sigreturn, %eax
+	SYSCALL_ENTER_KERNEL
+.LEND_sigreturn:
+	nop
+	.size __kernel_sigreturn,.-.LSTART_sigreturn
+
+	.globl __kernel_rt_sigreturn
+	.type __kernel_rt_sigreturn,@function
+	ALIGN
+__kernel_rt_sigreturn:
+.LSTART_rt_sigreturn:
+	movl $__NR_rt_sigreturn, %eax
+	SYSCALL_ENTER_KERNEL
+.LEND_rt_sigreturn:
+	nop
+	.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
+	.previous
+
+	.section .eh_frame,"a",@progbits
+.LSTARTFRAMEDLSI1:
+	.long .LENDCIEDLSI1-.LSTARTCIEDLSI1
+.LSTARTCIEDLSI1:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.string "zRS"		/* NUL-terminated augmentation string */
+	.uleb128 1		/* Code alignment factor */
+	.sleb128 -4		/* Data alignment factor */
+	.byte 8			/* Return address register column */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+	.byte 0			/* DW_CFA_nop */
+	.align 4
+.LENDCIEDLSI1:
+	.long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */
+.LSTARTFDEDLSI1:
+	.long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */
+	/* HACK: The dwarf2 unwind routines will subtract 1 from the
+	   return address to get an address in the middle of the
+	   presumed call instruction.  Since we didn't get here via
+	   a call, we need to include the nop before the real start
+	   to make up for it.  */
+	.long .LSTART_sigreturn-1-.	/* PC-relative start address */
+	.long .LEND_sigreturn-.LSTART_sigreturn+1
+	.uleb128 0			/* Augmentation */
+	/* What follows are the instructions for the table generation.
+	   We record the locations of each register saved.  This is
+	   complicated by the fact that the "CFA" is always assumed to
+	   be the value of the stack pointer in the caller.  This means
+	   that we must define the CFA of this body of code to be the
+	   saved value of the stack pointer in the sigcontext.  Which
+	   also means that there is no fixed relation to the other
+	   saved registers, which means that we must use DW_CFA_expression
+	   to compute their addresses.  It also means that when we
+	   adjust the stack with the popl, we have to do it all over again.  */
+
+#define do_cfa_expr(offset)						\
+	.byte 0x0f;			/* DW_CFA_def_cfa_expression */	\
+	.uleb128 1f-0f;			/*   length */			\
+0:	.byte 0x74;			/*     DW_OP_breg4 */		\
+	.sleb128 offset;		/*      offset */		\
+	.byte 0x06;			/*     DW_OP_deref */		\
+1:
+
+#define do_expr(regno, offset)						\
+	.byte 0x10;			/* DW_CFA_expression */		\
+	.uleb128 regno;			/*   regno */			\
+	.uleb128 1f-0f;			/*   length */			\
+0:	.byte 0x74;			/*     DW_OP_breg4 */		\
+	.sleb128 offset;		/*       offset */		\
+1:
+
+	do_cfa_expr(IA32_SIGCONTEXT_sp+4)
+	do_expr(0, IA32_SIGCONTEXT_ax+4)
+	do_expr(1, IA32_SIGCONTEXT_cx+4)
+	do_expr(2, IA32_SIGCONTEXT_dx+4)
+	do_expr(3, IA32_SIGCONTEXT_bx+4)
+	do_expr(5, IA32_SIGCONTEXT_bp+4)
+	do_expr(6, IA32_SIGCONTEXT_si+4)
+	do_expr(7, IA32_SIGCONTEXT_di+4)
+	do_expr(8, IA32_SIGCONTEXT_ip+4)
+
+	.byte 0x42	/* DW_CFA_advance_loc 2 -- nop; popl eax. */
+
+	do_cfa_expr(IA32_SIGCONTEXT_sp)
+	do_expr(0, IA32_SIGCONTEXT_ax)
+	do_expr(1, IA32_SIGCONTEXT_cx)
+	do_expr(2, IA32_SIGCONTEXT_dx)
+	do_expr(3, IA32_SIGCONTEXT_bx)
+	do_expr(5, IA32_SIGCONTEXT_bp)
+	do_expr(6, IA32_SIGCONTEXT_si)
+	do_expr(7, IA32_SIGCONTEXT_di)
+	do_expr(8, IA32_SIGCONTEXT_ip)
+
+	.align 4
+.LENDFDEDLSI1:
+
+	.long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */
+.LSTARTFDEDLSI2:
+	.long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */
+	/* HACK: See above wrt unwind library assumptions.  */
+	.long .LSTART_rt_sigreturn-1-.	/* PC-relative start address */
+	.long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
+	.uleb128 0			/* Augmentation */
+	/* What follows are the instructions for the table generation.
+	   We record the locations of each register saved.  This is
+	   slightly less complicated than the above, since we don't
+	   modify the stack pointer in the process.  */
+
+	do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_sp)
+	do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ax)
+	do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_cx)
+	do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_dx)
+	do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bx)
+	do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bp)
+	do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_si)
+	do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_di)
+	do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ip)
+
+	.align 4
+.LENDFDEDLSI2:
+	.previous
diff -puN /dev/null arch/x86/vdso/vdso32/syscall.S
--- /dev/null
+++ a/arch/x86/vdso/vdso32/syscall.S
@@ -0,0 +1,77 @@
+/*
+ * Code for the vDSO.  This version uses the syscall instruction.
+ *
+ * First get the common code for the sigreturn entry points.
+ * This must come first.
+ */
+#define SYSCALL_ENTER_KERNEL	syscall
+#include "sigreturn.S"
+
+#include <asm/segment.h>
+
+	.text
+	.globl __kernel_vsyscall
+	.type __kernel_vsyscall,@function
+	ALIGN
+__kernel_vsyscall:
+.LSTART_vsyscall:
+	push	%ebp
+.Lpush_ebp:
+	movl	%ecx, %ebp
+	syscall
+	movl	$__USER32_DS, %ecx
+	movl	%ecx, %ss
+	movl	%ebp, %ecx
+	popl	%ebp
+.Lpop_ebp:
+	ret
+.LEND_vsyscall:
+	.size __kernel_vsyscall,.-.LSTART_vsyscall
+
+	.section .eh_frame,"a",@progbits
+.LSTARTFRAME:
+	.long .LENDCIE-.LSTARTCIE
+.LSTARTCIE:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.string "zR"		/* NUL-terminated augmentation string */
+	.uleb128 1		/* Code alignment factor */
+	.sleb128 -4		/* Data alignment factor */
+	.byte 8			/* Return address register column */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+	.byte 0x0c		/* DW_CFA_def_cfa */
+	.uleb128 4
+	.uleb128 4
+	.byte 0x88		/* DW_CFA_offset, column 0x8 */
+	.uleb128 1
+	.align 4
+.LENDCIE:
+
+	.long .LENDFDE1-.LSTARTFDE1	/* Length FDE */
+.LSTARTFDE1:
+	.long .LSTARTFDE1-.LSTARTFRAME	/* CIE pointer */
+	.long .LSTART_vsyscall-.	/* PC-relative start address */
+	.long .LEND_vsyscall-.LSTART_vsyscall
+	.uleb128 0			/* Augmentation length */
+	/* What follows are the instructions for the table generation.
+	   We have to record all changes of the stack pointer.  */
+	.byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.uleb128 8
+	.byte 0x85, 0x02	/* DW_CFA_offset %ebp -8 */
+	.byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
+	.byte 0xc5		/* DW_CFA_restore %ebp */
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.uleb128 4
+	.align 4
+.LENDFDE1:
+	.previous
+
+	/*
+	 * Pad out the segment to match the size of the sysenter.S version.
+	 */
+VDSO32_vsyscall_eh_frame_size = 0x40
+	.section .data,"aw",@progbits
+	.space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0
+	.previous
diff -puN /dev/null arch/x86/vdso/vdso32/sysenter.S
--- /dev/null
+++ a/arch/x86/vdso/vdso32/sysenter.S
@@ -0,0 +1,116 @@
+/*
+ * Code for the vDSO.  This version uses the sysenter instruction.
+ *
+ * First get the common code for the sigreturn entry points.
+ * This must come first.
+ */
+#include "sigreturn.S"
+
+/*
+ * The caller puts arg2 in %ecx, which gets pushed. The kernel will use
+ * %ecx itself for arg2. The pushing is because the sysexit instruction
+ * (found in entry.S) requires that we clobber %ecx with the desired %esp.
+ * User code might expect that %ecx is unclobbered though, as it would be
+ * for returning via the iret instruction, so we must push and pop.
+ *
+ * The caller puts arg3 in %edx, which the sysexit instruction requires
+ * for %eip. Thus, exactly as for arg2, we must push and pop.
+ *
+ * Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter
+ * instruction clobbers %esp, the user's %esp won't even survive entry
+ * into the kernel. We store %esp in %ebp. Code in entry.S must fetch
+ * arg6 from the stack.
+ *
+ * You can not use this vsyscall for the clone() syscall because the
+ * three words on the parent stack do not get copied to the child.
+ */
+	.text
+	.globl __kernel_vsyscall
+	.type __kernel_vsyscall,@function
+	ALIGN
+__kernel_vsyscall:
+.LSTART_vsyscall:
+	push %ecx
+.Lpush_ecx:
+	push %edx
+.Lpush_edx:
+	push %ebp
+.Lenter_kernel:
+	movl %esp,%ebp
+	sysenter
+
+	/* 7: align return point with nop's to make disassembly easier */
+	.space 7,0x90
+
+	/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
+	jmp .Lenter_kernel
+	/* 16: System call normal return point is here! */
+VDSO32_SYSENTER_RETURN:	/* Symbol used by sysenter.c via vdso32-syms.h */
+	pop %ebp
+.Lpop_ebp:
+	pop %edx
+.Lpop_edx:
+	pop %ecx
+.Lpop_ecx:
+	ret
+.LEND_vsyscall:
+	.size __kernel_vsyscall,.-.LSTART_vsyscall
+	.previous
+
+	.section .eh_frame,"a",@progbits
+.LSTARTFRAMEDLSI:
+	.long .LENDCIEDLSI-.LSTARTCIEDLSI
+.LSTARTCIEDLSI:
+	.long 0			/* CIE ID */
+	.byte 1			/* Version number */
+	.string "zR"		/* NUL-terminated augmentation string */
+	.uleb128 1		/* Code alignment factor */
+	.sleb128 -4		/* Data alignment factor */
+	.byte 8			/* Return address register column */
+	.uleb128 1		/* Augmentation value length */
+	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+	.byte 0x0c		/* DW_CFA_def_cfa */
+	.uleb128 4
+	.uleb128 4
+	.byte 0x88		/* DW_CFA_offset, column 0x8 */
+	.uleb128 1
+	.align 4
+.LENDCIEDLSI:
+	.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
+.LSTARTFDEDLSI:
+	.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
+	.long .LSTART_vsyscall-.	/* PC-relative start address */
+	.long .LEND_vsyscall-.LSTART_vsyscall
+	.uleb128 0
+	/* What follows are the instructions for the table generation.
+	   We have to record all changes of the stack pointer.  */
+	.byte 0x40 + (.Lpush_ecx-.LSTART_vsyscall) /* DW_CFA_advance_loc */
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.byte 0x08		/* RA at offset 8 now */
+	.byte 0x40 + (.Lpush_edx-.Lpush_ecx) /* DW_CFA_advance_loc */
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.byte 0x0c		/* RA at offset 12 now */
+	.byte 0x40 + (.Lenter_kernel-.Lpush_edx) /* DW_CFA_advance_loc */
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.byte 0x10		/* RA at offset 16 now */
+	.byte 0x85, 0x04	/* DW_CFA_offset %ebp -16 */
+	/* Finally the epilogue.  */
+	.byte 0x40 + (.Lpop_ebp-.Lenter_kernel)	/* DW_CFA_advance_loc */
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.byte 0x0c		/* RA at offset 12 now */
+	.byte 0xc5		/* DW_CFA_restore %ebp */
+	.byte 0x40 + (.Lpop_edx-.Lpop_ebp) /* DW_CFA_advance_loc */
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.byte 0x08		/* RA at offset 8 now */
+	.byte 0x40 + (.Lpop_ecx-.Lpop_edx) /* DW_CFA_advance_loc */
+	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+	.byte 0x04		/* RA at offset 4 now */
+	.align 4
+.LENDFDEDLSI:
+	.previous
+
+	/*
+	 * Emit a symbol with the size of this .eh_frame data,
+	 * to verify it matches the other versions.
+	 */
+VDSO32_vsyscall_eh_frame_size = (.LENDFDEDLSI-.LSTARTFRAMEDLSI)
diff -puN /dev/null arch/x86/vdso/vdso32/vdso32.lds.S
--- /dev/null
+++ a/arch/x86/vdso/vdso32/vdso32.lds.S
@@ -0,0 +1,37 @@
+/*
+ * Linker script for 32-bit vDSO.
+ * We #include the file to define the layout details.
+ * Here we only choose the prelinked virtual address.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO.  We can define local symbols here called VDSO* to make their
+ * values visible using the asm-x86/vdso.h macros from the kernel proper.
+ */
+
+#define VDSO_PRELINK 0
+#include "../vdso-layout.lds.S"
+
+/* The ELF entry point can be used to set the AT_SYSINFO value.  */
+ENTRY(__kernel_vsyscall);
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION
+{
+	LINUX_2.5 {
+	global:
+		__kernel_vsyscall;
+		__kernel_sigreturn;
+		__kernel_rt_sigreturn;
+	local: *;
+	};
+}
+
+/*
+ * Symbols we define here called VDSO* get their values into vdso32-syms.h.
+ */
+VDSO32_PRELINK		= VDSO_PRELINK;
+VDSO32_vsyscall		= __kernel_vsyscall;
+VDSO32_sigreturn	= __kernel_sigreturn;
+VDSO32_rt_sigreturn	= __kernel_rt_sigreturn;
diff -puN arch/x86/vdso/vgetcpu.c~git-x86 arch/x86/vdso/vgetcpu.c
--- a/arch/x86/vdso/vgetcpu.c~git-x86
+++ a/arch/x86/vdso/vgetcpu.c
@@ -15,11 +15,11 @@
 
 long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
 {
-	unsigned int dummy, p;
+	unsigned int p;
 
 	if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
 		/* Load per CPU data from RDTSCP */
-		rdtscp(dummy, dummy, p);
+		native_read_tscp(&p);
 	} else {
 		/* Load per CPU data from GDT */
 		asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
diff -puN arch/x86/vdso/vma.c~git-x86 arch/x86/vdso/vma.c
--- a/arch/x86/vdso/vma.c~git-x86
+++ a/arch/x86/vdso/vma.c
@@ -11,23 +11,20 @@
 #include <asm/vsyscall.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
-#include "voffset.h"
+#include <asm/vdso.h>
 
-int vdso_enabled = 1;
-
-#define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x;
-#include "vextern.h"
+#include "vextern.h"		/* Just for VMAGIC.  */
 #undef VEXTERN
 
-extern char vdso_kernel_start[], vdso_start[], vdso_end[];
+int vdso_enabled = 1;
+
+extern char vdso_start[], vdso_end[];
 extern unsigned short vdso_sync_cpuid;
 
 struct page **vdso_pages;
 
-static inline void *var_ref(void *vbase, char *var, char *name)
+static inline void *var_ref(void *p, char *name)
 {
-	unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET;
-	void *p = vbase + offset;
 	if (*(void **)p != (void *)VMAGIC) {
 		printk("VDSO: variable %s broken\n", name);
 		vdso_enabled = 0;
@@ -62,9 +59,8 @@ static int __init init_vdso_vars(void)
 		vdso_enabled = 0;
 	}
 
-#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x)
 #define VEXTERN(x) \
-	V(vdso_ ## x) = &__ ## x;
+	*(typeof(__ ## x) **) var_ref(VDSO64_SYMBOL(vbase, x), #x) = &__ ## x;
 #include "vextern.h"
 #undef VEXTERN
 	return 0;
diff -puN arch/x86/vdso/voffset.h~git-x86 /dev/null
--- a/arch/x86/vdso/voffset.h
+++ /dev/null
@@ -1 +0,0 @@
-#define VDSO_TEXT_OFFSET 0x600
diff -puN arch/x86/xen/enlighten.c~git-x86 arch/x86/xen/enlighten.c
--- a/arch/x86/xen/enlighten.c~git-x86
+++ a/arch/x86/xen/enlighten.c
@@ -141,8 +141,8 @@ static void __init xen_banner(void)
 	printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
 }
 
-static void xen_cpuid(unsigned int *eax, unsigned int *ebx,
-		      unsigned int *ecx, unsigned int *edx)
+static void xen_cpuid(unsigned int *ax, unsigned int *bx,
+		      unsigned int *cx, unsigned int *dx)
 {
 	unsigned maskedx = ~0;
 
@@ -150,18 +150,18 @@ static void xen_cpuid(unsigned int *eax,
 	 * Mask out inconvenient features, to try and disable as many
 	 * unsupported kernel subsystems as possible.
 	 */
-	if (*eax == 1)
+	if (*ax == 1)
 		maskedx = ~((1 << X86_FEATURE_APIC) |  /* disable APIC */
 			    (1 << X86_FEATURE_ACPI) |  /* disable ACPI */
 			    (1 << X86_FEATURE_ACC));   /* thermal monitoring */
 
 	asm(XEN_EMULATE_PREFIX "cpuid"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (*eax), "2" (*ecx));
-	*edx &= maskedx;
+		: "=a" (*ax),
+		  "=b" (*bx),
+		  "=c" (*cx),
+		  "=d" (*dx)
+		: "0" (*ax), "2" (*cx));
+	*dx &= maskedx;
 }
 
 static void xen_set_debugreg(int reg, unsigned long val)
@@ -499,11 +499,11 @@ static void xen_write_gdt_entry(struct d
 	preempt_enable();
 }
 
-static void xen_load_esp0(struct tss_struct *tss,
+static void xen_load_sp0(struct tss_struct *tss,
 			  struct thread_struct *thread)
 {
 	struct multicall_space mcs = xen_mc_entry(0);
-	MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0);
+	MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
 }
 
@@ -521,12 +521,12 @@ static void xen_io_delay(void)
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
-static unsigned long xen_apic_read(unsigned long reg)
+static u32 xen_apic_read(unsigned long reg)
 {
 	return 0;
 }
 
-static void xen_apic_write(unsigned long reg, unsigned long val)
+static void xen_apic_write(unsigned long reg, u32 val)
 {
 	/* Warn to see if there's any stray references */
 	WARN_ON(1);
@@ -953,7 +953,7 @@ static const struct pv_cpu_ops xen_cpu_o
 	.read_pmc = native_read_pmc,
 
 	.iret = (void *)&hypercall_page[__HYPERVISOR_iret],
-	.irq_enable_sysexit = NULL,  /* never called */
+	.irq_enable_syscall_ret = NULL,  /* never called */
 
 	.load_tr_desc = paravirt_nop,
 	.set_ldt = xen_set_ldt,
@@ -968,7 +968,7 @@ static const struct pv_cpu_ops xen_cpu_o
 	.write_ldt_entry = xen_write_ldt_entry,
 	.write_gdt_entry = xen_write_gdt_entry,
 	.write_idt_entry = xen_write_idt_entry,
-	.load_esp0 = xen_load_esp0,
+	.load_sp0 = xen_load_sp0,
 
 	.set_iopl_mask = xen_set_iopl_mask,
 	.io_delay = xen_io_delay,
diff -puN arch/x86/xen/events.c~git-x86 arch/x86/xen/events.c
--- a/arch/x86/xen/events.c~git-x86
+++ a/arch/x86/xen/events.c
@@ -487,7 +487,7 @@ fastcall void xen_evtchn_do_upcall(struc
 			int irq = evtchn_to_irq[port];
 
 			if (irq != -1) {
-				regs->orig_eax = ~irq;
+				regs->orig_ax = ~irq;
 				do_IRQ(regs);
 			}
 		}
diff -puN arch/x86/xen/mmu.c~git-x86 arch/x86/xen/mmu.c
--- a/arch/x86/xen/mmu.c~git-x86
+++ a/arch/x86/xen/mmu.c
@@ -246,6 +246,8 @@ pte_t xen_make_pte(unsigned long long pt
 
 	pte &= ~_PAGE_PCD;
 
+	pte &= ~_PAGE_PCD;
+
 	return (pte_t){ pte, pte >> 32 };
 }
 
@@ -295,6 +297,8 @@ pte_t xen_make_pte(unsigned long pte)
 
 	pte &= ~_PAGE_PCD;
 
+	pte &= ~_PAGE_PCD;
+
 	return (pte_t){ pte };
 }
 
diff -puN arch/x86/xen/setup.c~git-x86 arch/x86/xen/setup.c
--- a/arch/x86/xen/setup.c~git-x86
+++ a/arch/x86/xen/setup.c
@@ -10,6 +10,7 @@
 #include <linux/pm.h>
 
 #include <asm/elf.h>
+#include <asm/vdso.h>
 #include <asm/e820.h>
 #include <asm/setup.h>
 #include <asm/xen/hypervisor.h>
@@ -61,10 +62,8 @@ static void xen_idle(void)
  */
 static void fiddle_vdso(void)
 {
-	extern u32 VDSO_NOTE_MASK; /* See ../kernel/vsyscall-note.S.  */
-	extern char vsyscall_int80_start;
-	u32 *mask = (u32 *) ((unsigned long) &VDSO_NOTE_MASK - VDSO_PRELINK +
-			     &vsyscall_int80_start);
+	extern const char vdso32_default_start;
+	u32 *mask = VDSO32_SYMBOL(&vdso32_default_start, NOTE_MASK);
 	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
 }
 
diff -puN arch/x86/xen/smp.c~git-x86 arch/x86/xen/smp.c
--- a/arch/x86/xen/smp.c~git-x86
+++ a/arch/x86/xen/smp.c
@@ -146,7 +146,7 @@ void __init xen_smp_prepare_boot_cpu(voi
 	   old memory can be recycled */
 	make_lowmem_page_readwrite(&per_cpu__gdt_page);
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_cpu(cpu) {
 		cpus_clear(per_cpu(cpu_sibling_map, cpu));
 		/*
 		 * cpu_core_map lives in a per cpu area that is cleared
@@ -163,7 +163,7 @@ void __init xen_smp_prepare_cpus(unsigne
 {
 	unsigned cpu;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_cpu(cpu) {
 		cpus_clear(per_cpu(cpu_sibling_map, cpu));
 		/*
 		 * cpu_core_ map will be zeroed when the per
@@ -239,10 +239,10 @@ cpu_initialize_context(unsigned int cpu,
 	ctxt->gdt_ents      = ARRAY_SIZE(gdt->gdt);
 
 	ctxt->user_regs.cs = __KERNEL_CS;
-	ctxt->user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
+	ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
 
 	ctxt->kernel_ss = __KERNEL_DS;
-	ctxt->kernel_sp = idle->thread.esp0;
+	ctxt->kernel_sp = idle->thread.sp0;
 
 	ctxt->event_callback_cs     = __KERNEL_CS;
 	ctxt->event_callback_eip    = (unsigned long)xen_hypervisor_callback;
diff -puN drivers/acpi/bus.c~git-x86 drivers/acpi/bus.c
--- a/drivers/acpi/bus.c~git-x86
+++ a/drivers/acpi/bus.c
@@ -35,6 +35,7 @@
 #ifdef CONFIG_X86
 #include <asm/mpspec.h>
 #endif
+#include <linux/pci.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
@@ -764,6 +765,7 @@ static int __init acpi_init(void)
 	result = acpi_bus_init();
 
 	if (!result) {
+		pci_mmcfg_late_init();
 #ifdef CONFIG_PM_LEGACY
 		if (!PM_IS_ACTIVE())
 			pm_active = 1;
diff -puN drivers/char/hpet.c~git-x86 drivers/char/hpet.c
--- a/drivers/char/hpet.c~git-x86
+++ a/drivers/char/hpet.c
@@ -600,63 +600,6 @@ static int hpet_is_known(struct hpet_dat
 	return 0;
 }
 
-EXPORT_SYMBOL(hpet_alloc);
-EXPORT_SYMBOL(hpet_register);
-EXPORT_SYMBOL(hpet_unregister);
-EXPORT_SYMBOL(hpet_control);
-
-int hpet_register(struct hpet_task *tp, int periodic)
-{
-	unsigned int i;
-	u64 mask;
-	struct hpet_timer __iomem *timer;
-	struct hpet_dev *devp;
-	struct hpets *hpetp;
-
-	switch (periodic) {
-	case 1:
-		mask = Tn_PER_INT_CAP_MASK;
-		break;
-	case 0:
-		mask = 0;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	tp->ht_opaque = NULL;
-
-	spin_lock_irq(&hpet_task_lock);
-	spin_lock(&hpet_lock);
-
-	for (devp = NULL, hpetp = hpets; hpetp && !devp; hpetp = hpetp->hp_next)
-		for (timer = hpetp->hp_hpet->hpet_timers, i = 0;
-		     i < hpetp->hp_ntimer; i++, timer++) {
-			if ((readq(&timer->hpet_config) & Tn_PER_INT_CAP_MASK)
-			    != mask)
-				continue;
-
-			devp = &hpetp->hp_dev[i];
-
-			if (devp->hd_flags & HPET_OPEN || devp->hd_task) {
-				devp = NULL;
-				continue;
-			}
-
-			tp->ht_opaque = devp;
-			devp->hd_task = tp;
-			break;
-		}
-
-	spin_unlock(&hpet_lock);
-	spin_unlock_irq(&hpet_task_lock);
-
-	if (tp->ht_opaque)
-		return 0;
-	else
-		return -EBUSY;
-}
-
 static inline int hpet_tpcheck(struct hpet_task *tp)
 {
 	struct hpet_dev *devp;
@@ -706,24 +649,6 @@ int hpet_unregister(struct hpet_task *tp
 	return 0;
 }
 
-int hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg)
-{
-	struct hpet_dev *devp;
-	int err;
-
-	if ((err = hpet_tpcheck(tp)))
-		return err;
-
-	spin_lock_irq(&hpet_lock);
-	devp = tp->ht_opaque;
-	if (devp->hd_task != tp) {
-		spin_unlock_irq(&hpet_lock);
-		return -ENXIO;
-	}
-	spin_unlock_irq(&hpet_lock);
-	return hpet_ioctl_common(devp, cmd, arg, 1);
-}
-
 static ctl_table hpet_table[] = {
 	{
 	 .ctl_name = CTL_UNNUMBERED,
diff -puN drivers/char/rtc.c~git-x86 drivers/char/rtc.c
--- a/drivers/char/rtc.c~git-x86
+++ a/drivers/char/rtc.c
@@ -1,5 +1,5 @@
 /*
- *	Real Time Clock interface for Linux	
+ *	Real Time Clock interface for Linux
  *
  *	Copyright (C) 1996 Paul Gortmaker
  *
@@ -17,7 +17,7 @@
  *	has been received. If a RTC interrupt has already happened,
  *	it will output an unsigned long and then block. The output value
  *	contains the interrupt status in the low byte and the number of
- *	interrupts since the last read in the remaining high bytes. The 
+ *	interrupts since the last read in the remaining high bytes. The
  *	/dev/rtc interface can also be used with the select(2) call.
  *
  *	This program is free software; you can redistribute it and/or
@@ -104,12 +104,12 @@ static int rtc_has_irq = 1;
 
 #ifndef CONFIG_HPET_EMULATE_RTC
 #define is_hpet_enabled()			0
-#define hpet_set_alarm_time(hrs, min, sec) 	0
-#define hpet_set_periodic_freq(arg) 		0
-#define hpet_mask_rtc_irq_bit(arg) 		0
-#define hpet_set_rtc_irq_bit(arg) 		0
-#define hpet_rtc_timer_init() 			do { } while (0)
-#define hpet_rtc_dropped_irq() 			0
+#define hpet_set_alarm_time(hrs, min, sec)	0
+#define hpet_set_periodic_freq(arg)		0
+#define hpet_mask_rtc_irq_bit(arg)		0
+#define hpet_set_rtc_irq_bit(arg)		0
+#define hpet_rtc_timer_init()			do { } while (0)
+#define hpet_rtc_dropped_irq()			0
 #ifdef RTC_IRQ
 static irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
 {
@@ -147,7 +147,7 @@ static int rtc_ioctl(struct inode *inode
 static unsigned int rtc_poll(struct file *file, poll_table *wait);
 #endif
 
-static void get_rtc_alm_time (struct rtc_time *alm_tm);
+static void get_rtc_alm_time(struct rtc_time *alm_tm);
 #ifdef RTC_IRQ
 static void set_rtc_irq_bit_locked(unsigned char bit);
 static void mask_rtc_irq_bit_locked(unsigned char bit);
@@ -185,9 +185,9 @@ static int rtc_proc_open(struct inode *i
  * rtc_status but before mod_timer is called, which would then reenable the
  * timer (but you would need to have an awful timing before you'd trip on it)
  */
-static unsigned long rtc_status = 0;	/* bitmapped status byte.	*/
-static unsigned long rtc_freq = 0;	/* Current periodic IRQ rate	*/
-static unsigned long rtc_irq_data = 0;	/* our output to the world	*/
+static unsigned long rtc_status;	/* bitmapped status byte.	*/
+static unsigned long rtc_freq;		/* Current periodic IRQ rate	*/
+static unsigned long rtc_irq_data;	/* our output to the world	*/
 static unsigned long rtc_max_user_freq = 64; /* > this, need CAP_SYS_RESOURCE */
 
 #ifdef RTC_IRQ
@@ -195,7 +195,7 @@ static unsigned long rtc_max_user_freq =
  * rtc_task_lock nests inside rtc_lock.
  */
 static DEFINE_SPINLOCK(rtc_task_lock);
-static rtc_task_t *rtc_callback = NULL;
+static rtc_task_t *rtc_callback;
 #endif
 
 /*
@@ -205,7 +205,7 @@ static rtc_task_t *rtc_callback = NULL;
 
 static unsigned long epoch = 1900;	/* year corresponding to 0x00	*/
 
-static const unsigned char days_in_mo[] = 
+static const unsigned char days_in_mo[] =
 {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
 
 /*
@@ -242,7 +242,7 @@ irqreturn_t rtc_interrupt(int irq, void 
 	 *	the last read in the remainder of rtc_irq_data.
 	 */
 
-	spin_lock (&rtc_lock);
+	spin_lock(&rtc_lock);
 	rtc_irq_data += 0x100;
 	rtc_irq_data &= ~0xff;
 	if (is_hpet_enabled()) {
@@ -259,16 +259,16 @@ irqreturn_t rtc_interrupt(int irq, void 
 	if (rtc_status & RTC_TIMER_ON)
 		mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);
 
-	spin_unlock (&rtc_lock);
+	spin_unlock(&rtc_lock);
 
 	/* Now do the rest of the actions */
 	spin_lock(&rtc_task_lock);
 	if (rtc_callback)
 		rtc_callback->func(rtc_callback->private_data);
 	spin_unlock(&rtc_task_lock);
-	wake_up_interruptible(&rtc_wait);	
+	wake_up_interruptible(&rtc_wait);
 
-	kill_fasync (&rtc_async_queue, SIGIO, POLL_IN);
+	kill_fasync(&rtc_async_queue, SIGIO, POLL_IN);
 
 	return IRQ_HANDLED;
 }
@@ -335,7 +335,7 @@ static ssize_t rtc_read(struct file *fil
 	DECLARE_WAITQUEUE(wait, current);
 	unsigned long data;
 	ssize_t retval;
-	
+
 	if (rtc_has_irq == 0)
 		return -EIO;
 
@@ -358,11 +358,11 @@ static ssize_t rtc_read(struct file *fil
 		 * confusing. And no, xchg() is not the answer. */
 
 		__set_current_state(TASK_INTERRUPTIBLE);
-		
-		spin_lock_irq (&rtc_lock);
+
+		spin_lock_irq(&rtc_lock);
 		data = rtc_irq_data;
 		rtc_irq_data = 0;
-		spin_unlock_irq (&rtc_lock);
+		spin_unlock_irq(&rtc_lock);
 
 		if (data != 0)
 			break;
@@ -378,10 +378,13 @@ static ssize_t rtc_read(struct file *fil
 		schedule();
 	} while (1);
 
-	if (count == sizeof(unsigned int))
-		retval = put_user(data, (unsigned int __user *)buf) ?: sizeof(int);
-	else
-		retval = put_user(data, (unsigned long __user *)buf) ?: sizeof(long);
+	if (count == sizeof(unsigned int)) {
+		retval = put_user(data,
+				  (unsigned int __user *)buf) ?: sizeof(int);
+	} else {
+		retval = put_user(data,
+				  (unsigned long __user *)buf) ?: sizeof(long);
+	}
 	if (!retval)
 		retval = count;
  out:
@@ -394,7 +397,7 @@ static ssize_t rtc_read(struct file *fil
 
 static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 {
-	struct rtc_time wtime; 
+	struct rtc_time wtime;
 
 #ifdef RTC_IRQ
 	if (rtc_has_irq == 0) {
@@ -426,35 +429,41 @@ static int rtc_do_ioctl(unsigned int cmd
 	}
 	case RTC_PIE_OFF:	/* Mask periodic int. enab. bit	*/
 	{
-		unsigned long flags; /* can be called from isr via rtc_control() */
-		spin_lock_irqsave (&rtc_lock, flags);
+		/* can be called from isr via rtc_control() */
+		unsigned long flags;
+
+		spin_lock_irqsave(&rtc_lock, flags);
 		mask_rtc_irq_bit_locked(RTC_PIE);
 		if (rtc_status & RTC_TIMER_ON) {
 			rtc_status &= ~RTC_TIMER_ON;
 			del_timer(&rtc_irq_timer);
 		}
-		spin_unlock_irqrestore (&rtc_lock, flags);
+		spin_unlock_irqrestore(&rtc_lock, flags);
+
 		return 0;
 	}
 	case RTC_PIE_ON:	/* Allow periodic ints		*/
 	{
-		unsigned long flags; /* can be called from isr via rtc_control() */
+		/* can be called from isr via rtc_control() */
+		unsigned long flags;
+
 		/*
 		 * We don't really want Joe User enabling more
 		 * than 64Hz of interrupts on a multi-user machine.
 		 */
 		if (!kernel && (rtc_freq > rtc_max_user_freq) &&
-			(!capable(CAP_SYS_RESOURCE)))
+						(!capable(CAP_SYS_RESOURCE)))
 			return -EACCES;
 
-		spin_lock_irqsave (&rtc_lock, flags);
+		spin_lock_irqsave(&rtc_lock, flags);
 		if (!(rtc_status & RTC_TIMER_ON)) {
 			mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq +
 					2*HZ/100);
 			rtc_status |= RTC_TIMER_ON;
 		}
 		set_rtc_irq_bit_locked(RTC_PIE);
-		spin_unlock_irqrestore (&rtc_lock, flags);
+		spin_unlock_irqrestore(&rtc_lock, flags);
+
 		return 0;
 	}
 	case RTC_UIE_OFF:	/* Mask ints from RTC updates.	*/
@@ -477,7 +486,7 @@ static int rtc_do_ioctl(unsigned int cmd
 		 */
 		memset(&wtime, 0, sizeof(struct rtc_time));
 		get_rtc_alm_time(&wtime);
-		break; 
+		break;
 	}
 	case RTC_ALM_SET:	/* Store a time into the alarm */
 	{
@@ -505,16 +514,21 @@ static int rtc_do_ioctl(unsigned int cmd
 			 */
 		}
 		if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) ||
-		    RTC_ALWAYS_BCD)
-		{
-			if (sec < 60) BIN_TO_BCD(sec);
-			else sec = 0xff;
-
-			if (min < 60) BIN_TO_BCD(min);
-			else min = 0xff;
-
-			if (hrs < 24) BIN_TO_BCD(hrs);
-			else hrs = 0xff;
+							RTC_ALWAYS_BCD) {
+			if (sec < 60)
+				BIN_TO_BCD(sec);
+			else
+				sec = 0xff;
+
+			if (min < 60)
+				BIN_TO_BCD(min);
+			else
+				min = 0xff;
+
+			if (hrs < 24)
+				BIN_TO_BCD(hrs);
+			else
+				hrs = 0xff;
 		}
 		CMOS_WRITE(hrs, RTC_HOURS_ALARM);
 		CMOS_WRITE(min, RTC_MINUTES_ALARM);
@@ -563,11 +577,12 @@ static int rtc_do_ioctl(unsigned int cmd
 
 		if (day > (days_in_mo[mon] + ((mon == 2) && leap_yr)))
 			return -EINVAL;
-			
+
 		if ((hrs >= 24) || (min >= 60) || (sec >= 60))
 			return -EINVAL;
 
-		if ((yrs -= epoch) > 255)    /* They are unsigned */
+		yrs -= epoch;
+		if (yrs > 255)		/* They are unsigned */
 			return -EINVAL;
 
 		spin_lock_irq(&rtc_lock);
@@ -635,9 +650,10 @@ static int rtc_do_ioctl(unsigned int cmd
 	{
 		int tmp = 0;
 		unsigned char val;
-		unsigned long flags; /* can be called from isr via rtc_control() */
+		/* can be called from isr via rtc_control() */
+		unsigned long flags;
 
-		/* 
+		/*
 		 * The max we can do is 8192Hz.
 		 */
 		if ((arg < 2) || (arg > 8192))
@@ -646,7 +662,8 @@ static int rtc_do_ioctl(unsigned int cmd
 		 * We don't really want Joe User generating more
 		 * than 64Hz of interrupts on a multi-user machine.
 		 */
-		if (!kernel && (arg > rtc_max_user_freq) && (!capable(CAP_SYS_RESOURCE)))
+		if (!kernel && (arg > rtc_max_user_freq) &&
+					!capable(CAP_SYS_RESOURCE))
 			return -EACCES;
 
 		while (arg > (1<<tmp))
@@ -674,11 +691,11 @@ static int rtc_do_ioctl(unsigned int cmd
 #endif
 	case RTC_EPOCH_READ:	/* Read the epoch.	*/
 	{
-		return put_user (epoch, (unsigned long __user *)arg);
+		return put_user(epoch, (unsigned long __user *)arg);
 	}
 	case RTC_EPOCH_SET:	/* Set the epoch.	*/
 	{
-		/* 
+		/*
 		 * There were no RTC clocks before 1900.
 		 */
 		if (arg < 1900)
@@ -693,7 +710,8 @@ static int rtc_do_ioctl(unsigned int cmd
 	default:
 		return -ENOTTY;
 	}
-	return copy_to_user((void __user *)arg, &wtime, sizeof wtime) ? -EFAULT : 0;
+	return copy_to_user((void __user *)arg,
+			    &wtime, sizeof wtime) ? -EFAULT : 0;
 }
 
 static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
@@ -712,26 +730,25 @@ static int rtc_ioctl(struct inode *inode
  * needed here. Or anywhere else in this driver. */
 static int rtc_open(struct inode *inode, struct file *file)
 {
-	spin_lock_irq (&rtc_lock);
+	spin_lock_irq(&rtc_lock);
 
-	if(rtc_status & RTC_IS_OPEN)
+	if (rtc_status & RTC_IS_OPEN)
 		goto out_busy;
 
 	rtc_status |= RTC_IS_OPEN;
 
 	rtc_irq_data = 0;
-	spin_unlock_irq (&rtc_lock);
+	spin_unlock_irq(&rtc_lock);
 	return 0;
 
 out_busy:
-	spin_unlock_irq (&rtc_lock);
+	spin_unlock_irq(&rtc_lock);
 	return -EBUSY;
 }
 
-static int rtc_fasync (int fd, struct file *filp, int on)
-
+static int rtc_fasync(int fd, struct file *filp, int on)
 {
-	return fasync_helper (fd, filp, on, &rtc_async_queue);
+	return fasync_helper(fd, filp, on, &rtc_async_queue);
 }
 
 static int rtc_release(struct inode *inode, struct file *file)
@@ -762,16 +779,16 @@ static int rtc_release(struct inode *ino
 	}
 	spin_unlock_irq(&rtc_lock);
 
-	if (file->f_flags & FASYNC) {
-		rtc_fasync (-1, file, 0);
-	}
+	if (file->f_flags & FASYNC)
+		rtc_fasync(-1, file, 0);
 no_irq:
 #endif
 
-	spin_lock_irq (&rtc_lock);
+	spin_lock_irq(&rtc_lock);
 	rtc_irq_data = 0;
 	rtc_status &= ~RTC_IS_OPEN;
-	spin_unlock_irq (&rtc_lock);
+	spin_unlock_irq(&rtc_lock);
+
 	return 0;
 }
 
@@ -786,9 +803,9 @@ static unsigned int rtc_poll(struct file
 
 	poll_wait(file, &rtc_wait, wait);
 
-	spin_lock_irq (&rtc_lock);
+	spin_lock_irq(&rtc_lock);
 	l = rtc_irq_data;
-	spin_unlock_irq (&rtc_lock);
+	spin_unlock_irq(&rtc_lock);
 
 	if (l != 0)
 		return POLLIN | POLLRDNORM;
@@ -796,14 +813,6 @@ static unsigned int rtc_poll(struct file
 }
 #endif
 
-/*
- * exported stuffs
- */
-
-EXPORT_SYMBOL(rtc_register);
-EXPORT_SYMBOL(rtc_unregister);
-EXPORT_SYMBOL(rtc_control);
-
 int rtc_register(rtc_task_t *task)
 {
 #ifndef RTC_IRQ
@@ -829,6 +838,7 @@ int rtc_register(rtc_task_t *task)
 	return 0;
 #endif
 }
+EXPORT_SYMBOL(rtc_register);
 
 int rtc_unregister(rtc_task_t *task)
 {
@@ -845,7 +855,7 @@ int rtc_unregister(rtc_task_t *task)
 		return -ENXIO;
 	}
 	rtc_callback = NULL;
-	
+
 	/* disable controls */
 	if (!hpet_mask_rtc_irq_bit(RTC_PIE | RTC_AIE | RTC_UIE)) {
 		tmp = CMOS_READ(RTC_CONTROL);
@@ -865,6 +875,7 @@ int rtc_unregister(rtc_task_t *task)
 	return 0;
 #endif
 }
+EXPORT_SYMBOL(rtc_unregister);
 
 int rtc_control(rtc_task_t *task, unsigned int cmd, unsigned long arg)
 {
@@ -883,7 +894,7 @@ int rtc_control(rtc_task_t *task, unsign
 	return rtc_do_ioctl(cmd, arg, 1);
 #endif
 }
-
+EXPORT_SYMBOL(rtc_control);
 
 /*
  *	The various file operations we support.
@@ -910,11 +921,11 @@ static struct miscdevice rtc_dev = {
 
 #ifdef CONFIG_PROC_FS
 static const struct file_operations rtc_proc_fops = {
-	.owner = THIS_MODULE,
-	.open = rtc_proc_open,
-	.read  = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
+	.owner		= THIS_MODULE,
+	.open		= rtc_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 };
 #endif
 
@@ -965,7 +976,7 @@ static int __init rtc_init(void)
 #ifdef CONFIG_SPARC32
 	for_each_ebus(ebus) {
 		for_each_ebusdev(edev, ebus) {
-			if(strcmp(edev->prom_node->name, "rtc") == 0) {
+			if (strcmp(edev->prom_node->name, "rtc") == 0) {
 				rtc_port = edev->resource[0].start;
 				rtc_irq = edev->irqs[0];
 				goto found;
@@ -986,7 +997,8 @@ found:
 	 * XXX Interrupt pin #7 in Espresso is shared between RTC and
 	 * PCI Slot 2 INTA# (and some INTx# in Slot 1).
 	 */
-	if (request_irq(rtc_irq, rtc_interrupt, IRQF_SHARED, "rtc", (void *)&rtc_port)) {
+	if (request_irq(rtc_irq, rtc_interrupt, IRQF_SHARED, "rtc",
+			(void *)&rtc_port)) {
 		rtc_has_irq = 0;
 		printk(KERN_ERR "rtc: cannot register IRQ %d\n", rtc_irq);
 		return -EIO;
@@ -1020,11 +1032,13 @@ no_irq:
 		rtc_int_handler_ptr = rtc_interrupt;
 	}
 
-	if(request_irq(RTC_IRQ, rtc_int_handler_ptr, IRQF_DISABLED, "rtc", NULL)) {
+	if (request_irq(RTC_IRQ, rtc_int_handler_ptr, IRQF_DISABLED,
+			"rtc", NULL)) {
 		/* Yeah right, seeing as irq 8 doesn't even hit the bus. */
 		rtc_has_irq = 0;
 		printk(KERN_ERR "rtc: IRQ %d is not free.\n", RTC_IRQ);
 		rtc_release_region();
+
 		return -EIO;
 	}
 	hpet_rtc_timer_init();
@@ -1052,21 +1066,21 @@ no_irq:
 
 #if defined(__alpha__) || defined(__mips__)
 	rtc_freq = HZ;
-	
+
 	/* Each operating system on an Alpha uses its own epoch.
 	   Let's try to guess which one we are using now. */
-	
+
 	if (rtc_is_updating() != 0)
 		msleep(20);
-	
+
 	spin_lock_irq(&rtc_lock);
 	year = CMOS_READ(RTC_YEAR);
 	ctrl = CMOS_READ(RTC_CONTROL);
 	spin_unlock_irq(&rtc_lock);
-	
+
 	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
 		BCD_TO_BIN(year);       /* This should never happen... */
-	
+
 	if (year < 20) {
 		epoch = 2000;
 		guess = "SRM (post-2000)";
@@ -1087,7 +1101,8 @@ no_irq:
 #endif
 	}
 	if (guess)
-		printk(KERN_INFO "rtc: %s epoch (%lu) detected\n", guess, epoch);
+		printk(KERN_INFO "rtc: %s epoch (%lu) detected\n",
+			guess, epoch);
 #endif
 #ifdef RTC_IRQ
 	if (rtc_has_irq == 0)
@@ -1096,8 +1111,12 @@ no_irq:
 	spin_lock_irq(&rtc_lock);
 	rtc_freq = 1024;
 	if (!hpet_set_periodic_freq(rtc_freq)) {
-		/* Initialize periodic freq. to CMOS reset default, which is 1024Hz */
-		CMOS_WRITE(((CMOS_READ(RTC_FREQ_SELECT) & 0xF0) | 0x06), RTC_FREQ_SELECT);
+		/*
+		 * Initialize periodic frequency to CMOS reset default,
+		 * which is 1024Hz
+		 */
+		CMOS_WRITE(((CMOS_READ(RTC_FREQ_SELECT) & 0xF0) | 0x06),
+			   RTC_FREQ_SELECT);
 	}
 	spin_unlock_irq(&rtc_lock);
 no_irq2:
@@ -1110,20 +1129,20 @@ no_irq2:
 	return 0;
 }
 
-static void __exit rtc_exit (void)
+static void __exit rtc_exit(void)
 {
 	cleanup_sysctl();
-	remove_proc_entry ("driver/rtc", NULL);
+	remove_proc_entry("driver/rtc", NULL);
 	misc_deregister(&rtc_dev);
 
 #ifdef CONFIG_SPARC32
 	if (rtc_has_irq)
-		free_irq (rtc_irq, &rtc_port);
+		free_irq(rtc_irq, &rtc_port);
 #else
 	rtc_release_region();
 #ifdef RTC_IRQ
 	if (rtc_has_irq)
-		free_irq (RTC_IRQ, NULL);
+		free_irq(RTC_IRQ, NULL);
 #endif
 #endif /* CONFIG_SPARC32 */
 }
@@ -1133,14 +1152,14 @@ module_exit(rtc_exit);
 
 #ifdef RTC_IRQ
 /*
- * 	At IRQ rates >= 4096Hz, an interrupt may get lost altogether.
+ *	At IRQ rates >= 4096Hz, an interrupt may get lost altogether.
  *	(usually during an IDE disk interrupt, with IRQ unmasking off)
  *	Since the interrupt handler doesn't get called, the IRQ status
  *	byte doesn't get read, and the RTC stops generating interrupts.
  *	A timer is set, and will call this function if/when that happens.
  *	To get it out of this stalled state, we just read the status.
  *	At least a jiffy of interrupts (rtc_freq/HZ) will have been lost.
- *	(You *really* shouldn't be trying to use a non-realtime system 
+ *	(You *really* shouldn't be trying to use a non-realtime system
  *	for something that requires a steady > 1KHz signal anyways.)
  */
 
@@ -1148,7 +1167,7 @@ static void rtc_dropped_irq(unsigned lon
 {
 	unsigned long freq;
 
-	spin_lock_irq (&rtc_lock);
+	spin_lock_irq(&rtc_lock);
 
 	if (hpet_rtc_dropped_irq()) {
 		spin_unlock_irq(&rtc_lock);
@@ -1167,13 +1186,15 @@ static void rtc_dropped_irq(unsigned lon
 
 	spin_unlock_irq(&rtc_lock);
 
-	if (printk_ratelimit())
-		printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", freq);
+	if (printk_ratelimit()) {
+		printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
+			freq);
+	}
 
 	/* Now we have new data */
 	wake_up_interruptible(&rtc_wait);
 
-	kill_fasync (&rtc_async_queue, SIGIO, POLL_IN);
+	kill_fasync(&rtc_async_queue, SIGIO, POLL_IN);
 }
 #endif
 
@@ -1277,7 +1298,7 @@ void rtc_get_rtc_time(struct rtc_time *r
 	 * can take just over 2ms. We wait 20ms. There is no need to
 	 * to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP.
 	 * If you need to know *exactly* when a second has started, enable
-	 * periodic update complete interrupts, (via ioctl) and then 
+	 * periodic update complete interrupts, (via ioctl) and then
 	 * immediately read /dev/rtc which will block until you get the IRQ.
 	 * Once the read clears, read the RTC time (again via ioctl). Easy.
 	 */
@@ -1307,8 +1328,7 @@ void rtc_get_rtc_time(struct rtc_time *r
 	ctrl = CMOS_READ(RTC_CONTROL);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-	{
+	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
 		BCD_TO_BIN(rtc_tm->tm_sec);
 		BCD_TO_BIN(rtc_tm->tm_min);
 		BCD_TO_BIN(rtc_tm->tm_hour);
@@ -1326,7 +1346,8 @@ void rtc_get_rtc_time(struct rtc_time *r
 	 * Account for differences between how the RTC uses the values
 	 * and how they are defined in a struct rtc_time;
 	 */
-	if ((rtc_tm->tm_year += (epoch - 1900)) <= 69)
+	rtc_tm->tm_year += epoch - 1900;
+	if (rtc_tm->tm_year <= 69)
 		rtc_tm->tm_year += 100;
 
 	rtc_tm->tm_mon--;
@@ -1347,8 +1368,7 @@ static void get_rtc_alm_time(struct rtc_
 	ctrl = CMOS_READ(RTC_CONTROL);
 	spin_unlock_irq(&rtc_lock);
 
-	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-	{
+	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
 		BCD_TO_BIN(alm_tm->tm_sec);
 		BCD_TO_BIN(alm_tm->tm_min);
 		BCD_TO_BIN(alm_tm->tm_hour);
diff -puN drivers/lguest/x86/core.c~git-x86 drivers/lguest/x86/core.c
--- a/drivers/lguest/x86/core.c~git-x86
+++ a/drivers/lguest/x86/core.c
@@ -94,7 +94,7 @@ static void copy_in_guest_info(struct lg
 	/* Set up the two "TSS" members which tell the CPU what stack to use
 	 * for traps which do directly into the Guest (ie. traps at privilege
 	 * level 1). */
-	pages->state.guest_tss.esp1 = lg->esp1;
+	pages->state.guest_tss.sp1 = lg->esp1;
 	pages->state.guest_tss.ss1 = lg->ss1;
 
 	/* Copy direct-to-Guest trap entries. */
@@ -416,7 +416,7 @@ void __init lguest_arch_host_init(void)
 		/* We know where we want the stack to be when the Guest enters
 		 * the switcher: in pages->regs.  The stack grows upwards, so
 		 * we start it at the end of that structure. */
-		state->guest_tss.esp0 = (long)(&pages->regs + 1);
+		state->guest_tss.sp0 = (long)(&pages->regs + 1);
 		/* And this is the GDT entry to use for the stack: we keep a
 		 * couple of special LGUEST entries. */
 		state->guest_tss.ss0 = LGUEST_DS;
diff -puN drivers/pci/probe.c~git-x86 drivers/pci/probe.c
--- a/drivers/pci/probe.c~git-x86
+++ a/drivers/pci/probe.c
@@ -844,11 +844,14 @@ static void set_pcie_port_type(struct pc
  * reading the dword at 0x100 which must either be 0 or a valid extended
  * capability header.
  */
-int pci_cfg_space_size(struct pci_dev *dev)
+int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix)
 {
 	int pos;
 	u32 status;
 
+	if (!check_exp_pcix)
+		goto skip;
+
 	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
 	if (!pos) {
 		pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
@@ -860,6 +863,7 @@ int pci_cfg_space_size(struct pci_dev *d
 			goto fail;
 	}
 
+ skip:
 	if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL)
 		goto fail;
 	if (status == 0xffffffff)
@@ -871,6 +875,11 @@ int pci_cfg_space_size(struct pci_dev *d
 	return PCI_CFG_SPACE_SIZE;
 }
 
+int pci_cfg_space_size(struct pci_dev *dev)
+{
+	return pci_cfg_space_size_ext(dev, 1);
+}
+
 static void pci_release_bus_bridge_dev(struct device *dev)
 {
 	kfree(dev);
diff -puN fs/binfmt_elf.c~git-x86 fs/binfmt_elf.c
--- a/fs/binfmt_elf.c~git-x86
+++ a/fs/binfmt_elf.c
@@ -45,7 +45,8 @@
 
 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
 static int load_elf_library(struct file *);
-static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
+static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
+				int, int, unsigned long);
 
 /*
  * If we don't support core dumping, then supply a NULL so we
@@ -298,33 +299,70 @@ create_elf_tables(struct linux_binprm *b
 #ifndef elf_map
 
 static unsigned long elf_map(struct file *filep, unsigned long addr,
-		struct elf_phdr *eppnt, int prot, int type)
+		struct elf_phdr *eppnt, int prot, int type,
+		unsigned long total_size)
 {
 	unsigned long map_addr;
-	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
+	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
+	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
+	addr = ELF_PAGESTART(addr);
+	size = ELF_PAGEALIGN(size);
 
-	down_write(&current->mm->mmap_sem);
 	/* mmap() will return -EINVAL if given a zero size, but a
 	 * segment with zero filesize is perfectly valid */
-	if (eppnt->p_filesz + pageoffset)
-		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
-				   eppnt->p_filesz + pageoffset, prot, type,
-				   eppnt->p_offset - pageoffset);
-	else
-		map_addr = ELF_PAGESTART(addr);
+	if (!size)
+		return addr;
+
+	down_write(&current->mm->mmap_sem);
+	/*
+	* total_size is the size of the ELF (interpreter) image.
+	* The _first_ mmap needs to know the full size, otherwise
+	* randomization might put this image into an overlapping
+	* position with the ELF binary image. (since size < total_size)
+	* So we first map the 'big' image - and unmap the remainder at
+	* the end. (which unmap is needed for ELF images with holes.)
+	*/
+	if (total_size) {
+		total_size = ELF_PAGEALIGN(total_size);
+		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
+		if (!BAD_ADDR(map_addr))
+			do_munmap(current->mm, map_addr+size, total_size-size);
+	} else
+		map_addr = do_mmap(filep, addr, size, prot, type, off);
+
 	up_write(&current->mm->mmap_sem);
 	return(map_addr);
 }
 
 #endif /* !elf_map */
 
+static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
+{
+	int i, first_idx = -1, last_idx = -1;
+
+	for (i = 0; i < nr; i++) {
+		if (cmds[i].p_type == PT_LOAD) {
+			last_idx = i;
+			if (first_idx == -1)
+				first_idx = i;
+		}
+	}
+	if (first_idx == -1)
+		return 0;
+
+	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
+				ELF_PAGESTART(cmds[first_idx].p_vaddr);
+}
+
+
 /* This is much more generalized than the library routine read function,
    so we keep this separate.  Technically the library read function
    is only provided so that we can read a.out libraries that have
    an ELF header */
 
 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
-		struct file *interpreter, unsigned long *interp_load_addr)
+		struct file *interpreter, unsigned long *interp_map_addr,
+		unsigned long no_base)
 {
 	struct elf_phdr *elf_phdata;
 	struct elf_phdr *eppnt;
@@ -332,6 +370,7 @@ static unsigned long load_elf_interp(str
 	int load_addr_set = 0;
 	unsigned long last_bss = 0, elf_bss = 0;
 	unsigned long error = ~0UL;
+	unsigned long total_size;
 	int retval, i, size;
 
 	/* First of all, some simple consistency checks */
@@ -370,6 +409,12 @@ static unsigned long load_elf_interp(str
 		goto out_close;
 	}
 
+	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
+	if (!total_size) {
+		error = -EINVAL;
+		goto out_close;
+	}
+
 	eppnt = elf_phdata;
 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 		if (eppnt->p_type == PT_LOAD) {
@@ -387,9 +432,14 @@ static unsigned long load_elf_interp(str
 			vaddr = eppnt->p_vaddr;
 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 				elf_type |= MAP_FIXED;
+			else if (no_base && interp_elf_ex->e_type == ET_DYN)
+				load_addr = -vaddr;
 
 			map_addr = elf_map(interpreter, load_addr + vaddr,
-					   eppnt, elf_prot, elf_type);
+					eppnt, elf_prot, elf_type, total_size);
+			total_size = 0;
+			if (!*interp_map_addr)
+				*interp_map_addr = map_addr;
 			error = map_addr;
 			if (BAD_ADDR(map_addr))
 				goto out_close;
@@ -455,8 +505,7 @@ static unsigned long load_elf_interp(str
 			goto out_close;
 	}
 
-	*interp_load_addr = load_addr;
-	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
+	error = load_addr;
 
 out_close:
 	kfree(elf_phdata);
@@ -553,7 +602,8 @@ static int load_elf_binary(struct linux_
 	int elf_exec_fileno;
 	int retval, i;
 	unsigned int size;
-	unsigned long elf_entry, interp_load_addr = 0;
+	unsigned long elf_entry;
+	unsigned long interp_load_addr = 0;
 	unsigned long start_code, end_code, start_data, end_data;
 	unsigned long reloc_func_desc = 0;
 	char passed_fileno[6];
@@ -825,9 +875,7 @@ static int load_elf_binary(struct linux_
 	current->mm->start_stack = bprm->p;
 
 	/* Now we do a little grungy work by mmaping the ELF image into
-	   the correct location in memory.  At this point, we assume that
-	   the image should be loaded at fixed address, not at a variable
-	   address. */
+	   the correct location in memory. */
 	for(i = 0, elf_ppnt = elf_phdata;
 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 		int elf_prot = 0, elf_flags;
@@ -881,11 +929,15 @@ static int load_elf_binary(struct linux_
 			 * default mmap base, as well as whatever program they
 			 * might try to exec.  This is because the brk will
 			 * follow the loader, and is not movable.  */
+#ifdef CONFIG_X86
+			load_bias = 0;
+#else
 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
+#endif
 		}
 
 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
-				elf_prot, elf_flags);
+				elf_prot, elf_flags, 0);
 		if (BAD_ADDR(error)) {
 			send_sig(SIGKILL, current, 0);
 			retval = IS_ERR((void *)error) ?
@@ -961,13 +1013,25 @@ static int load_elf_binary(struct linux_
 	}
 
 	if (elf_interpreter) {
-		if (interpreter_type == INTERPRETER_AOUT)
+		if (interpreter_type == INTERPRETER_AOUT) {
 			elf_entry = load_aout_interp(&loc->interp_ex,
 						     interpreter);
-		else
+		} else {
+			unsigned long uninitialized_var(interp_map_addr);
+
 			elf_entry = load_elf_interp(&loc->interp_elf_ex,
 						    interpreter,
-						    &interp_load_addr);
+						    &interp_map_addr,
+						    load_bias);
+			if (!IS_ERR((void *)elf_entry)) {
+				/*
+				 * load_elf_interp() returns relocation
+				 * adjustment
+				 */
+				interp_load_addr = elf_entry;
+				elf_entry += loc->interp_elf_ex.e_entry;
+			}
+		}
 		if (BAD_ADDR(elf_entry)) {
 			force_sig(SIGSEGV, current);
 			retval = IS_ERR((void *)elf_entry) ?
@@ -1021,6 +1085,12 @@ static int load_elf_binary(struct linux_
 	current->mm->end_data = end_data;
 	current->mm->start_stack = bprm->p;
 
+#ifdef arch_randomize_brk
+	if (current->flags & PF_RANDOMIZE)
+		current->mm->brk = current->mm->start_brk =
+			arch_randomize_brk(current->mm);
+#endif
+
 	if (current->personality & MMAP_PAGE_ZERO) {
 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 		   and some applications "depend" upon this behavior.
diff -puN include/asm-powerpc/ptrace.h~git-x86 include/asm-powerpc/ptrace.h
--- a/include/asm-powerpc/ptrace.h~git-x86
+++ a/include/asm-powerpc/ptrace.h
@@ -119,6 +119,13 @@ do {									      \
 } while (0)
 #endif /* __powerpc64__ */
 
+/*
+ * These are defined as per linux/ptrace.h, which see.
+ */
+#define arch_has_single_step()	(1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff -puN include/asm-x86/Kbuild~git-x86 include/asm-x86/Kbuild
--- a/include/asm-x86/Kbuild~git-x86
+++ a/include/asm-x86/Kbuild
@@ -10,7 +10,6 @@ header-y += prctl.h
 header-y += ptrace-abi.h
 header-y += sigcontext32.h
 header-y += ucontext.h
-header-y += vsyscall32.h
 
 unifdef-y += e820.h
 unifdef-y += ist.h
diff -puN include/asm-x86/acpi.h~git-x86 include/asm-x86/acpi.h
--- a/include/asm-x86/acpi.h~git-x86
+++ a/include/asm-x86/acpi.h
@@ -1,14 +1,123 @@
 #ifndef _ASM_X86_ACPI_H
 #define _ASM_X86_ACPI_H
 
-#ifdef CONFIG_X86_32
-# include "acpi_32.h"
-#else
-# include "acpi_64.h"
-#endif
+/*
+ *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <acpi/pdc_intel.h>
 
+#include <asm/numa.h>
 #include <asm/processor.h>
 
+#define COMPILER_DEPENDENT_INT64   long long
+#define COMPILER_DEPENDENT_UINT64  unsigned long long
+
+/*
+ * Calling conventions:
+ *
+ * ACPI_SYSTEM_XFACE        - Interfaces to host OS (handlers, threads)
+ * ACPI_EXTERNAL_XFACE      - External ACPI interfaces
+ * ACPI_INTERNAL_XFACE      - Internal ACPI interfaces
+ * ACPI_INTERNAL_VAR_XFACE  - Internal variable-parameter list interfaces
+ */
+#define ACPI_SYSTEM_XFACE
+#define ACPI_EXTERNAL_XFACE
+#define ACPI_INTERNAL_XFACE
+#define ACPI_INTERNAL_VAR_XFACE
+
+/* Asm macros */
+
+#define ACPI_ASM_MACROS
+#define BREAKPOINT3
+#define ACPI_DISABLE_IRQS() local_irq_disable()
+#define ACPI_ENABLE_IRQS()  local_irq_enable()
+#define ACPI_FLUSH_CPU_CACHE()	wbinvd()
+
+int __acpi_acquire_global_lock(unsigned int *lock);
+int __acpi_release_global_lock(unsigned int *lock);
+
+#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \
+	((Acq) = __acpi_acquire_global_lock(&facs->global_lock))
+
+#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \
+	((Acq) = __acpi_release_global_lock(&facs->global_lock))
+
+/*
+ * Math helper asm macros
+ */
+#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \
+	asm("divl %2;"				     \
+	    :"=a"(q32), "=d"(r32)		     \
+	    :"r"(d32),				     \
+	     "0"(n_lo), "1"(n_hi))
+
+
+#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \
+	asm("shrl   $1,%2	;"	\
+	    "rcrl   $1,%3;"		\
+	    :"=r"(n_hi), "=r"(n_lo)	\
+	    :"0"(n_hi), "1"(n_lo))
+
+#ifdef CONFIG_ACPI
+extern int acpi_lapic;
+extern int acpi_ioapic;
+extern int acpi_noirq;
+extern int acpi_strict;
+extern int acpi_disabled;
+extern int acpi_ht;
+extern int acpi_pci_disabled;
+extern int acpi_skip_timer_override;
+extern int acpi_use_timer_override;
+
+static inline void disable_acpi(void)
+{
+	acpi_disabled = 1;
+	acpi_ht = 0;
+	acpi_pci_disabled = 1;
+	acpi_noirq = 1;
+}
+
+/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */
+#define FIX_ACPI_PAGES 4
+
+extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
+
+static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
+static inline void acpi_disable_pci(void)
+{
+	acpi_pci_disabled = 1;
+	acpi_noirq_set();
+}
+extern int acpi_irq_balance_set(char *str);
+
+/* routines for saving/restoring kernel state */
+extern int acpi_save_state_mem(void);
+extern void acpi_restore_state_mem(void);
+
+extern unsigned long acpi_wakeup_address;
+
+/* early initialization routine */
+extern void acpi_reserve_bootmem(void);
+
 /*
  * Check if the CPU can handle C2 and deeper
  */
@@ -29,4 +138,31 @@ static inline unsigned int acpi_processo
 		return max_cstate;
 }
 
+#else /* !CONFIG_ACPI */
+
+#define acpi_lapic 0
+#define acpi_ioapic 0
+static inline void acpi_noirq_set(void) { }
+static inline void acpi_disable_pci(void) { }
+static inline void disable_acpi(void) { }
+
+#endif /* !CONFIG_ACPI */
+
+#define ARCH_HAS_POWER_INIT	1
+
+struct bootnode;
+
+#ifdef CONFIG_ACPI_NUMA
+extern int acpi_numa;
+extern int acpi_scan_nodes(unsigned long start, unsigned long end);
+# define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
+extern void __init acpi_fake_nodes(const struct bootnode *fake_nodes,
+				   int num_nodes);
+#else
+static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
+				   int num_nodes)
+{
+}
 #endif
+
+#endif /*__X86_ASM_ACPI_H*/
diff -puN include/asm-x86/acpi_32.h~git-x86 /dev/null
--- a/include/asm-x86/acpi_32.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- *  asm-i386/acpi.h
- *
- *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
-  *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#ifndef _ASM_ACPI_H
-#define _ASM_ACPI_H
-
-#ifdef __KERNEL__
-
-#include <acpi/pdc_intel.h>
-
-#include <asm/system.h>		/* defines cmpxchg */
-
-#define COMPILER_DEPENDENT_INT64   long long
-#define COMPILER_DEPENDENT_UINT64  unsigned long long
-
-/*
- * Calling conventions:
- *
- * ACPI_SYSTEM_XFACE        - Interfaces to host OS (handlers, threads)
- * ACPI_EXTERNAL_XFACE      - External ACPI interfaces
- * ACPI_INTERNAL_XFACE      - Internal ACPI interfaces
- * ACPI_INTERNAL_VAR_XFACE  - Internal variable-parameter list interfaces
- */
-#define ACPI_SYSTEM_XFACE
-#define ACPI_EXTERNAL_XFACE
-#define ACPI_INTERNAL_XFACE
-#define ACPI_INTERNAL_VAR_XFACE
-
-/* Asm macros */
-
-#define ACPI_ASM_MACROS
-#define BREAKPOINT3
-#define ACPI_DISABLE_IRQS() local_irq_disable()
-#define ACPI_ENABLE_IRQS()  local_irq_enable()
-#define ACPI_FLUSH_CPU_CACHE()	wbinvd()
-
-int __acpi_acquire_global_lock(unsigned int *lock);
-int __acpi_release_global_lock(unsigned int *lock);
-
-#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \
-	((Acq) = __acpi_acquire_global_lock(&facs->global_lock))
-
-#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \
-	((Acq) = __acpi_release_global_lock(&facs->global_lock))
-
-/*
- * Math helper asm macros
- */
-#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \
-        asm("divl %2;"        \
-        :"=a"(q32), "=d"(r32) \
-        :"r"(d32),            \
-        "0"(n_lo), "1"(n_hi))
-
-
-#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \
-    asm("shrl   $1,%2;"             \
-        "rcrl   $1,%3;"             \
-        :"=r"(n_hi), "=r"(n_lo)     \
-        :"0"(n_hi), "1"(n_lo))
-
-extern void early_quirks(void);
-
-#ifdef CONFIG_ACPI
-extern int acpi_lapic;
-extern int acpi_ioapic;
-extern int acpi_noirq;
-extern int acpi_strict;
-extern int acpi_disabled;
-extern int acpi_ht;
-extern int acpi_pci_disabled;
-static inline void disable_acpi(void)
-{
-	acpi_disabled = 1;
-	acpi_ht = 0;
-	acpi_pci_disabled = 1;
-	acpi_noirq = 1;
-}
-
-/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */
-#define FIX_ACPI_PAGES 4
-
-extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
-
-#ifdef CONFIG_X86_IO_APIC
-extern int acpi_skip_timer_override;
-extern int acpi_use_timer_override;
-#endif
-
-static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
-static inline void acpi_disable_pci(void)
-{
-	acpi_pci_disabled = 1;
-	acpi_noirq_set();
-}
-extern int acpi_irq_balance_set(char *str);
-
-/* routines for saving/restoring kernel state */
-extern int acpi_save_state_mem(void);
-extern void acpi_restore_state_mem(void);
-
-extern unsigned long acpi_wakeup_address;
-
-/* early initialization routine */
-extern void acpi_reserve_bootmem(void);
-
-#else	/* !CONFIG_ACPI */
-
-#define acpi_lapic 0
-#define acpi_ioapic 0
-static inline void acpi_noirq_set(void) { }
-static inline void acpi_disable_pci(void) { }
-static inline void disable_acpi(void) { }
-
-#endif	/* !CONFIG_ACPI */
-
-#define ARCH_HAS_POWER_INIT	1
-
-#endif /*__KERNEL__*/
-
-#endif /*_ASM_ACPI_H*/
diff -puN include/asm-x86/acpi_64.h~git-x86 /dev/null
--- a/include/asm-x86/acpi_64.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- *  asm-x86_64/acpi.h
- *
- *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
-  *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#ifndef _ASM_ACPI_H
-#define _ASM_ACPI_H
-
-#ifdef __KERNEL__
-
-#include <acpi/pdc_intel.h>
-#include <asm/numa.h>
-
-#define COMPILER_DEPENDENT_INT64   long long
-#define COMPILER_DEPENDENT_UINT64  unsigned long long
-
-/*
- * Calling conventions:
- *
- * ACPI_SYSTEM_XFACE        - Interfaces to host OS (handlers, threads)
- * ACPI_EXTERNAL_XFACE      - External ACPI interfaces
- * ACPI_INTERNAL_XFACE      - Internal ACPI interfaces
- * ACPI_INTERNAL_VAR_XFACE  - Internal variable-parameter list interfaces
- */
-#define ACPI_SYSTEM_XFACE
-#define ACPI_EXTERNAL_XFACE
-#define ACPI_INTERNAL_XFACE
-#define ACPI_INTERNAL_VAR_XFACE
-
-/* Asm macros */
-
-#define ACPI_ASM_MACROS
-#define BREAKPOINT3
-#define ACPI_DISABLE_IRQS() local_irq_disable()
-#define ACPI_ENABLE_IRQS()  local_irq_enable()
-#define ACPI_FLUSH_CPU_CACHE()	wbinvd()
-
-int __acpi_acquire_global_lock(unsigned int *lock);
-int __acpi_release_global_lock(unsigned int *lock);
-
-#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \
-	((Acq) = __acpi_acquire_global_lock(&facs->global_lock))
-
-#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \
-	((Acq) = __acpi_release_global_lock(&facs->global_lock))
-
-/*
- * Math helper asm macros
- */
-#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \
-        asm("divl %2;"        \
-        :"=a"(q32), "=d"(r32) \
-        :"r"(d32),            \
-        "0"(n_lo), "1"(n_hi))
-
-
-#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \
-    asm("shrl   $1,%2;"             \
-        "rcrl   $1,%3;"             \
-        :"=r"(n_hi), "=r"(n_lo)     \
-        :"0"(n_hi), "1"(n_lo))
-
-#ifdef CONFIG_ACPI
-extern int acpi_lapic;
-extern int acpi_ioapic;
-extern int acpi_noirq;
-extern int acpi_strict;
-extern int acpi_disabled;
-extern int acpi_pci_disabled;
-extern int acpi_ht;
-static inline void disable_acpi(void)
-{
-	acpi_disabled = 1;
-	acpi_ht = 0;
-	acpi_pci_disabled = 1;
-	acpi_noirq = 1;
-}
-
-/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */
-#define FIX_ACPI_PAGES 4
-
-extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
-static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
-static inline void acpi_disable_pci(void)
-{
-	acpi_pci_disabled = 1;
-	acpi_noirq_set();
-}
-extern int acpi_irq_balance_set(char *str);
-
-/* routines for saving/restoring kernel state */
-extern int acpi_save_state_mem(void);
-extern void acpi_restore_state_mem(void);
-
-extern unsigned long acpi_wakeup_address;
-
-/* early initialization routine */
-extern void acpi_reserve_bootmem(void);
-
-#else	/* !CONFIG_ACPI */
-
-#define acpi_lapic 0
-#define acpi_ioapic 0
-static inline void acpi_noirq_set(void) { }
-static inline void acpi_disable_pci(void) { }
-
-#endif /* !CONFIG_ACPI */
-
-extern int acpi_numa;
-extern int acpi_scan_nodes(unsigned long start, unsigned long end);
-#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-
-extern int acpi_disabled;
-extern int acpi_pci_disabled;
-
-#define ARCH_HAS_POWER_INIT 1
-
-extern int acpi_skip_timer_override;
-extern int acpi_use_timer_override;
-
-#ifdef CONFIG_ACPI_NUMA
-extern void __init acpi_fake_nodes(const struct bootnode *fake_nodes,
-				   int num_nodes);
-#else
-static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
-				   int num_nodes)
-{
-}
-#endif
-
-#endif /*__KERNEL__*/
-
-#endif /*_ASM_ACPI_H*/
diff -puN include/asm-x86/alternative.h~git-x86 include/asm-x86/alternative.h
--- a/include/asm-x86/alternative.h~git-x86
+++ a/include/asm-x86/alternative.h
@@ -1,5 +1,161 @@
-#ifdef CONFIG_X86_32
-# include "alternative_32.h"
+#ifndef _ASM_X86_ALTERNATIVE_H
+#define _ASM_X86_ALTERNATIVE_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <asm/asm.h>
+
+/*
+ * Alternative inline assembly for SMP.
+ *
+ * The LOCK_PREFIX macro defined here replaces the LOCK and
+ * LOCK_PREFIX macros used everywhere in the source tree.
+ *
+ * SMP alternatives use the same data structures as the other
+ * alternatives and the X86_FEATURE_UP flag to indicate the case of a
+ * UP system running a SMP kernel.  The existing apply_alternatives()
+ * works fine for patching a SMP kernel for UP.
+ *
+ * The SMP alternative tables can be kept after boot and contain both
+ * UP and SMP versions of the instructions to allow switching back to
+ * SMP at runtime, when hotplugging in a new CPU, which is especially
+ * useful in virtualized environments.
+ *
+ * The very common lock prefix is handled as special case in a
+ * separate table which is a pure address list without replacement ptr
+ * and size information.  That keeps the table sizes small.
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK_PREFIX \
+		".section .smp_locks,\"a\"\n"	\
+		_ASM_ALIGN "\n"			\
+		_ASM_PTR "661f\n" /* address */	\
+		".previous\n"			\
+		"661:\n\tlock; "
+
+#else /* ! CONFIG_SMP */
+#define LOCK_PREFIX ""
+#endif
+
+/* This must be included *after* the definition of LOCK_PREFIX */
+#include <asm/cpufeature.h>
+
+struct alt_instr {
+	u8 *instr;		/* original instruction */
+	u8 *replacement;
+	u8  cpuid;		/* cpuid bit set for replacement */
+	u8  instrlen;		/* length of original instruction */
+	u8  replacementlen;	/* length of new instruction, <= instrlen */
+	u8  pad1;
+#ifdef CONFIG_X86_64
+	u32 pad2;
+#endif
+};
+
+extern void alternative_instructions(void);
+extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+
+struct module;
+
+#ifdef CONFIG_SMP
+extern void alternatives_smp_module_add(struct module *mod, char *name,
+					void *locks, void *locks_end,
+					void *text, void *text_end);
+extern void alternatives_smp_module_del(struct module *mod);
+extern void alternatives_smp_switch(int smp);
+#else
+static inline void alternatives_smp_module_add(struct module *mod, char *name,
+					void *locks, void *locks_end,
+					void *text, void *text_end) {}
+static inline void alternatives_smp_module_del(struct module *mod) {}
+static inline void alternatives_smp_switch(int smp) {}
+#endif	/* CONFIG_SMP */
+
+/*
+ * Alternative instructions for different CPU types or capabilities.
+ *
+ * This allows to use optimized instructions even on generic binary
+ * kernels.
+ *
+ * length of oldinstr must be longer or equal the length of newinstr
+ * It can be padded with nops as needed.
+ *
+ * For non barrier like inlines please define new variants
+ * without volatile and memory clobber.
+ */
+#define alternative(oldinstr, newinstr, feature)			\
+	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
+		      ".section .altinstructions,\"a\"\n"		\
+		      _ASM_ALIGN "\n"					\
+		      _ASM_PTR "661b\n"		/* label */		\
+		      _ASM_PTR "663f\n"		/* new instruction */	\
+		      "	 .byte %c0\n"		/* feature bit */	\
+		      "	 .byte 662b-661b\n"	/* sourcelen */		\
+		      "	 .byte 664f-663f\n"	/* replacementlen */	\
+		      ".previous\n"					\
+		      ".section .altinstr_replacement,\"ax\"\n"		\
+		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
+		      ".previous" :: "i" (feature) : "memory")
+
+/*
+ * Alternative inline assembly with input.
+ *
+ * Pecularities:
+ * No memory clobber here.
+ * Argument numbers start with 1.
+ * Best is to use constraints that are fixed size (like (%1) ... "r")
+ * If you use variable sized constraints like "m" or "g" in the
+ * replacement make sure to pad to the worst case length.
+ */
+#define alternative_input(oldinstr, newinstr, feature, input...)	\
+	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
+		      ".section .altinstructions,\"a\"\n"		\
+		      _ASM_ALIGN "\n"					\
+		      _ASM_PTR "661b\n"		/* label */		\
+		      _ASM_PTR "663f\n"		/* new instruction */	\
+		      "	 .byte %c0\n"		/* feature bit */	\
+		      "	 .byte 662b-661b\n"	/* sourcelen */		\
+		      "	 .byte 664f-663f\n"	/* replacementlen */	\
+		      ".previous\n"					\
+		      ".section .altinstr_replacement,\"ax\"\n"		\
+		      "663:\n\t" newinstr "\n664:\n"  /* replacement */	\
+		      ".previous" :: "i" (feature), ##input)
+
+/* Like alternative_input, but with a single output argument */
+#define alternative_io(oldinstr, newinstr, feature, output, input...)	\
+	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
+		      ".section .altinstructions,\"a\"\n"		\
+		      _ASM_ALIGN "\n"					\
+		      _ASM_PTR "661b\n"		/* label */		\
+		      _ASM_PTR "663f\n"		/* new instruction */	\
+		      "	 .byte %c[feat]\n"	/* feature bit */	\
+		      "	 .byte 662b-661b\n"	/* sourcelen */		\
+		      "	 .byte 664f-663f\n"	/* replacementlen */	\
+		      ".previous\n"					\
+		      ".section .altinstr_replacement,\"ax\"\n"		\
+		      "663:\n\t" newinstr "\n664:\n"  /* replacement */ \
+		      ".previous" : output : [feat] "i" (feature), ##input)
+
+/*
+ * use this macro(s) if you need more than one output parameter
+ * in alternative_io
+ */
+#define ASM_OUTPUT2(a, b) a, b
+
+struct paravirt_patch_site;
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch_site *start,
+		    struct paravirt_patch_site *end);
 #else
-# include "alternative_64.h"
+static inline void
+apply_paravirt(struct paravirt_patch_site *start,
+	       struct paravirt_patch_site *end)
+{}
+#define __parainstructions	NULL
+#define __parainstructions_end	NULL
 #endif
+
+extern void text_poke(void *addr, unsigned char *opcode, int len);
+
+#endif /* _ASM_X86_ALTERNATIVE_H */
diff -puN include/asm-x86/alternative_32.h~git-x86 /dev/null
--- a/include/asm-x86/alternative_32.h
+++ /dev/null
@@ -1,154 +0,0 @@
-#ifndef _I386_ALTERNATIVE_H
-#define _I386_ALTERNATIVE_H
-
-#include <asm/types.h>
-#include <linux/stddef.h>
-#include <linux/types.h>
-
-struct alt_instr {
-	u8 *instr; 		/* original instruction */
-	u8 *replacement;
-	u8  cpuid;		/* cpuid bit set for replacement */
-	u8  instrlen;		/* length of original instruction */
-	u8  replacementlen; 	/* length of new instruction, <= instrlen */
-	u8  pad;
-};
-
-extern void alternative_instructions(void);
-extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
-
-struct module;
-#ifdef CONFIG_SMP
-extern void alternatives_smp_module_add(struct module *mod, char *name,
-					void *locks, void *locks_end,
-					void *text, void *text_end);
-extern void alternatives_smp_module_del(struct module *mod);
-extern void alternatives_smp_switch(int smp);
-#else
-static inline void alternatives_smp_module_add(struct module *mod, char *name,
-					void *locks, void *locks_end,
-					void *text, void *text_end) {}
-static inline void alternatives_smp_module_del(struct module *mod) {}
-static inline void alternatives_smp_switch(int smp) {}
-#endif	/* CONFIG_SMP */
-
-/*
- * Alternative instructions for different CPU types or capabilities.
- *
- * This allows to use optimized instructions even on generic binary
- * kernels.
- *
- * length of oldinstr must be longer or equal the length of newinstr
- * It can be padded with nops as needed.
- *
- * For non barrier like inlines please define new variants
- * without volatile and memory clobber.
- */
-#define alternative(oldinstr, newinstr, feature)			\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n" 			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      "  .align 4\n"					\
-		      "  .long 661b\n"            /* label */		\
-		      "  .long 663f\n"		  /* new instruction */	\
-		      "  .byte %c0\n"             /* feature bit */	\
-		      "  .byte 662b-661b\n"       /* sourcelen */	\
-		      "  .byte 664f-663f\n"       /* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */\
-		      ".previous" :: "i" (feature) : "memory")
-
-/*
- * Alternative inline assembly with input.
- *
- * Pecularities:
- * No memory clobber here.
- * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement maake sure to pad to the worst case length.
- */
-#define alternative_input(oldinstr, newinstr, feature, input...)	\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      "  .align 4\n"					\
-		      "  .long 661b\n"            /* label */		\
-		      "  .long 663f\n"		  /* new instruction */ \
-		      "  .byte %c0\n"             /* feature bit */	\
-		      "  .byte 662b-661b\n"       /* sourcelen */	\
-		      "  .byte 664f-663f\n"       /* replacementlen */ 	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */\
-		      ".previous" :: "i" (feature), ##input)
-
-/* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, newinstr, feature, output, input...) \
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      "  .align 4\n"					\
-		      "  .long 661b\n"            /* label */		\
-		      "  .long 663f\n"		  /* new instruction */	\
-		      "  .byte %c[feat]\n"        /* feature bit */	\
-		      "  .byte 662b-661b\n"       /* sourcelen */	\
-		      "  .byte 664f-663f\n"       /* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */ \
-		      ".previous" : output : [feat] "i" (feature), ##input)
-
-/*
- * use this macro(s) if you need more than one output parameter
- * in alternative_io
- */
-#define ASM_OUTPUT2(a, b) a, b
-
-/*
- * Alternative inline assembly for SMP.
- *
- * The LOCK_PREFIX macro defined here replaces the LOCK and
- * LOCK_PREFIX macros used everywhere in the source tree.
- *
- * SMP alternatives use the same data structures as the other
- * alternatives and the X86_FEATURE_UP flag to indicate the case of a
- * UP system running a SMP kernel.  The existing apply_alternatives()
- * works fine for patching a SMP kernel for UP.
- *
- * The SMP alternative tables can be kept after boot and contain both
- * UP and SMP versions of the instructions to allow switching back to
- * SMP at runtime, when hotplugging in a new CPU, which is especially
- * useful in virtualized environments.
- *
- * The very common lock prefix is handled as special case in a
- * separate table which is a pure address list without replacement ptr
- * and size information.  That keeps the table sizes small.
- */
-
-#ifdef CONFIG_SMP
-#define LOCK_PREFIX \
-		".section .smp_locks,\"a\"\n"	\
-		"  .align 4\n"			\
-		"  .long 661f\n" /* address */	\
-		".previous\n"			\
-	       	"661:\n\tlock; "
-
-#else /* ! CONFIG_SMP */
-#define LOCK_PREFIX ""
-#endif
-
-struct paravirt_patch_site;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
-		    struct paravirt_patch_site *end);
-#else
-static inline void
-apply_paravirt(struct paravirt_patch_site *start,
-	       struct paravirt_patch_site *end)
-{}
-#define __parainstructions	NULL
-#define __parainstructions_end	NULL
-#endif
-
-extern void text_poke(void *addr, unsigned char *opcode, int len);
-
-#endif /* _I386_ALTERNATIVE_H */
diff -puN include/asm-x86/alternative_64.h~git-x86 /dev/null
--- a/include/asm-x86/alternative_64.h
+++ /dev/null
@@ -1,159 +0,0 @@
-#ifndef _X86_64_ALTERNATIVE_H
-#define _X86_64_ALTERNATIVE_H
-
-#ifdef __KERNEL__
-
-#include <linux/types.h>
-#include <linux/stddef.h>
-
-/*
- * Alternative inline assembly for SMP.
- *
- * The LOCK_PREFIX macro defined here replaces the LOCK and
- * LOCK_PREFIX macros used everywhere in the source tree.
- *
- * SMP alternatives use the same data structures as the other
- * alternatives and the X86_FEATURE_UP flag to indicate the case of a
- * UP system running a SMP kernel.  The existing apply_alternatives()
- * works fine for patching a SMP kernel for UP.
- *
- * The SMP alternative tables can be kept after boot and contain both
- * UP and SMP versions of the instructions to allow switching back to
- * SMP at runtime, when hotplugging in a new CPU, which is especially
- * useful in virtualized environments.
- *
- * The very common lock prefix is handled as special case in a
- * separate table which is a pure address list without replacement ptr
- * and size information.  That keeps the table sizes small.
- */
-
-#ifdef CONFIG_SMP
-#define LOCK_PREFIX \
-		".section .smp_locks,\"a\"\n"	\
-		"  .align 8\n"			\
-		"  .quad 661f\n" /* address */	\
-		".previous\n"			\
-	       	"661:\n\tlock; "
-
-#else /* ! CONFIG_SMP */
-#define LOCK_PREFIX ""
-#endif
-
-/* This must be included *after* the definition of LOCK_PREFIX */
-#include <asm/cpufeature.h>
-
-struct alt_instr {
-	u8 *instr; 		/* original instruction */
-	u8 *replacement;
-	u8  cpuid;		/* cpuid bit set for replacement */
-	u8  instrlen;		/* length of original instruction */
-	u8  replacementlen; 	/* length of new instruction, <= instrlen */
-	u8  pad[5];
-};
-
-extern void alternative_instructions(void);
-extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
-
-struct module;
-
-#ifdef CONFIG_SMP
-extern void alternatives_smp_module_add(struct module *mod, char *name,
-					void *locks, void *locks_end,
-					void *text, void *text_end);
-extern void alternatives_smp_module_del(struct module *mod);
-extern void alternatives_smp_switch(int smp);
-#else
-static inline void alternatives_smp_module_add(struct module *mod, char *name,
-					void *locks, void *locks_end,
-					void *text, void *text_end) {}
-static inline void alternatives_smp_module_del(struct module *mod) {}
-static inline void alternatives_smp_switch(int smp) {}
-#endif
-
-#endif
-
-/*
- * Alternative instructions for different CPU types or capabilities.
- *
- * This allows to use optimized instructions even on generic binary
- * kernels.
- *
- * length of oldinstr must be longer or equal the length of newinstr
- * It can be padded with nops as needed.
- *
- * For non barrier like inlines please define new variants
- * without volatile and memory clobber.
- */
-#define alternative(oldinstr, newinstr, feature) 	\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n" 		     \
-		      ".section .altinstructions,\"a\"\n"     	     \
-		      "  .align 8\n"				       \
-		      "  .quad 661b\n"            /* label */          \
-		      "  .quad 663f\n"		  /* new instruction */ \
-		      "  .byte %c0\n"             /* feature bit */    \
-		      "  .byte 662b-661b\n"       /* sourcelen */      \
-		      "  .byte 664f-663f\n"       /* replacementlen */ \
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */ \
-		      ".previous" :: "i" (feature) : "memory")
-
-/*
- * Alternative inline assembly with input.
- *
- * Pecularities:
- * No memory clobber here.
- * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement make sure to pad to the worst case length.
- */
-#define alternative_input(oldinstr, newinstr, feature, input...)	\
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      "  .align 8\n"					\
-		      "  .quad 661b\n"            /* label */		\
-		      "  .quad 663f\n"		  /* new instruction */	\
-		      "  .byte %c0\n"             /* feature bit */	\
-		      "  .byte 662b-661b\n"       /* sourcelen */	\
-		      "  .byte 664f-663f\n"       /* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */ \
-		      ".previous" :: "i" (feature), ##input)
-
-/* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, newinstr, feature, output, input...) \
-	asm volatile ("661:\n\t" oldinstr "\n662:\n"			\
-		      ".section .altinstructions,\"a\"\n"		\
-		      "  .align 8\n"					\
-		      "  .quad 661b\n"            /* label */		\
-		      "  .quad 663f\n"		  /* new instruction */	\
-		      "  .byte %c[feat]\n"        /* feature bit */	\
-		      "  .byte 662b-661b\n"       /* sourcelen */	\
-		      "  .byte 664f-663f\n"       /* replacementlen */	\
-		      ".previous\n"					\
-		      ".section .altinstr_replacement,\"ax\"\n"		\
-		      "663:\n\t" newinstr "\n664:\n"   /* replacement */ \
-		      ".previous" : output : [feat] "i" (feature), ##input)
-
-/*
- * use this macro(s) if you need more than one output parameter
- * in alternative_io
- */
-#define ASM_OUTPUT2(a, b) a, b
-
-struct paravirt_patch;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end);
-#else
-static inline void
-apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
-{}
-#define __parainstructions NULL
-#define __parainstructions_end NULL
-#endif
-
-extern void text_poke(void *addr, unsigned char *opcode, int len);
-
-#endif /* _X86_64_ALTERNATIVE_H */
diff -puN include/asm-x86/apic.h~git-x86 include/asm-x86/apic.h
--- a/include/asm-x86/apic.h~git-x86
+++ a/include/asm-x86/apic.h
@@ -1,5 +1,140 @@
-#ifdef CONFIG_X86_32
-# include "apic_32.h"
+#ifndef _ASM_X86_APIC_H
+#define _ASM_X86_APIC_H
+
+#include <linux/pm.h>
+#include <linux/delay.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+
+#define ARCH_APICTIMER_STOPS_ON_C3	1
+
+#define Dprintk(x...)
+
+/*
+ * Debugging macros
+ */
+#define APIC_QUIET   0
+#define APIC_VERBOSE 1
+#define APIC_DEBUG   2
+
+/*
+ * Define the default level of output to be very little
+ * This can be turned up by using apic=verbose for more
+ * information and apic=debug for _lots_ of information.
+ * apic_verbosity is defined in apic.c
+ */
+#define apic_printk(v, s, a...) do {       \
+		if ((v) <= apic_verbosity) \
+			printk(s, ##a);    \
+	} while (0)
+
+
+extern void generic_apic_probe(void);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+extern int apic_verbosity;
+extern int timer_over_8254;
+extern int local_apic_timer_c2_ok;
+extern int local_apic_timer_disabled;
+
+extern int apic_runs_main_timer;
+extern int ioapic_force;
+extern int disable_apic;
+extern int disable_apic_timer;
+extern unsigned boot_cpu_id;
+
+/*
+ * Basic functions accessing APICs.
+ */
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
 #else
-# include "apic_64.h"
+#define apic_write native_apic_write
+#define apic_write_atomic native_apic_write_atomic
+#define apic_read native_apic_read
+#define setup_boot_clock setup_boot_APIC_clock
+#define setup_secondary_clock setup_secondary_APIC_clock
 #endif
+
+static inline fastcall void native_apic_write(unsigned long reg, u32 v)
+{
+	*((volatile u32 *)(APIC_BASE + reg)) = v;
+}
+
+static inline fastcall void native_apic_write_atomic(unsigned long reg, u32 v)
+{
+	(void) xchg((u32*)(APIC_BASE + reg), v);
+}
+
+static inline fastcall u32 native_apic_read(unsigned long reg)
+{
+	return *((volatile u32 *)(APIC_BASE + reg));
+}
+
+extern void apic_wait_icr_idle(void);
+extern u32 safe_apic_wait_icr_idle(void);
+extern int get_physical_broadcast(void);
+
+#ifdef CONFIG_X86_GOOD_APIC
+# define FORCE_READ_AROUND_WRITE 0
+# define apic_read_around(x)
+# define apic_write_around(x, y) apic_write((x), (y))
+#else
+# define FORCE_READ_AROUND_WRITE 1
+# define apic_read_around(x) apic_read(x)
+# define apic_write_around(x, y) apic_write_atomic((x), (y))
+#endif
+
+static inline void ack_APIC_irq(void)
+{
+	/*
+	 * ack_APIC_irq() actually gets compiled as a single instruction:
+	 * - a single rmw on Pentium/82489DX
+	 * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
+	 * ... yummie.
+	 */
+
+	/* Docs say use 0 for future compatibility */
+	apic_write_around(APIC_EOI, 0);
+}
+
+extern int lapic_get_maxlvt(void);
+extern void clear_local_APIC(void);
+extern void connect_bsp_APIC(void);
+extern void disconnect_bsp_APIC(int virt_wire_setup);
+extern void disable_local_APIC(void);
+extern void lapic_shutdown(void);
+extern int verify_local_APIC(void);
+extern void cache_APIC_registers(void);
+extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
+extern void setup_local_APIC(void);
+extern void end_local_APIC_setup(void);
+extern void init_apic_mappings(void);
+extern void setup_boot_APIC_clock(void);
+extern void setup_secondary_APIC_clock(void);
+extern int APIC_init_uniprocessor(void);
+extern void enable_NMI_through_LVT0(void *dummy);
+
+/*
+ * On 32bit this is mach-xxx local
+ */
+#ifdef CONFIG_X86_64
+extern void setup_apic_routing(void);
+#endif
+
+extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask);
+extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
+
+extern int apic_is_clustered_box(void);
+
+#else /* !CONFIG_X86_LOCAL_APIC */
+static inline void lapic_shutdown(void) { }
+#define local_apic_timer_c2_ok		1
+
+#endif /* !CONFIG_X86_LOCAL_APIC */
+
+#endif /* __ASM_APIC_H */
diff -puN include/asm-x86/apic_32.h~git-x86 /dev/null
--- a/include/asm-x86/apic_32.h
+++ /dev/null
@@ -1,127 +0,0 @@
-#ifndef __ASM_APIC_H
-#define __ASM_APIC_H
-
-#include <linux/pm.h>
-#include <linux/delay.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <asm/processor.h>
-#include <asm/system.h>
-
-#define Dprintk(x...)
-
-/*
- * Debugging macros
- */
-#define APIC_QUIET   0
-#define APIC_VERBOSE 1
-#define APIC_DEBUG   2
-
-extern int apic_verbosity;
-
-/*
- * Define the default level of output to be very little
- * This can be turned up by using apic=verbose for more
- * information and apic=debug for _lots_ of information.
- * apic_verbosity is defined in apic.c
- */
-#define apic_printk(v, s, a...) do {       \
-		if ((v) <= apic_verbosity) \
-			printk(s, ##a);    \
-	} while (0)
-
-
-extern void generic_apic_probe(void);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-/*
- * Basic functions accessing APICs.
- */
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define apic_write native_apic_write
-#define apic_write_atomic native_apic_write_atomic
-#define apic_read native_apic_read
-#define setup_boot_clock setup_boot_APIC_clock
-#define setup_secondary_clock setup_secondary_APIC_clock
-#endif
-
-static __inline fastcall void native_apic_write(unsigned long reg,
-						unsigned long v)
-{
-	*((volatile unsigned long *)(APIC_BASE+reg)) = v;
-}
-
-static __inline fastcall void native_apic_write_atomic(unsigned long reg,
-						       unsigned long v)
-{
-	xchg((volatile unsigned long *)(APIC_BASE+reg), v);
-}
-
-static __inline fastcall unsigned long native_apic_read(unsigned long reg)
-{
-	return *((volatile unsigned long *)(APIC_BASE+reg));
-}
-
-void apic_wait_icr_idle(void);
-unsigned long safe_apic_wait_icr_idle(void);
-int get_physical_broadcast(void);
-
-#ifdef CONFIG_X86_GOOD_APIC
-# define FORCE_READ_AROUND_WRITE 0
-# define apic_read_around(x)
-# define apic_write_around(x,y) apic_write((x),(y))
-#else
-# define FORCE_READ_AROUND_WRITE 1
-# define apic_read_around(x) apic_read(x)
-# define apic_write_around(x,y) apic_write_atomic((x),(y))
-#endif
-
-static inline void ack_APIC_irq(void)
-{
-	/*
-	 * ack_APIC_irq() actually gets compiled as a single instruction:
-	 * - a single rmw on Pentium/82489DX
-	 * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
-	 * ... yummie.
-	 */
-
-	/* Docs say use 0 for future compatibility */
-	apic_write_around(APIC_EOI, 0);
-}
-
-extern int lapic_get_maxlvt(void);
-extern void clear_local_APIC(void);
-extern void connect_bsp_APIC (void);
-extern void disconnect_bsp_APIC (int virt_wire_setup);
-extern void disable_local_APIC (void);
-extern void lapic_shutdown (void);
-extern int verify_local_APIC (void);
-extern void cache_APIC_registers (void);
-extern void sync_Arb_IDs (void);
-extern void init_bsp_APIC (void);
-extern void setup_local_APIC (void);
-extern void init_apic_mappings (void);
-extern void smp_local_timer_interrupt (void);
-extern void setup_boot_APIC_clock (void);
-extern void setup_secondary_APIC_clock (void);
-extern int APIC_init_uniprocessor (void);
-
-extern void enable_NMI_through_LVT0 (void * dummy);
-
-#define ARCH_APICTIMER_STOPS_ON_C3	1
-
-extern int timer_over_8254;
-extern int local_apic_timer_c2_ok;
-
-extern int local_apic_timer_disabled;
-
-#else /* !CONFIG_X86_LOCAL_APIC */
-static inline void lapic_shutdown(void) { }
-#define local_apic_timer_c2_ok		1
-
-#endif /* !CONFIG_X86_LOCAL_APIC */
-
-#endif /* __ASM_APIC_H */
diff -puN include/asm-x86/apic_64.h~git-x86 /dev/null
--- a/include/asm-x86/apic_64.h
+++ /dev/null
@@ -1,102 +0,0 @@
-#ifndef __ASM_APIC_H
-#define __ASM_APIC_H
-
-#include <linux/pm.h>
-#include <linux/delay.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <asm/system.h>
-
-#define Dprintk(x...)
-
-/*
- * Debugging macros
- */
-#define APIC_QUIET   0
-#define APIC_VERBOSE 1
-#define APIC_DEBUG   2
-
-extern int apic_verbosity;
-extern int apic_runs_main_timer;
-extern int ioapic_force;
-extern int disable_apic_timer;
-
-/*
- * Define the default level of output to be very little
- * This can be turned up by using apic=verbose for more
- * information and apic=debug for _lots_ of information.
- * apic_verbosity is defined in apic.c
- */
-#define apic_printk(v, s, a...) do {       \
-		if ((v) <= apic_verbosity) \
-			printk(s, ##a);    \
-	} while (0)
-
-struct pt_regs;
-
-/*
- * Basic functions accessing APICs.
- */
-
-static __inline void apic_write(unsigned long reg, unsigned int v)
-{
-	*((volatile unsigned int *)(APIC_BASE+reg)) = v;
-}
-
-static __inline unsigned int apic_read(unsigned long reg)
-{
-	return *((volatile unsigned int *)(APIC_BASE+reg));
-}
-
-extern void apic_wait_icr_idle(void);
-extern unsigned int safe_apic_wait_icr_idle(void);
-
-static inline void ack_APIC_irq(void)
-{
-	/*
-	 * ack_APIC_irq() actually gets compiled as a single instruction:
-	 * - a single rmw on Pentium/82489DX
-	 * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
-	 * ... yummie.
-	 */
-
-	/* Docs say use 0 for future compatibility */
-	apic_write(APIC_EOI, 0);
-}
-
-extern int get_maxlvt (void);
-extern void clear_local_APIC (void);
-extern void connect_bsp_APIC (void);
-extern void disconnect_bsp_APIC (int virt_wire_setup);
-extern void disable_local_APIC (void);
-extern void lapic_shutdown (void);
-extern int verify_local_APIC (void);
-extern void cache_APIC_registers (void);
-extern void sync_Arb_IDs (void);
-extern void init_bsp_APIC (void);
-extern void setup_local_APIC (void);
-extern void init_apic_mappings (void);
-extern void smp_local_timer_interrupt (void);
-extern void setup_boot_APIC_clock (void);
-extern void setup_secondary_APIC_clock (void);
-extern int APIC_init_uniprocessor (void);
-extern void setup_apic_routing(void);
-
-extern void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
-				    unsigned char msg_type, unsigned char mask);
-
-extern int apic_is_clustered_box(void);
-
-#define K8_APIC_EXT_LVT_BASE    0x500
-#define K8_APIC_EXT_INT_MSG_FIX 0x0
-#define K8_APIC_EXT_INT_MSG_SMI 0x2
-#define K8_APIC_EXT_INT_MSG_NMI 0x4
-#define K8_APIC_EXT_INT_MSG_EXT 0x7
-#define K8_APIC_EXT_LVT_ENTRY_THRESHOLD    0
-
-#define ARCH_APICTIMER_STOPS_ON_C3	1
-
-extern unsigned boot_cpu_id;
-extern int local_apic_timer_c2_ok;
-
-#endif /* __ASM_APIC_H */
diff -puN include/asm-x86/apicdef.h~git-x86 include/asm-x86/apicdef.h
--- a/include/asm-x86/apicdef.h~git-x86
+++ a/include/asm-x86/apicdef.h
@@ -1,5 +1,413 @@
+#ifndef _ASM_X86_APICDEF_H
+#define _ASM_X86_APICDEF_H
+
+/*
+ * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
+ *
+ * Alan Cox <Alan.Cox@linux.org>, 1995.
+ * Ingo Molnar <mingo@redhat.com>, 1999, 2000
+ */
+
+#define	APIC_DEFAULT_PHYS_BASE	0xfee00000
+
+#define	APIC_ID		0x20
+
+#ifdef CONFIG_X86_64
+# define	APIC_ID_MASK		(0xFFu<<24)
+# define	GET_APIC_ID(x)		(((x)>>24)&0xFFu)
+# define	SET_APIC_ID(x)		(((x)<<24))
+#endif
+
+#define	APIC_LVR	0x30
+#define		APIC_LVR_MASK		0xFF00FF
+#define		GET_APIC_VERSION(x)	((x)&0xFFu)
+#define		GET_APIC_MAXLVT(x)	(((x)>>16)&0xFFu)
+#define		APIC_INTEGRATED(x)	((x)&0xF0u)
+#define		APIC_XAPIC(x)		((x) >= 0x14)
+#define	APIC_TASKPRI	0x80
+#define		APIC_TPRI_MASK		0xFFu
+#define	APIC_ARBPRI	0x90
+#define		APIC_ARBPRI_MASK	0xFFu
+#define	APIC_PROCPRI	0xA0
+#define	APIC_EOI	0xB0
+#define		APIC_EIO_ACK		0x0
+#define	APIC_RRR	0xC0
+#define	APIC_LDR	0xD0
+#define		APIC_LDR_MASK		(0xFFu<<24)
+#define		GET_APIC_LOGICAL_ID(x)	(((x)>>24)&0xFFu)
+#define		SET_APIC_LOGICAL_ID(x)	(((x)<<24))
+#define		APIC_ALL_CPUS		0xFFu
+#define	APIC_DFR	0xE0
+#define		APIC_DFR_CLUSTER		0x0FFFFFFFul
+#define		APIC_DFR_FLAT			0xFFFFFFFFul
+#define	APIC_SPIV	0xF0
+#define		APIC_SPIV_FOCUS_DISABLED	(1<<9)
+#define		APIC_SPIV_APIC_ENABLED		(1<<8)
+#define	APIC_ISR	0x100
+#define	APIC_ISR_NR     0x8     /* Number of 32 bit ISR registers. */
+#define	APIC_TMR	0x180
+#define	APIC_IRR	0x200
+#define	APIC_ESR	0x280
+#define		APIC_ESR_SEND_CS	0x00001
+#define		APIC_ESR_RECV_CS	0x00002
+#define		APIC_ESR_SEND_ACC	0x00004
+#define		APIC_ESR_RECV_ACC	0x00008
+#define		APIC_ESR_SENDILL	0x00020
+#define		APIC_ESR_RECVILL	0x00040
+#define		APIC_ESR_ILLREGA	0x00080
+#define	APIC_ICR	0x300
+#define		APIC_DEST_SELF		0x40000
+#define		APIC_DEST_ALLINC	0x80000
+#define		APIC_DEST_ALLBUT	0xC0000
+#define		APIC_ICR_RR_MASK	0x30000
+#define		APIC_ICR_RR_INVALID	0x00000
+#define		APIC_ICR_RR_INPROG	0x10000
+#define		APIC_ICR_RR_VALID	0x20000
+#define		APIC_INT_LEVELTRIG	0x08000
+#define		APIC_INT_ASSERT		0x04000
+#define		APIC_ICR_BUSY		0x01000
+#define		APIC_DEST_LOGICAL	0x00800
+#define		APIC_DEST_PHYSICAL	0x00000
+#define		APIC_DM_FIXED		0x00000
+#define		APIC_DM_LOWEST		0x00100
+#define		APIC_DM_SMI		0x00200
+#define		APIC_DM_REMRD		0x00300
+#define		APIC_DM_NMI		0x00400
+#define		APIC_DM_INIT		0x00500
+#define		APIC_DM_STARTUP		0x00600
+#define		APIC_DM_EXTINT		0x00700
+#define		APIC_VECTOR_MASK	0x000FF
+#define	APIC_ICR2	0x310
+#define		GET_APIC_DEST_FIELD(x)	(((x)>>24)&0xFF)
+#define		SET_APIC_DEST_FIELD(x)	((x)<<24)
+#define	APIC_LVTT	0x320
+#define	APIC_LVTTHMR	0x330
+#define	APIC_LVTPC	0x340
+#define	APIC_LVT0	0x350
+#define		APIC_LVT_TIMER_BASE_MASK	(0x3<<18)
+#define		GET_APIC_TIMER_BASE(x)		(((x)>>18)&0x3)
+#define		SET_APIC_TIMER_BASE(x)		(((x)<<18))
+#define		APIC_TIMER_BASE_CLKIN		0x0
+#define		APIC_TIMER_BASE_TMBASE		0x1
+#define		APIC_TIMER_BASE_DIV		0x2
+#define		APIC_LVT_TIMER_PERIODIC		(1<<17)
+#define		APIC_LVT_MASKED			(1<<16)
+#define		APIC_LVT_LEVEL_TRIGGER		(1<<15)
+#define		APIC_LVT_REMOTE_IRR		(1<<14)
+#define		APIC_INPUT_POLARITY		(1<<13)
+#define		APIC_SEND_PENDING		(1<<12)
+#define		APIC_MODE_MASK			0x700
+#define		GET_APIC_DELIVERY_MODE(x)	(((x)>>8)&0x7)
+#define		SET_APIC_DELIVERY_MODE(x, y)	(((x)&~0x700)|((y)<<8))
+#define			APIC_MODE_FIXED		0x0
+#define			APIC_MODE_NMI		0x4
+#define			APIC_MODE_EXTINT	0x7
+#define	APIC_LVT1	0x360
+#define	APIC_LVTERR	0x370
+#define	APIC_TMICT	0x380
+#define	APIC_TMCCT	0x390
+#define	APIC_TDCR	0x3E0
+#define		APIC_TDR_DIV_TMBASE	(1<<2)
+#define		APIC_TDR_DIV_1		0xB
+#define		APIC_TDR_DIV_2		0x0
+#define		APIC_TDR_DIV_4		0x1
+#define		APIC_TDR_DIV_8		0x2
+#define		APIC_TDR_DIV_16		0x3
+#define		APIC_TDR_DIV_32		0x8
+#define		APIC_TDR_DIV_64		0x9
+#define		APIC_TDR_DIV_128	0xA
+#define	APIC_EILVT0     0x500
+#define		APIC_EILVT_NR_AMD_K8	1	/* Number of extended interrupts */
+#define		APIC_EILVT_NR_AMD_10H	4
+#define		APIC_EILVT_LVTOFF(x)	(((x)>>4)&0xF)
+#define		APIC_EILVT_MSG_FIX	0x0
+#define		APIC_EILVT_MSG_SMI	0x2
+#define		APIC_EILVT_MSG_NMI	0x4
+#define		APIC_EILVT_MSG_EXT	0x7
+#define		APIC_EILVT_MASKED	(1<<16)
+#define	APIC_EILVT1     0x510
+#define	APIC_EILVT2     0x520
+#define	APIC_EILVT3     0x530
+
+#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
+
 #ifdef CONFIG_X86_32
-# include "apicdef_32.h"
+# define MAX_IO_APICS 64
 #else
-# include "apicdef_64.h"
+# define MAX_IO_APICS 128
+# define MAX_LOCAL_APIC 256
+#endif
+
+/*
+ * All x86-64 systems are xAPIC compatible.
+ * In the following, "apicid" is a physical APIC ID.
+ */
+#define XAPIC_DEST_CPUS_SHIFT	4
+#define XAPIC_DEST_CPUS_MASK	((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
+#define XAPIC_DEST_CLUSTER_MASK	(XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
+#define APIC_CLUSTER(apicid)	((apicid) & XAPIC_DEST_CLUSTER_MASK)
+#define APIC_CLUSTERID(apicid)	(APIC_CLUSTER(apicid) >> XAPIC_DEST_CPUS_SHIFT)
+#define APIC_CPUID(apicid)	((apicid) & XAPIC_DEST_CPUS_MASK)
+#define NUM_APIC_CLUSTERS	((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT)
+
+/*
+ * the local APIC register structure, memory mapped. Not terribly well
+ * tested, but we might eventually use this one in the future - the
+ * problem why we cannot use it right now is the P5 APIC, it has an
+ * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
+ */
+#define u32 unsigned int
+
+struct local_apic {
+
+/*000*/	struct { u32 __reserved[4]; } __reserved_01;
+
+/*010*/	struct { u32 __reserved[4]; } __reserved_02;
+
+/*020*/	struct { /* APIC ID Register */
+		u32   __reserved_1	: 24,
+			phys_apic_id	:  4,
+			__reserved_2	:  4;
+		u32 __reserved[3];
+	} id;
+
+/*030*/	const
+	struct { /* APIC Version Register */
+		u32   version		:  8,
+			__reserved_1	:  8,
+			max_lvt		:  8,
+			__reserved_2	:  8;
+		u32 __reserved[3];
+	} version;
+
+/*040*/	struct { u32 __reserved[4]; } __reserved_03;
+
+/*050*/	struct { u32 __reserved[4]; } __reserved_04;
+
+/*060*/	struct { u32 __reserved[4]; } __reserved_05;
+
+/*070*/	struct { u32 __reserved[4]; } __reserved_06;
+
+/*080*/	struct { /* Task Priority Register */
+		u32   priority	:  8,
+			__reserved_1	: 24;
+		u32 __reserved_2[3];
+	} tpr;
+
+/*090*/	const
+	struct { /* Arbitration Priority Register */
+		u32   priority	:  8,
+			__reserved_1	: 24;
+		u32 __reserved_2[3];
+	} apr;
+
+/*0A0*/	const
+	struct { /* Processor Priority Register */
+		u32   priority	:  8,
+			__reserved_1	: 24;
+		u32 __reserved_2[3];
+	} ppr;
+
+/*0B0*/	struct { /* End Of Interrupt Register */
+		u32   eoi;
+		u32 __reserved[3];
+	} eoi;
+
+/*0C0*/	struct { u32 __reserved[4]; } __reserved_07;
+
+/*0D0*/	struct { /* Logical Destination Register */
+		u32   __reserved_1	: 24,
+			logical_dest	:  8;
+		u32 __reserved_2[3];
+	} ldr;
+
+/*0E0*/	struct { /* Destination Format Register */
+		u32   __reserved_1	: 28,
+			model		:  4;
+		u32 __reserved_2[3];
+	} dfr;
+
+/*0F0*/	struct { /* Spurious Interrupt Vector Register */
+		u32	spurious_vector	:  8,
+			apic_enabled	:  1,
+			focus_cpu	:  1,
+			__reserved_2	: 22;
+		u32 __reserved_3[3];
+	} svr;
+
+/*100*/	struct { /* In Service Register */
+/*170*/		u32 bitfield;
+		u32 __reserved[3];
+	} isr [8];
+
+/*180*/	struct { /* Trigger Mode Register */
+/*1F0*/		u32 bitfield;
+		u32 __reserved[3];
+	} tmr [8];
+
+/*200*/	struct { /* Interrupt Request Register */
+/*270*/		u32 bitfield;
+		u32 __reserved[3];
+	} irr [8];
+
+/*280*/	union { /* Error Status Register */
+		struct {
+			u32   send_cs_error			:  1,
+				receive_cs_error		:  1,
+				send_accept_error		:  1,
+				receive_accept_error		:  1,
+				__reserved_1			:  1,
+				send_illegal_vector		:  1,
+				receive_illegal_vector		:  1,
+				illegal_register_address	:  1,
+				__reserved_2			: 24;
+			u32 __reserved_3[3];
+		} error_bits;
+		struct {
+			u32 errors;
+			u32 __reserved_3[3];
+		} all_errors;
+	} esr;
+
+/*290*/	struct { u32 __reserved[4]; } __reserved_08;
+
+/*2A0*/	struct { u32 __reserved[4]; } __reserved_09;
+
+/*2B0*/	struct { u32 __reserved[4]; } __reserved_10;
+
+/*2C0*/	struct { u32 __reserved[4]; } __reserved_11;
+
+/*2D0*/	struct { u32 __reserved[4]; } __reserved_12;
+
+/*2E0*/	struct { u32 __reserved[4]; } __reserved_13;
+
+/*2F0*/	struct { u32 __reserved[4]; } __reserved_14;
+
+/*300*/	struct { /* Interrupt Command Register 1 */
+		u32   vector			:  8,
+			delivery_mode		:  3,
+			destination_mode	:  1,
+			delivery_status		:  1,
+			__reserved_1		:  1,
+			level			:  1,
+			trigger			:  1,
+			__reserved_2		:  2,
+			shorthand		:  2,
+			__reserved_3		:  12;
+		u32 __reserved_4[3];
+	} icr1;
+
+/*310*/	struct { /* Interrupt Command Register 2 */
+		union {
+			u32   __reserved_1	: 24,
+				phys_dest	:  4,
+				__reserved_2	:  4;
+			u32   __reserved_3	: 24,
+				logical_dest	:  8;
+		} dest;
+		u32 __reserved_4[3];
+	} icr2;
+
+/*320*/	struct { /* LVT - Timer */
+		u32   vector		:  8,
+			__reserved_1	:  4,
+			delivery_status	:  1,
+			__reserved_2	:  3,
+			mask		:  1,
+			timer_mode	:  1,
+			__reserved_3	: 14;
+		u32 __reserved_4[3];
+	} lvt_timer;
+
+/*330*/	struct { /* LVT - Thermal Sensor */
+		u32  vector		:  8,
+			delivery_mode	:  3,
+			__reserved_1	:  1,
+			delivery_status	:  1,
+			__reserved_2	:  3,
+			mask		:  1,
+			__reserved_3	: 15;
+		u32 __reserved_4[3];
+	} lvt_thermal;
+
+/*340*/	struct { /* LVT - Performance Counter */
+		u32   vector		:  8,
+			delivery_mode	:  3,
+			__reserved_1	:  1,
+			delivery_status	:  1,
+			__reserved_2	:  3,
+			mask		:  1,
+			__reserved_3	: 15;
+		u32 __reserved_4[3];
+	} lvt_pc;
+
+/*350*/	struct { /* LVT - LINT0 */
+		u32   vector		:  8,
+			delivery_mode	:  3,
+			__reserved_1	:  1,
+			delivery_status	:  1,
+			polarity	:  1,
+			remote_irr	:  1,
+			trigger		:  1,
+			mask		:  1,
+			__reserved_2	: 15;
+		u32 __reserved_3[3];
+	} lvt_lint0;
+
+/*360*/	struct { /* LVT - LINT1 */
+		u32   vector		:  8,
+			delivery_mode	:  3,
+			__reserved_1	:  1,
+			delivery_status	:  1,
+			polarity	:  1,
+			remote_irr	:  1,
+			trigger		:  1,
+			mask		:  1,
+			__reserved_2	: 15;
+		u32 __reserved_3[3];
+	} lvt_lint1;
+
+/*370*/	struct { /* LVT - Error */
+		u32   vector		:  8,
+			__reserved_1	:  4,
+			delivery_status	:  1,
+			__reserved_2	:  3,
+			mask		:  1,
+			__reserved_3	: 15;
+		u32 __reserved_4[3];
+	} lvt_error;
+
+/*380*/	struct { /* Timer Initial Count Register */
+		u32   initial_count;
+		u32 __reserved_2[3];
+	} timer_icr;
+
+/*390*/	const
+	struct { /* Timer Current Count Register */
+		u32   curr_count;
+		u32 __reserved_2[3];
+	} timer_ccr;
+
+/*3A0*/	struct { u32 __reserved[4]; } __reserved_16;
+
+/*3B0*/	struct { u32 __reserved[4]; } __reserved_17;
+
+/*3C0*/	struct { u32 __reserved[4]; } __reserved_18;
+
+/*3D0*/	struct { u32 __reserved[4]; } __reserved_19;
+
+/*3E0*/	struct { /* Timer Divide Configuration Register */
+		u32   divisor		:  4,
+			__reserved_1	: 28;
+		u32 __reserved_2[3];
+	} timer_dcr;
+
+/*3F0*/	struct { u32 __reserved[4]; } __reserved_20;
+
+} __attribute__ ((packed));
+
+#undef u32
+
+#define BAD_APICID 0xFFu
+
 #endif
diff -puN include/asm-x86/apicdef_32.h~git-x86 /dev/null
--- a/include/asm-x86/apicdef_32.h
+++ /dev/null
@@ -1,375 +0,0 @@
-#ifndef __ASM_APICDEF_H
-#define __ASM_APICDEF_H
-
-/*
- * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
- *
- * Alan Cox <Alan.Cox@linux.org>, 1995.
- * Ingo Molnar <mingo@redhat.com>, 1999, 2000
- */
-
-#define		APIC_DEFAULT_PHYS_BASE	0xfee00000
- 
-#define		APIC_ID		0x20
-#define		APIC_LVR	0x30
-#define			APIC_LVR_MASK		0xFF00FF
-#define			GET_APIC_VERSION(x)	((x)&0xFF)
-#define			GET_APIC_MAXLVT(x)	(((x)>>16)&0xFF)
-#define			APIC_INTEGRATED(x)	((x)&0xF0)
-#define			APIC_XAPIC(x)		((x) >= 0x14)
-#define		APIC_TASKPRI	0x80
-#define			APIC_TPRI_MASK		0xFF
-#define		APIC_ARBPRI	0x90
-#define			APIC_ARBPRI_MASK	0xFF
-#define		APIC_PROCPRI	0xA0
-#define		APIC_EOI	0xB0
-#define			APIC_EIO_ACK		0x0		/* Write this to the EOI register */
-#define		APIC_RRR	0xC0
-#define		APIC_LDR	0xD0
-#define			APIC_LDR_MASK		(0xFF<<24)
-#define			GET_APIC_LOGICAL_ID(x)	(((x)>>24)&0xFF)
-#define			SET_APIC_LOGICAL_ID(x)	(((x)<<24))
-#define			APIC_ALL_CPUS		0xFF
-#define		APIC_DFR	0xE0
-#define			APIC_DFR_CLUSTER		0x0FFFFFFFul
-#define			APIC_DFR_FLAT			0xFFFFFFFFul
-#define		APIC_SPIV	0xF0
-#define			APIC_SPIV_FOCUS_DISABLED	(1<<9)
-#define			APIC_SPIV_APIC_ENABLED		(1<<8)
-#define		APIC_ISR	0x100
-#define         APIC_ISR_NR     0x8     /* Number of 32 bit ISR registers. */
-#define		APIC_TMR	0x180
-#define 	APIC_IRR	0x200
-#define 	APIC_ESR	0x280
-#define			APIC_ESR_SEND_CS	0x00001
-#define			APIC_ESR_RECV_CS	0x00002
-#define			APIC_ESR_SEND_ACC	0x00004
-#define			APIC_ESR_RECV_ACC	0x00008
-#define			APIC_ESR_SENDILL	0x00020
-#define			APIC_ESR_RECVILL	0x00040
-#define			APIC_ESR_ILLREGA	0x00080
-#define		APIC_ICR	0x300
-#define			APIC_DEST_SELF		0x40000
-#define			APIC_DEST_ALLINC	0x80000
-#define			APIC_DEST_ALLBUT	0xC0000
-#define			APIC_ICR_RR_MASK	0x30000
-#define			APIC_ICR_RR_INVALID	0x00000
-#define			APIC_ICR_RR_INPROG	0x10000
-#define			APIC_ICR_RR_VALID	0x20000
-#define			APIC_INT_LEVELTRIG	0x08000
-#define			APIC_INT_ASSERT		0x04000
-#define			APIC_ICR_BUSY		0x01000
-#define			APIC_DEST_LOGICAL	0x00800
-#define			APIC_DM_FIXED		0x00000
-#define			APIC_DM_LOWEST		0x00100
-#define			APIC_DM_SMI		0x00200
-#define			APIC_DM_REMRD		0x00300
-#define			APIC_DM_NMI		0x00400
-#define			APIC_DM_INIT		0x00500
-#define			APIC_DM_STARTUP		0x00600
-#define			APIC_DM_EXTINT		0x00700
-#define			APIC_VECTOR_MASK	0x000FF
-#define		APIC_ICR2	0x310
-#define			GET_APIC_DEST_FIELD(x)	(((x)>>24)&0xFF)
-#define			SET_APIC_DEST_FIELD(x)	((x)<<24)
-#define		APIC_LVTT	0x320
-#define		APIC_LVTTHMR	0x330
-#define		APIC_LVTPC	0x340
-#define		APIC_LVT0	0x350
-#define			APIC_LVT_TIMER_BASE_MASK	(0x3<<18)
-#define			GET_APIC_TIMER_BASE(x)		(((x)>>18)&0x3)
-#define			SET_APIC_TIMER_BASE(x)		(((x)<<18))
-#define			APIC_TIMER_BASE_CLKIN		0x0
-#define			APIC_TIMER_BASE_TMBASE		0x1
-#define			APIC_TIMER_BASE_DIV		0x2
-#define			APIC_LVT_TIMER_PERIODIC		(1<<17)
-#define			APIC_LVT_MASKED			(1<<16)
-#define			APIC_LVT_LEVEL_TRIGGER		(1<<15)
-#define			APIC_LVT_REMOTE_IRR		(1<<14)
-#define			APIC_INPUT_POLARITY		(1<<13)
-#define			APIC_SEND_PENDING		(1<<12)
-#define			APIC_MODE_MASK			0x700
-#define			GET_APIC_DELIVERY_MODE(x)	(((x)>>8)&0x7)
-#define			SET_APIC_DELIVERY_MODE(x,y)	(((x)&~0x700)|((y)<<8))
-#define				APIC_MODE_FIXED		0x0
-#define				APIC_MODE_NMI		0x4
-#define				APIC_MODE_EXTINT	0x7
-#define 	APIC_LVT1	0x360
-#define		APIC_LVTERR	0x370
-#define		APIC_TMICT	0x380
-#define		APIC_TMCCT	0x390
-#define		APIC_TDCR	0x3E0
-#define			APIC_TDR_DIV_TMBASE	(1<<2)
-#define			APIC_TDR_DIV_1		0xB
-#define			APIC_TDR_DIV_2		0x0
-#define			APIC_TDR_DIV_4		0x1
-#define			APIC_TDR_DIV_8		0x2
-#define			APIC_TDR_DIV_16		0x3
-#define			APIC_TDR_DIV_32		0x8
-#define			APIC_TDR_DIV_64		0x9
-#define			APIC_TDR_DIV_128	0xA
-
-#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
-
-#define MAX_IO_APICS 64
-
-/*
- * the local APIC register structure, memory mapped. Not terribly well
- * tested, but we might eventually use this one in the future - the
- * problem why we cannot use it right now is the P5 APIC, it has an
- * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
- */
-#define u32 unsigned int
-
-
-struct local_apic {
-
-/*000*/	struct { u32 __reserved[4]; } __reserved_01;
-
-/*010*/	struct { u32 __reserved[4]; } __reserved_02;
-
-/*020*/	struct { /* APIC ID Register */
-		u32   __reserved_1	: 24,
-			phys_apic_id	:  4,
-			__reserved_2	:  4;
-		u32 __reserved[3];
-	} id;
-
-/*030*/	const
-	struct { /* APIC Version Register */
-		u32   version		:  8,
-			__reserved_1	:  8,
-			max_lvt		:  8,
-			__reserved_2	:  8;
-		u32 __reserved[3];
-	} version;
-
-/*040*/	struct { u32 __reserved[4]; } __reserved_03;
-
-/*050*/	struct { u32 __reserved[4]; } __reserved_04;
-
-/*060*/	struct { u32 __reserved[4]; } __reserved_05;
-
-/*070*/	struct { u32 __reserved[4]; } __reserved_06;
-
-/*080*/	struct { /* Task Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} tpr;
-
-/*090*/	const
-	struct { /* Arbitration Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} apr;
-
-/*0A0*/	const
-	struct { /* Processor Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} ppr;
-
-/*0B0*/	struct { /* End Of Interrupt Register */
-		u32   eoi;
-		u32 __reserved[3];
-	} eoi;
-
-/*0C0*/	struct { u32 __reserved[4]; } __reserved_07;
-
-/*0D0*/	struct { /* Logical Destination Register */
-		u32   __reserved_1	: 24,
-			logical_dest	:  8;
-		u32 __reserved_2[3];
-	} ldr;
-
-/*0E0*/	struct { /* Destination Format Register */
-		u32   __reserved_1	: 28,
-			model		:  4;
-		u32 __reserved_2[3];
-	} dfr;
-
-/*0F0*/	struct { /* Spurious Interrupt Vector Register */
-		u32	spurious_vector	:  8,
-			apic_enabled	:  1,
-			focus_cpu	:  1,
-			__reserved_2	: 22;
-		u32 __reserved_3[3];
-	} svr;
-
-/*100*/	struct { /* In Service Register */
-/*170*/		u32 bitfield;
-		u32 __reserved[3];
-	} isr [8];
-
-/*180*/	struct { /* Trigger Mode Register */
-/*1F0*/		u32 bitfield;
-		u32 __reserved[3];
-	} tmr [8];
-
-/*200*/	struct { /* Interrupt Request Register */
-/*270*/		u32 bitfield;
-		u32 __reserved[3];
-	} irr [8];
-
-/*280*/	union { /* Error Status Register */
-		struct {
-			u32   send_cs_error			:  1,
-				receive_cs_error		:  1,
-				send_accept_error		:  1,
-				receive_accept_error		:  1,
-				__reserved_1			:  1,
-				send_illegal_vector		:  1,
-				receive_illegal_vector		:  1,
-				illegal_register_address	:  1,
-				__reserved_2			: 24;
-			u32 __reserved_3[3];
-		} error_bits;
-		struct {
-			u32 errors;
-			u32 __reserved_3[3];
-		} all_errors;
-	} esr;
-
-/*290*/	struct { u32 __reserved[4]; } __reserved_08;
-
-/*2A0*/	struct { u32 __reserved[4]; } __reserved_09;
-
-/*2B0*/	struct { u32 __reserved[4]; } __reserved_10;
-
-/*2C0*/	struct { u32 __reserved[4]; } __reserved_11;
-
-/*2D0*/	struct { u32 __reserved[4]; } __reserved_12;
-
-/*2E0*/	struct { u32 __reserved[4]; } __reserved_13;
-
-/*2F0*/	struct { u32 __reserved[4]; } __reserved_14;
-
-/*300*/	struct { /* Interrupt Command Register 1 */
-		u32   vector			:  8,
-			delivery_mode		:  3,
-			destination_mode	:  1,
-			delivery_status		:  1,
-			__reserved_1		:  1,
-			level			:  1,
-			trigger			:  1,
-			__reserved_2		:  2,
-			shorthand		:  2,
-			__reserved_3		:  12;
-		u32 __reserved_4[3];
-	} icr1;
-
-/*310*/	struct { /* Interrupt Command Register 2 */
-		union {
-			u32   __reserved_1	: 24,
-				phys_dest	:  4,
-				__reserved_2	:  4;
-			u32   __reserved_3	: 24,
-				logical_dest	:  8;
-		} dest;
-		u32 __reserved_4[3];
-	} icr2;
-
-/*320*/	struct { /* LVT - Timer */
-		u32   vector		:  8,
-			__reserved_1	:  4,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			timer_mode	:  1,
-			__reserved_3	: 14;
-		u32 __reserved_4[3];
-	} lvt_timer;
-
-/*330*/	struct { /* LVT - Thermal Sensor */
-		u32  vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_thermal;
-
-/*340*/	struct { /* LVT - Performance Counter */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_pc;
-
-/*350*/	struct { /* LVT - LINT0 */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			polarity	:  1,
-			remote_irr	:  1,
-			trigger		:  1,
-			mask		:  1,
-			__reserved_2	: 15;
-		u32 __reserved_3[3];
-	} lvt_lint0;
-
-/*360*/	struct { /* LVT - LINT1 */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			polarity	:  1,
-			remote_irr	:  1,
-			trigger		:  1,
-			mask		:  1,
-			__reserved_2	: 15;
-		u32 __reserved_3[3];
-	} lvt_lint1;
-
-/*370*/	struct { /* LVT - Error */
-		u32   vector		:  8,
-			__reserved_1	:  4,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_error;
-
-/*380*/	struct { /* Timer Initial Count Register */
-		u32   initial_count;
-		u32 __reserved_2[3];
-	} timer_icr;
-
-/*390*/	const
-	struct { /* Timer Current Count Register */
-		u32   curr_count;
-		u32 __reserved_2[3];
-	} timer_ccr;
-
-/*3A0*/	struct { u32 __reserved[4]; } __reserved_16;
-
-/*3B0*/	struct { u32 __reserved[4]; } __reserved_17;
-
-/*3C0*/	struct { u32 __reserved[4]; } __reserved_18;
-
-/*3D0*/	struct { u32 __reserved[4]; } __reserved_19;
-
-/*3E0*/	struct { /* Timer Divide Configuration Register */
-		u32   divisor		:  4,
-			__reserved_1	: 28;
-		u32 __reserved_2[3];
-	} timer_dcr;
-
-/*3F0*/	struct { u32 __reserved[4]; } __reserved_20;
-
-} __attribute__ ((packed));
-
-#undef u32
-
-#endif
diff -puN include/asm-x86/apicdef_64.h~git-x86 /dev/null
--- a/include/asm-x86/apicdef_64.h
+++ /dev/null
@@ -1,392 +0,0 @@
-#ifndef __ASM_APICDEF_H
-#define __ASM_APICDEF_H
-
-/*
- * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
- *
- * Alan Cox <Alan.Cox@linux.org>, 1995.
- * Ingo Molnar <mingo@redhat.com>, 1999, 2000
- */
-
-#define		APIC_DEFAULT_PHYS_BASE	0xfee00000
- 
-#define		APIC_ID		0x20
-#define			APIC_ID_MASK		(0xFFu<<24)
-#define			GET_APIC_ID(x)		(((x)>>24)&0xFFu)
-#define			SET_APIC_ID(x)		(((x)<<24))
-#define		APIC_LVR	0x30
-#define			APIC_LVR_MASK		0xFF00FF
-#define			GET_APIC_VERSION(x)	((x)&0xFFu)
-#define			GET_APIC_MAXLVT(x)	(((x)>>16)&0xFFu)
-#define			APIC_INTEGRATED(x)	((x)&0xF0u)
-#define		APIC_TASKPRI	0x80
-#define			APIC_TPRI_MASK		0xFFu
-#define		APIC_ARBPRI	0x90
-#define			APIC_ARBPRI_MASK	0xFFu
-#define		APIC_PROCPRI	0xA0
-#define		APIC_EOI	0xB0
-#define			APIC_EIO_ACK		0x0		/* Write this to the EOI register */
-#define		APIC_RRR	0xC0
-#define		APIC_LDR	0xD0
-#define			APIC_LDR_MASK		(0xFFu<<24)
-#define			GET_APIC_LOGICAL_ID(x)	(((x)>>24)&0xFFu)
-#define			SET_APIC_LOGICAL_ID(x)	(((x)<<24))
-#define			APIC_ALL_CPUS		0xFFu
-#define		APIC_DFR	0xE0
-#define			APIC_DFR_CLUSTER		0x0FFFFFFFul
-#define			APIC_DFR_FLAT			0xFFFFFFFFul
-#define		APIC_SPIV	0xF0
-#define			APIC_SPIV_FOCUS_DISABLED	(1<<9)
-#define			APIC_SPIV_APIC_ENABLED		(1<<8)
-#define		APIC_ISR	0x100
-#define		APIC_ISR_NR	0x8	/* Number of 32 bit ISR registers. */
-#define		APIC_TMR	0x180
-#define 	APIC_IRR	0x200
-#define 	APIC_ESR	0x280
-#define			APIC_ESR_SEND_CS	0x00001
-#define			APIC_ESR_RECV_CS	0x00002
-#define			APIC_ESR_SEND_ACC	0x00004
-#define			APIC_ESR_RECV_ACC	0x00008
-#define			APIC_ESR_SENDILL	0x00020
-#define			APIC_ESR_RECVILL	0x00040
-#define			APIC_ESR_ILLREGA	0x00080
-#define		APIC_ICR	0x300
-#define			APIC_DEST_SELF		0x40000
-#define			APIC_DEST_ALLINC	0x80000
-#define			APIC_DEST_ALLBUT	0xC0000
-#define			APIC_ICR_RR_MASK	0x30000
-#define			APIC_ICR_RR_INVALID	0x00000
-#define			APIC_ICR_RR_INPROG	0x10000
-#define			APIC_ICR_RR_VALID	0x20000
-#define			APIC_INT_LEVELTRIG	0x08000
-#define			APIC_INT_ASSERT		0x04000
-#define			APIC_ICR_BUSY		0x01000
-#define			APIC_DEST_LOGICAL	0x00800
-#define			APIC_DEST_PHYSICAL	0x00000
-#define			APIC_DM_FIXED		0x00000
-#define			APIC_DM_LOWEST		0x00100
-#define			APIC_DM_SMI		0x00200
-#define			APIC_DM_REMRD		0x00300
-#define			APIC_DM_NMI		0x00400
-#define			APIC_DM_INIT		0x00500
-#define			APIC_DM_STARTUP		0x00600
-#define			APIC_DM_EXTINT		0x00700
-#define			APIC_VECTOR_MASK	0x000FF
-#define		APIC_ICR2	0x310
-#define			GET_APIC_DEST_FIELD(x)	(((x)>>24)&0xFF)
-#define			SET_APIC_DEST_FIELD(x)	((x)<<24)
-#define		APIC_LVTT	0x320
-#define		APIC_LVTTHMR	0x330
-#define		APIC_LVTPC	0x340
-#define		APIC_LVT0	0x350
-#define			APIC_LVT_TIMER_BASE_MASK	(0x3<<18)
-#define			GET_APIC_TIMER_BASE(x)		(((x)>>18)&0x3)
-#define			SET_APIC_TIMER_BASE(x)		(((x)<<18))
-#define			APIC_TIMER_BASE_CLKIN		0x0
-#define			APIC_TIMER_BASE_TMBASE		0x1
-#define			APIC_TIMER_BASE_DIV		0x2
-#define			APIC_LVT_TIMER_PERIODIC		(1<<17)
-#define			APIC_LVT_MASKED			(1<<16)
-#define			APIC_LVT_LEVEL_TRIGGER		(1<<15)
-#define			APIC_LVT_REMOTE_IRR		(1<<14)
-#define			APIC_INPUT_POLARITY		(1<<13)
-#define			APIC_SEND_PENDING		(1<<12)
-#define			APIC_MODE_MASK			0x700
-#define			GET_APIC_DELIVERY_MODE(x)	(((x)>>8)&0x7)
-#define			SET_APIC_DELIVERY_MODE(x,y)	(((x)&~0x700)|((y)<<8))
-#define				APIC_MODE_FIXED		0x0
-#define				APIC_MODE_NMI		0x4
-#define				APIC_MODE_EXTINT	0x7
-#define 	APIC_LVT1	0x360
-#define		APIC_LVTERR	0x370
-#define		APIC_TMICT	0x380
-#define		APIC_TMCCT	0x390
-#define		APIC_TDCR	0x3E0
-#define			APIC_TDR_DIV_TMBASE	(1<<2)
-#define			APIC_TDR_DIV_1		0xB
-#define			APIC_TDR_DIV_2		0x0
-#define			APIC_TDR_DIV_4		0x1
-#define			APIC_TDR_DIV_8		0x2
-#define			APIC_TDR_DIV_16		0x3
-#define			APIC_TDR_DIV_32		0x8
-#define			APIC_TDR_DIV_64		0x9
-#define			APIC_TDR_DIV_128	0xA
-
-#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
-
-#define MAX_IO_APICS 128
-#define MAX_LOCAL_APIC 256
-
-/*
- * All x86-64 systems are xAPIC compatible.
- * In the following, "apicid" is a physical APIC ID.
- */
-#define XAPIC_DEST_CPUS_SHIFT	4
-#define XAPIC_DEST_CPUS_MASK	((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
-#define XAPIC_DEST_CLUSTER_MASK	(XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
-#define APIC_CLUSTER(apicid)	((apicid) & XAPIC_DEST_CLUSTER_MASK)
-#define APIC_CLUSTERID(apicid)	(APIC_CLUSTER(apicid) >> XAPIC_DEST_CPUS_SHIFT)
-#define APIC_CPUID(apicid)	((apicid) & XAPIC_DEST_CPUS_MASK)
-#define NUM_APIC_CLUSTERS	((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT)
-
-/*
- * the local APIC register structure, memory mapped. Not terribly well
- * tested, but we might eventually use this one in the future - the
- * problem why we cannot use it right now is the P5 APIC, it has an
- * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
- */
-#define u32 unsigned int
-
-struct local_apic {
-
-/*000*/	struct { u32 __reserved[4]; } __reserved_01;
-
-/*010*/	struct { u32 __reserved[4]; } __reserved_02;
-
-/*020*/	struct { /* APIC ID Register */
-		u32   __reserved_1	: 24,
-			phys_apic_id	:  4,
-			__reserved_2	:  4;
-		u32 __reserved[3];
-	} id;
-
-/*030*/	const
-	struct { /* APIC Version Register */
-		u32   version		:  8,
-			__reserved_1	:  8,
-			max_lvt		:  8,
-			__reserved_2	:  8;
-		u32 __reserved[3];
-	} version;
-
-/*040*/	struct { u32 __reserved[4]; } __reserved_03;
-
-/*050*/	struct { u32 __reserved[4]; } __reserved_04;
-
-/*060*/	struct { u32 __reserved[4]; } __reserved_05;
-
-/*070*/	struct { u32 __reserved[4]; } __reserved_06;
-
-/*080*/	struct { /* Task Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} tpr;
-
-/*090*/	const
-	struct { /* Arbitration Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} apr;
-
-/*0A0*/	const
-	struct { /* Processor Priority Register */
-		u32   priority	:  8,
-			__reserved_1	: 24;
-		u32 __reserved_2[3];
-	} ppr;
-
-/*0B0*/	struct { /* End Of Interrupt Register */
-		u32   eoi;
-		u32 __reserved[3];
-	} eoi;
-
-/*0C0*/	struct { u32 __reserved[4]; } __reserved_07;
-
-/*0D0*/	struct { /* Logical Destination Register */
-		u32   __reserved_1	: 24,
-			logical_dest	:  8;
-		u32 __reserved_2[3];
-	} ldr;
-
-/*0E0*/	struct { /* Destination Format Register */
-		u32   __reserved_1	: 28,
-			model		:  4;
-		u32 __reserved_2[3];
-	} dfr;
-
-/*0F0*/	struct { /* Spurious Interrupt Vector Register */
-		u32	spurious_vector	:  8,
-			apic_enabled	:  1,
-			focus_cpu	:  1,
-			__reserved_2	: 22;
-		u32 __reserved_3[3];
-	} svr;
-
-/*100*/	struct { /* In Service Register */
-/*170*/		u32 bitfield;
-		u32 __reserved[3];
-	} isr [8];
-
-/*180*/	struct { /* Trigger Mode Register */
-/*1F0*/		u32 bitfield;
-		u32 __reserved[3];
-	} tmr [8];
-
-/*200*/	struct { /* Interrupt Request Register */
-/*270*/		u32 bitfield;
-		u32 __reserved[3];
-	} irr [8];
-
-/*280*/	union { /* Error Status Register */
-		struct {
-			u32   send_cs_error			:  1,
-				receive_cs_error		:  1,
-				send_accept_error		:  1,
-				receive_accept_error		:  1,
-				__reserved_1			:  1,
-				send_illegal_vector		:  1,
-				receive_illegal_vector		:  1,
-				illegal_register_address	:  1,
-				__reserved_2			: 24;
-			u32 __reserved_3[3];
-		} error_bits;
-		struct {
-			u32 errors;
-			u32 __reserved_3[3];
-		} all_errors;
-	} esr;
-
-/*290*/	struct { u32 __reserved[4]; } __reserved_08;
-
-/*2A0*/	struct { u32 __reserved[4]; } __reserved_09;
-
-/*2B0*/	struct { u32 __reserved[4]; } __reserved_10;
-
-/*2C0*/	struct { u32 __reserved[4]; } __reserved_11;
-
-/*2D0*/	struct { u32 __reserved[4]; } __reserved_12;
-
-/*2E0*/	struct { u32 __reserved[4]; } __reserved_13;
-
-/*2F0*/	struct { u32 __reserved[4]; } __reserved_14;
-
-/*300*/	struct { /* Interrupt Command Register 1 */
-		u32   vector			:  8,
-			delivery_mode		:  3,
-			destination_mode	:  1,
-			delivery_status		:  1,
-			__reserved_1		:  1,
-			level			:  1,
-			trigger			:  1,
-			__reserved_2		:  2,
-			shorthand		:  2,
-			__reserved_3		:  12;
-		u32 __reserved_4[3];
-	} icr1;
-
-/*310*/	struct { /* Interrupt Command Register 2 */
-		union {
-			u32   __reserved_1	: 24,
-				phys_dest	:  4,
-				__reserved_2	:  4;
-			u32   __reserved_3	: 24,
-				logical_dest	:  8;
-		} dest;
-		u32 __reserved_4[3];
-	} icr2;
-
-/*320*/	struct { /* LVT - Timer */
-		u32   vector		:  8,
-			__reserved_1	:  4,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			timer_mode	:  1,
-			__reserved_3	: 14;
-		u32 __reserved_4[3];
-	} lvt_timer;
-
-/*330*/	struct { /* LVT - Thermal Sensor */
-		u32  vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_thermal;
-
-/*340*/	struct { /* LVT - Performance Counter */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_pc;
-
-/*350*/	struct { /* LVT - LINT0 */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			polarity	:  1,
-			remote_irr	:  1,
-			trigger		:  1,
-			mask		:  1,
-			__reserved_2	: 15;
-		u32 __reserved_3[3];
-	} lvt_lint0;
-
-/*360*/	struct { /* LVT - LINT1 */
-		u32   vector		:  8,
-			delivery_mode	:  3,
-			__reserved_1	:  1,
-			delivery_status	:  1,
-			polarity	:  1,
-			remote_irr	:  1,
-			trigger		:  1,
-			mask		:  1,
-			__reserved_2	: 15;
-		u32 __reserved_3[3];
-	} lvt_lint1;
-
-/*370*/	struct { /* LVT - Error */
-		u32   vector		:  8,
-			__reserved_1	:  4,
-			delivery_status	:  1,
-			__reserved_2	:  3,
-			mask		:  1,
-			__reserved_3	: 15;
-		u32 __reserved_4[3];
-	} lvt_error;
-
-/*380*/	struct { /* Timer Initial Count Register */
-		u32   initial_count;
-		u32 __reserved_2[3];
-	} timer_icr;
-
-/*390*/	const
-	struct { /* Timer Current Count Register */
-		u32   curr_count;
-		u32 __reserved_2[3];
-	} timer_ccr;
-
-/*3A0*/	struct { u32 __reserved[4]; } __reserved_16;
-
-/*3B0*/	struct { u32 __reserved[4]; } __reserved_17;
-
-/*3C0*/	struct { u32 __reserved[4]; } __reserved_18;
-
-/*3D0*/	struct { u32 __reserved[4]; } __reserved_19;
-
-/*3E0*/	struct { /* Timer Divide Configuration Register */
-		u32   divisor		:  4,
-			__reserved_1	: 28;
-		u32 __reserved_2[3];
-	} timer_dcr;
-
-/*3F0*/	struct { u32 __reserved[4]; } __reserved_20;
-
-} __attribute__ ((packed));
-
-#undef u32
-
-#define BAD_APICID 0xFFu
-
-#endif
diff -puN include/asm-x86/arch_hooks.h~git-x86 include/asm-x86/arch_hooks.h
--- a/include/asm-x86/arch_hooks.h~git-x86
+++ a/include/asm-x86/arch_hooks.h
@@ -6,7 +6,7 @@
 /*
  *	linux/include/asm/arch_hooks.h
  *
- *	define the architecture specific hooks 
+ *	define the architecture specific hooks
  */
 
 /* these aren't arch hooks, they are generic routines
@@ -24,7 +24,4 @@ extern void trap_init_hook(void);
 extern void time_init_hook(void);
 extern void mca_nmi_hook(void);
 
-extern int setup_early_printk(char *);
-extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
-
 #endif
diff -puN /dev/null include/asm-x86/asm.h
--- /dev/null
+++ a/include/asm-x86/asm.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_X86_ASM_H
+#define _ASM_X86_ASM_H
+
+#ifdef CONFIG_X86_32
+/* 32 bits */
+
+# define _ASM_PTR	" .long "
+# define _ASM_ALIGN	" .balign 4 "
+# define _ASM_MOV_UL	" movl "
+
+#else
+/* 64 bits */
+
+# define _ASM_PTR	" .quad "
+# define _ASM_ALIGN	" .balign 8 "
+# define _ASM_MOV_UL	" movq "
+
+#endif /* CONFIG_X86_32 */
+
+#endif /* _ASM_X86_ASM_H */
diff -puN include/asm-x86/bitops.h~git-x86 include/asm-x86/bitops.h
--- a/include/asm-x86/bitops.h~git-x86
+++ a/include/asm-x86/bitops.h
@@ -1,5 +1,320 @@
+#ifndef _ASM_X86_BITOPS_H
+#define _ASM_X86_BITOPS_H
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ */
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <linux/compiler.h>
+#include <asm/alternative.h>
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
+/* Technically wrong, but this avoids compilation errors on some gcc
+   versions. */
+#define ADDR "=m" (*(volatile long *) addr)
+#else
+#define ADDR "+m" (*(volatile long *) addr)
+#endif
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ *
+ * Note: there are no guarantees that this function will not be reordered
+ * on non x86 architectures, so if you are writing portable code,
+ * make sure not to rely on its reordering guarantees.
+ *
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void set_bit(int nr, volatile unsigned long *addr)
+{
+	asm volatile(LOCK_PREFIX "bts %1,%0"
+		     : ADDR
+		     : "Ir" (nr) : "memory");
+}
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __set_bit(int nr, volatile unsigned long *addr)
+{
+	asm volatile("bts %1,%0"
+		     : ADDR
+		     : "Ir" (nr) : "memory");
+}
+
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static inline void clear_bit(int nr, volatile unsigned long *addr)
+{
+	asm volatile(LOCK_PREFIX "btr %1,%0"
+		     : ADDR
+		     : "Ir" (nr));
+}
+
+/*
+ * clear_bit_unlock - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and implies release semantics before the memory
+ * operation. It can be used for an unlock.
+ */
+static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
+{
+	barrier();
+	clear_bit(nr, addr);
+}
+
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
+{
+	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
+}
+
+/*
+ * __clear_bit_unlock - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * __clear_bit() is non-atomic and implies release semantics before the memory
+ * operation. It can be used for an unlock if no other CPUs can concurrently
+ * modify other bits in the word.
+ *
+ * No memory barrier is required here, because x86 cannot reorder stores past
+ * older loads. Same principle as spin_unlock.
+ */
+static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
+{
+	barrier();
+	__clear_bit(nr, addr);
+}
+
+#define smp_mb__before_clear_bit()	barrier()
+#define smp_mb__after_clear_bit()	barrier()
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to change
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __change_bit(int nr, volatile unsigned long *addr)
+{
+	asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void change_bit(int nr, volatile unsigned long *addr)
+{
+	asm volatile(LOCK_PREFIX "btc %1,%0"
+		     : ADDR : "Ir" (nr));
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+	int oldbit;
+
+	asm volatile(LOCK_PREFIX "bts %2,%1\n\t"
+		     "sbb %0,%0"
+		     : "=r" (oldbit), ADDR
+		     : "Ir" (nr) : "memory");
+
+	return oldbit;
+}
+
+/**
+ * test_and_set_bit_lock - Set a bit and return its old value for lock
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This is the same as test_and_set_bit on x86.
+ */
+static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr)
+{
+	return test_and_set_bit(nr, addr);
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+	int oldbit;
+
+	asm("bts %2,%1\n\t"
+	    "sbb %0,%0"
+	    : "=r" (oldbit), ADDR
+	    : "Ir" (nr));
+	return oldbit;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+	int oldbit;
+
+	asm volatile(LOCK_PREFIX "btr %2,%1\n\t"
+		     "sbb %0,%0"
+		     : "=r" (oldbit), ADDR
+		     : "Ir" (nr) : "memory");
+
+	return oldbit;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+	int oldbit;
+
+	asm volatile("btr %2,%1\n\t"
+		     "sbb %0,%0"
+		     : "=r" (oldbit), ADDR
+		     : "Ir" (nr));
+	return oldbit;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
+{
+	int oldbit;
+
+	asm volatile("btc %2,%1\n\t"
+		     "sbb %0,%0"
+		     : "=r" (oldbit), ADDR
+		     : "Ir" (nr) : "memory");
+
+	return oldbit;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
+{
+	int oldbit;
+
+	asm volatile(LOCK_PREFIX "btc %2,%1\n\t"
+		     "sbb %0,%0"
+		     : "=r" (oldbit), ADDR
+		     : "Ir" (nr) : "memory");
+
+	return oldbit;
+}
+
+static inline int constant_test_bit(int nr, const volatile unsigned long *addr)
+{
+	return ((1UL << (nr % BITS_PER_LONG)) & (addr[nr / BITS_PER_LONG])) != 0;
+}
+
+static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
+{
+	int oldbit;
+
+	asm volatile("bt %2,%1\n\t"
+		     "sbb %0,%0"
+		     : "=r" (oldbit)
+		     : "m" (*addr), "Ir" (nr));
+
+	return oldbit;
+}
+
+#if 0 /* Fool kernel-doc since it doesn't do macros yet */
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static int test_bit(int nr, const volatile unsigned long *addr);
+#endif
+
+#define test_bit(nr,addr)			\
+	(__builtin_constant_p(nr) ?		\
+	 constant_test_bit((nr),(addr)) :	\
+	 variable_test_bit((nr),(addr)))
+
+#undef ADDR
+
 #ifdef CONFIG_X86_32
 # include "bitops_32.h"
 #else
 # include "bitops_64.h"
 #endif
+
+#endif	/* _ASM_X86_BITOPS_H */
diff -puN include/asm-x86/bitops_32.h~git-x86 include/asm-x86/bitops_32.h
--- a/include/asm-x86/bitops_32.h~git-x86
+++ a/include/asm-x86/bitops_32.h
@@ -5,320 +5,12 @@
  * Copyright 1992, Linus Torvalds.
  */
 
-#ifndef _LINUX_BITOPS_H
-#error only <linux/bitops.h> can be included directly
-#endif
-
-#include <linux/compiler.h>
-#include <asm/alternative.h>
-
-/*
- * These have to be done with inline assembly: that way the bit-setting
- * is guaranteed to be atomic. All bit operations return 0 if the bit
- * was cleared before the operation and != 0 if it was not.
- *
- * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
- */
-
-#define ADDR (*(volatile long *) addr)
-
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered.  See __set_bit()
- * if you do not require the atomic guarantees.
- *
- * Note: there are no guarantees that this function will not be reordered
- * on non x86 architectures, so if you are writing portable code,
- * make sure not to rely on its reordering guarantees.
- *
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void set_bit(int nr, volatile unsigned long * addr)
-{
-	__asm__ __volatile__( LOCK_PREFIX
-		"btsl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __set_bit(int nr, volatile unsigned long * addr)
-{
-	__asm__(
-		"btsl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered.  However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
- * in order to ensure changes are visible on other processors.
- */
-static inline void clear_bit(int nr, volatile unsigned long * addr)
-{
-	__asm__ __volatile__( LOCK_PREFIX
-		"btrl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/*
- * clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and implies release semantics before the memory
- * operation. It can be used for an unlock.
- */
-static inline void clear_bit_unlock(unsigned long nr, volatile unsigned long *addr)
-{
-	barrier();
-	clear_bit(nr, addr);
-}
-
-static inline void __clear_bit(int nr, volatile unsigned long * addr)
-{
-	__asm__ __volatile__(
-		"btrl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/*
- * __clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * __clear_bit() is non-atomic and implies release semantics before the memory
- * operation. It can be used for an unlock if no other CPUs can concurrently
- * modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
- */
-static inline void __clear_bit_unlock(unsigned long nr, volatile unsigned long *addr)
-{
-	barrier();
-	__clear_bit(nr, addr);
-}
-
-#define smp_mb__before_clear_bit()	barrier()
-#define smp_mb__after_clear_bit()	barrier()
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __change_bit(int nr, volatile unsigned long * addr)
-{
-	__asm__ __volatile__(
-		"btcl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered. It may be
- * reordered on other architectures than x86.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void change_bit(int nr, volatile unsigned long * addr)
-{
-	__asm__ __volatile__( LOCK_PREFIX
-		"btcl %1,%0"
-		:"+m" (ADDR)
-		:"Ir" (nr));
-}
-
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It may be reordered on other architectures than x86.
- * It also implies a memory barrier.
- */
-static inline int test_and_set_bit(int nr, volatile unsigned long * addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK_PREFIX
-		"btsl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * test_and_set_bit_lock - Set a bit and return its old value for lock
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This is the same as test_and_set_bit on x86.
- */
-static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr)
-{
-	return test_and_set_bit(nr, addr);
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.  
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_set_bit(int nr, volatile unsigned long * addr)
-{
-	int oldbit;
-
-	__asm__(
-		"btsl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr));
-	return oldbit;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It can be reorderdered on other architectures other than x86.
- * It also implies a memory barrier.
- */
-static inline int test_and_clear_bit(int nr, volatile unsigned long * addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK_PREFIX
-		"btrl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.  
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	int oldbit;
-
-	__asm__(
-		"btrl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr));
-	return oldbit;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__(
-		"btcl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-static inline int test_and_change_bit(int nr, volatile unsigned long* addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK_PREFIX
-		"btcl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),"+m" (ADDR)
-		:"Ir" (nr) : "memory");
-	return oldbit;
-}
-
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static int test_bit(int nr, const volatile void * addr);
-#endif
-
-static __always_inline int constant_test_bit(int nr, const volatile unsigned long *addr)
-{
-	return ((1UL << (nr & 31)) & (addr[nr >> 5])) != 0;
-}
-
-static inline int variable_test_bit(int nr, const volatile unsigned long * addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__(
-		"btl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit)
-		:"m" (ADDR),"Ir" (nr));
-	return oldbit;
-}
-
-#define test_bit(nr,addr) \
-(__builtin_constant_p(nr) ? \
- constant_test_bit((nr),(addr)) : \
- variable_test_bit((nr),(addr)))
-
-#undef ADDR
-
 /**
  * find_first_zero_bit - find the first zero bit in a memory region
  * @addr: The address to start the search at
  * @size: The maximum size to search
  *
- * Returns the bit-number of the first zero bit, not the number of the byte
+ * Returns the bit number of the first zero bit, not the number of the byte
  * containing a bit.
  */
 static inline int find_first_zero_bit(const unsigned long *addr, unsigned size)
@@ -348,7 +40,7 @@ static inline int find_first_zero_bit(co
 /**
  * find_next_zero_bit - find the first zero bit in a memory region
  * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
+ * @offset: The bit number to start searching at
  * @size: The maximum size to search
  */
 int find_next_zero_bit(const unsigned long *addr, int size, int offset);
@@ -372,7 +64,7 @@ static inline unsigned long __ffs(unsign
  * @addr: The address to start the search at
  * @size: The maximum size to search
  *
- * Returns the bit-number of the first set bit, not the number of the byte
+ * Returns the bit number of the first set bit, not the number of the byte
  * containing a bit.
  */
 static inline unsigned find_first_bit(const unsigned long *addr, unsigned size)
@@ -391,7 +83,7 @@ static inline unsigned find_first_bit(co
 /**
  * find_next_bit - find the first set bit in a memory region
  * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
+ * @offset: The bit number to start searching at
  * @size: The maximum size to search
  */
 int find_next_bit(const unsigned long *addr, int size, int offset);
@@ -460,10 +152,10 @@ static inline int fls(int x)
 
 #include <asm-generic/bitops/ext2-non-atomic.h>
 
-#define ext2_set_bit_atomic(lock,nr,addr) \
-        test_and_set_bit((nr),(unsigned long*)addr)
-#define ext2_clear_bit_atomic(lock,nr, addr) \
-	        test_and_clear_bit((nr),(unsigned long*)addr)
+#define ext2_set_bit_atomic(lock, nr, addr) \
+		test_and_set_bit((nr), (unsigned long *)addr)
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+		test_and_clear_bit((nr), (unsigned long *)addr)
 
 #include <asm-generic/bitops/minix.h>
 
diff -puN include/asm-x86/bitops_64.h~git-x86 include/asm-x86/bitops_64.h
--- a/include/asm-x86/bitops_64.h~git-x86
+++ a/include/asm-x86/bitops_64.h
@@ -5,303 +5,6 @@
  * Copyright 1992, Linus Torvalds.
  */
 
-#ifndef _LINUX_BITOPS_H
-#error only <linux/bitops.h> can be included directly
-#endif
-
-#include <asm/alternative.h>
-
-#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
-/* Technically wrong, but this avoids compilation errors on some gcc
-   versions. */
-#define ADDR "=m" (*(volatile long *) addr)
-#else
-#define ADDR "+m" (*(volatile long *) addr)
-#endif
-
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered.  See __set_bit()
- * if you do not require the atomic guarantees.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void set_bit(int nr, volatile void *addr)
-{
-	__asm__ __volatile__( LOCK_PREFIX
-		"btsl %1,%0"
-		:ADDR
-		:"dIr" (nr) : "memory");
-}
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __set_bit(int nr, volatile void *addr)
-{
-	__asm__ volatile(
-		"btsl %1,%0"
-		:ADDR
-		:"dIr" (nr) : "memory");
-}
-
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered.  However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
- * in order to ensure changes are visible on other processors.
- */
-static inline void clear_bit(int nr, volatile void *addr)
-{
-	__asm__ __volatile__( LOCK_PREFIX
-		"btrl %1,%0"
-		:ADDR
-		:"dIr" (nr));
-}
-
-/*
- * clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and implies release semantics before the memory
- * operation. It can be used for an unlock.
- */
-static inline void clear_bit_unlock(unsigned long nr, volatile unsigned long *addr)
-{
-	barrier();
-	clear_bit(nr, addr);
-}
-
-static inline void __clear_bit(int nr, volatile void *addr)
-{
-	__asm__ __volatile__(
-		"btrl %1,%0"
-		:ADDR
-		:"dIr" (nr));
-}
-
-/*
- * __clear_bit_unlock - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * __clear_bit() is non-atomic and implies release semantics before the memory
- * operation. It can be used for an unlock if no other CPUs can concurrently
- * modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
- */
-static inline void __clear_bit_unlock(unsigned long nr, volatile unsigned long *addr)
-{
-	barrier();
-	__clear_bit(nr, addr);
-}
-
-#define smp_mb__before_clear_bit()	barrier()
-#define smp_mb__after_clear_bit()	barrier()
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __change_bit(int nr, volatile void *addr)
-{
-	__asm__ __volatile__(
-		"btcl %1,%0"
-		:ADDR
-		:"dIr" (nr));
-}
-
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void change_bit(int nr, volatile void *addr)
-{
-	__asm__ __volatile__( LOCK_PREFIX
-		"btcl %1,%0"
-		:ADDR
-		:"dIr" (nr));
-}
-
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-static inline int test_and_set_bit(int nr, volatile void *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK_PREFIX
-		"btsl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),ADDR
-		:"dIr" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * test_and_set_bit_lock - Set a bit and return its old value for lock
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This is the same as test_and_set_bit on x86.
- */
-static inline int test_and_set_bit_lock(int nr, volatile void *addr)
-{
-	return test_and_set_bit(nr, addr);
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.  
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_set_bit(int nr, volatile void *addr)
-{
-	int oldbit;
-
-	__asm__(
-		"btsl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),ADDR
-		:"dIr" (nr));
-	return oldbit;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-static inline int test_and_clear_bit(int nr, volatile void *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK_PREFIX
-		"btrl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),ADDR
-		:"dIr" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.  
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_clear_bit(int nr, volatile void *addr)
-{
-	int oldbit;
-
-	__asm__(
-		"btrl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),ADDR
-		:"dIr" (nr));
-	return oldbit;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr, volatile void *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__(
-		"btcl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),ADDR
-		:"dIr" (nr) : "memory");
-	return oldbit;
-}
-
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-static inline int test_and_change_bit(int nr, volatile void *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__( LOCK_PREFIX
-		"btcl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit),ADDR
-		:"dIr" (nr) : "memory");
-	return oldbit;
-}
-
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static int test_bit(int nr, const volatile void *addr);
-#endif
-
-static inline int constant_test_bit(int nr, const volatile void *addr)
-{
-	return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
-}
-
-static inline int variable_test_bit(int nr, volatile const void *addr)
-{
-	int oldbit;
-
-	__asm__ __volatile__(
-		"btl %2,%1\n\tsbbl %0,%0"
-		:"=r" (oldbit)
-		:"m" (*(volatile long *)addr),"dIr" (nr));
-	return oldbit;
-}
-
-#define test_bit(nr,addr) \
-(__builtin_constant_p(nr) ? \
- constant_test_bit((nr),(addr)) : \
- variable_test_bit((nr),(addr)))
-
-#undef ADDR
-
 extern long find_first_zero_bit(const unsigned long *addr, unsigned long size);
 extern long find_next_zero_bit(const unsigned long *addr, long size, long offset);
 extern long find_first_bit(const unsigned long *addr, unsigned long size);
diff -puN include/asm-x86/bug.h~git-x86 include/asm-x86/bug.h
--- a/include/asm-x86/bug.h~git-x86
+++ a/include/asm-x86/bug.h
@@ -33,9 +33,6 @@
 	} while(0)
 #endif
 
-void out_of_line_bug(void);
-#else /* CONFIG_BUG */
-static inline void out_of_line_bug(void) { }
 #endif /* !CONFIG_BUG */
 
 #include <asm-generic/bug.h>
diff -puN include/asm-x86/checksum_64.h~git-x86 include/asm-x86/checksum_64.h
--- a/include/asm-x86/checksum_64.h~git-x86
+++ a/include/asm-x86/checksum_64.h
@@ -4,7 +4,7 @@
 /* 
  * Checksums for x86-64 
  * Copyright 2002 by Andi Kleen, SuSE Labs 
- * with some code from asm-i386/checksum.h
+ * with some code from asm-x86/checksum.h
  */ 
 
 #include <linux/compiler.h>
diff -puN include/asm-x86/cmpxchg_32.h~git-x86 include/asm-x86/cmpxchg_32.h
--- a/include/asm-x86/cmpxchg_32.h~git-x86
+++ a/include/asm-x86/cmpxchg_32.h
@@ -105,15 +105,24 @@ static inline unsigned long __xchg(unsig
 
 #ifdef CONFIG_X86_CMPXCHG
 #define __HAVE_ARCH_CMPXCHG 1
-#define cmpxchg(ptr,o,n)\
-	((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
-					(unsigned long)(n),sizeof(*(ptr))))
-#define sync_cmpxchg(ptr,o,n)\
-	((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
-					(unsigned long)(n),sizeof(*(ptr))))
-#define cmpxchg_local(ptr,o,n)\
-	((__typeof__(*(ptr)))__cmpxchg_local((ptr),(unsigned long)(o),\
-					(unsigned long)(n),sizeof(*(ptr))))
+#define cmpxchg(ptr, o, n)						     \
+	((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o),	     \
+					(unsigned long)(n), sizeof(*(ptr))))
+#define sync_cmpxchg(ptr, o, n)						     \
+	((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o),	     \
+					(unsigned long)(n), sizeof(*(ptr))))
+#define cmpxchg_local(ptr, o, n)					     \
+	((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o),	     \
+					(unsigned long)(n), sizeof(*(ptr))))
+#endif
+
+#ifdef CONFIG_X86_CMPXCHG64
+#define cmpxchg64(ptr, o, n)						      \
+	((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o),      \
+					(unsigned long long)(n)))
+#define cmpxchg64_local(ptr, o, n)					      \
+	((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o),\
+					(unsigned long long)(n)))
 #endif
 
 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
@@ -203,6 +212,34 @@ static inline unsigned long __cmpxchg_lo
 	return old;
 }
 
+static inline unsigned long long __cmpxchg64(volatile void *ptr,
+			unsigned long long old, unsigned long long new)
+{
+	unsigned long long prev;
+	__asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
+			     : "=A"(prev)
+			     : "b"((unsigned long)new),
+			       "c"((unsigned long)(new >> 32)),
+			       "m"(*__xg(ptr)),
+			       "0"(old)
+			     : "memory");
+	return prev;
+}
+
+static inline unsigned long long __cmpxchg64_local(volatile void *ptr,
+			unsigned long long old, unsigned long long new)
+{
+	unsigned long long prev;
+	__asm__ __volatile__("cmpxchg8b %3"
+			     : "=A"(prev)
+			     : "b"((unsigned long)new),
+			       "c"((unsigned long)(new >> 32)),
+			       "m"(*__xg(ptr)),
+			       "0"(old)
+			     : "memory");
+	return prev;
+}
+
 #ifndef CONFIG_X86_CMPXCHG
 /*
  * Building a kernel capable running on 80386. It may be necessary to
@@ -228,7 +265,7 @@ static inline unsigned long cmpxchg_386(
 	return old;
 }
 
-#define cmpxchg(ptr,o,n)						\
+#define cmpxchg(ptr, o, n)						\
 ({									\
 	__typeof__(*(ptr)) __ret;					\
 	if (likely(boot_cpu_data.x86 > 3))				\
@@ -239,7 +276,7 @@ static inline unsigned long cmpxchg_386(
 					(unsigned long)(n), sizeof(*(ptr))); \
 	__ret;								\
 })
-#define cmpxchg_local(ptr,o,n)						\
+#define cmpxchg_local(ptr, o, n)					\
 ({									\
 	__typeof__(*(ptr)) __ret;					\
 	if (likely(boot_cpu_data.x86 > 3))				\
@@ -252,38 +289,37 @@ static inline unsigned long cmpxchg_386(
 })
 #endif
 
-static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old,
-				      unsigned long long new)
-{
-	unsigned long long prev;
-	__asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
-			     : "=A"(prev)
-			     : "b"((unsigned long)new),
-			       "c"((unsigned long)(new >> 32)),
-			       "m"(*__xg(ptr)),
-			       "0"(old)
-			     : "memory");
-	return prev;
-}
+#ifndef CONFIG_X86_CMPXCHG64
+/*
+ * Building a kernel capable running on 80386 and 80486. It may be necessary
+ * to simulate the cmpxchg8b on the 80386 and 80486 CPU.
+ */
 
-static inline unsigned long long __cmpxchg64_local(volatile void *ptr,
-			unsigned long long old, unsigned long long new)
-{
-	unsigned long long prev;
-	__asm__ __volatile__("cmpxchg8b %3"
-			     : "=A"(prev)
-			     : "b"((unsigned long)new),
-			       "c"((unsigned long)(new >> 32)),
-			       "m"(*__xg(ptr)),
-			       "0"(old)
-			     : "memory");
-	return prev;
-}
+extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64);
+
+#define cmpxchg64(ptr, o, n)						\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	if (likely(boot_cpu_data.x86 > 4))				\
+		__ret = __cmpxchg64((ptr), (unsigned long long)(o),	\
+				(unsigned long long)(n));		\
+	else								\
+		__ret = cmpxchg_486_u64((ptr), (unsigned long long)(o),	\
+				(unsigned long long)(n));		\
+	__ret;								\
+})
+#define cmpxchg64_local(ptr, o, n)					\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	if (likely(boot_cpu_data.x86 > 4))				\
+		__ret = __cmpxchg64_local((ptr), (unsigned long long)(o), \
+				(unsigned long long)(n));		\
+	else								\
+		__ret = cmpxchg_486_u64((ptr), (unsigned long long)(o),	\
+				(unsigned long long)(n));		\
+	__ret;								\
+})
+
+#endif
 
-#define cmpxchg64(ptr,o,n)\
-	((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
-					(unsigned long long)(n)))
-#define cmpxchg64_local(ptr,o,n)\
-	((__typeof__(*(ptr)))__cmpxchg64_local((ptr),(unsigned long long)(o),\
-					(unsigned long long)(n)))
 #endif
diff -puN include/asm-x86/compat.h~git-x86 include/asm-x86/compat.h
--- a/include/asm-x86/compat.h~git-x86
+++ a/include/asm-x86/compat.h
@@ -207,7 +207,7 @@ static inline compat_uptr_t ptr_to_compa
 static __inline__ void __user *compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = task_pt_regs(current);
-	return (void __user *)regs->rsp - len; 
+	return (void __user *)regs->sp - len;
 }
 
 static inline int is_compat_task(void)
diff -puN include/asm-x86/cpufeature.h~git-x86 include/asm-x86/cpufeature.h
--- a/include/asm-x86/cpufeature.h~git-x86
+++ a/include/asm-x86/cpufeature.h
@@ -1,5 +1,200 @@
-#ifdef CONFIG_X86_32
-# include "cpufeature_32.h"
+/*
+ * Defines x86 CPU feature bits
+ */
+#ifndef _ASM_X86_CPUFEATURE_H
+#define _ASM_X86_CPUFEATURE_H
+
+#ifndef __ASSEMBLY__
+#include <linux/bitops.h>
+#endif
+#include <asm/required-features.h>
+
+#define NCAPINTS	8	/* N 32-bit words worth of info */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+#define X86_FEATURE_FPU		(0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME		(0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE		(0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE		(0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC		(0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR		(0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
+#define X86_FEATURE_PAE		(0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE		(0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_CX8		(0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC	(0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP		(0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR	(0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE		(0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA		(0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV	(0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
+#define X86_FEATURE_PAT		(0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36	(0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN		(0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLSH	(0*32+19) /* Supports the CLFLUSH instruction */
+#define X86_FEATURE_DS		(0*32+21) /* Debug Store */
+#define X86_FEATURE_ACPI	(0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
+					  /* of FPU context), and CR4.OSFXSR available */
+#define X86_FEATURE_XMM		(0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2	(0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SELFSNOOP	(0*32+27) /* CPU self snoop */
+#define X86_FEATURE_HT		(0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC		(0*32+29) /* Automatic clock control */
+#define X86_FEATURE_IA64	(0*32+30) /* IA-64 processor */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL	(1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP		(1*32+19) /* MP Capable. */
+#define X86_FEATURE_NX		(1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_RDTSCP	(1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM		(1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT	(1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW	(1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY	(2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN	(2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI	(2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX	(3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR	(3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR	(3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR	(3*32+ 3) /* Centaur MCRs (= MTRRs) */
+/* cpu types for specific tunings: */
+#define X86_FEATURE_K8		(3*32+ 4) /* Opteron, Athlon64 */
+#define X86_FEATURE_K7		(3*32+ 5) /* Athlon */
+#define X86_FEATURE_P3		(3*32+ 6) /* P3 */
+#define X86_FEATURE_P4		(3*32+ 7) /* P4 */
+#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP		(3*32+ 9) /* smp kernel running on up */
+#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS	(3*32+12)  /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS		(3*32+13)  /* Branch Trace Store */
+/* 14 free */
+#define X86_FEATURE_SYNC_RDTSC	(3*32+15)  /* RDTSC synchronizes the CPU */
+#define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well on this CPU */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+#define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
+#define X86_FEATURE_MWAIT	(4*32+ 3) /* Monitor/Mwait support */
+#define X86_FEATURE_DSCPL	(4*32+ 4) /* CPL Qualified Debug Store */
+#define X86_FEATURE_EST		(4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2		(4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_CID		(4*32+10) /* Context ID */
+#define X86_FEATURE_CX16	(4*32+13) /* CMPXCHG16B */
+#define X86_FEATURE_XTPR	(4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_DCA		(4*32+18) /* Direct Cache Access */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+#define X86_FEATURE_XSTORE	(5*32+ 2) /* on-CPU RNG present (xstore insn) */
+#define X86_FEATURE_XSTORE_EN	(5*32+ 3) /* on-CPU RNG enabled */
+#define X86_FEATURE_XCRYPT	(5*32+ 6) /* on-CPU crypto (xcrypt insn) */
+#define X86_FEATURE_XCRYPT_EN	(5*32+ 7) /* on-CPU crypto enabled */
+#define X86_FEATURE_ACE2	(5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN	(5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE		(5*32+ 10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN	(5*32+ 11) /* PHE enabled */
+#define X86_FEATURE_PMM		(5*32+ 12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN	(5*32+ 13) /* PMM enabled */
+
+/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
+#define X86_FEATURE_LAHF_LM	(6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY	(6*32+ 1) /* If yes HyperThreading not valid */
+
+/*
+ * Auxiliary flags: Linux defined - For features scattered in various
+ * CPUID levels like 0x6, 0xA etc
+ */
+#define X86_FEATURE_IDA		(7*32+ 0) /* Intel Dynamic Acceleration */
+
+#define cpu_has(c, bit)							\
+	(__builtin_constant_p(bit) &&					\
+	 ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) ||	\
+	   (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) ||	\
+	   (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) ||	\
+	   (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3)) ||	\
+	   (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) ||	\
+	   (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) ||	\
+	   (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) ||	\
+	   (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) )	\
+	  ? 1 :								\
+	 test_bit(bit, (unsigned long *)((c)->x86_capability)))
+#define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
+
+#define set_cpu_cap(c, bit)	set_bit(bit, (unsigned long *)((c)->x86_capability))
+#define clear_cpu_cap(c, bit)	clear_bit(bit, (unsigned long *)((c)->x86_capability))
+
+#define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
+#define cpu_has_vme		boot_cpu_has(X86_FEATURE_VME)
+#define cpu_has_de		boot_cpu_has(X86_FEATURE_DE)
+#define cpu_has_pse		boot_cpu_has(X86_FEATURE_PSE)
+#define cpu_has_tsc		boot_cpu_has(X86_FEATURE_TSC)
+#define cpu_has_pae		boot_cpu_has(X86_FEATURE_PAE)
+#define cpu_has_pge		boot_cpu_has(X86_FEATURE_PGE)
+#define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
+#define cpu_has_sep		boot_cpu_has(X86_FEATURE_SEP)
+#define cpu_has_mtrr		boot_cpu_has(X86_FEATURE_MTRR)
+#define cpu_has_mmx		boot_cpu_has(X86_FEATURE_MMX)
+#define cpu_has_fxsr		boot_cpu_has(X86_FEATURE_FXSR)
+#define cpu_has_xmm		boot_cpu_has(X86_FEATURE_XMM)
+#define cpu_has_xmm2		boot_cpu_has(X86_FEATURE_XMM2)
+#define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
+#define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
+#define cpu_has_mp		boot_cpu_has(X86_FEATURE_MP)
+#define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)
+#define cpu_has_k6_mtrr		boot_cpu_has(X86_FEATURE_K6_MTRR)
+#define cpu_has_cyrix_arr	boot_cpu_has(X86_FEATURE_CYRIX_ARR)
+#define cpu_has_centaur_mcr	boot_cpu_has(X86_FEATURE_CENTAUR_MCR)
+#define cpu_has_xstore		boot_cpu_has(X86_FEATURE_XSTORE)
+#define cpu_has_xstore_enabled	boot_cpu_has(X86_FEATURE_XSTORE_EN)
+#define cpu_has_xcrypt		boot_cpu_has(X86_FEATURE_XCRYPT)
+#define cpu_has_xcrypt_enabled	boot_cpu_has(X86_FEATURE_XCRYPT_EN)
+#define cpu_has_ace2		boot_cpu_has(X86_FEATURE_ACE2)
+#define cpu_has_ace2_enabled	boot_cpu_has(X86_FEATURE_ACE2_EN)
+#define cpu_has_phe		boot_cpu_has(X86_FEATURE_PHE)
+#define cpu_has_phe_enabled	boot_cpu_has(X86_FEATURE_PHE_EN)
+#define cpu_has_pmm		boot_cpu_has(X86_FEATURE_PMM)
+#define cpu_has_pmm_enabled	boot_cpu_has(X86_FEATURE_PMM_EN)
+#define cpu_has_ds		boot_cpu_has(X86_FEATURE_DS)
+#define cpu_has_pebs		boot_cpu_has(X86_FEATURE_PEBS)
+#define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLSH)
+#define cpu_has_bts		boot_cpu_has(X86_FEATURE_BTS)
+
+#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
+# define cpu_has_invlpg		1
 #else
-# include "cpufeature_64.h"
+# define cpu_has_invlpg		(boot_cpu_data.x86 > 3)
 #endif
+
+#ifdef CONFIG_X86_64
+
+#undef  cpu_has_vme
+#define cpu_has_vme		0
+
+#undef  cpu_has_pae
+#define cpu_has_pae		___BUG___
+
+#undef  cpu_has_mp
+#define cpu_has_mp		1
+
+#undef  cpu_has_k6_mtrr
+#define cpu_has_k6_mtrr		0
+
+#undef  cpu_has_cyrix_arr
+#define cpu_has_cyrix_arr	0
+
+#undef  cpu_has_centaur_mcr
+#define cpu_has_centaur_mcr	0
+
+#undef  cpu_has_pge
+#define cpu_has_pge		1
+
+#endif /* CONFIG_X86_64 */
+
+#endif /* _ASM_X86_CPUFEATURE_H */
diff -puN include/asm-x86/cpufeature_32.h~git-x86 /dev/null
--- a/include/asm-x86/cpufeature_32.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * cpufeature.h
- *
- * Defines x86 CPU feature bits
- */
-
-#ifndef __ASM_I386_CPUFEATURE_H
-#define __ASM_I386_CPUFEATURE_H
-
-#ifndef __ASSEMBLY__
-#include <linux/bitops.h>
-#endif
-#include <asm/required-features.h>
-
-#define NCAPINTS	8	/* N 32-bit words worth of info */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU		(0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME		(0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE		(0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE 	(0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC		(0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR		(0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
-#define X86_FEATURE_PAE		(0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE		(0*32+ 7) /* Machine Check Architecture */
-#define X86_FEATURE_CX8		(0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC	(0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP		(0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR	(0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE		(0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA		(0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV	(0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
-#define X86_FEATURE_PAT		(0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36	(0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN		(0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLSH	(0*32+19) /* Supports the CLFLUSH instruction */
-#define X86_FEATURE_DS		(0*32+21) /* Debug Store */
-#define X86_FEATURE_ACPI	(0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
-				          /* of FPU context), and CR4.OSFXSR available */
-#define X86_FEATURE_XMM		(0*32+25) /* Streaming SIMD Extensions */
-#define X86_FEATURE_XMM2	(0*32+26) /* Streaming SIMD Extensions-2 */
-#define X86_FEATURE_SELFSNOOP	(0*32+27) /* CPU self snoop */
-#define X86_FEATURE_HT		(0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC		(0*32+29) /* Automatic clock control */
-#define X86_FEATURE_IA64	(0*32+30) /* IA-64 processor */
-
-/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
-/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL	(1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP		(1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX		(1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_RDTSCP	(1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM		(1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT	(1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW	(1*32+31) /* 3DNow! */
-
-/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY	(2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN	(2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI	(2*32+ 3) /* LongRun table interface */
-
-/* Other features, Linux-defined mapping, word 3 */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX	(3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR	(3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR	(3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR	(3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8		(3*32+ 4) /* Opteron, Athlon64 */
-#define X86_FEATURE_K7		(3*32+ 5) /* Athlon */
-#define X86_FEATURE_P3		(3*32+ 6) /* P3 */
-#define X86_FEATURE_P4		(3*32+ 7) /* P4 */
-#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP		(3*32+ 9) /* smp kernel running on up */
-#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS	(3*32+12)  /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS		(3*32+13)  /* Branch Trace Store */
-/* 14 free */
-#define X86_FEATURE_SYNC_RDTSC	(3*32+15)  /* RDTSC synchronizes the CPU */
-#define X86_FEATURE_REP_GOOD   (3*32+16) /* rep microcode works well on this CPU */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
-#define X86_FEATURE_MWAIT	(4*32+ 3) /* Monitor/Mwait support */
-#define X86_FEATURE_DSCPL	(4*32+ 4) /* CPL Qualified Debug Store */
-#define X86_FEATURE_EST		(4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2		(4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_CID		(4*32+10) /* Context ID */
-#define X86_FEATURE_CX16        (4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR	(4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_DCA		(4*32+18) /* Direct Cache Access */
-
-/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE	(5*32+ 2) /* on-CPU RNG present (xstore insn) */
-#define X86_FEATURE_XSTORE_EN	(5*32+ 3) /* on-CPU RNG enabled */
-#define X86_FEATURE_XCRYPT	(5*32+ 6) /* on-CPU crypto (xcrypt insn) */
-#define X86_FEATURE_XCRYPT_EN	(5*32+ 7) /* on-CPU crypto enabled */
-#define X86_FEATURE_ACE2	(5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN	(5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE		(5*32+ 10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN	(5*32+ 11) /* PHE enabled */
-#define X86_FEATURE_PMM		(5*32+ 12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN	(5*32+ 13) /* PMM enabled */
-
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM	(6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY	(6*32+ 1) /* If yes HyperThreading not valid */
-
-/*
- * Auxiliary flags: Linux defined - For features scattered in various
- * CPUID levels like 0x6, 0xA etc
- */
-#define X86_FEATURE_IDA		(7*32+ 0) /* Intel Dynamic Acceleration */
-
-#define cpu_has(c, bit)							\
-	(__builtin_constant_p(bit) &&					\
-	 ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) ||	\
-	   (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) ||	\
-	   (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) ||	\
-	   (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3)) ||	\
-	   (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) ||	\
-	   (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) ||	\
-	   (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) ||	\
-	   (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) )	\
-	  ? 1 :								\
-	  test_bit(bit, (c)->x86_capability))
-#define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
-
-#define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
-#define cpu_has_vme		boot_cpu_has(X86_FEATURE_VME)
-#define cpu_has_de		boot_cpu_has(X86_FEATURE_DE)
-#define cpu_has_pse		boot_cpu_has(X86_FEATURE_PSE)
-#define cpu_has_tsc		boot_cpu_has(X86_FEATURE_TSC)
-#define cpu_has_pae		boot_cpu_has(X86_FEATURE_PAE)
-#define cpu_has_pge		boot_cpu_has(X86_FEATURE_PGE)
-#define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_sep		boot_cpu_has(X86_FEATURE_SEP)
-#define cpu_has_mtrr		boot_cpu_has(X86_FEATURE_MTRR)
-#define cpu_has_mmx		boot_cpu_has(X86_FEATURE_MMX)
-#define cpu_has_fxsr		boot_cpu_has(X86_FEATURE_FXSR)
-#define cpu_has_xmm		boot_cpu_has(X86_FEATURE_XMM)
-#define cpu_has_xmm2		boot_cpu_has(X86_FEATURE_XMM2)
-#define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
-#define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
-#define cpu_has_mp		boot_cpu_has(X86_FEATURE_MP)
-#define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)
-#define cpu_has_k6_mtrr		boot_cpu_has(X86_FEATURE_K6_MTRR)
-#define cpu_has_cyrix_arr	boot_cpu_has(X86_FEATURE_CYRIX_ARR)
-#define cpu_has_centaur_mcr	boot_cpu_has(X86_FEATURE_CENTAUR_MCR)
-#define cpu_has_xstore		boot_cpu_has(X86_FEATURE_XSTORE)
-#define cpu_has_xstore_enabled	boot_cpu_has(X86_FEATURE_XSTORE_EN)
-#define cpu_has_xcrypt		boot_cpu_has(X86_FEATURE_XCRYPT)
-#define cpu_has_xcrypt_enabled	boot_cpu_has(X86_FEATURE_XCRYPT_EN)
-#define cpu_has_ace2		boot_cpu_has(X86_FEATURE_ACE2)
-#define cpu_has_ace2_enabled	boot_cpu_has(X86_FEATURE_ACE2_EN)
-#define cpu_has_phe		boot_cpu_has(X86_FEATURE_PHE)
-#define cpu_has_phe_enabled	boot_cpu_has(X86_FEATURE_PHE_EN)
-#define cpu_has_pmm		boot_cpu_has(X86_FEATURE_PMM)
-#define cpu_has_pmm_enabled	boot_cpu_has(X86_FEATURE_PMM_EN)
-#define cpu_has_ds		boot_cpu_has(X86_FEATURE_DS)
-#define cpu_has_pebs 		boot_cpu_has(X86_FEATURE_PEBS)
-#define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLSH)
-#define cpu_has_bts 		boot_cpu_has(X86_FEATURE_BTS)
-
-#endif /* __ASM_I386_CPUFEATURE_H */
-
-/* 
- * Local Variables:
- * mode:c
- * comment-column:42
- * End:
- */
diff -puN include/asm-x86/cpufeature_64.h~git-x86 /dev/null
--- a/include/asm-x86/cpufeature_64.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * cpufeature_32.h
- *
- * Defines x86 CPU feature bits
- */
-
-#ifndef __ASM_X8664_CPUFEATURE_H
-#define __ASM_X8664_CPUFEATURE_H
-
-#include "cpufeature_32.h"
-
-#undef  cpu_has_vme
-#define cpu_has_vme            0
-
-#undef  cpu_has_pae
-#define cpu_has_pae            ___BUG___
-
-#undef  cpu_has_mp
-#define cpu_has_mp             1 /* XXX */
-
-#undef  cpu_has_k6_mtrr
-#define cpu_has_k6_mtrr        0
-
-#undef  cpu_has_cyrix_arr
-#define cpu_has_cyrix_arr      0
-
-#undef  cpu_has_centaur_mcr
-#define cpu_has_centaur_mcr    0
-
-#endif /* __ASM_X8664_CPUFEATURE_H */
diff -puN include/asm-x86/desc_64.h~git-x86 include/asm-x86/desc_64.h
--- a/include/asm-x86/desc_64.h~git-x86
+++ a/include/asm-x86/desc_64.h
@@ -1,4 +1,4 @@
-/* Written 2000 by Andi Kleen */ 
+/* Written 2000 by Andi Kleen */
 #ifndef __ARCH_DESC_H
 #define __ARCH_DESC_H
 
@@ -35,9 +35,18 @@ static inline unsigned long __store_tr(v
  * something other than this.
  */
 extern struct desc_struct default_ldt[];
-extern struct gate_struct idt_table[]; 
+extern struct gate_struct idt_table[];
 extern struct desc_ptr cpu_gdt_descr[];
 
+static inline void write_ldt_entry(struct desc_struct *ldt,
+				   int entry, u32 entry_low, u32 entry_high)
+{
+	__u32 *lp = (__u32 *)((entry << 3) + (char *)ldt);
+
+	lp[0] = entry_low;
+	lp[1] = entry_high;
+}
+
 /* the cpu gdt accessor */
 #define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address)
 
@@ -51,40 +60,45 @@ static inline void store_gdt(struct desc
        asm("sgdt %w0":"=m" (*ptr));
 }
 
-static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist)  
+static inline void _set_gate(void *adr, unsigned type, unsigned long func,
+			     unsigned dpl, unsigned ist)
 {
-	struct gate_struct s; 	
-	s.offset_low = PTR_LOW(func); 
+	struct gate_struct s;
+
+	s.offset_low = PTR_LOW(func);
 	s.segment = __KERNEL_CS;
-	s.ist = ist; 
+	s.ist = ist;
 	s.p = 1;
-	s.dpl = dpl; 
+	s.dpl = dpl;
 	s.zero0 = 0;
-	s.zero1 = 0; 
-	s.type = type; 
-	s.offset_middle = PTR_MIDDLE(func); 
-	s.offset_high = PTR_HIGH(func); 
-	/* does not need to be atomic because it is only done once at setup time */ 
-	memcpy(adr, &s, 16); 
-} 
+	s.zero1 = 0;
+	s.type = type;
+	s.offset_middle = PTR_MIDDLE(func);
+	s.offset_high = PTR_HIGH(func);
+	/*
+	 * does not need to be atomic because it is only done once at
+	 * setup time
+	 */
+	memcpy(adr, &s, 16);
+}
 
-static inline void set_intr_gate(int nr, void *func) 
-{ 
+static inline void set_intr_gate(int nr, void *func)
+{
 	BUG_ON((unsigned)nr > 0xFF);
-	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); 
-} 
+	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0);
+}
 
-static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) 
-{ 
+static inline void set_intr_gate_ist(int nr, void *func, unsigned ist)
+{
 	BUG_ON((unsigned)nr > 0xFF);
-	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist); 
-} 
+	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist);
+}
 
-static inline void set_system_gate(int nr, void *func) 
-{ 
+static inline void set_system_gate(int nr, void *func)
+{
 	BUG_ON((unsigned)nr > 0xFF);
-	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); 
-} 
+	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0);
+}
 
 static inline void set_system_gate_ist(int nr, void *func, unsigned ist)
 {
@@ -101,24 +115,25 @@ static inline void store_idt(struct desc
        asm("sidt %w0":"=m" (*dtr));
 }
 
-static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, 
-					 unsigned size) 
-{ 
+static inline void set_tssldt_descriptor(void *ptr, unsigned long tss,
+					 unsigned type, unsigned size)
+{
 	struct ldttss_desc d;
-	memset(&d,0,sizeof(d)); 
+
+	memset(&d, 0, sizeof(d));
 	d.limit0 = size & 0xFFFF;
-	d.base0 = PTR_LOW(tss); 
-	d.base1 = PTR_MIDDLE(tss) & 0xFF; 
+	d.base0 = PTR_LOW(tss);
+	d.base1 = PTR_MIDDLE(tss) & 0xFF;
 	d.type = type;
-	d.p = 1; 
+	d.p = 1;
 	d.limit1 = (size >> 16) & 0xF;
-	d.base2 = (PTR_MIDDLE(tss) >> 8) & 0xFF; 
-	d.base3 = PTR_HIGH(tss); 
-	memcpy(ptr, &d, 16); 
+	d.base2 = (PTR_MIDDLE(tss) >> 8) & 0xFF;
+	d.base3 = PTR_HIGH(tss);
+	memcpy(ptr, &d, 16);
 }
 
 static inline void set_tss_desc(unsigned cpu, void *addr)
-{ 
+{
 	/*
 	 * sizeof(unsigned long) coming from an extra "long" at the end
 	 * of the iobitmap. See tss_struct definition in processor.h
@@ -129,18 +144,18 @@ static inline void set_tss_desc(unsigned
 	set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_TSS],
 		(unsigned long)addr, DESC_TSS,
 		IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
-} 
+}
 
 static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
-{ 
+{
 	set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_LDT], (unsigned long)addr,
 			      DESC_LDT, size * 8 - 1);
 }
 
 #define LDT_entry_a(info) \
 	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-/* Don't allow setting of the lm bit. It is useless anyways because 
-   64bit system calls require __USER_CS. */ 
+/* Don't allow setting of the lm bit. It is useless anyways because
+   64bit system calls require __USER_CS. */
 #define LDT_entry_b(info) \
 	(((info)->base_addr & 0xff000000) | \
 	(((info)->base_addr & 0x00ff0000) >> 16) | \
@@ -172,12 +187,12 @@ static inline void load_TLS(struct threa
 
 	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
 		gdt[i] = t->tls_array[i];
-} 
+}
 
 /*
  * load one particular LDT into the current CPU
  */
-static inline void load_LDT_nolock (mm_context_t *pc, int cpu)
+static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
 {
 	int count = pc->size;
 
@@ -185,7 +200,7 @@ static inline void load_LDT_nolock (mm_c
 		clear_LDT();
 		return;
 	}
-		
+
 	set_ldt_desc(cpu, pc->ldt, count);
 	load_LDT_desc();
 }
@@ -193,12 +208,23 @@ static inline void load_LDT_nolock (mm_c
 static inline void load_LDT(mm_context_t *pc)
 {
 	int cpu = get_cpu();
+
 	load_LDT_nolock(pc, cpu);
 	put_cpu();
 }
 
 extern struct desc_ptr idt_descr;
 
+static inline unsigned long get_desc_base(const void *ptr)
+{
+	const u32 *desc = ptr;
+	unsigned long base;
+	base = ((desc[0] >> 16)  & 0x0000ffff) |
+		((desc[1] << 16) & 0x00ff0000) |
+		(desc[1] & 0xff000000);
+	return base;
+}
+
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff -puN include/asm-x86/dma.h~git-x86 include/asm-x86/dma.h
--- a/include/asm-x86/dma.h~git-x86
+++ a/include/asm-x86/dma.h
@@ -1,5 +1,319 @@
+/*
+ * linux/include/asm/dma.h: Defines for using and allocating dma channels.
+ * Written by Hennus Bergman, 1992.
+ * High DMA channel support & info by Hannu Savolainen
+ * and John Boyd, Nov. 1992.
+ */
+
+#ifndef _ASM_X86_DMA_H
+#define _ASM_X86_DMA_H
+
+#include <linux/spinlock.h>	/* And spinlocks */
+#include <asm/io.h>		/* need byte IO */
+#include <linux/delay.h>
+
+
+#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
+#define dma_outb	outb_p
+#else
+#define dma_outb	outb
+#endif
+
+#define dma_inb		inb
+
+/*
+ * NOTES about DMA transfers:
+ *
+ *  controller 1: channels 0-3, byte operations, ports 00-1F
+ *  controller 2: channels 4-7, word operations, ports C0-DF
+ *
+ *  - ALL registers are 8 bits only, regardless of transfer size
+ *  - channel 4 is not used - cascades 1 into 2.
+ *  - channels 0-3 are byte - addresses/counts are for physical bytes
+ *  - channels 5-7 are word - addresses/counts are for physical words
+ *  - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
+ *  - transfer count loaded to registers is 1 less than actual count
+ *  - controller 2 offsets are all even (2x offsets for controller 1)
+ *  - page registers for 5-7 don't use data bit 0, represent 128K pages
+ *  - page registers for 0-3 use bit 0, represent 64K pages
+ *
+ * DMA transfers are limited to the lower 16MB of _physical_ memory.
+ * Note that addresses loaded into registers must be _physical_ addresses,
+ * not logical addresses (which may differ if paging is active).
+ *
+ *  Address mapping for channels 0-3:
+ *
+ *   A23 ... A16 A15 ... A8  A7 ... A0    (Physical addresses)
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *   P7  ...  P0  A7 ... A0  A7 ... A0
+ * |    Page    | Addr MSB | Addr LSB |   (DMA registers)
+ *
+ *  Address mapping for channels 5-7:
+ *
+ *   A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0    (Physical addresses)
+ *    |  ...  |   \   \   ... \  \  \  ... \  \
+ *    |  ...  |    \   \   ... \  \  \  ... \  (not used)
+ *    |  ...  |     \   \   ... \  \  \  ... \
+ *   P7  ...  P1 (0) A7 A6  ... A0 A7 A6 ... A0
+ * |      Page      |  Addr MSB   |  Addr LSB  |   (DMA registers)
+ *
+ * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
+ * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
+ * the hardware level, so odd-byte transfers aren't possible).
+ *
+ * Transfer count (_not # bytes_) is limited to 64K, represented as actual
+ * count - 1 : 64K => 0xFFFF, 1 => 0x0000.  Thus, count is always 1 or more,
+ * and up to 128K bytes may be transferred on channels 5-7 in one operation.
+ *
+ */
+
+#define MAX_DMA_CHANNELS	8
+
 #ifdef CONFIG_X86_32
-# include "dma_32.h"
+
+/* The maximum address that we can perform a DMA transfer to on this platform */
+#define MAX_DMA_ADDRESS      (PAGE_OFFSET+0x1000000)
+
+#else
+
+/* 16MB ISA DMA zone */
+#define MAX_DMA_PFN   ((16*1024*1024) >> PAGE_SHIFT)
+
+/* 4GB broken PCI/AGP hardware bus master zone */
+#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT)
+
+/* Compat define for old dma zone */
+#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
+
+#endif
+
+/* 8237 DMA controllers */
+#define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
+#define IO_DMA2_BASE	0xC0	/* 16 bit master DMA, ch 4(=slave input)..7 */
+
+/* DMA controller registers */
+#define DMA1_CMD_REG		0x08	/* command register (w) */
+#define DMA1_STAT_REG		0x08	/* status register (r) */
+#define DMA1_REQ_REG		0x09    /* request register (w) */
+#define DMA1_MASK_REG		0x0A	/* single-channel mask (w) */
+#define DMA1_MODE_REG		0x0B	/* mode register (w) */
+#define DMA1_CLEAR_FF_REG	0x0C	/* clear pointer flip-flop (w) */
+#define DMA1_TEMP_REG		0x0D    /* Temporary Register (r) */
+#define DMA1_RESET_REG		0x0D	/* Master Clear (w) */
+#define DMA1_CLR_MASK_REG       0x0E    /* Clear Mask */
+#define DMA1_MASK_ALL_REG       0x0F    /* all-channels mask (w) */
+
+#define DMA2_CMD_REG		0xD0	/* command register (w) */
+#define DMA2_STAT_REG		0xD0	/* status register (r) */
+#define DMA2_REQ_REG		0xD2    /* request register (w) */
+#define DMA2_MASK_REG		0xD4	/* single-channel mask (w) */
+#define DMA2_MODE_REG		0xD6	/* mode register (w) */
+#define DMA2_CLEAR_FF_REG	0xD8	/* clear pointer flip-flop (w) */
+#define DMA2_TEMP_REG		0xDA    /* Temporary Register (r) */
+#define DMA2_RESET_REG		0xDA	/* Master Clear (w) */
+#define DMA2_CLR_MASK_REG       0xDC    /* Clear Mask */
+#define DMA2_MASK_ALL_REG       0xDE    /* all-channels mask (w) */
+
+#define DMA_ADDR_0		0x00    /* DMA address registers */
+#define DMA_ADDR_1		0x02
+#define DMA_ADDR_2		0x04
+#define DMA_ADDR_3		0x06
+#define DMA_ADDR_4		0xC0
+#define DMA_ADDR_5		0xC4
+#define DMA_ADDR_6		0xC8
+#define DMA_ADDR_7		0xCC
+
+#define DMA_CNT_0		0x01    /* DMA count registers */
+#define DMA_CNT_1		0x03
+#define DMA_CNT_2		0x05
+#define DMA_CNT_3		0x07
+#define DMA_CNT_4		0xC2
+#define DMA_CNT_5		0xC6
+#define DMA_CNT_6		0xCA
+#define DMA_CNT_7		0xCE
+
+#define DMA_PAGE_0		0x87    /* DMA page registers */
+#define DMA_PAGE_1		0x83
+#define DMA_PAGE_2		0x81
+#define DMA_PAGE_3		0x82
+#define DMA_PAGE_5		0x8B
+#define DMA_PAGE_6		0x89
+#define DMA_PAGE_7		0x8A
+
+/* I/O to memory, no autoinit, increment, single mode */
+#define DMA_MODE_READ		0x44
+/* memory to I/O, no autoinit, increment, single mode */
+#define DMA_MODE_WRITE		0x48
+/* pass thru DREQ->HRQ, DACK<-HLDA only */
+#define DMA_MODE_CASCADE	0xC0
+
+#define DMA_AUTOINIT		0x10
+
+
+extern spinlock_t  dma_spin_lock;
+
+static __inline__ unsigned long claim_dma_lock(void)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&dma_spin_lock, flags);
+	return flags;
+}
+
+static __inline__ void release_dma_lock(unsigned long flags)
+{
+	spin_unlock_irqrestore(&dma_spin_lock, flags);
+}
+
+/* enable/disable a specific DMA channel */
+static __inline__ void enable_dma(unsigned int dmanr)
+{
+	if (dmanr <= 3)
+		dma_outb(dmanr, DMA1_MASK_REG);
+	else
+		dma_outb(dmanr & 3, DMA2_MASK_REG);
+}
+
+static __inline__ void disable_dma(unsigned int dmanr)
+{
+	if (dmanr <= 3)
+		dma_outb(dmanr | 4, DMA1_MASK_REG);
+	else
+		dma_outb((dmanr & 3) | 4, DMA2_MASK_REG);
+}
+
+/* Clear the 'DMA Pointer Flip Flop'.
+ * Write 0 for LSB/MSB, 1 for MSB/LSB access.
+ * Use this once to initialize the FF to a known state.
+ * After that, keep track of it. :-)
+ * --- In order to do that, the DMA routines below should ---
+ * --- only be used while holding the DMA lock ! ---
+ */
+static __inline__ void clear_dma_ff(unsigned int dmanr)
+{
+	if (dmanr <= 3)
+		dma_outb(0, DMA1_CLEAR_FF_REG);
+	else
+		dma_outb(0, DMA2_CLEAR_FF_REG);
+}
+
+/* set mode (above) for a specific DMA channel */
+static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
+{
+	if (dmanr <= 3)
+		dma_outb(mode | dmanr, DMA1_MODE_REG);
+	else
+		dma_outb(mode | (dmanr & 3), DMA2_MODE_REG);
+}
+
+/* Set only the page register bits of the transfer address.
+ * This is used for successive transfers when we know the contents of
+ * the lower 16 bits of the DMA current address register, but a 64k boundary
+ * may have been crossed.
+ */
+static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
+{
+	switch (dmanr) {
+	case 0:
+		dma_outb(pagenr, DMA_PAGE_0);
+		break;
+	case 1:
+		dma_outb(pagenr, DMA_PAGE_1);
+		break;
+	case 2:
+		dma_outb(pagenr, DMA_PAGE_2);
+		break;
+	case 3:
+		dma_outb(pagenr, DMA_PAGE_3);
+		break;
+	case 5:
+		dma_outb(pagenr & 0xfe, DMA_PAGE_5);
+		break;
+	case 6:
+		dma_outb(pagenr & 0xfe, DMA_PAGE_6);
+		break;
+	case 7:
+		dma_outb(pagenr & 0xfe, DMA_PAGE_7);
+		break;
+	}
+}
+
+
+/* Set transfer address & page bits for specific DMA channel.
+ * Assumes dma flipflop is clear.
+ */
+static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
+{
+	set_dma_page(dmanr, a>>16);
+	if (dmanr <= 3)  {
+		dma_outb(a & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE);
+		dma_outb((a >> 8) & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE);
+	}  else  {
+	    dma_outb((a >> 1) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE);
+	    dma_outb((a >> 9) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE);
+	}
+}
+
+
+/* Set transfer size (max 64k for DMA0..3, 128k for DMA5..7) for
+ * a specific DMA channel.
+ * You must ensure the parameters are valid.
+ * NOTE: from a manual: "the number of transfers is one more
+ * than the initial word count"! This is taken into account.
+ * Assumes dma flip-flop is clear.
+ * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
+ */
+static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
+{
+	count--;
+	if (dmanr <= 3)  {
+	    dma_outb(count & 0xff, ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
+	    dma_outb((count >> 8) & 0xff,
+		     ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
+	} else {
+	    dma_outb((count >> 1) & 0xff,
+		     ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
+	    dma_outb((count >> 9) & 0xff,
+		     ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
+	}
+}
+
+
+/* Get DMA residue count. After a DMA transfer, this
+ * should return zero. Reading this while a DMA transfer is
+ * still in progress will return unpredictable results.
+ * If called before the channel has been used, it may return 1.
+ * Otherwise, it returns the number of _bytes_ left to transfer.
+ *
+ * Assumes DMA flip-flop is clear.
+ */
+static __inline__ int get_dma_residue(unsigned int dmanr)
+{
+	unsigned int io_port;
+	/* using short to get 16-bit wrap around */
+	unsigned short count;
+
+	io_port = (dmanr <= 3) ? ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE
+		: ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE;
+
+	count = 1 + dma_inb(io_port);
+	count += dma_inb(io_port) << 8;
+
+	return (dmanr <= 3) ? count : (count << 1);
+}
+
+
+/* These are in kernel/dma.c: */
+extern int request_dma(unsigned int dmanr, const char *device_id);
+extern void free_dma(unsigned int dmanr);
+
+/* From PCI */
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
 #else
-# include "dma_64.h"
+#define isa_dma_bridge_buggy	(0)
 #endif
+
+#endif /* _ASM_X86_DMA_H */
diff -puN include/asm-x86/dma_32.h~git-x86 /dev/null
--- a/include/asm-x86/dma_32.h
+++ /dev/null
@@ -1,297 +0,0 @@
-/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $
- * linux/include/asm/dma.h: Defines for using and allocating dma channels.
- * Written by Hennus Bergman, 1992.
- * High DMA channel support & info by Hannu Savolainen
- * and John Boyd, Nov. 1992.
- */
-
-#ifndef _ASM_DMA_H
-#define _ASM_DMA_H
-
-#include <linux/spinlock.h>	/* And spinlocks */
-#include <asm/io.h>		/* need byte IO */
-#include <linux/delay.h>
-
-
-#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
-#define dma_outb	outb_p
-#else
-#define dma_outb	outb
-#endif
-
-#define dma_inb		inb
-
-/*
- * NOTES about DMA transfers:
- *
- *  controller 1: channels 0-3, byte operations, ports 00-1F
- *  controller 2: channels 4-7, word operations, ports C0-DF
- *
- *  - ALL registers are 8 bits only, regardless of transfer size
- *  - channel 4 is not used - cascades 1 into 2.
- *  - channels 0-3 are byte - addresses/counts are for physical bytes
- *  - channels 5-7 are word - addresses/counts are for physical words
- *  - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
- *  - transfer count loaded to registers is 1 less than actual count
- *  - controller 2 offsets are all even (2x offsets for controller 1)
- *  - page registers for 5-7 don't use data bit 0, represent 128K pages
- *  - page registers for 0-3 use bit 0, represent 64K pages
- *
- * DMA transfers are limited to the lower 16MB of _physical_ memory.  
- * Note that addresses loaded into registers must be _physical_ addresses,
- * not logical addresses (which may differ if paging is active).
- *
- *  Address mapping for channels 0-3:
- *
- *   A23 ... A16 A15 ... A8  A7 ... A0    (Physical addresses)
- *    |  ...  |   |  ... |   |  ... |
- *    |  ...  |   |  ... |   |  ... |
- *    |  ...  |   |  ... |   |  ... |
- *   P7  ...  P0  A7 ... A0  A7 ... A0   
- * |    Page    | Addr MSB | Addr LSB |   (DMA registers)
- *
- *  Address mapping for channels 5-7:
- *
- *   A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0    (Physical addresses)
- *    |  ...  |   \   \   ... \  \  \  ... \  \
- *    |  ...  |    \   \   ... \  \  \  ... \  (not used)
- *    |  ...  |     \   \   ... \  \  \  ... \
- *   P7  ...  P1 (0) A7 A6  ... A0 A7 A6 ... A0   
- * |      Page      |  Addr MSB   |  Addr LSB  |   (DMA registers)
- *
- * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
- * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
- * the hardware level, so odd-byte transfers aren't possible).
- *
- * Transfer count (_not # bytes_) is limited to 64K, represented as actual
- * count - 1 : 64K => 0xFFFF, 1 => 0x0000.  Thus, count is always 1 or more,
- * and up to 128K bytes may be transferred on channels 5-7 in one operation. 
- *
- */
-
-#define MAX_DMA_CHANNELS	8
-
-/* The maximum address that we can perform a DMA transfer to on this platform */
-#define MAX_DMA_ADDRESS      (PAGE_OFFSET+0x1000000)
-
-/* 8237 DMA controllers */
-#define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
-#define IO_DMA2_BASE	0xC0	/* 16 bit master DMA, ch 4(=slave input)..7 */
-
-/* DMA controller registers */
-#define DMA1_CMD_REG		0x08	/* command register (w) */
-#define DMA1_STAT_REG		0x08	/* status register (r) */
-#define DMA1_REQ_REG            0x09    /* request register (w) */
-#define DMA1_MASK_REG		0x0A	/* single-channel mask (w) */
-#define DMA1_MODE_REG		0x0B	/* mode register (w) */
-#define DMA1_CLEAR_FF_REG	0x0C	/* clear pointer flip-flop (w) */
-#define DMA1_TEMP_REG           0x0D    /* Temporary Register (r) */
-#define DMA1_RESET_REG		0x0D	/* Master Clear (w) */
-#define DMA1_CLR_MASK_REG       0x0E    /* Clear Mask */
-#define DMA1_MASK_ALL_REG       0x0F    /* all-channels mask (w) */
-
-#define DMA2_CMD_REG		0xD0	/* command register (w) */
-#define DMA2_STAT_REG		0xD0	/* status register (r) */
-#define DMA2_REQ_REG            0xD2    /* request register (w) */
-#define DMA2_MASK_REG		0xD4	/* single-channel mask (w) */
-#define DMA2_MODE_REG		0xD6	/* mode register (w) */
-#define DMA2_CLEAR_FF_REG	0xD8	/* clear pointer flip-flop (w) */
-#define DMA2_TEMP_REG           0xDA    /* Temporary Register (r) */
-#define DMA2_RESET_REG		0xDA	/* Master Clear (w) */
-#define DMA2_CLR_MASK_REG       0xDC    /* Clear Mask */
-#define DMA2_MASK_ALL_REG       0xDE    /* all-channels mask (w) */
-
-#define DMA_ADDR_0              0x00    /* DMA address registers */
-#define DMA_ADDR_1              0x02
-#define DMA_ADDR_2              0x04
-#define DMA_ADDR_3              0x06
-#define DMA_ADDR_4              0xC0
-#define DMA_ADDR_5              0xC4
-#define DMA_ADDR_6              0xC8
-#define DMA_ADDR_7              0xCC
-
-#define DMA_CNT_0               0x01    /* DMA count registers */
-#define DMA_CNT_1               0x03
-#define DMA_CNT_2               0x05
-#define DMA_CNT_3               0x07
-#define DMA_CNT_4               0xC2
-#define DMA_CNT_5               0xC6
-#define DMA_CNT_6               0xCA
-#define DMA_CNT_7               0xCE
-
-#define DMA_PAGE_0              0x87    /* DMA page registers */
-#define DMA_PAGE_1              0x83
-#define DMA_PAGE_2              0x81
-#define DMA_PAGE_3              0x82
-#define DMA_PAGE_5              0x8B
-#define DMA_PAGE_6              0x89
-#define DMA_PAGE_7              0x8A
-
-#define DMA_MODE_READ	0x44	/* I/O to memory, no autoinit, increment, single mode */
-#define DMA_MODE_WRITE	0x48	/* memory to I/O, no autoinit, increment, single mode */
-#define DMA_MODE_CASCADE 0xC0   /* pass thru DREQ->HRQ, DACK<-HLDA only */
-
-#define DMA_AUTOINIT	0x10
-
-
-extern spinlock_t  dma_spin_lock;
-
-static __inline__ unsigned long claim_dma_lock(void)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&dma_spin_lock, flags);
-	return flags;
-}
-
-static __inline__ void release_dma_lock(unsigned long flags)
-{
-	spin_unlock_irqrestore(&dma_spin_lock, flags);
-}
-
-/* enable/disable a specific DMA channel */
-static __inline__ void enable_dma(unsigned int dmanr)
-{
-	if (dmanr<=3)
-		dma_outb(dmanr,  DMA1_MASK_REG);
-	else
-		dma_outb(dmanr & 3,  DMA2_MASK_REG);
-}
-
-static __inline__ void disable_dma(unsigned int dmanr)
-{
-	if (dmanr<=3)
-		dma_outb(dmanr | 4,  DMA1_MASK_REG);
-	else
-		dma_outb((dmanr & 3) | 4,  DMA2_MASK_REG);
-}
-
-/* Clear the 'DMA Pointer Flip Flop'.
- * Write 0 for LSB/MSB, 1 for MSB/LSB access.
- * Use this once to initialize the FF to a known state.
- * After that, keep track of it. :-)
- * --- In order to do that, the DMA routines below should ---
- * --- only be used while holding the DMA lock ! ---
- */
-static __inline__ void clear_dma_ff(unsigned int dmanr)
-{
-	if (dmanr<=3)
-		dma_outb(0,  DMA1_CLEAR_FF_REG);
-	else
-		dma_outb(0,  DMA2_CLEAR_FF_REG);
-}
-
-/* set mode (above) for a specific DMA channel */
-static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
-{
-	if (dmanr<=3)
-		dma_outb(mode | dmanr,  DMA1_MODE_REG);
-	else
-		dma_outb(mode | (dmanr&3),  DMA2_MODE_REG);
-}
-
-/* Set only the page register bits of the transfer address.
- * This is used for successive transfers when we know the contents of
- * the lower 16 bits of the DMA current address register, but a 64k boundary
- * may have been crossed.
- */
-static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
-{
-	switch(dmanr) {
-		case 0:
-			dma_outb(pagenr, DMA_PAGE_0);
-			break;
-		case 1:
-			dma_outb(pagenr, DMA_PAGE_1);
-			break;
-		case 2:
-			dma_outb(pagenr, DMA_PAGE_2);
-			break;
-		case 3:
-			dma_outb(pagenr, DMA_PAGE_3);
-			break;
-		case 5:
-			dma_outb(pagenr & 0xfe, DMA_PAGE_5);
-			break;
-		case 6:
-			dma_outb(pagenr & 0xfe, DMA_PAGE_6);
-			break;
-		case 7:
-			dma_outb(pagenr & 0xfe, DMA_PAGE_7);
-			break;
-	}
-}
-
-
-/* Set transfer address & page bits for specific DMA channel.
- * Assumes dma flipflop is clear.
- */
-static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
-{
-	set_dma_page(dmanr, a>>16);
-	if (dmanr <= 3)  {
-	    dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
-            dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
-	}  else  {
-	    dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
-	    dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
-	}
-}
-
-
-/* Set transfer size (max 64k for DMA0..3, 128k for DMA5..7) for
- * a specific DMA channel.
- * You must ensure the parameters are valid.
- * NOTE: from a manual: "the number of transfers is one more
- * than the initial word count"! This is taken into account.
- * Assumes dma flip-flop is clear.
- * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
- */
-static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
-{
-        count--;
-	if (dmanr <= 3)  {
-	    dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
-	    dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
-        } else {
-	    dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
-	    dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
-        }
-}
-
-
-/* Get DMA residue count. After a DMA transfer, this
- * should return zero. Reading this while a DMA transfer is
- * still in progress will return unpredictable results.
- * If called before the channel has been used, it may return 1.
- * Otherwise, it returns the number of _bytes_ left to transfer.
- *
- * Assumes DMA flip-flop is clear.
- */
-static __inline__ int get_dma_residue(unsigned int dmanr)
-{
-	unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
-					 : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
-
-	/* using short to get 16-bit wrap around */
-	unsigned short count;
-
-	count = 1 + dma_inb(io_port);
-	count += dma_inb(io_port) << 8;
-	
-	return (dmanr<=3)? count : (count<<1);
-}
-
-
-/* These are in kernel/dma.c: */
-extern int request_dma(unsigned int dmanr, const char * device_id);	/* reserve a DMA channel */
-extern void free_dma(unsigned int dmanr);	/* release it again */
-
-/* From PCI */
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy 	(0)
-#endif
-
-#endif /* _ASM_DMA_H */
diff -puN include/asm-x86/dma_64.h~git-x86 /dev/null
--- a/include/asm-x86/dma_64.h
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * linux/include/asm/dma.h: Defines for using and allocating dma channels.
- * Written by Hennus Bergman, 1992.
- * High DMA channel support & info by Hannu Savolainen
- * and John Boyd, Nov. 1992.
- */
-
-#ifndef _ASM_DMA_H
-#define _ASM_DMA_H
-
-#include <linux/spinlock.h>	/* And spinlocks */
-#include <asm/io.h>		/* need byte IO */
-#include <linux/delay.h>
-
-
-#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
-#define dma_outb	outb_p
-#else
-#define dma_outb	outb
-#endif
-
-#define dma_inb		inb
-
-/*
- * NOTES about DMA transfers:
- *
- *  controller 1: channels 0-3, byte operations, ports 00-1F
- *  controller 2: channels 4-7, word operations, ports C0-DF
- *
- *  - ALL registers are 8 bits only, regardless of transfer size
- *  - channel 4 is not used - cascades 1 into 2.
- *  - channels 0-3 are byte - addresses/counts are for physical bytes
- *  - channels 5-7 are word - addresses/counts are for physical words
- *  - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
- *  - transfer count loaded to registers is 1 less than actual count
- *  - controller 2 offsets are all even (2x offsets for controller 1)
- *  - page registers for 5-7 don't use data bit 0, represent 128K pages
- *  - page registers for 0-3 use bit 0, represent 64K pages
- *
- * DMA transfers are limited to the lower 16MB of _physical_ memory.  
- * Note that addresses loaded into registers must be _physical_ addresses,
- * not logical addresses (which may differ if paging is active).
- *
- *  Address mapping for channels 0-3:
- *
- *   A23 ... A16 A15 ... A8  A7 ... A0    (Physical addresses)
- *    |  ...  |   |  ... |   |  ... |
- *    |  ...  |   |  ... |   |  ... |
- *    |  ...  |   |  ... |   |  ... |
- *   P7  ...  P0  A7 ... A0  A7 ... A0   
- * |    Page    | Addr MSB | Addr LSB |   (DMA registers)
- *
- *  Address mapping for channels 5-7:
- *
- *   A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0    (Physical addresses)
- *    |  ...  |   \   \   ... \  \  \  ... \  \
- *    |  ...  |    \   \   ... \  \  \  ... \  (not used)
- *    |  ...  |     \   \   ... \  \  \  ... \
- *   P7  ...  P1 (0) A7 A6  ... A0 A7 A6 ... A0   
- * |      Page      |  Addr MSB   |  Addr LSB  |   (DMA registers)
- *
- * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
- * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
- * the hardware level, so odd-byte transfers aren't possible).
- *
- * Transfer count (_not # bytes_) is limited to 64K, represented as actual
- * count - 1 : 64K => 0xFFFF, 1 => 0x0000.  Thus, count is always 1 or more,
- * and up to 128K bytes may be transferred on channels 5-7 in one operation. 
- *
- */
-
-#define MAX_DMA_CHANNELS	8
-
-
-/* 16MB ISA DMA zone */
-#define MAX_DMA_PFN   ((16*1024*1024) >> PAGE_SHIFT)
-
-/* 4GB broken PCI/AGP hardware bus master zone */
-#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT)
-
-/* Compat define for old dma zone */
-#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
-
-/* 8237 DMA controllers */
-#define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
-#define IO_DMA2_BASE	0xC0	/* 16 bit master DMA, ch 4(=slave input)..7 */
-
-/* DMA controller registers */
-#define DMA1_CMD_REG		0x08	/* command register (w) */
-#define DMA1_STAT_REG		0x08	/* status register (r) */
-#define DMA1_REQ_REG            0x09    /* request register (w) */
-#define DMA1_MASK_REG		0x0A	/* single-channel mask (w) */
-#define DMA1_MODE_REG		0x0B	/* mode register (w) */
-#define DMA1_CLEAR_FF_REG	0x0C	/* clear pointer flip-flop (w) */
-#define DMA1_TEMP_REG           0x0D    /* Temporary Register (r) */
-#define DMA1_RESET_REG		0x0D	/* Master Clear (w) */
-#define DMA1_CLR_MASK_REG       0x0E    /* Clear Mask */
-#define DMA1_MASK_ALL_REG       0x0F    /* all-channels mask (w) */
-
-#define DMA2_CMD_REG		0xD0	/* command register (w) */
-#define DMA2_STAT_REG		0xD0	/* status register (r) */
-#define DMA2_REQ_REG            0xD2    /* request register (w) */
-#define DMA2_MASK_REG		0xD4	/* single-channel mask (w) */
-#define DMA2_MODE_REG		0xD6	/* mode register (w) */
-#define DMA2_CLEAR_FF_REG	0xD8	/* clear pointer flip-flop (w) */
-#define DMA2_TEMP_REG           0xDA    /* Temporary Register (r) */
-#define DMA2_RESET_REG		0xDA	/* Master Clear (w) */
-#define DMA2_CLR_MASK_REG       0xDC    /* Clear Mask */
-#define DMA2_MASK_ALL_REG       0xDE    /* all-channels mask (w) */
-
-#define DMA_ADDR_0              0x00    /* DMA address registers */
-#define DMA_ADDR_1              0x02
-#define DMA_ADDR_2              0x04
-#define DMA_ADDR_3              0x06
-#define DMA_ADDR_4              0xC0
-#define DMA_ADDR_5              0xC4
-#define DMA_ADDR_6              0xC8
-#define DMA_ADDR_7              0xCC
-
-#define DMA_CNT_0               0x01    /* DMA count registers */
-#define DMA_CNT_1               0x03
-#define DMA_CNT_2               0x05
-#define DMA_CNT_3               0x07
-#define DMA_CNT_4               0xC2
-#define DMA_CNT_5               0xC6
-#define DMA_CNT_6               0xCA
-#define DMA_CNT_7               0xCE
-
-#define DMA_PAGE_0              0x87    /* DMA page registers */
-#define DMA_PAGE_1              0x83
-#define DMA_PAGE_2              0x81
-#define DMA_PAGE_3              0x82
-#define DMA_PAGE_5              0x8B
-#define DMA_PAGE_6              0x89
-#define DMA_PAGE_7              0x8A
-
-#define DMA_MODE_READ	0x44	/* I/O to memory, no autoinit, increment, single mode */
-#define DMA_MODE_WRITE	0x48	/* memory to I/O, no autoinit, increment, single mode */
-#define DMA_MODE_CASCADE 0xC0   /* pass thru DREQ->HRQ, DACK<-HLDA only */
-
-#define DMA_AUTOINIT	0x10
-
-
-extern spinlock_t  dma_spin_lock;
-
-static __inline__ unsigned long claim_dma_lock(void)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&dma_spin_lock, flags);
-	return flags;
-}
-
-static __inline__ void release_dma_lock(unsigned long flags)
-{
-	spin_unlock_irqrestore(&dma_spin_lock, flags);
-}
-
-/* enable/disable a specific DMA channel */
-static __inline__ void enable_dma(unsigned int dmanr)
-{
-	if (dmanr<=3)
-		dma_outb(dmanr,  DMA1_MASK_REG);
-	else
-		dma_outb(dmanr & 3,  DMA2_MASK_REG);
-}
-
-static __inline__ void disable_dma(unsigned int dmanr)
-{
-	if (dmanr<=3)
-		dma_outb(dmanr | 4,  DMA1_MASK_REG);
-	else
-		dma_outb((dmanr & 3) | 4,  DMA2_MASK_REG);
-}
-
-/* Clear the 'DMA Pointer Flip Flop'.
- * Write 0 for LSB/MSB, 1 for MSB/LSB access.
- * Use this once to initialize the FF to a known state.
- * After that, keep track of it. :-)
- * --- In order to do that, the DMA routines below should ---
- * --- only be used while holding the DMA lock ! ---
- */
-static __inline__ void clear_dma_ff(unsigned int dmanr)
-{
-	if (dmanr<=3)
-		dma_outb(0,  DMA1_CLEAR_FF_REG);
-	else
-		dma_outb(0,  DMA2_CLEAR_FF_REG);
-}
-
-/* set mode (above) for a specific DMA channel */
-static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
-{
-	if (dmanr<=3)
-		dma_outb(mode | dmanr,  DMA1_MODE_REG);
-	else
-		dma_outb(mode | (dmanr&3),  DMA2_MODE_REG);
-}
-
-/* Set only the page register bits of the transfer address.
- * This is used for successive transfers when we know the contents of
- * the lower 16 bits of the DMA current address register, but a 64k boundary
- * may have been crossed.
- */
-static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
-{
-	switch(dmanr) {
-		case 0:
-			dma_outb(pagenr, DMA_PAGE_0);
-			break;
-		case 1:
-			dma_outb(pagenr, DMA_PAGE_1);
-			break;
-		case 2:
-			dma_outb(pagenr, DMA_PAGE_2);
-			break;
-		case 3:
-			dma_outb(pagenr, DMA_PAGE_3);
-			break;
-		case 5:
-			dma_outb(pagenr & 0xfe, DMA_PAGE_5);
-			break;
-		case 6:
-			dma_outb(pagenr & 0xfe, DMA_PAGE_6);
-			break;
-		case 7:
-			dma_outb(pagenr & 0xfe, DMA_PAGE_7);
-			break;
-	}
-}
-
-
-/* Set transfer address & page bits for specific DMA channel.
- * Assumes dma flipflop is clear.
- */
-static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
-{
-	set_dma_page(dmanr, a>>16);
-	if (dmanr <= 3)  {
-	    dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
-            dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
-	}  else  {
-	    dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
-	    dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
-	}
-}
-
-
-/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
- * a specific DMA channel.
- * You must ensure the parameters are valid.
- * NOTE: from a manual: "the number of transfers is one more
- * than the initial word count"! This is taken into account.
- * Assumes dma flip-flop is clear.
- * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
- */
-static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
-{
-        count--;
-	if (dmanr <= 3)  {
-	    dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
-	    dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
-        } else {
-	    dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
-	    dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
-        }
-}
-
-
-/* Get DMA residue count. After a DMA transfer, this
- * should return zero. Reading this while a DMA transfer is
- * still in progress will return unpredictable results.
- * If called before the channel has been used, it may return 1.
- * Otherwise, it returns the number of _bytes_ left to transfer.
- *
- * Assumes DMA flip-flop is clear.
- */
-static __inline__ int get_dma_residue(unsigned int dmanr)
-{
-	unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
-					 : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
-
-	/* using short to get 16-bit wrap around */
-	unsigned short count;
-
-	count = 1 + dma_inb(io_port);
-	count += dma_inb(io_port) << 8;
-	
-	return (dmanr<=3)? count : (count<<1);
-}
-
-
-/* These are in kernel/dma.c: */
-extern int request_dma(unsigned int dmanr, const char * device_id);	/* reserve a DMA channel */
-extern void free_dma(unsigned int dmanr);	/* release it again */
-
-/* From PCI */
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy 	(0)
-#endif
-
-#endif /* _ASM_DMA_H */
diff -puN /dev/null include/asm-x86/ds.h
--- /dev/null
+++ a/include/asm-x86/ds.h
@@ -0,0 +1,65 @@
+/*
+ * Debug Store (DS) support
+ *
+ * This provides a low-level interface to the hardware's Debug Store
+ * feature that is used for last branch recording (LBR) and
+ * precise-event based sampling (PEBS).
+ *
+ * Different architectures use a different DS layout/pointer size.
+ * The below functions therefore work on a void*.
+ *
+ *
+ * Since there is no user for PEBS, yet, only LBR (or branch
+ * trace store, BTS) is supported.
+ *
+ *
+ * Copyright (C) 2007 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ */
+
+#ifndef _ASM_X86_DS_H
+#define _ASM_X86_DS_H
+
+#include <linux/types.h>
+#include <linux/init.h>
+
+struct cpuinfo_x86;
+
+
+/* a branch trace record entry
+ *
+ * In order to unify the interface between various processor versions,
+ * we use the below data structure for all processors.
+ */
+enum bts_qualifier {
+	BTS_INVALID = 0,
+	BTS_BRANCH,
+	BTS_TASK_ARRIVES,
+	BTS_TASK_DEPARTS
+};
+
+struct bts_struct {
+	enum bts_qualifier qualifier;
+	union {
+		/* BTS_BRANCH */
+		struct {
+			long from_ip;
+			long to_ip;
+		} lbr;
+		/* BTS_TASK_ARRIVES or
+		   BTS_TASK_DEPARTS */
+		unsigned long long timestamp;
+	} variant;
+};
+
+
+extern int ds_allocate(void **, size_t);
+extern int ds_free(void **);
+extern int ds_get_bts_size(void *);
+extern int ds_get_bts_index(void *);
+extern int ds_read_bts(void *, size_t, struct bts_struct *);
+extern int ds_write_bts(void *, const struct bts_struct *);
+extern unsigned long ds_debugctl_mask(void);
+extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c);
+
+#endif /* _ASM_X86_DS_H */
diff -puN include/asm-x86/e820_32.h~git-x86 include/asm-x86/e820_32.h
--- a/include/asm-x86/e820_32.h~git-x86
+++ a/include/asm-x86/e820_32.h
@@ -12,6 +12,8 @@
 #ifndef __E820_HEADER
 #define __E820_HEADER
 
+#include <linux/ioport.h>
+
 #define HIGH_MEMORY	(1024*1024)
 
 #ifndef __ASSEMBLY__
@@ -26,6 +28,9 @@ extern void register_bootmem_low_pages(u
 extern void e820_register_memory(void);
 extern void limit_regions(unsigned long long size);
 extern void print_memory_map(char *who);
+extern void legacy_init_iomem_resources(struct resource *code_resource,
+			    struct resource *data_resource,
+			    struct resource *bss_resource);
 
 #if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
 extern void e820_mark_nosave_regions(void);
@@ -35,5 +40,6 @@ static inline void e820_mark_nosave_regi
 }
 #endif
 
+
 #endif/*!__ASSEMBLY__*/
 #endif/*__E820_HEADER*/
diff -puN include/asm-x86/e820_64.h~git-x86 include/asm-x86/e820_64.h
--- a/include/asm-x86/e820_64.h~git-x86
+++ a/include/asm-x86/e820_64.h
@@ -11,6 +11,8 @@
 #ifndef __E820_HEADER
 #define __E820_HEADER
 
+#include <linux/ioport.h>
+
 #ifndef __ASSEMBLY__
 extern unsigned long find_e820_area(unsigned long start, unsigned long end, 
 				    unsigned size);
@@ -19,9 +21,9 @@ extern void add_memory_region(unsigned l
 extern void setup_memory_region(void);
 extern void contig_e820_setup(void); 
 extern unsigned long e820_end_of_ram(void);
-extern void e820_reserve_resources(void);
+extern void e820_reserve_resources(struct resource *code_resource,
+		struct resource *data_resource, struct resource *bss_resource);
 extern void e820_mark_nosave_regions(void);
-extern void e820_print_map(char *who);
 extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
 extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
 extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
diff -puN include/asm-x86/elf.h~git-x86 include/asm-x86/elf.h
--- a/include/asm-x86/elf.h~git-x86
+++ a/include/asm-x86/elf.h
@@ -74,16 +74,19 @@ typedef struct user_fxsr_struct elf_fpxr
 
 #ifdef __KERNEL__
 
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch_ia32(x) \
+	(((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
+
 #ifdef CONFIG_X86_32
 #include <asm/processor.h>
 #include <asm/system.h>		/* for savesegment */
 #include <asm/desc.h>
+#include <asm/vdso.h>
 
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) \
-	(((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
+#define elf_check_arch(x)	elf_check_arch_ia32(x)
 
 /* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx
    contains a pointer to a function which might be registered using `atexit'.
@@ -96,32 +99,32 @@ typedef struct user_fxsr_struct elf_fpxr
    just to make things more deterministic.
  */
 #define ELF_PLAT_INIT(_r, load_addr)	do { \
-	_r->ebx = 0; _r->ecx = 0; _r->edx = 0; \
-	_r->esi = 0; _r->edi = 0; _r->ebp = 0; \
-	_r->eax = 0; \
+	_r->bx = 0; _r->cx = 0; _r->dx = 0; \
+	_r->si = 0; _r->di = 0; _r->bp = 0; \
+	_r->ax = 0; \
 } while (0)
 
 /* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
    now struct_user_regs, they are different) */
 
 #define ELF_CORE_COPY_REGS(pr_reg, regs)		\
-	pr_reg[0] = regs->ebx;				\
-	pr_reg[1] = regs->ecx;				\
-	pr_reg[2] = regs->edx;				\
-	pr_reg[3] = regs->esi;				\
-	pr_reg[4] = regs->edi;				\
-	pr_reg[5] = regs->ebp;				\
-	pr_reg[6] = regs->eax;				\
-	pr_reg[7] = regs->xds & 0xffff;			\
-	pr_reg[8] = regs->xes & 0xffff;			\
-	pr_reg[9] = regs->xfs & 0xffff;			\
+	pr_reg[0] = regs->bx;				\
+	pr_reg[1] = regs->cx;				\
+	pr_reg[2] = regs->dx;				\
+	pr_reg[3] = regs->si;				\
+	pr_reg[4] = regs->di;				\
+	pr_reg[5] = regs->bp;				\
+	pr_reg[6] = regs->ax;				\
+	pr_reg[7] = regs->ds & 0xffff;			\
+	pr_reg[8] = regs->es & 0xffff;			\
+	pr_reg[9] = regs->fs & 0xffff;			\
 	savesegment(gs,pr_reg[10]);			\
-	pr_reg[11] = regs->orig_eax;			\
-	pr_reg[12] = regs->eip;				\
-	pr_reg[13] = regs->xcs & 0xffff;		\
-	pr_reg[14] = regs->eflags;			\
-	pr_reg[15] = regs->esp;				\
-	pr_reg[16] = regs->xss & 0xffff;
+	pr_reg[11] = regs->orig_ax;			\
+	pr_reg[12] = regs->ip;				\
+	pr_reg[13] = regs->cs & 0xffff;			\
+	pr_reg[14] = regs->flags;			\
+	pr_reg[15] = regs->sp;				\
+	pr_reg[16] = regs->ss & 0xffff;
 
 #define ELF_PLATFORM	(utsname()->machine)
 #define set_personality_64bit()	do { } while (0)
@@ -139,9 +142,9 @@ extern unsigned int vdso_enabled;
 
 #define ELF_PLAT_INIT(_r, load_addr)	do {		  \
 	struct task_struct *cur = current;		  \
-	(_r)->rbx = 0; (_r)->rcx = 0; (_r)->rdx = 0;	  \
-	(_r)->rsi = 0; (_r)->rdi = 0; (_r)->rbp = 0;	  \
-	(_r)->rax = 0;					  \
+	(_r)->bx = 0; (_r)->cx = 0; (_r)->dx = 0;	  \
+	(_r)->si = 0; (_r)->di = 0; (_r)->bp = 0;	  \
+	(_r)->ax = 0;					  \
 	(_r)->r8 = 0;					  \
 	(_r)->r9 = 0;					  \
 	(_r)->r10 = 0;					  \
@@ -166,22 +169,22 @@ extern unsigned int vdso_enabled;
 	(pr_reg)[1] = (regs)->r14;				\
 	(pr_reg)[2] = (regs)->r13;				\
 	(pr_reg)[3] = (regs)->r12;				\
-	(pr_reg)[4] = (regs)->rbp;				\
-	(pr_reg)[5] = (regs)->rbx;				\
+	(pr_reg)[4] = (regs)->bp;				\
+	(pr_reg)[5] = (regs)->bx;				\
 	(pr_reg)[6] = (regs)->r11;				\
 	(pr_reg)[7] = (regs)->r10;				\
 	(pr_reg)[8] = (regs)->r9;				\
 	(pr_reg)[9] = (regs)->r8;				\
-	(pr_reg)[10] = (regs)->rax;				\
-	(pr_reg)[11] = (regs)->rcx;				\
-	(pr_reg)[12] = (regs)->rdx;				\
-	(pr_reg)[13] = (regs)->rsi;				\
-	(pr_reg)[14] = (regs)->rdi;				\
-	(pr_reg)[15] = (regs)->orig_rax;			\
-	(pr_reg)[16] = (regs)->rip;				\
+	(pr_reg)[10] = (regs)->ax;				\
+	(pr_reg)[11] = (regs)->cx;				\
+	(pr_reg)[12] = (regs)->dx;				\
+	(pr_reg)[13] = (regs)->si;				\
+	(pr_reg)[14] = (regs)->di;				\
+	(pr_reg)[15] = (regs)->orig_ax;			\
+	(pr_reg)[16] = (regs)->ip;				\
 	(pr_reg)[17] = (regs)->cs;				\
-	(pr_reg)[18] = (regs)->eflags;				\
-	(pr_reg)[19] = (regs)->rsp;				\
+	(pr_reg)[18] = (regs)->flags;				\
+	(pr_reg)[19] = (regs)->sp;				\
 	(pr_reg)[20] = (regs)->ss;				\
 	(pr_reg)[21] = current->thread.fs;			\
 	(pr_reg)[22] = current->thread.gs;			\
@@ -246,18 +249,6 @@ extern int dump_task_extended_fpu (struc
 #define ELF_CORE_XFPREG_TYPE NT_PRXFPREG
 
 #define VDSO_HIGH_BASE		(__fix_to_virt(FIX_VDSO))
-#define VDSO_CURRENT_BASE	((unsigned long)current->mm->context.vdso)
-#define VDSO_PRELINK		0
-
-#define VDSO_SYM(x) \
-		(VDSO_CURRENT_BASE + (unsigned long)(x) - VDSO_PRELINK)
-
-#define VDSO_HIGH_EHDR		((const struct elfhdr *) VDSO_HIGH_BASE)
-#define VDSO_EHDR		((const struct elfhdr *) VDSO_CURRENT_BASE)
-
-extern void __kernel_vsyscall;
-
-#define VDSO_ENTRY		VDSO_SYM(&__kernel_vsyscall)
 
 /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
 
@@ -269,6 +260,8 @@ do if (vdso_enabled) {							\
 
 #else /* CONFIG_X86_32 */
 
+#define VDSO_HIGH_BASE		0xffffe000U /* CONFIG_COMPAT_VDSO address */
+
 /* 1GB for 64bit, 8MB for 32bit */
 #define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff)
 
@@ -279,12 +272,20 @@ do if (vdso_enabled) {						\
 
 #endif /* !CONFIG_X86_32 */
 
+#define VDSO_CURRENT_BASE	((unsigned long)current->mm->context.vdso)
+
+#define VDSO_ENTRY \
+	((unsigned long) VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall))
+
 struct linux_binprm;
 
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 				       int executable_stack);
 
+extern unsigned long arch_randomize_brk(struct mm_struct *mm);
+#define arch_randomize_brk arch_randomize_brk
+
 #endif /* __KERNEL__ */
 
 #endif
diff -puN include/asm-x86/futex.h~git-x86 include/asm-x86/futex.h
--- a/include/asm-x86/futex.h~git-x86
+++ a/include/asm-x86/futex.h
@@ -1,5 +1,135 @@
-#ifdef CONFIG_X86_32
-# include "futex_32.h"
-#else
-# include "futex_64.h"
+#ifndef _ASM_X86_FUTEX_H
+#define _ASM_X86_FUTEX_H
+
+#ifdef __KERNEL__
+
+#include <linux/futex.h>
+
+#include <asm/asm.h>
+#include <asm/errno.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg)	\
+  __asm__ __volatile(						\
+"1:	" insn "\n"						\
+"2:	.section .fixup,\"ax\"\n				\
+3:	mov	%3, %1\n					\
+	jmp	2b\n						\
+	.previous\n						\
+	.section __ex_table,\"a\"\n				\
+	.align	8\n"						\
+	_ASM_PTR "1b,3b\n					\
+	.previous"						\
+	: "=r" (oldval), "=r" (ret), "+m" (*uaddr)		\
+	: "i" (-EFAULT), "0" (oparg), "1" (0))
+
+#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg)	\
+  __asm__ __volatile(						\
+"1:	movl	%2, %0\n					\
+	movl	%0, %3\n"					\
+	insn "\n"						\
+"2:	" LOCK_PREFIX "cmpxchgl %3, %2\n			\
+	jnz	1b\n						\
+3:	.section .fixup,\"ax\"\n				\
+4:	mov	%5, %1\n					\
+	jmp	3b\n						\
+	.previous\n						\
+	.section __ex_table,\"a\"\n				\
+	.align	8\n"						\
+	_ASM_PTR "1b,4b,2b,4b\n					\
+	.previous"						\
+	: "=&a" (oldval), "=&r" (ret), "+m" (*uaddr),		\
+	  "=&r" (tem)						\
+	: "r" (oparg), "i" (-EFAULT), "1" (0))
+
+static inline int
+futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	int oparg = (encoded_op << 8) >> 20;
+	int cmparg = (encoded_op << 20) >> 20;
+	int oldval = 0, ret, tem;
+
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
+	/* Real i386 machines can only support FUTEX_OP_SET */
+	if (op != FUTEX_OP_SET && boot_cpu_data.x86 == 3)
+		return -ENOSYS;
+#endif
+
+	pagefault_disable();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
+				   uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	pagefault_enable();
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
+		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
+		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
+		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+		default: ret = -ENOSYS;
+		}
+	}
+	return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+{
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+	__asm__ __volatile__(
+		"1:	" LOCK_PREFIX "cmpxchgl %3, %1		\n"
+
+		"2:	.section .fixup, \"ax\"			\n"
+		"3:	mov     %2, %0				\n"
+		"	jmp     2b				\n"
+		"	.previous				\n"
+
+		"	.section __ex_table, \"a\"		\n"
+		"	.align  8				\n"
+			_ASM_PTR " 1b,3b			\n"
+		"	.previous				\n"
+
+		: "=a" (oldval), "+m" (*uaddr)
+		: "i" (-EFAULT), "r" (newval), "0" (oldval)
+		: "memory"
+	);
+
+	return oldval;
+}
+
+#endif
 #endif
diff -puN include/asm-x86/futex_32.h~git-x86 /dev/null
--- a/include/asm-x86/futex_32.h
+++ /dev/null
@@ -1,135 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#ifdef __KERNEL__
-
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-
-#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
-  __asm__ __volatile (						\
-"1:	" insn "\n"						\
-"2:	.section .fixup,\"ax\"\n\
-3:	mov	%3, %1\n\
-	jmp	2b\n\
-	.previous\n\
-	.section __ex_table,\"a\"\n\
-	.align	8\n\
-	.long	1b,3b\n\
-	.previous"						\
-	: "=r" (oldval), "=r" (ret), "+m" (*uaddr)		\
-	: "i" (-EFAULT), "0" (oparg), "1" (0))
-
-#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
-  __asm__ __volatile (						\
-"1:	movl	%2, %0\n\
-	movl	%0, %3\n"					\
-	insn "\n"						\
-"2:	" LOCK_PREFIX "cmpxchgl %3, %2\n\
-	jnz	1b\n\
-3:	.section .fixup,\"ax\"\n\
-4:	mov	%5, %1\n\
-	jmp	3b\n\
-	.previous\n\
-	.section __ex_table,\"a\"\n\
-	.align	8\n\
-	.long	1b,4b,2b,4b\n\
-	.previous"						\
-	: "=&a" (oldval), "=&r" (ret), "+m" (*uaddr),		\
-	  "=&r" (tem)						\
-	: "r" (oparg), "i" (-EFAULT), "1" (0))
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
-
-	pagefault_disable();
-
-	if (op == FUTEX_OP_SET)
-		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
-	else {
-#ifndef CONFIG_X86_BSWAP
-		if (boot_cpu_data.x86 == 3)
-			ret = -ENOSYS;
-		else
-#endif
-		switch (op) {
-		case FUTEX_OP_ADD:
-			__futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret,
-					   oldval, uaddr, oparg);
-			break;
-		case FUTEX_OP_OR:
-			__futex_atomic_op2("orl %4, %3", ret, oldval, uaddr,
-					   oparg);
-			break;
-		case FUTEX_OP_ANDN:
-			__futex_atomic_op2("andl %4, %3", ret, oldval, uaddr,
-					   ~oparg);
-			break;
-		case FUTEX_OP_XOR:
-			__futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr,
-					   oparg);
-			break;
-		default:
-			ret = -ENOSYS;
-		}
-	}
-
-	pagefault_enable();
-
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
-	return ret;
-}
-
-static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
-{
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
-
-	__asm__ __volatile__(
-		"1:	" LOCK_PREFIX "cmpxchgl %3, %1		\n"
-
-		"2:	.section .fixup, \"ax\"			\n"
-		"3:	mov     %2, %0				\n"
-		"	jmp     2b				\n"
-		"	.previous				\n"
-
-		"	.section __ex_table, \"a\"		\n"
-		"	.align  8				\n"
-		"	.long   1b,3b				\n"
-		"	.previous				\n"
-
-		: "=a" (oldval), "+m" (*uaddr)
-		: "i" (-EFAULT), "r" (newval), "0" (oldval)
-		: "memory"
-	);
-
-	return oldval;
-}
-
-#endif
-#endif
diff -puN include/asm-x86/futex_64.h~git-x86 /dev/null
--- a/include/asm-x86/futex_64.h
+++ /dev/null
@@ -1,125 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#ifdef __KERNEL__
-
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
-  __asm__ __volatile (						\
-"1:	" insn "\n"						\
-"2:	.section .fixup,\"ax\"\n\
-3:	mov	%3, %1\n\
-	jmp	2b\n\
-	.previous\n\
-	.section __ex_table,\"a\"\n\
-	.align	8\n\
-	.quad	1b,3b\n\
-	.previous"						\
-	: "=r" (oldval), "=r" (ret), "=m" (*uaddr)		\
-	: "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0))
-
-#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
-  __asm__ __volatile (						\
-"1:	movl	%2, %0\n\
-	movl	%0, %3\n"					\
-	insn "\n"						\
-"2:	" LOCK_PREFIX "cmpxchgl %3, %2\n\
-	jnz	1b\n\
-3:	.section .fixup,\"ax\"\n\
-4:	mov	%5, %1\n\
-	jmp	3b\n\
-	.previous\n\
-	.section __ex_table,\"a\"\n\
-	.align	8\n\
-	.quad	1b,4b,2b,4b\n\
-	.previous"						\
-	: "=&a" (oldval), "=&r" (ret), "=m" (*uaddr),		\
-	  "=&r" (tem)						\
-	: "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0))
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
-
-	pagefault_disable();
-
-	switch (op) {
-	case FUTEX_OP_SET:
-		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ADD:
-		__futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
-				   uaddr, oparg);
-		break;
-	case FUTEX_OP_OR:
-		__futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ANDN:
-		__futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
-		break;
-	case FUTEX_OP_XOR:
-		__futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
-		break;
-	default:
-		ret = -ENOSYS;
-	}
-
-	pagefault_enable();
-
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
-	return ret;
-}
-
-static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
-{
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
-
-	__asm__ __volatile__(
-		"1:	" LOCK_PREFIX "cmpxchgl %3, %1		\n"
-
-		"2:	.section .fixup, \"ax\"			\n"
-		"3:	mov     %2, %0				\n"
-		"	jmp     2b				\n"
-		"	.previous				\n"
-
-		"	.section __ex_table, \"a\"		\n"
-		"	.align  8				\n"
-		"	.quad   1b,3b				\n"
-		"	.previous				\n"
-
-		: "=a" (oldval), "=m" (*uaddr)
-		: "i" (-EFAULT), "r" (newval), "0" (oldval)
-		: "memory"
-	);
-
-	return oldval;
-}
-
-#endif
-#endif
diff -puN include/asm-x86/geode.h~git-x86 include/asm-x86/geode.h
--- a/include/asm-x86/geode.h~git-x86
+++ a/include/asm-x86/geode.h
@@ -121,9 +121,15 @@ extern int geode_get_dev_base(unsigned i
 #define GPIO_MAP_Z		0xE8
 #define GPIO_MAP_W		0xEC
 
-extern void geode_gpio_set(unsigned int, unsigned int);
-extern void geode_gpio_clear(unsigned int, unsigned int);
-extern int geode_gpio_isset(unsigned int, unsigned int);
+static inline u32 geode_gpio(unsigned int nr)
+{
+	BUG_ON(nr > 28);
+	return 1 << nr;
+}
+
+extern void geode_gpio_set(u32, unsigned int);
+extern void geode_gpio_clear(u32, unsigned int);
+extern int geode_gpio_isset(u32, unsigned int);
 extern void geode_gpio_setup_event(unsigned int, int, int);
 extern void geode_gpio_set_irq(unsigned int, unsigned int);
 
diff -puN /dev/null include/asm-x86/gpio.h
--- /dev/null
+++ a/include/asm-x86/gpio.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_I386_GPIO_H
+#define _ASM_I386_GPIO_H
+
+#include <gpio.h>
+
+#endif /* _ASM_I386_GPIO_H */
diff -puN include/asm-x86/hw_irq_64.h~git-x86 include/asm-x86/hw_irq_64.h
--- a/include/asm-x86/hw_irq_64.h~git-x86
+++ a/include/asm-x86/hw_irq_64.h
@@ -135,6 +135,7 @@ extern void init_8259A(int aeoi);
 extern void send_IPI_self(int vector);
 extern void init_VISWS_APIC_irqs(void);
 extern void setup_IO_APIC(void);
+extern void enable_IO_APIC(void);
 extern void disable_IO_APIC(void);
 extern void print_IO_APIC(void);
 extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
diff -puN include/asm-x86/i387_32.h~git-x86 include/asm-x86/i387_32.h
--- a/include/asm-x86/i387_32.h~git-x86
+++ a/include/asm-x86/i387_32.h
@@ -1,6 +1,4 @@
 /*
- * include/asm-i386/i387.h
- *
  * Copyright (C) 1994 Linus Torvalds
  *
  * Pentium III FXSR, SSE support
diff -puN include/asm-x86/i387_64.h~git-x86 include/asm-x86/i387_64.h
--- a/include/asm-x86/i387_64.h~git-x86
+++ a/include/asm-x86/i387_64.h
@@ -1,6 +1,4 @@
 /*
- * include/asm-x86_64/i387.h
- *
  * Copyright (C) 1994 Linus Torvalds
  *
  * Pentium III FXSR, SSE support
diff -puN include/asm-x86/i8259.h~git-x86 include/asm-x86/i8259.h
--- a/include/asm-x86/i8259.h~git-x86
+++ a/include/asm-x86/i8259.h
@@ -3,10 +3,25 @@
 
 extern unsigned int cached_irq_mask;
 
-#define __byte(x,y) 		(((unsigned char *) &(y))[x])
+#define __byte(x,y)		(((unsigned char *) &(y))[x])
 #define cached_master_mask	(__byte(0, cached_irq_mask))
 #define cached_slave_mask	(__byte(1, cached_irq_mask))
 
+/* i8259A PIC registers */
+#define PIC_MASTER_CMD		0x20
+#define PIC_MASTER_IMR		0x21
+#define PIC_MASTER_ISR		PIC_MASTER_CMD
+#define PIC_MASTER_POLL		PIC_MASTER_ISR
+#define PIC_MASTER_OCW3		PIC_MASTER_ISR
+#define PIC_SLAVE_CMD		0xa0
+#define PIC_SLAVE_IMR		0xa1
+
+/* i8259A PIC related value */
+#define PIC_CASCADE_IR		2
+#define MASTER_ICW4_DEFAULT	0x01
+#define SLAVE_ICW4_DEFAULT	0x01
+#define PIC_ICW4_AEOI		2
+
 extern spinlock_t i8259A_lock;
 
 extern void init_8259A(int auto_eoi);
diff -puN include/asm-x86/ia32.h~git-x86 include/asm-x86/ia32.h
--- a/include/asm-x86/ia32.h~git-x86
+++ a/include/asm-x86/ia32.h
@@ -159,12 +159,6 @@ struct ustat32 {
 #define IA32_STACK_TOP IA32_PAGE_OFFSET
 
 #ifdef __KERNEL__
-struct user_desc;
-struct siginfo_t;
-int do_get_thread_area(struct thread_struct *t, struct user_desc __user *info);
-int do_set_thread_area(struct thread_struct *t, struct user_desc __user *info);
-int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs);
-
 struct linux_binprm;
 extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
 				unsigned long stack_top, int exec_stack);
diff -puN include/asm-x86/ia32_unistd.h~git-x86 include/asm-x86/ia32_unistd.h
--- a/include/asm-x86/ia32_unistd.h~git-x86
+++ a/include/asm-x86/ia32_unistd.h
@@ -5,7 +5,7 @@
  * This file contains the system call numbers of the ia32 port,
  * this is for the kernel only.
  * Only add syscalls here where some part of the kernel needs to know
- * the number. This should be otherwise in sync with asm-i386/unistd.h. -AK
+ * the number. This should be otherwise in sync with asm-x86/unistd_32.h. -AK
  */
 
 #define __NR_ia32_restart_syscall 0
diff -puN include/asm-x86/ide.h~git-x86 include/asm-x86/ide.h
--- a/include/asm-x86/ide.h~git-x86
+++ a/include/asm-x86/ide.h
@@ -1,6 +1,4 @@
 /*
- *  linux/include/asm-i386/ide.h
- *
  *  Copyright (C) 1994-1996  Linus Torvalds & authors
  */
 
diff -puN include/asm-x86/idle.h~git-x86 include/asm-x86/idle.h
--- a/include/asm-x86/idle.h~git-x86
+++ a/include/asm-x86/idle.h
@@ -6,7 +6,6 @@
 
 struct notifier_block;
 void idle_notifier_register(struct notifier_block *n);
-void idle_notifier_unregister(struct notifier_block *n);
 
 void enter_idle(void);
 void exit_idle(void);
diff -puN include/asm-x86/io_apic.h~git-x86 include/asm-x86/io_apic.h
--- a/include/asm-x86/io_apic.h~git-x86
+++ a/include/asm-x86/io_apic.h
@@ -1,5 +1,159 @@
+#ifndef __ASM_IO_APIC_H
+#define __ASM_IO_APIC_H
+
+#include <asm/types.h>
+#include <asm/mpspec.h>
+#include <asm/apicdef.h>
+
+/*
+ * Intel IO-APIC support for SMP and UP systems.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
+ */
+
+/*
+ * The structure of the IO-APIC:
+ */
+union IO_APIC_reg_00 {
+	u32	raw;
+	struct {
+		u32	__reserved_2	: 14,
+			LTS		:  1,
+			delivery_type	:  1,
+			__reserved_1	:  8,
+			ID		:  8;
+	} __attribute__ ((packed)) bits;
+};
+
+union IO_APIC_reg_01 {
+	u32	raw;
+	struct {
+		u32	version		:  8,
+			__reserved_2	:  7,
+			PRQ		:  1,
+			entries		:  8,
+			__reserved_1	:  8;
+	} __attribute__ ((packed)) bits;
+};
+
+union IO_APIC_reg_02 {
+	u32	raw;
+	struct {
+		u32	__reserved_2	: 24,
+			arbitration	:  4,
+			__reserved_1	:  4;
+	} __attribute__ ((packed)) bits;
+};
+
+union IO_APIC_reg_03 {
+	u32	raw;
+	struct {
+		u32	boot_DT		:  1,
+			__reserved_1	: 31;
+	} __attribute__ ((packed)) bits;
+};
+
+enum ioapic_irq_destination_types {
+	dest_Fixed = 0,
+	dest_LowestPrio = 1,
+	dest_SMI = 2,
+	dest__reserved_1 = 3,
+	dest_NMI = 4,
+	dest_INIT = 5,
+	dest__reserved_2 = 6,
+	dest_ExtINT = 7
+};
+
+struct IO_APIC_route_entry {
+	__u32	vector		:  8,
+		delivery_mode	:  3,	/* 000: FIXED
+					 * 001: lowest prio
+					 * 111: ExtINT
+					 */
+		dest_mode	:  1,	/* 0: physical, 1: logical */
+		delivery_status	:  1,
+		polarity	:  1,
+		irr		:  1,
+		trigger		:  1,	/* 0: edge, 1: level */
+		mask		:  1,	/* 0: enabled, 1: disabled */
+		__reserved_2	: 15;
+
 #ifdef CONFIG_X86_32
-# include "io_apic_32.h"
+	union {
+		struct {
+			__u32	__reserved_1	: 24,
+				physical_dest	:  4,
+				__reserved_2	:  4;
+		} physical;
+
+		struct {
+			__u32	__reserved_1	: 24,
+				logical_dest	:  8;
+		} logical;
+	} dest;
 #else
-# include "io_apic_64.h"
+	__u32	__reserved_3	: 24,
+		dest		:  8;
+#endif
+
+} __attribute__ ((packed));
+
+#ifdef CONFIG_X86_IO_APIC
+
+/*
+ * # of IO-APICs and # of IRQ routing registers
+ */
+extern int nr_ioapics;
+extern int nr_ioapic_registers[MAX_IO_APICS];
+
+/*
+ * MP-BIOS irq configuration table structures:
+ */
+
+/* I/O APIC entries */
+extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+extern int mp_irq_entries;
+
+/* MP IRQ source entries */
+extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* non-0 if default (table-less) MP configuration */
+extern int mpc_default_type;
+
+/* Older SiS APIC requires we rewrite the index register */
+extern int sis_apic_bug;
+
+/* 1 if "noapic" boot option passed */
+extern int skip_ioapic_setup;
+
+static inline void disable_ioapic_setup(void)
+{
+	skip_ioapic_setup = 1;
+}
+
+/*
+ * If we use the IO-APIC for IRQ routing, disable automatic
+ * assignment of PCI IRQ's.
+ */
+#define io_apic_assign_pci_irqs \
+	(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
+
+#ifdef CONFIG_ACPI
+extern int io_apic_get_unique_id(int ioapic, int apic_id);
+extern int io_apic_get_version(int ioapic);
+extern int io_apic_get_redir_entries(int ioapic);
+extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
+				   int edge_level, int active_high_low);
+extern int timer_uses_ioapic_pin_0;
+#endif /* CONFIG_ACPI */
+
+extern int (*ioapic_renumber_irq)(int ioapic, int irq);
+extern void ioapic_init_mappings(void);
+
+#else  /* !CONFIG_X86_IO_APIC */
+#define io_apic_assign_pci_irqs 0
+#endif
+
 #endif
diff -puN include/asm-x86/io_apic_32.h~git-x86 /dev/null
--- a/include/asm-x86/io_apic_32.h
+++ /dev/null
@@ -1,155 +0,0 @@
-#ifndef __ASM_IO_APIC_H
-#define __ASM_IO_APIC_H
-
-#include <asm/types.h>
-#include <asm/mpspec.h>
-#include <asm/apicdef.h>
-
-/*
- * Intel IO-APIC support for SMP and UP systems.
- *
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
- */
-
-/*
- * The structure of the IO-APIC:
- */
-union IO_APIC_reg_00 {
-	u32	raw;
-	struct {
-		u32	__reserved_2	: 14,
-			LTS		:  1,
-			delivery_type	:  1,
-			__reserved_1	:  8,
-			ID		:  8;
-	} __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_01 {
-	u32	raw;
-	struct {
-		u32	version		:  8,
-			__reserved_2	:  7,
-			PRQ		:  1,
-			entries		:  8,
-			__reserved_1	:  8;
-	} __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_02 {
-	u32	raw;
-	struct {
-		u32	__reserved_2	: 24,
-			arbitration	:  4,
-			__reserved_1	:  4;
-	} __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_03 {
-	u32	raw;
-	struct {
-		u32	boot_DT		:  1,
-			__reserved_1	: 31;
-	} __attribute__ ((packed)) bits;
-};
-
-enum ioapic_irq_destination_types {
-	dest_Fixed = 0,
-	dest_LowestPrio = 1,
-	dest_SMI = 2,
-	dest__reserved_1 = 3,
-	dest_NMI = 4,
-	dest_INIT = 5,
-	dest__reserved_2 = 6,
-	dest_ExtINT = 7
-};
-
-struct IO_APIC_route_entry {
-	__u32	vector		:  8,
-		delivery_mode	:  3,	/* 000: FIXED
-					 * 001: lowest prio
-					 * 111: ExtINT
-					 */
-		dest_mode	:  1,	/* 0: physical, 1: logical */
-		delivery_status	:  1,
-		polarity	:  1,
-		irr		:  1,
-		trigger		:  1,	/* 0: edge, 1: level */
-		mask		:  1,	/* 0: enabled, 1: disabled */
-		__reserved_2	: 15;
-
-	union {		struct { __u32
-					__reserved_1	: 24,
-					physical_dest	:  4,
-					__reserved_2	:  4;
-			} physical;
-
-			struct { __u32
-					__reserved_1	: 24,
-					logical_dest	:  8;
-			} logical;
-	} dest;
-
-} __attribute__ ((packed));
-
-#ifdef CONFIG_X86_IO_APIC
-
-/*
- * # of IO-APICs and # of IRQ routing registers
- */
-extern int nr_ioapics;
-extern int nr_ioapic_registers[MAX_IO_APICS];
-
-/*
- * MP-BIOS irq configuration table structures:
- */
-
-/* I/O APIC entries */
-extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-extern int mp_irq_entries;
-
-/* MP IRQ source entries */
-extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* non-0 if default (table-less) MP configuration */
-extern int mpc_default_type;
-
-/* Older SiS APIC requires we rewrite the index register */
-extern int sis_apic_bug;
-
-/* 1 if "noapic" boot option passed */
-extern int skip_ioapic_setup;
-
-static inline void disable_ioapic_setup(void)
-{
-	skip_ioapic_setup = 1;
-}
-
-static inline int ioapic_setup_disabled(void)
-{
-	return skip_ioapic_setup;
-}
-
-/*
- * If we use the IO-APIC for IRQ routing, disable automatic
- * assignment of PCI IRQ's.
- */
-#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
-
-#ifdef CONFIG_ACPI
-extern int io_apic_get_unique_id (int ioapic, int apic_id);
-extern int io_apic_get_version (int ioapic);
-extern int io_apic_get_redir_entries (int ioapic);
-extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low);
-extern int timer_uses_ioapic_pin_0;
-#endif /* CONFIG_ACPI */
-
-extern int (*ioapic_renumber_irq)(int ioapic, int irq);
-
-#else  /* !CONFIG_X86_IO_APIC */
-#define io_apic_assign_pci_irqs 0
-#endif
-
-#endif
diff -puN include/asm-x86/io_apic_64.h~git-x86 /dev/null
--- a/include/asm-x86/io_apic_64.h
+++ /dev/null
@@ -1,138 +0,0 @@
-#ifndef __ASM_IO_APIC_H
-#define __ASM_IO_APIC_H
-
-#include <asm/types.h>
-#include <asm/mpspec.h>
-#include <asm/apicdef.h>
-
-/*
- * Intel IO-APIC support for SMP and UP systems.
- *
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
- */
-
-#define APIC_MISMATCH_DEBUG
-
-/*
- * The structure of the IO-APIC:
- */
-union IO_APIC_reg_00 {
-	u32	raw;
-	struct {
-		u32	__reserved_2	: 14,
-			LTS		:  1,
-			delivery_type	:  1,
-			__reserved_1	:  8,
-			ID		:  8;
-	} __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_01 {
-	u32	raw;
-	struct {
-		u32	version		:  8,
-		__reserved_2	:  7,
-		PRQ		:  1,
-		entries		:  8,
-		__reserved_1	:  8;
-	} __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_02 {
-	u32	raw;
-	struct {
-		u32	__reserved_2	: 24,
-		arbitration	:  4,
-		__reserved_1	:  4;
-	} __attribute__ ((packed)) bits;
-};
-
-union IO_APIC_reg_03 {
-	u32	raw;
-	struct {
-		u32	boot_DT		:  1,
-			__reserved_1	: 31;
-	} __attribute__ ((packed)) bits;
-};
-
-/*
- * # of IO-APICs and # of IRQ routing registers
- */
-extern int nr_ioapics;
-extern int nr_ioapic_registers[MAX_IO_APICS];
-
-enum ioapic_irq_destination_types {
-	dest_Fixed = 0,
-	dest_LowestPrio = 1,
-	dest_SMI = 2,
-	dest__reserved_1 = 3,
-	dest_NMI = 4,
-	dest_INIT = 5,
-	dest__reserved_2 = 6,
-	dest_ExtINT = 7
-};
-
-struct IO_APIC_route_entry {
-	__u32	vector		:  8,
-		delivery_mode	:  3,	/* 000: FIXED
-					 * 001: lowest prio
-					 * 111: ExtINT
-					 */
-		dest_mode	:  1,	/* 0: physical, 1: logical */
-		delivery_status	:  1,
-		polarity	:  1,
-		irr		:  1,
-		trigger		:  1,	/* 0: edge, 1: level */
-		mask		:  1,	/* 0: enabled, 1: disabled */
-		__reserved_2	: 15;
-
-	__u32	__reserved_3	: 24,
-		dest		:  8;
-} __attribute__ ((packed));
-
-/*
- * MP-BIOS irq configuration table structures:
- */
-
-/* I/O APIC entries */
-extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-extern int mp_irq_entries;
-
-/* MP IRQ source entries */
-extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* non-0 if default (table-less) MP configuration */
-extern int mpc_default_type;
-
-/* 1 if "noapic" boot option passed */
-extern int skip_ioapic_setup;
-
-static inline void disable_ioapic_setup(void)
-{
-	skip_ioapic_setup = 1;
-}
-
-
-/*
- * If we use the IO-APIC for IRQ routing, disable automatic
- * assignment of PCI IRQ's.
- */
-#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
-
-#ifdef CONFIG_ACPI
-extern int io_apic_get_version (int ioapic);
-extern int io_apic_get_redir_entries (int ioapic);
-extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int, int);
-#endif
-
-extern int sis_apic_bug; /* dummy */ 
-
-void enable_NMI_through_LVT0 (void * dummy);
-
-extern spinlock_t i8259A_lock;
-
-extern int timer_over_8254;
-
-#endif
diff -puN include/asm-x86/irqflags.h~git-x86 include/asm-x86/irqflags.h
--- a/include/asm-x86/irqflags.h~git-x86
+++ a/include/asm-x86/irqflags.h
@@ -1,5 +1,245 @@
-#ifdef CONFIG_X86_32
-# include "irqflags_32.h"
+#ifndef _X86_IRQFLAGS_H_
+#define _X86_IRQFLAGS_H_
+
+#include <asm/processor-flags.h>
+
+#ifndef __ASSEMBLY__
+/*
+ * Interrupt control:
+ */
+
+static inline unsigned long native_save_fl(void)
+{
+	unsigned long flags;
+
+	__asm__ __volatile__(
+		"# __raw_save_flags\n\t"
+		"pushf ; pop %0"
+		: "=g" (flags)
+		: /* no input */
+		: "memory"
+	);
+
+	return flags;
+}
+
+static inline void native_restore_fl(unsigned long flags)
+{
+	__asm__ __volatile__(
+		"push %0 ; popf"
+		: /* no output */
+		:"g" (flags)
+		:"memory", "cc"
+	);
+}
+
+static inline void native_irq_disable(void)
+{
+	asm volatile("cli": : :"memory");
+}
+
+static inline void native_irq_enable(void)
+{
+	asm volatile("sti": : :"memory");
+}
+
+static inline void native_safe_halt(void)
+{
+	asm volatile("sti; hlt": : :"memory");
+}
+
+static inline void native_halt(void)
+{
+	asm volatile("hlt": : :"memory");
+}
+
+#endif
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#ifndef __ASSEMBLY__
+
+static inline unsigned long __raw_local_save_flags(void)
+{
+	return native_save_fl();
+}
+
+static inline void raw_local_irq_restore(unsigned long flags)
+{
+	native_restore_fl(flags);
+}
+
+static inline void raw_local_irq_disable(void)
+{
+	native_irq_disable();
+}
+
+static inline void raw_local_irq_enable(void)
+{
+	native_irq_enable();
+}
+
+/*
+ * Used in the idle loop; sti takes one instruction cycle
+ * to complete:
+ */
+static inline void raw_safe_halt(void)
+{
+	native_safe_halt();
+}
+
+/*
+ * Used when interrupts are already enabled or to
+ * shutdown the processor:
+ */
+static inline void halt(void)
+{
+	native_halt();
+}
+
+/*
+ * For spinlocks, etc:
+ */
+static inline unsigned long __raw_local_irq_save(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	raw_local_irq_disable();
+
+	return flags;
+}
+#else
+
+#define ENABLE_INTERRUPTS(x)	sti
+#define DISABLE_INTERRUPTS(x)	cli
+
+#ifdef CONFIG_X86_64
+#define INTERRUPT_RETURN	iretq
+#define ENABLE_INTERRUPTS_SYSCALL_RET			\
+			movq	%gs:pda_oldrsp, %rsp;	\
+			swapgs;				\
+			sysretq;
+#else
+#define INTERRUPT_RETURN		iret
+#define ENABLE_INTERRUPTS_SYSCALL_RET	sti; sysexit
+#define GET_CR0_INTO_EAX		movl %cr0, %eax
+#endif
+
+
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_PARAVIRT */
+
+#ifndef __ASSEMBLY__
+#define raw_local_save_flags(flags) \
+		do { (flags) = __raw_local_save_flags(); } while (0)
+
+#define raw_local_irq_save(flags) \
+		do { (flags) = __raw_local_irq_save(); } while (0)
+
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & X86_EFLAGS_IF);
+}
+
+static inline int raw_irqs_disabled(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	return raw_irqs_disabled_flags(flags);
+}
+
+/*
+ * makes the traced hardirq state match with the machine state
+ *
+ * should be a rarely used function, only in places where its
+ * otherwise impossible to know the irq state, like in traps.
+ */
+static inline void trace_hardirqs_fixup_flags(unsigned long flags)
+{
+	if (raw_irqs_disabled_flags(flags))
+		trace_hardirqs_off();
+	else
+		trace_hardirqs_on();
+}
+
+static inline void trace_hardirqs_fixup(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	trace_hardirqs_fixup_flags(flags);
+}
+
 #else
-# include "irqflags_64.h"
+
+#ifdef CONFIG_X86_64
+/*
+ * Currently paravirt can't handle swapgs nicely when we
+ * don't have a stack we can rely on (such as a user space
+ * stack).  So we either find a way around these or just fault
+ * and emulate if a guest tries to call swapgs directly.
+ *
+ * Either way, this is a good way to document that we don't
+ * have a reliable stack. x86_64 only.
+ */
+#define SWAPGS_UNSAFE_STACK	swapgs
+#define ARCH_TRACE_IRQS_ON		call trace_hardirqs_on_thunk
+#define ARCH_TRACE_IRQS_OFF		call trace_hardirqs_off_thunk
+#define ARCH_LOCKDEP_SYS_EXIT		call lockdep_sys_exit_thunk
+#define ARCH_LOCKDEP_SYS_EXIT_IRQ	\
+	TRACE_IRQS_ON; \
+	sti; \
+	SAVE_REST; \
+	LOCKDEP_SYS_EXIT; \
+	RESTORE_REST; \
+	cli; \
+	TRACE_IRQS_OFF;
+
+#else
+#define ARCH_TRACE_IRQS_ON			\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call trace_hardirqs_on;			\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+
+#define ARCH_TRACE_IRQS_OFF			\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call trace_hardirqs_off;		\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+
+#define ARCH_LOCKDEP_SYS_EXIT			\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call lockdep_sys_exit;			\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+
+#define ARCH_LOCKDEP_SYS_EXIT_IRQ
+#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#  define TRACE_IRQS_ON		ARCH_TRACE_IRQS_ON
+#  define TRACE_IRQS_OFF	ARCH_TRACE_IRQS_OFF
+#else
+#  define TRACE_IRQS_ON
+#  define TRACE_IRQS_OFF
+#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#  define LOCKDEP_SYS_EXIT	ARCH_LOCKDEP_SYS_EXIT
+#  define LOCKDEP_SYS_EXIT_IRQ	ARCH_LOCKDEP_SYS_EXIT_IRQ
+# else
+#  define LOCKDEP_SYS_EXIT
+#  define LOCKDEP_SYS_EXIT_IRQ
+# endif
+
+#endif /* __ASSEMBLY__ */
 #endif
diff -puN include/asm-x86/irqflags_32.h~git-x86 /dev/null
--- a/include/asm-x86/irqflags_32.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * include/asm-i386/irqflags.h
- *
- * IRQ flags handling
- *
- * This file gets included from lowlevel asm headers too, to provide
- * wrapped versions of the local_irq_*() APIs, based on the
- * raw_local_irq_*() functions from the lowlevel headers.
- */
-#ifndef _ASM_IRQFLAGS_H
-#define _ASM_IRQFLAGS_H
-#include <asm/processor-flags.h>
-
-#ifndef __ASSEMBLY__
-static inline unsigned long native_save_fl(void)
-{
-	unsigned long f;
-	asm volatile("pushfl ; popl %0":"=g" (f): /* no input */);
-	return f;
-}
-
-static inline void native_restore_fl(unsigned long f)
-{
-	asm volatile("pushl %0 ; popfl": /* no output */
-			     :"g" (f)
-			     :"memory", "cc");
-}
-
-static inline void native_irq_disable(void)
-{
-	asm volatile("cli": : :"memory");
-}
-
-static inline void native_irq_enable(void)
-{
-	asm volatile("sti": : :"memory");
-}
-
-static inline void native_safe_halt(void)
-{
-	asm volatile("sti; hlt": : :"memory");
-}
-
-static inline void native_halt(void)
-{
-	asm volatile("hlt": : :"memory");
-}
-#endif	/* __ASSEMBLY__ */
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#ifndef __ASSEMBLY__
-
-static inline unsigned long __raw_local_save_flags(void)
-{
-	return native_save_fl();
-}
-
-static inline void raw_local_irq_restore(unsigned long flags)
-{
-	native_restore_fl(flags);
-}
-
-static inline void raw_local_irq_disable(void)
-{
-	native_irq_disable();
-}
-
-static inline void raw_local_irq_enable(void)
-{
-	native_irq_enable();
-}
-
-/*
- * Used in the idle loop; sti takes one instruction cycle
- * to complete:
- */
-static inline void raw_safe_halt(void)
-{
-	native_safe_halt();
-}
-
-/*
- * Used when interrupts are already enabled or to
- * shutdown the processor:
- */
-static inline void halt(void)
-{
-	native_halt();
-}
-
-/*
- * For spinlocks, etc:
- */
-static inline unsigned long __raw_local_irq_save(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	raw_local_irq_disable();
-
-	return flags;
-}
-
-#else
-#define DISABLE_INTERRUPTS(clobbers)	cli
-#define ENABLE_INTERRUPTS(clobbers)	sti
-#define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
-#define INTERRUPT_RETURN		iret
-#define GET_CR0_INTO_EAX		movl %cr0, %eax
-#endif /* __ASSEMBLY__ */
-#endif /* CONFIG_PARAVIRT */
-
-#ifndef __ASSEMBLY__
-#define raw_local_save_flags(flags) \
-		do { (flags) = __raw_local_save_flags(); } while (0)
-
-#define raw_local_irq_save(flags) \
-		do { (flags) = __raw_local_irq_save(); } while (0)
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
-{
-	return !(flags & X86_EFLAGS_IF);
-}
-
-static inline int raw_irqs_disabled(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	return raw_irqs_disabled_flags(flags);
-}
-
-/*
- * makes the traced hardirq state match with the machine state
- *
- * should be a rarely used function, only in places where its
- * otherwise impossible to know the irq state, like in traps.
- */
-static inline void trace_hardirqs_fixup_flags(unsigned long flags)
-{
-	if (raw_irqs_disabled_flags(flags))
-		trace_hardirqs_off();
-	else
-		trace_hardirqs_on();
-}
-
-static inline void trace_hardirqs_fixup(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	trace_hardirqs_fixup_flags(flags);
-}
-#endif /* __ASSEMBLY__ */
-
-/*
- * Do the CPU's IRQ-state tracing from assembly code. We call a
- * C function, so save all the C-clobbered registers:
- */
-#ifdef CONFIG_TRACE_IRQFLAGS
-
-# define TRACE_IRQS_ON				\
-	pushl %eax;				\
-	pushl %ecx;				\
-	pushl %edx;				\
-	call trace_hardirqs_on;			\
-	popl %edx;				\
-	popl %ecx;				\
-	popl %eax;
-
-# define TRACE_IRQS_OFF				\
-	pushl %eax;				\
-	pushl %ecx;				\
-	pushl %edx;				\
-	call trace_hardirqs_off;		\
-	popl %edx;				\
-	popl %ecx;				\
-	popl %eax;
-
-#else
-# define TRACE_IRQS_ON
-# define TRACE_IRQS_OFF
-#endif
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define LOCKDEP_SYS_EXIT			\
-	pushl %eax;				\
-	pushl %ecx;				\
-	pushl %edx;				\
-	call lockdep_sys_exit;			\
-	popl %edx;				\
-	popl %ecx;				\
-	popl %eax;
-#else
-# define LOCKDEP_SYS_EXIT
-#endif
-
-#endif
diff -puN include/asm-x86/irqflags_64.h~git-x86 /dev/null
--- a/include/asm-x86/irqflags_64.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * include/asm-x86_64/irqflags.h
- *
- * IRQ flags handling
- *
- * This file gets included from lowlevel asm headers too, to provide
- * wrapped versions of the local_irq_*() APIs, based on the
- * raw_local_irq_*() functions from the lowlevel headers.
- */
-#ifndef _ASM_IRQFLAGS_H
-#define _ASM_IRQFLAGS_H
-#include <asm/processor-flags.h>
-
-#ifndef __ASSEMBLY__
-/*
- * Interrupt control:
- */
-
-static inline unsigned long __raw_local_save_flags(void)
-{
-	unsigned long flags;
-
-	__asm__ __volatile__(
-		"# __raw_save_flags\n\t"
-		"pushfq ; popq %q0"
-		: "=g" (flags)
-		: /* no input */
-		: "memory"
-	);
-
-	return flags;
-}
-
-#define raw_local_save_flags(flags) \
-		do { (flags) = __raw_local_save_flags(); } while (0)
-
-static inline void raw_local_irq_restore(unsigned long flags)
-{
-	__asm__ __volatile__(
-		"pushq %0 ; popfq"
-		: /* no output */
-		:"g" (flags)
-		:"memory", "cc"
-	);
-}
-
-#ifdef CONFIG_X86_VSMP
-
-/*
- * Interrupt control for the VSMP architecture:
- */
-
-static inline void raw_local_irq_disable(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
-}
-
-static inline void raw_local_irq_enable(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
-}
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
-{
-	return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC);
-}
-
-#else /* CONFIG_X86_VSMP */
-
-static inline void raw_local_irq_disable(void)
-{
-	__asm__ __volatile__("cli" : : : "memory");
-}
-
-static inline void raw_local_irq_enable(void)
-{
-	__asm__ __volatile__("sti" : : : "memory");
-}
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
-{
-	return !(flags & X86_EFLAGS_IF);
-}
-
-#endif
-
-/*
- * For spinlocks, etc.:
- */
-
-static inline unsigned long __raw_local_irq_save(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	raw_local_irq_disable();
-
-	return flags;
-}
-
-#define raw_local_irq_save(flags) \
-		do { (flags) = __raw_local_irq_save(); } while (0)
-
-static inline int raw_irqs_disabled(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	return raw_irqs_disabled_flags(flags);
-}
-
-/*
- * makes the traced hardirq state match with the machine state
- *
- * should be a rarely used function, only in places where its
- * otherwise impossible to know the irq state, like in traps.
- */
-static inline void trace_hardirqs_fixup_flags(unsigned long flags)
-{
-	if (raw_irqs_disabled_flags(flags))
-		trace_hardirqs_off();
-	else
-		trace_hardirqs_on();
-}
-
-static inline void trace_hardirqs_fixup(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	trace_hardirqs_fixup_flags(flags);
-}
-/*
- * Used in the idle loop; sti takes one instruction cycle
- * to complete:
- */
-static inline void raw_safe_halt(void)
-{
-	__asm__ __volatile__("sti; hlt" : : : "memory");
-}
-
-/*
- * Used when interrupts are already enabled or to
- * shutdown the processor:
- */
-static inline void halt(void)
-{
-	__asm__ __volatile__("hlt": : :"memory");
-}
-
-#else /* __ASSEMBLY__: */
-# ifdef CONFIG_TRACE_IRQFLAGS
-#  define TRACE_IRQS_ON		call trace_hardirqs_on_thunk
-#  define TRACE_IRQS_OFF	call trace_hardirqs_off_thunk
-# else
-#  define TRACE_IRQS_ON
-#  define TRACE_IRQS_OFF
-# endif
-# ifdef CONFIG_DEBUG_LOCK_ALLOC
-#  define LOCKDEP_SYS_EXIT	call lockdep_sys_exit_thunk
-#  define LOCKDEP_SYS_EXIT_IRQ	\
-	TRACE_IRQS_ON; \
-	sti; \
-	SAVE_REST; \
-	LOCKDEP_SYS_EXIT; \
-	RESTORE_REST; \
-	cli; \
-	TRACE_IRQS_OFF;
-# else
-#  define LOCKDEP_SYS_EXIT
-#  define LOCKDEP_SYS_EXIT_IRQ
-# endif
-#endif
-
-#endif
diff -puN include/asm-x86/k8.h~git-x86 include/asm-x86/k8.h
--- a/include/asm-x86/k8.h~git-x86
+++ a/include/asm-x86/k8.h
@@ -10,5 +10,6 @@ extern struct pci_dev **k8_northbridges;
 extern int num_k8_northbridges;
 extern int cache_k8_northbridges(void);
 extern void k8_flush_garts(void);
+extern int k8_scan_nodes(unsigned long start, unsigned long end);
 
 #endif
diff -puN include/asm-x86/kdebug.h~git-x86 include/asm-x86/kdebug.h
--- a/include/asm-x86/kdebug.h~git-x86
+++ a/include/asm-x86/kdebug.h
@@ -22,10 +22,15 @@ enum die_val {
 	DIE_PAGE_FAULT,
 };
 
+extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
 extern void printk_address(unsigned long address);
 extern void die(const char *,struct pt_regs *,long);
 extern void __die(const char *,struct pt_regs *,long);
 extern void show_registers(struct pt_regs *regs);
+extern void __show_registers(struct pt_regs *, int all);
+extern void show_trace(struct task_struct *, struct pt_regs *, unsigned long *);
+extern void __show_regs(struct pt_regs *regs);
+extern void show_regs(struct pt_regs *regs);
 extern void dump_pagetable(unsigned long);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long);
diff -puN include/asm-x86/kexec_32.h~git-x86 include/asm-x86/kexec_32.h
--- a/include/asm-x86/kexec_32.h~git-x86
+++ a/include/asm-x86/kexec_32.h
@@ -45,7 +45,7 @@
 /* We can also handle crash dumps from 64 bit kernel. */
 #define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
 
-/* CPU does not save ss and esp on stack if execution is already
+/* CPU does not save ss and sp on stack if execution is already
  * running in kernel mode at the time of NMI occurrence. This code
  * fixes it.
  */
@@ -53,16 +53,16 @@ static inline void crash_fixup_ss_esp(st
 					struct pt_regs *oldregs)
 {
 	memcpy(newregs, oldregs, sizeof(*newregs));
-	newregs->esp = (unsigned long)&(oldregs->esp);
+	newregs->sp = (unsigned long)&(oldregs->sp);
 	__asm__ __volatile__(
 			"xorl %%eax, %%eax\n\t"
 			"movw %%ss, %%ax\n\t"
-			:"=a"(newregs->xss));
+			:"=a"(newregs->ss));
 }
 
 /*
  * This function is responsible for capturing register states if coming
- * via panic otherwise just fix up the ss and esp if coming via kernel
+ * via panic otherwise just fix up the ss and sp if coming via kernel
  * mode exception.
  */
 static inline void crash_setup_regs(struct pt_regs *newregs,
@@ -71,21 +71,21 @@ static inline void crash_setup_regs(stru
        if (oldregs)
                crash_fixup_ss_esp(newregs, oldregs);
        else {
-               __asm__ __volatile__("movl %%ebx,%0" : "=m"(newregs->ebx));
-               __asm__ __volatile__("movl %%ecx,%0" : "=m"(newregs->ecx));
-               __asm__ __volatile__("movl %%edx,%0" : "=m"(newregs->edx));
-               __asm__ __volatile__("movl %%esi,%0" : "=m"(newregs->esi));
-               __asm__ __volatile__("movl %%edi,%0" : "=m"(newregs->edi));
-               __asm__ __volatile__("movl %%ebp,%0" : "=m"(newregs->ebp));
-               __asm__ __volatile__("movl %%eax,%0" : "=m"(newregs->eax));
-               __asm__ __volatile__("movl %%esp,%0" : "=m"(newregs->esp));
-               __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(newregs->xss));
-               __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(newregs->xcs));
-               __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(newregs->xds));
-               __asm__ __volatile__("movw %%es, %%ax;" :"=a"(newregs->xes));
-               __asm__ __volatile__("pushfl; popl %0" :"=m"(newregs->eflags));
+               __asm__ __volatile__("movl %%ebx,%0" : "=m"(newregs->bx));
+               __asm__ __volatile__("movl %%ecx,%0" : "=m"(newregs->cx));
+               __asm__ __volatile__("movl %%edx,%0" : "=m"(newregs->dx));
+               __asm__ __volatile__("movl %%esi,%0" : "=m"(newregs->si));
+               __asm__ __volatile__("movl %%edi,%0" : "=m"(newregs->di));
+               __asm__ __volatile__("movl %%ebp,%0" : "=m"(newregs->bp));
+               __asm__ __volatile__("movl %%eax,%0" : "=m"(newregs->ax));
+               __asm__ __volatile__("movl %%esp,%0" : "=m"(newregs->sp));
+               __asm__ __volatile__("movl %%ss, %%eax;" :"=a"(newregs->ss));
+               __asm__ __volatile__("movl %%cs, %%eax;" :"=a"(newregs->cs));
+               __asm__ __volatile__("movl %%ds, %%eax;" :"=a"(newregs->ds));
+               __asm__ __volatile__("movl %%es, %%eax;" :"=a"(newregs->es));
+               __asm__ __volatile__("pushfl; popl %0" :"=m"(newregs->flags));
 
-               newregs->eip = (unsigned long)current_text_addr();
+               newregs->ip = (unsigned long)current_text_addr();
        }
 }
 asmlinkage NORET_TYPE void
diff -puN include/asm-x86/kexec_64.h~git-x86 include/asm-x86/kexec_64.h
--- a/include/asm-x86/kexec_64.h~git-x86
+++ a/include/asm-x86/kexec_64.h
@@ -60,14 +60,14 @@ static inline void crash_setup_regs(stru
 	if (oldregs)
 		memcpy(newregs, oldregs, sizeof(*newregs));
 	else {
-		__asm__ __volatile__("movq %%rbx,%0" : "=m"(newregs->rbx));
-		__asm__ __volatile__("movq %%rcx,%0" : "=m"(newregs->rcx));
-		__asm__ __volatile__("movq %%rdx,%0" : "=m"(newregs->rdx));
-		__asm__ __volatile__("movq %%rsi,%0" : "=m"(newregs->rsi));
-		__asm__ __volatile__("movq %%rdi,%0" : "=m"(newregs->rdi));
-		__asm__ __volatile__("movq %%rbp,%0" : "=m"(newregs->rbp));
-		__asm__ __volatile__("movq %%rax,%0" : "=m"(newregs->rax));
-		__asm__ __volatile__("movq %%rsp,%0" : "=m"(newregs->rsp));
+		__asm__ __volatile__("movq %%rbx,%0" : "=m"(newregs->bx));
+		__asm__ __volatile__("movq %%rcx,%0" : "=m"(newregs->cx));
+		__asm__ __volatile__("movq %%rdx,%0" : "=m"(newregs->dx));
+		__asm__ __volatile__("movq %%rsi,%0" : "=m"(newregs->si));
+		__asm__ __volatile__("movq %%rdi,%0" : "=m"(newregs->di));
+		__asm__ __volatile__("movq %%rbp,%0" : "=m"(newregs->bp));
+		__asm__ __volatile__("movq %%rax,%0" : "=m"(newregs->ax));
+		__asm__ __volatile__("movq %%rsp,%0" : "=m"(newregs->sp));
 		__asm__ __volatile__("movq %%r8,%0" : "=m"(newregs->r8));
 		__asm__ __volatile__("movq %%r9,%0" : "=m"(newregs->r9));
 		__asm__ __volatile__("movq %%r10,%0" : "=m"(newregs->r10));
@@ -78,9 +78,9 @@ static inline void crash_setup_regs(stru
 		__asm__ __volatile__("movq %%r15,%0" : "=m"(newregs->r15));
 		__asm__ __volatile__("movl %%ss, %%eax;" :"=a"(newregs->ss));
 		__asm__ __volatile__("movl %%cs, %%eax;" :"=a"(newregs->cs));
-		__asm__ __volatile__("pushfq; popq %0" :"=m"(newregs->eflags));
+		__asm__ __volatile__("pushfq; popq %0" :"=m"(newregs->flags));
 
-		newregs->rip = (unsigned long)current_text_addr();
+		newregs->ip = (unsigned long)current_text_addr();
 	}
 }
 
diff -puN include/asm-x86/kprobes_32.h~git-x86 include/asm-x86/kprobes_32.h
--- a/include/asm-x86/kprobes_32.h~git-x86
+++ a/include/asm-x86/kprobes_32.h
@@ -84,7 +84,7 @@ struct kprobe_ctlblk {
  */
 static inline void restore_interrupts(struct pt_regs *regs)
 {
-	if (regs->eflags & IF_MASK)
+	if (regs->flags & IF_MASK)
 		local_irq_enable();
 }
 
diff -puN include/asm-x86/kprobes_64.h~git-x86 include/asm-x86/kprobes_64.h
--- a/include/asm-x86/kprobes_64.h~git-x86
+++ a/include/asm-x86/kprobes_64.h
@@ -77,7 +77,7 @@ struct kprobe_ctlblk {
  */
 static inline void restore_interrupts(struct pt_regs *regs)
 {
-	if (regs->eflags & IF_MASK)
+	if (regs->flags & IF_MASK)
 		local_irq_enable();
 }
 
diff -puN include/asm-x86/linkage.h~git-x86 include/asm-x86/linkage.h
--- a/include/asm-x86/linkage.h~git-x86
+++ a/include/asm-x86/linkage.h
@@ -1,5 +1,20 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#ifdef CONFIG_X86_64
+#define __ALIGN .p2align 4,,15
+#define __ALIGN_STR ".p2align 4,,15"
+#endif
+
 #ifdef CONFIG_X86_32
-# include "linkage_32.h"
-#else
-# include "linkage_64.h"
+#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
+#define prevent_tail_call(ret) __asm__ ("" : "=r" (ret) : "0" (ret))
+#endif
+
+#ifdef CONFIG_X86_ALIGNMENT_16
+#define __ALIGN .align 16,0x90
+#define __ALIGN_STR ".align 16,0x90"
+#endif
+
 #endif
+
diff -puN include/asm-x86/linkage_32.h~git-x86 /dev/null
--- a/include/asm-x86/linkage_32.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef __ASM_LINKAGE_H
-#define __ASM_LINKAGE_H
-
-#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
-#define FASTCALL(x)	x __attribute__((regparm(3)))
-#define fastcall	__attribute__((regparm(3)))
-
-#define prevent_tail_call(ret) __asm__ ("" : "=r" (ret) : "0" (ret))
-
-#ifdef CONFIG_X86_ALIGNMENT_16
-#define __ALIGN .align 16,0x90
-#define __ALIGN_STR ".align 16,0x90"
-#endif
-
-#endif
diff -puN include/asm-x86/linkage_64.h~git-x86 /dev/null
--- a/include/asm-x86/linkage_64.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_LINKAGE_H
-#define __ASM_LINKAGE_H
-
-#define __ALIGN .p2align 4,,15
-
-#endif
diff -puN include/asm-x86/mach-bigsmp/mach_apic.h~git-x86 include/asm-x86/mach-bigsmp/mach_apic.h
--- a/include/asm-x86/mach-bigsmp/mach_apic.h~git-x86
+++ a/include/asm-x86/mach-bigsmp/mach_apic.h
@@ -110,13 +110,13 @@ static inline int cpu_to_logical_apicid(
 }
 
 static inline int mpc_apic_id(struct mpc_config_processor *m,
-			struct mpc_config_translation *translation_record)
+			      struct mpc_config_translation *translation_record)
 {
-	printk("Processor #%d %ld:%ld APIC version %d\n",
-	        m->mpc_apicid,
-	        (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-	        (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-	        m->mpc_apicver);
+	printk("Processor #%d %u:%u APIC version %d\n",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver);
 	return m->mpc_apicid;
 }
 
diff -puN include/asm-x86/mach-default/apm.h~git-x86 include/asm-x86/mach-default/apm.h
--- a/include/asm-x86/mach-default/apm.h~git-x86
+++ a/include/asm-x86/mach-default/apm.h
@@ -1,6 +1,4 @@
 /*
- *  include/asm-i386/mach-default/apm.h
- *
  *  Machine specific APM BIOS functions for generic.
  *  Split out from apm.c by Osamu Tomita <tomita@cinet.co.jp>
  */
diff -puN include/asm-x86/mach-default/io_ports.h~git-x86 /dev/null
--- a/include/asm-x86/mach-default/io_ports.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- *  arch/i386/mach-generic/io_ports.h
- *
- *  Machine specific IO port address definition for generic.
- *  Written by Osamu Tomita <tomita@cinet.co.jp>
- */
-#ifndef _MACH_IO_PORTS_H
-#define _MACH_IO_PORTS_H
-
-/* i8259A PIC registers */
-#define PIC_MASTER_CMD		0x20
-#define PIC_MASTER_IMR		0x21
-#define PIC_MASTER_ISR		PIC_MASTER_CMD
-#define PIC_MASTER_POLL		PIC_MASTER_ISR
-#define PIC_MASTER_OCW3		PIC_MASTER_ISR
-#define PIC_SLAVE_CMD		0xa0
-#define PIC_SLAVE_IMR		0xa1
-
-/* i8259A PIC related value */
-#define PIC_CASCADE_IR		2
-#define MASTER_ICW4_DEFAULT	0x01
-#define SLAVE_ICW4_DEFAULT	0x01
-#define PIC_ICW4_AEOI		2
-
-#endif /* !_MACH_IO_PORTS_H */
diff -puN include/asm-x86/mach-default/mach_apic.h~git-x86 include/asm-x86/mach-default/mach_apic.h
--- a/include/asm-x86/mach-default/mach_apic.h~git-x86
+++ a/include/asm-x86/mach-default/mach_apic.h
@@ -89,15 +89,15 @@ static inline physid_mask_t apicid_to_cp
 	return physid_mask_of_physid(phys_apicid);
 }
 
-static inline int mpc_apic_id(struct mpc_config_processor *m, 
-			struct mpc_config_translation *translation_record)
+static inline int mpc_apic_id(struct mpc_config_processor *m,
+			      struct mpc_config_translation *translation_record)
 {
-	printk("Processor #%d %ld:%ld APIC version %d\n",
-			m->mpc_apicid,
-			(m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-			(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-			m->mpc_apicver);
-	return (m->mpc_apicid);
+	printk("Processor #%d %u:%u APIC version %d\n",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver);
+	return m->mpc_apicid;
 }
 
 static inline void setup_portio_remap(void)
diff -puN include/asm-x86/mach-default/mach_time.h~git-x86 /dev/null
--- a/include/asm-x86/mach-default/mach_time.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- *  include/asm-i386/mach-default/mach_time.h
- *
- *  Machine specific set RTC function for generic.
- *  Split out from time.c by Osamu Tomita <tomita@cinet.co.jp>
- */
-#ifndef _MACH_TIME_H
-#define _MACH_TIME_H
-
-#include <linux/mc146818rtc.h>
-
-/* for check timing call set_rtc_mmss() 500ms     */
-/* used in arch/i386/time.c::do_timer_interrupt() */
-#define USEC_AFTER	500000
-#define USEC_BEFORE	500000
-
-/*
- * In order to set the CMOS clock precisely, set_rtc_mmss has to be
- * called 500 ms after the second nowtime has started, because when
- * nowtime is written into the registers of the CMOS clock, it will
- * jump to the next second precisely 500 ms later. Check the Motorola
- * MC146818A or Dallas DS12887 data sheet for details.
- *
- * BUG: This routine does not handle hour overflow properly; it just
- *      sets the minutes. Usually you'll only notice that after reboot!
- */
-static inline int mach_set_rtc_mmss(unsigned long nowtime)
-{
-	int retval = 0;
-	int real_seconds, real_minutes, cmos_minutes;
-	unsigned char save_control, save_freq_select;
-
-	save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */
-	CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
-
-	save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset prescaler */
-	CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
-
-	cmos_minutes = CMOS_READ(RTC_MINUTES);
-	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		BCD_TO_BIN(cmos_minutes);
-
-	/*
-	 * since we're only adjusting minutes and seconds,
-	 * don't interfere with hour overflow. This avoids
-	 * messing with unknown time zones but requires your
-	 * RTC not to be off by more than 15 minutes
-	 */
-	real_seconds = nowtime % 60;
-	real_minutes = nowtime / 60;
-	if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
-		real_minutes += 30;		/* correct for half hour time zone */
-	real_minutes %= 60;
-
-	if (abs(real_minutes - cmos_minutes) < 30) {
-		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-			BIN_TO_BCD(real_seconds);
-			BIN_TO_BCD(real_minutes);
-		}
-		CMOS_WRITE(real_seconds,RTC_SECONDS);
-		CMOS_WRITE(real_minutes,RTC_MINUTES);
-	} else {
-		printk(KERN_WARNING
-		       "set_rtc_mmss: can't update from %d to %d\n",
-		       cmos_minutes, real_minutes);
-		retval = -1;
-	}
-
-	/* The following flags have to be released exactly in this order,
-	 * otherwise the DS12887 (popular MC146818A clone with integrated
-	 * battery and quartz) will not reset the oscillator and will not
-	 * update precisely 500 ms later. You won't find this mentioned in
-	 * the Dallas Semiconductor data sheets, but who believes data
-	 * sheets anyway ...                           -- Markus Kuhn
-	 */
-	CMOS_WRITE(save_control, RTC_CONTROL);
-	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-
-	return retval;
-}
-
-static inline unsigned long mach_get_cmos_time(void)
-{
-	unsigned int year, mon, day, hour, min, sec;
-
-	do {
-		sec = CMOS_READ(RTC_SECONDS);
-		min = CMOS_READ(RTC_MINUTES);
-		hour = CMOS_READ(RTC_HOURS);
-		day = CMOS_READ(RTC_DAY_OF_MONTH);
-		mon = CMOS_READ(RTC_MONTH);
-		year = CMOS_READ(RTC_YEAR);
-	} while (sec != CMOS_READ(RTC_SECONDS));
-
-	if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-		BCD_TO_BIN(sec);
-		BCD_TO_BIN(min);
-		BCD_TO_BIN(hour);
-		BCD_TO_BIN(day);
-		BCD_TO_BIN(mon);
-		BCD_TO_BIN(year);
-	}
-
-	year += 1900;
-	if (year < 1970)
-		year += 100;
-
-	return mktime(year, mon, day, hour, min, sec);
-}
-
-#endif /* !_MACH_TIME_H */
diff -puN include/asm-x86/mach-default/mach_timer.h~git-x86 include/asm-x86/mach-default/mach_timer.h
--- a/include/asm-x86/mach-default/mach_timer.h~git-x86
+++ a/include/asm-x86/mach-default/mach_timer.h
@@ -1,6 +1,4 @@
 /*
- *  include/asm-i386/mach-default/mach_timer.h
- *
  *  Machine specific calibrate_tsc() for generic.
  *  Split out from timer_tsc.c by Osamu Tomita <tomita@cinet.co.jp>
  */
diff -puN include/asm-x86/mach-default/mach_traps.h~git-x86 include/asm-x86/mach-default/mach_traps.h
--- a/include/asm-x86/mach-default/mach_traps.h~git-x86
+++ a/include/asm-x86/mach-default/mach_traps.h
@@ -1,6 +1,4 @@
 /*
- *  include/asm-i386/mach-default/mach_traps.h
- *
  *  Machine specific NMI handling for generic.
  *  Split out from traps.c by Osamu Tomita <tomita@cinet.co.jp>
  */
diff -puN include/asm-x86/mach-es7000/mach_apic.h~git-x86 include/asm-x86/mach-es7000/mach_apic.h
--- a/include/asm-x86/mach-es7000/mach_apic.h~git-x86
+++ a/include/asm-x86/mach-es7000/mach_apic.h
@@ -131,11 +131,11 @@ static inline int cpu_to_logical_apicid(
 
 static inline int mpc_apic_id(struct mpc_config_processor *m, struct mpc_config_translation *unused)
 {
-	printk("Processor #%d %ld:%ld APIC version %d\n",
-	        m->mpc_apicid,
-	        (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-	        (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-	        m->mpc_apicver);
+	printk("Processor #%d %u:%u APIC version %d\n",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver);
 	return (m->mpc_apicid);
 }
 
diff -puN /dev/null include/asm-x86/mach-generic/gpio.h
--- /dev/null
+++ a/include/asm-x86/mach-generic/gpio.h
@@ -0,0 +1,15 @@
+#ifndef __ASM_MACH_GENERIC_GPIO_H
+#define __ASM_MACH_GENERIC_GPIO_H
+
+int gpio_request(unsigned gpio, const char *label);
+void gpio_free(unsigned gpio);
+int gpio_direction_input(unsigned gpio);
+int gpio_direction_output(unsigned gpio, int value);
+int gpio_get_value(unsigned gpio);
+void gpio_set_value(unsigned gpio, int value);
+int gpio_to_irq(unsigned gpio);
+int irq_to_gpio(unsigned irq);
+
+#include <asm-generic/gpio.h>           /* cansleep wrappers */
+
+#endif /* __ASM_MACH_GENERIC_GPIO_H */
diff -puN include/asm-x86/mach-numaq/mach_apic.h~git-x86 include/asm-x86/mach-numaq/mach_apic.h
--- a/include/asm-x86/mach-numaq/mach_apic.h~git-x86
+++ a/include/asm-x86/mach-numaq/mach_apic.h
@@ -101,11 +101,11 @@ static inline int mpc_apic_id(struct mpc
 	int quad = translation_record->trans_quad;
 	int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
 
-	printk("Processor #%d %ld:%ld APIC version %d (quad %d, apic %d)\n",
-			m->mpc_apicid,
-			(m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-			(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-			m->mpc_apicver, quad, logical_apicid);
+	printk("Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver, quad, logical_apicid);
 	return logical_apicid;
 }
 
diff -puN /dev/null include/asm-x86/mach-rdc321x/gpio.h
--- /dev/null
+++ a/include/asm-x86/mach-rdc321x/gpio.h
@@ -0,0 +1,56 @@
+#ifndef _RDC321X_GPIO_H
+#define _RDC321X_GPIO_H
+
+extern int rdc_gpio_get_value(unsigned gpio);
+extern void rdc_gpio_set_value(unsigned gpio, int value);
+extern int rdc_gpio_direction_input(unsigned gpio);
+extern int rdc_gpio_direction_output(unsigned gpio, int value);
+
+
+/* Wrappers for the arch-neutral GPIO API */
+
+static inline int gpio_request(unsigned gpio, const char *label)
+{
+	/* Not yet implemented */
+	return 0;
+}
+
+static inline void gpio_free(unsigned gpio)
+{
+	/* Not yet implemented */
+}
+
+static inline int gpio_direction_input(unsigned gpio)
+{
+	return rdc_gpio_direction_input(gpio);
+}
+
+static inline int gpio_direction_output(unsigned gpio, int value)
+{
+	return rdc_gpio_direction_output(gpio, value);
+}
+
+static inline int gpio_get_value(unsigned gpio)
+{
+	return rdc_gpio_get_value(gpio);
+}
+
+static inline void gpio_set_value(unsigned gpio, int value)
+{
+	rdc_gpio_set_value(gpio, value);
+}
+
+static inline int gpio_to_irq(unsigned gpio)
+{
+	return gpio;
+}
+
+static inline int irq_to_gpio(unsigned irq)
+{
+	return irq;
+}
+
+/* For cansleep */
+#include <asm-generic/gpio.h>
+
+#endif /* _RDC321X_GPIO_H_ */
diff -puN /dev/null include/asm-x86/mach-rdc321x/rdc321x_defs.h
--- /dev/null
+++ a/include/asm-x86/mach-rdc321x/rdc321x_defs.h
@@ -0,0 +1,6 @@
+#define PFX	"rdc321x: "
+
+/* General purpose configuration and data registers */
+#define RDC3210_CFGREG_ADDR     0x0CF8
+#define RDC3210_CFGREG_DATA     0x0CFC
+#define RDC_MAX_GPIO		0x3A
diff -puN include/asm-x86/mach-summit/mach_apic.h~git-x86 include/asm-x86/mach-summit/mach_apic.h
--- a/include/asm-x86/mach-summit/mach_apic.h~git-x86
+++ a/include/asm-x86/mach-summit/mach_apic.h
@@ -126,15 +126,15 @@ static inline physid_mask_t apicid_to_cp
 	return physid_mask_of_physid(0);
 }
 
-static inline int mpc_apic_id(struct mpc_config_processor *m, 
-			struct mpc_config_translation *translation_record)
+static inline int mpc_apic_id(struct mpc_config_processor *m,
+			      struct mpc_config_translation *translation_record)
 {
-	printk("Processor #%d %ld:%ld APIC version %d\n",
-			m->mpc_apicid,
-			(m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-			(m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-			m->mpc_apicver);
-	return (m->mpc_apicid);
+	printk("Processor #%d %u:%u APIC version %d\n",
+	       m->mpc_apicid,
+	       (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+	       (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+	       m->mpc_apicver);
+	return m->mpc_apicid;
 }
 
 static inline void setup_portio_remap(void)
diff -puN include/asm-x86/mc146818rtc.h~git-x86 include/asm-x86/mc146818rtc.h
--- a/include/asm-x86/mc146818rtc.h~git-x86
+++ a/include/asm-x86/mc146818rtc.h
@@ -1,5 +1,100 @@
-#ifdef CONFIG_X86_32
-# include "mc146818rtc_32.h"
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+#ifndef _ASM_MC146818RTC_H
+#define _ASM_MC146818RTC_H
+
+#include <asm/io.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <linux/mc146818rtc.h>
+
+#ifndef RTC_PORT
+#define RTC_PORT(x)	(0x70 + (x))
+#define RTC_ALWAYS_BCD	1	/* RTC operates in binary mode */
+#endif
+
+#if defined(CONFIG_X86_32) && defined(__HAVE_ARCH_CMPXCHG)
+/*
+ * This lock provides nmi access to the CMOS/RTC registers.  It has some
+ * special properties.  It is owned by a CPU and stores the index register
+ * currently being accessed (if owned).  The idea here is that it works
+ * like a normal lock (normally).  However, in an NMI, the NMI code will
+ * first check to see if its CPU owns the lock, meaning that the NMI
+ * interrupted during the read/write of the device.  If it does, it goes ahead
+ * and performs the access and then restores the index register.  If it does
+ * not, it locks normally.
+ *
+ * Note that since we are working with NMIs, we need this lock even in
+ * a non-SMP machine just to mark that the lock is owned.
+ *
+ * This only works with compare-and-swap.  There is no other way to
+ * atomically claim the lock and set the owner.
+ */
+#include <linux/smp.h>
+extern volatile unsigned long cmos_lock;
+
+/*
+ * All of these below must be called with interrupts off, preempt
+ * disabled, etc.
+ */
+
+static inline void lock_cmos(unsigned char reg)
+{
+	unsigned long new;
+	new = ((smp_processor_id()+1) << 8) | reg;
+	for (;;) {
+		if (cmos_lock) {
+			cpu_relax();
+			continue;
+		}
+		if (__cmpxchg(&cmos_lock, 0, new, sizeof(cmos_lock)) == 0)
+			return;
+	}
+}
+
+static inline void unlock_cmos(void)
+{
+	cmos_lock = 0;
+}
+static inline int do_i_have_lock_cmos(void)
+{
+	return (cmos_lock >> 8) == (smp_processor_id()+1);
+}
+static inline unsigned char current_lock_cmos_reg(void)
+{
+	return cmos_lock & 0xff;
+}
+#define lock_cmos_prefix(reg) \
+	do {					\
+		unsigned long cmos_flags;	\
+		local_irq_save(cmos_flags);	\
+		lock_cmos(reg)
+#define lock_cmos_suffix(reg) \
+		unlock_cmos();			\
+		local_irq_restore(cmos_flags);	\
+	} while (0)
 #else
-# include "mc146818rtc_64.h"
+#define lock_cmos_prefix(reg) do {} while (0)
+#define lock_cmos_suffix(reg) do {} while (0)
+#define lock_cmos(reg)
+#define unlock_cmos()
+#define do_i_have_lock_cmos() 0
+#define current_lock_cmos_reg() 0
 #endif
+
+/*
+ * The yet supported machines all access the RTC index register via
+ * an ISA port access but the way to access the date register differs ...
+ */
+#define CMOS_READ(addr) rtc_cmos_read(addr)
+#define CMOS_WRITE(val, addr) rtc_cmos_write(val, addr)
+unsigned char rtc_cmos_read(unsigned char addr);
+void rtc_cmos_write(unsigned char val, unsigned char addr);
+
+extern int mach_set_rtc_mmss(unsigned long nowtime);
+extern unsigned long mach_get_cmos_time(void);
+
+#define RTC_IRQ 8
+
+#endif /* _ASM_MC146818RTC_H */
diff -puN include/asm-x86/mc146818rtc_32.h~git-x86 /dev/null
--- a/include/asm-x86/mc146818rtc_32.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-#include <asm/io.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <linux/mc146818rtc.h>
-
-#ifndef RTC_PORT
-#define RTC_PORT(x)	(0x70 + (x))
-#define RTC_ALWAYS_BCD	1	/* RTC operates in binary mode */
-#endif
-
-#ifdef __HAVE_ARCH_CMPXCHG
-/*
- * This lock provides nmi access to the CMOS/RTC registers.  It has some
- * special properties.  It is owned by a CPU and stores the index register
- * currently being accessed (if owned).  The idea here is that it works
- * like a normal lock (normally).  However, in an NMI, the NMI code will
- * first check to see if its CPU owns the lock, meaning that the NMI
- * interrupted during the read/write of the device.  If it does, it goes ahead
- * and performs the access and then restores the index register.  If it does
- * not, it locks normally.
- *
- * Note that since we are working with NMIs, we need this lock even in
- * a non-SMP machine just to mark that the lock is owned.
- *
- * This only works with compare-and-swap.  There is no other way to
- * atomically claim the lock and set the owner.
- */
-#include <linux/smp.h>
-extern volatile unsigned long cmos_lock;
-
-/*
- * All of these below must be called with interrupts off, preempt
- * disabled, etc.
- */
-
-static inline void lock_cmos(unsigned char reg)
-{
-	unsigned long new;
-	new = ((smp_processor_id()+1) << 8) | reg;
-	for (;;) {
-		if (cmos_lock) {
-			cpu_relax();
-			continue;
-		}
-		if (__cmpxchg(&cmos_lock, 0, new, sizeof(cmos_lock)) == 0)
-			return;
-	}
-}
-
-static inline void unlock_cmos(void)
-{
-	cmos_lock = 0;
-}
-static inline int do_i_have_lock_cmos(void)
-{
-	return (cmos_lock >> 8) == (smp_processor_id()+1);
-}
-static inline unsigned char current_lock_cmos_reg(void)
-{
-	return cmos_lock & 0xff;
-}
-#define lock_cmos_prefix(reg) \
-	do {					\
-		unsigned long cmos_flags;	\
-		local_irq_save(cmos_flags);	\
-		lock_cmos(reg)
-#define lock_cmos_suffix(reg) \
-		unlock_cmos();			\
-		local_irq_restore(cmos_flags);	\
-	} while (0)
-#else
-#define lock_cmos_prefix(reg) do {} while (0)
-#define lock_cmos_suffix(reg) do {} while (0)
-#define lock_cmos(reg)
-#define unlock_cmos()
-#define do_i_have_lock_cmos() 0
-#define current_lock_cmos_reg() 0
-#endif
-
-/*
- * The yet supported machines all access the RTC index register via
- * an ISA port access but the way to access the date register differs ...
- */
-#define CMOS_READ(addr) rtc_cmos_read(addr)
-#define CMOS_WRITE(val, addr) rtc_cmos_write(val, addr)
-unsigned char rtc_cmos_read(unsigned char addr);
-void rtc_cmos_write(unsigned char val, unsigned char addr);
-
-#define RTC_IRQ 8
-
-#endif /* _ASM_MC146818RTC_H */
diff -puN include/asm-x86/mc146818rtc_64.h~git-x86 /dev/null
--- a/include/asm-x86/mc146818rtc_64.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Machine dependent access functions for RTC registers.
- */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
-
-#include <asm/io.h>
-
-#ifndef RTC_PORT
-#define RTC_PORT(x)	(0x70 + (x))
-#define RTC_ALWAYS_BCD	1	/* RTC operates in binary mode */
-#endif
-
-/*
- * The yet supported machines all access the RTC index register via
- * an ISA port access but the way to access the date register differs ...
- */
-#define CMOS_READ(addr) ({ \
-outb_p((addr),RTC_PORT(0)); \
-inb_p(RTC_PORT(1)); \
-})
-#define CMOS_WRITE(val, addr) ({ \
-outb_p((addr),RTC_PORT(0)); \
-outb_p((val),RTC_PORT(1)); \
-})
-
-#define RTC_IRQ 8
-
-#endif /* _ASM_MC146818RTC_H */
diff -puN include/asm-x86/mce.h~git-x86 include/asm-x86/mce.h
--- a/include/asm-x86/mce.h~git-x86
+++ a/include/asm-x86/mce.h
@@ -13,7 +13,7 @@
 #define MCG_CTL_P	 (1UL<<8)   /* MCG_CAP register available */
 
 #define MCG_STATUS_RIPV  (1UL<<0)   /* restart ip valid */
-#define MCG_STATUS_EIPV  (1UL<<1)   /* eip points to correct instruction */
+#define MCG_STATUS_EIPV  (1UL<<1)   /* ip points to correct instruction */
 #define MCG_STATUS_MCIP  (1UL<<2)   /* machine check in progress */
 
 #define MCI_STATUS_VAL   (1UL<<63)  /* valid error */
@@ -30,7 +30,7 @@ struct mce {
 	__u64 misc;
 	__u64 addr;
 	__u64 mcgstatus;
-	__u64 rip;
+	__u64 ip;
 	__u64 tsc;	/* cpu time stamp counter */
 	__u64 res1;	/* for future extension */
 	__u64 res2;	/* dito. */
@@ -85,14 +85,7 @@ struct mce_log {
 #ifdef __KERNEL__
 
 #ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_MCE
-extern void mcheck_init(struct cpuinfo_x86 *c);
-#else
-#define mcheck_init(c) do {} while(0)
-#endif
-
 extern int mce_disabled;
-
 #else /* CONFIG_X86_32 */
 
 #include <asm/atomic.h>
@@ -121,6 +114,13 @@ extern int mce_notify_user(void);
 
 #endif /* !CONFIG_X86_32 */
 
+
+
+#ifdef CONFIG_X86_MCE
+extern void mcheck_init(struct cpuinfo_x86 *c);
+#else
+#define mcheck_init(c) do { } while (0)
+#endif
 extern void stop_mce(void);
 extern void restart_mce(void);
 
diff -puN include/asm-x86/mmu_context_64.h~git-x86 include/asm-x86/mmu_context_64.h
--- a/include/asm-x86/mmu_context_64.h~git-x86
+++ a/include/asm-x86/mmu_context_64.h
@@ -49,7 +49,7 @@ static inline void switch_mm(struct mm_s
 	else {
 		write_pda(mmu_state, TLBSTATE_OK);
 		if (read_pda(active_mm) != next)
-			out_of_line_bug();
+			BUG();
 		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
 			/* We were in lazy tlb mode and leave_mm disabled 
 			 * tlb flush IPI delivery. We must reload CR3
diff -puN include/asm-x86/mmzone_32.h~git-x86 include/asm-x86/mmzone_32.h
--- a/include/asm-x86/mmzone_32.h~git-x86
+++ a/include/asm-x86/mmzone_32.h
@@ -87,9 +87,6 @@ static inline int pfn_to_nid(unsigned lo
 	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
 })
 
-/* XXX: FIXME -- wli */
-#define kern_addr_valid(kaddr)	(0)
-
 #ifdef CONFIG_X86_NUMAQ            /* we have contiguous memory on NUMA-Q */
 #define pfn_valid(pfn)          ((pfn) < num_physpages)
 #else
diff -puN include/asm-x86/mmzone_64.h~git-x86 include/asm-x86/mmzone_64.h
--- a/include/asm-x86/mmzone_64.h~git-x86
+++ a/include/asm-x86/mmzone_64.h
@@ -41,11 +41,7 @@ static inline __attribute__((pure)) int 
 #define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
 				 NODE_DATA(nid)->node_spanned_pages)
 
-#ifdef CONFIG_DISCONTIGMEM
-#define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT)
-
-extern int pfn_valid(unsigned long pfn);
-#endif
+extern int early_pfn_to_nid(unsigned long pfn);
 
 #ifdef CONFIG_NUMA_EMU
 #define FAKE_NODE_MIN_SIZE	(64*1024*1024)
diff -puN include/asm-x86/mpspec.h~git-x86 include/asm-x86/mpspec.h
--- a/include/asm-x86/mpspec.h~git-x86
+++ a/include/asm-x86/mpspec.h
@@ -1,5 +1,117 @@
+#ifndef _AM_X86_MPSPEC_H
+#define _AM_X86_MPSPEC_H
+
+#include <asm/mpspec_def.h>
+
 #ifdef CONFIG_X86_32
-# include "mpspec_32.h"
+#include <mach_mpspec.h>
+
+extern int mp_bus_id_to_type[MAX_MP_BUSSES];
+extern int mp_bus_id_to_node[MAX_MP_BUSSES];
+extern int mp_bus_id_to_local[MAX_MP_BUSSES];
+extern int quad_local_to_mp_bus_id[NR_CPUS/4][4];
+
+extern unsigned int def_to_bigsmp;
+extern int apic_version[MAX_APICS];
+extern u8 apicid_2_node[];
+extern int pic_mode;
+
+#define MAX_APICID 256
+
 #else
-# include "mpspec_64.h"
+
+#define MAX_MP_BUSSES 256
+/* Each PCI slot may be a combo card with its own bus.  4 IRQ pins per slot. */
+#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
+
+extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+
+#endif
+
+extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
+
+extern unsigned int boot_cpu_physical_apicid;
+extern int smp_found_config;
+extern int nr_ioapics;
+extern int mp_irq_entries;
+extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+extern int mpc_default_type;
+extern unsigned long mp_lapic_addr;
+
+extern void find_smp_config(void);
+extern void get_smp_config(void);
+
+#ifdef CONFIG_ACPI
+extern void mp_register_lapic(u8 id, u8 enabled);
+extern void mp_register_lapic_address(u64 address);
+extern void mp_register_ioapic(u8 id, u32 address, u32 gsi_base);
+extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
+				   u32 gsi);
+extern void mp_config_acpi_legacy_irqs(void);
+extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
+#endif /* CONFIG_ACPI */
+
+#define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_APICS)
+
+struct physid_mask
+{
+	unsigned long mask[PHYSID_ARRAY_SIZE];
+};
+
+typedef struct physid_mask physid_mask_t;
+
+#define physid_set(physid, map)			set_bit(physid, (map).mask)
+#define physid_clear(physid, map)		clear_bit(physid, (map).mask)
+#define physid_isset(physid, map)		test_bit(physid, (map).mask)
+#define physid_test_and_set(physid, map) \
+	test_and_set_bit(physid, (map).mask)
+
+#define physids_and(dst, src1, src2) \
+	bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
+
+#define physids_or(dst, src1, src2) \
+	bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
+
+#define physids_clear(map) \
+	bitmap_zero((map).mask, MAX_APICS)
+
+#define physids_complement(dst, src) \
+	bitmap_complement((dst).mask, (src).mask, MAX_APICS)
+
+#define physids_empty(map) \
+	bitmap_empty((map).mask, MAX_APICS)
+
+#define physids_equal(map1, map2) \
+	bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
+
+#define physids_weight(map) \
+	bitmap_weight((map).mask, MAX_APICS)
+
+#define physids_shift_right(d, s, n) \
+	bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS)
+
+#define physids_shift_left(d, s, n) \
+	bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
+
+#define physids_coerce(map)			((map).mask[0])
+
+#define physids_promote(physids)					\
+	({								\
+		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
+		__physid_mask.mask[0] = physids;			\
+		__physid_mask;						\
+	})
+
+#define physid_mask_of_physid(physid)					\
+	({								\
+		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
+		physid_set(physid, __physid_mask);			\
+		__physid_mask;						\
+	})
+
+#define PHYSID_MASK_ALL		{ {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
+#define PHYSID_MASK_NONE	{ {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
+
+extern physid_mask_t phys_cpu_present_map;
+
 #endif
diff -puN include/asm-x86/mpspec_32.h~git-x86 /dev/null
--- a/include/asm-x86/mpspec_32.h
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifndef __ASM_MPSPEC_H
-#define __ASM_MPSPEC_H
-
-#include <linux/cpumask.h>
-#include <asm/mpspec_def.h>
-#include <mach_mpspec.h>
-
-extern int mp_bus_id_to_type [MAX_MP_BUSSES];
-extern int mp_bus_id_to_node [MAX_MP_BUSSES];
-extern int mp_bus_id_to_local [MAX_MP_BUSSES];
-extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
-extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
-
-extern unsigned int def_to_bigsmp;
-extern unsigned int boot_cpu_physical_apicid;
-extern int smp_found_config;
-extern void find_smp_config (void);
-extern void get_smp_config (void);
-extern int nr_ioapics;
-extern int apic_version [MAX_APICS];
-extern int mp_irq_entries;
-extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
-extern int mpc_default_type;
-extern unsigned long mp_lapic_addr;
-extern int pic_mode;
-
-#ifdef CONFIG_ACPI
-extern void mp_register_lapic (u8 id, u8 enabled);
-extern void mp_register_lapic_address (u64 address);
-extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base);
-extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 gsi);
-extern void mp_config_acpi_legacy_irqs (void);
-extern int mp_register_gsi (u32 gsi, int edge_level, int active_high_low);
-#endif /* CONFIG_ACPI */
-
-#define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_APICS)
-
-struct physid_mask
-{
-	unsigned long mask[PHYSID_ARRAY_SIZE];
-};
-
-typedef struct physid_mask physid_mask_t;
-
-#define physid_set(physid, map)			set_bit(physid, (map).mask)
-#define physid_clear(physid, map)		clear_bit(physid, (map).mask)
-#define physid_isset(physid, map)		test_bit(physid, (map).mask)
-#define physid_test_and_set(physid, map)	test_and_set_bit(physid, (map).mask)
-
-#define physids_and(dst, src1, src2)		bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
-#define physids_or(dst, src1, src2)		bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
-#define physids_clear(map)			bitmap_zero((map).mask, MAX_APICS)
-#define physids_complement(dst, src)		bitmap_complement((dst).mask,(src).mask, MAX_APICS)
-#define physids_empty(map)			bitmap_empty((map).mask, MAX_APICS)
-#define physids_equal(map1, map2)		bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
-#define physids_weight(map)			bitmap_weight((map).mask, MAX_APICS)
-#define physids_shift_right(d, s, n)		bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS)
-#define physids_shift_left(d, s, n)		bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
-#define physids_coerce(map)			((map).mask[0])
-
-#define physids_promote(physids)						\
-	({									\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;			\
-		__physid_mask.mask[0] = physids;				\
-		__physid_mask;							\
-	})
-
-#define physid_mask_of_physid(physid)						\
-	({									\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;			\
-		physid_set(physid, __physid_mask);				\
-		__physid_mask;							\
-	})
-
-#define PHYSID_MASK_ALL		{ {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
-#define PHYSID_MASK_NONE	{ {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
-
-extern physid_mask_t phys_cpu_present_map;
-
-#endif
-
diff -puN include/asm-x86/mpspec_64.h~git-x86 /dev/null
--- a/include/asm-x86/mpspec_64.h
+++ /dev/null
@@ -1,233 +0,0 @@
-#ifndef __ASM_MPSPEC_H
-#define __ASM_MPSPEC_H
-
-/*
- * Structure definitions for SMP machines following the
- * Intel Multiprocessing Specification 1.1 and 1.4.
- */
-
-/*
- * This tag identifies where the SMP configuration
- * information is. 
- */
- 
-#define SMP_MAGIC_IDENT	(('_'<<24)|('P'<<16)|('M'<<8)|'_')
-
-/*
- * A maximum of 255 APICs with the current APIC ID architecture.
- */
-#define MAX_APICS 255
-
-struct intel_mp_floating
-{
-	char mpf_signature[4];		/* "_MP_" 			*/
-	unsigned int mpf_physptr;	/* Configuration table address	*/
-	unsigned char mpf_length;	/* Our length (paragraphs)	*/
-	unsigned char mpf_specification;/* Specification version	*/
-	unsigned char mpf_checksum;	/* Checksum (makes sum 0)	*/
-	unsigned char mpf_feature1;	/* Standard or configuration ? 	*/
-	unsigned char mpf_feature2;	/* Bit7 set for IMCR|PIC	*/
-	unsigned char mpf_feature3;	/* Unused (0)			*/
-	unsigned char mpf_feature4;	/* Unused (0)			*/
-	unsigned char mpf_feature5;	/* Unused (0)			*/
-};
-
-struct mp_config_table
-{
-	char mpc_signature[4];
-#define MPC_SIGNATURE "PCMP"
-	unsigned short mpc_length;	/* Size of table */
-	char  mpc_spec;			/* 0x01 */
-	char  mpc_checksum;
-	char  mpc_oem[8];
-	char  mpc_productid[12];
-	unsigned int mpc_oemptr;	/* 0 if not present */
-	unsigned short mpc_oemsize;	/* 0 if not present */
-	unsigned short mpc_oemcount;
-	unsigned int mpc_lapic;	/* APIC address */
-	unsigned int reserved;
-};
-
-/* Followed by entries */
-
-#define	MP_PROCESSOR	0
-#define	MP_BUS		1
-#define	MP_IOAPIC	2
-#define	MP_INTSRC	3
-#define	MP_LINTSRC	4
-
-struct mpc_config_processor
-{
-	unsigned char mpc_type;
-	unsigned char mpc_apicid;	/* Local APIC number */
-	unsigned char mpc_apicver;	/* Its versions */
-	unsigned char mpc_cpuflag;
-#define CPU_ENABLED		1	/* Processor is available */
-#define CPU_BOOTPROCESSOR	2	/* Processor is the BP */
-	unsigned int mpc_cpufeature;		
-#define CPU_STEPPING_MASK 0x0F
-#define CPU_MODEL_MASK	0xF0
-#define CPU_FAMILY_MASK	0xF00
-	unsigned int mpc_featureflag;	/* CPUID feature value */
-	unsigned int mpc_reserved[2];
-};
-
-struct mpc_config_bus
-{
-	unsigned char mpc_type;
-	unsigned char mpc_busid;
-	unsigned char mpc_bustype[6];
-};
-
-/* List of Bus Type string values, Intel MP Spec. */
-#define BUSTYPE_EISA	"EISA"
-#define BUSTYPE_ISA	"ISA"
-#define BUSTYPE_INTERN	"INTERN"	/* Internal BUS */
-#define BUSTYPE_MCA	"MCA"
-#define BUSTYPE_VL	"VL"		/* Local bus */
-#define BUSTYPE_PCI	"PCI"
-#define BUSTYPE_PCMCIA	"PCMCIA"
-#define BUSTYPE_CBUS	"CBUS"
-#define BUSTYPE_CBUSII	"CBUSII"
-#define BUSTYPE_FUTURE	"FUTURE"
-#define BUSTYPE_MBI	"MBI"
-#define BUSTYPE_MBII	"MBII"
-#define BUSTYPE_MPI	"MPI"
-#define BUSTYPE_MPSA	"MPSA"
-#define BUSTYPE_NUBUS	"NUBUS"
-#define BUSTYPE_TC	"TC"
-#define BUSTYPE_VME	"VME"
-#define BUSTYPE_XPRESS	"XPRESS"
-
-struct mpc_config_ioapic
-{
-	unsigned char mpc_type;
-	unsigned char mpc_apicid;
-	unsigned char mpc_apicver;
-	unsigned char mpc_flags;
-#define MPC_APIC_USABLE		0x01
-	unsigned int mpc_apicaddr;
-};
-
-struct mpc_config_intsrc
-{
-	unsigned char mpc_type;
-	unsigned char mpc_irqtype;
-	unsigned short mpc_irqflag;
-	unsigned char mpc_srcbus;
-	unsigned char mpc_srcbusirq;
-	unsigned char mpc_dstapic;
-	unsigned char mpc_dstirq;
-};
-
-enum mp_irq_source_types {
-	mp_INT = 0,
-	mp_NMI = 1,
-	mp_SMI = 2,
-	mp_ExtINT = 3
-};
-
-#define MP_IRQDIR_DEFAULT	0
-#define MP_IRQDIR_HIGH		1
-#define MP_IRQDIR_LOW		3
-
-
-struct mpc_config_lintsrc
-{
-	unsigned char mpc_type;
-	unsigned char mpc_irqtype;
-	unsigned short mpc_irqflag;
-	unsigned char mpc_srcbusid;
-	unsigned char mpc_srcbusirq;
-	unsigned char mpc_destapic;	
-#define MP_APIC_ALL	0xFF
-	unsigned char mpc_destapiclint;
-};
-
-/*
- *	Default configurations
- *
- *	1	2 CPU ISA 82489DX
- *	2	2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining
- *	3	2 CPU EISA 82489DX
- *	4	2 CPU MCA 82489DX
- *	5	2 CPU ISA+PCI
- *	6	2 CPU EISA+PCI
- *	7	2 CPU MCA+PCI
- */
-
-#define MAX_MP_BUSSES 256
-/* Each PCI slot may be a combo card with its own bus.  4 IRQ pins per slot. */
-#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
-extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
-
-extern unsigned int boot_cpu_physical_apicid;
-extern int smp_found_config;
-extern void find_smp_config (void);
-extern void get_smp_config (void);
-extern int nr_ioapics;
-extern unsigned char apic_version [MAX_APICS];
-extern int mp_irq_entries;
-extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
-extern int mpc_default_type;
-extern unsigned long mp_lapic_addr;
-
-#ifdef CONFIG_ACPI
-extern void mp_register_lapic (u8 id, u8 enabled);
-extern void mp_register_lapic_address (u64 address);
-
-extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base);
-extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 gsi);
-extern void mp_config_acpi_legacy_irqs (void);
-extern int mp_register_gsi (u32 gsi, int triggering, int polarity);
-#endif
-
-extern int using_apic_timer;
-
-#define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_APICS)
-
-struct physid_mask
-{
-	unsigned long mask[PHYSID_ARRAY_SIZE];
-};
-
-typedef struct physid_mask physid_mask_t;
-
-#define physid_set(physid, map)			set_bit(physid, (map).mask)
-#define physid_clear(physid, map)		clear_bit(physid, (map).mask)
-#define physid_isset(physid, map)		test_bit(physid, (map).mask)
-#define physid_test_and_set(physid, map)	test_and_set_bit(physid, (map).mask)
-
-#define physids_and(dst, src1, src2)		bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
-#define physids_or(dst, src1, src2)		bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
-#define physids_clear(map)			bitmap_zero((map).mask, MAX_APICS)
-#define physids_complement(dst, src)		bitmap_complement((dst).mask, (src).mask, MAX_APICS)
-#define physids_empty(map)			bitmap_empty((map).mask, MAX_APICS)
-#define physids_equal(map1, map2)		bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
-#define physids_weight(map)			bitmap_weight((map).mask, MAX_APICS)
-#define physids_shift_right(d, s, n)		bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS)
-#define physids_shift_left(d, s, n)		bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
-#define physids_coerce(map)			((map).mask[0])
-
-#define physids_promote(physids)						\
-	({									\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;			\
-		__physid_mask.mask[0] = physids;				\
-		__physid_mask;							\
-	})
-
-#define physid_mask_of_physid(physid)						\
-	({									\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;			\
-		physid_set(physid, __physid_mask);				\
-		__physid_mask;							\
-	})
-
-#define PHYSID_MASK_ALL		{ {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
-#define PHYSID_MASK_NONE	{ {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
-
-extern physid_mask_t phys_cpu_present_map;
-
-#endif
-
diff -puN include/asm-x86/mpspec_def.h~git-x86 include/asm-x86/mpspec_def.h
--- a/include/asm-x86/mpspec_def.h~git-x86
+++ a/include/asm-x86/mpspec_def.h
@@ -8,52 +8,68 @@
 
 /*
  * This tag identifies where the SMP configuration
- * information is. 
+ * information is.
  */
- 
+
 #define SMP_MAGIC_IDENT	(('_'<<24)|('P'<<16)|('M'<<8)|'_')
 
-#define MAX_MPC_ENTRY 1024
-#define MAX_APICS      256
+#ifdef CONFIG_X86_32
+# define MAX_MPC_ENTRY 1024
+# define MAX_APICS      256
+#else
+/*
+ * A maximum of 255 APICs with the current APIC ID architecture.
+ */
+# define MAX_APICS 255
+#endif
 
 struct intel_mp_floating
 {
-	char mpf_signature[4];		/* "_MP_" 			*/
-	unsigned long mpf_physptr;	/* Configuration table address	*/
+	char mpf_signature[4];		/* "_MP_"			*/
+	unsigned int mpf_physptr;	/* Configuration table address	*/
 	unsigned char mpf_length;	/* Our length (paragraphs)	*/
 	unsigned char mpf_specification;/* Specification version	*/
 	unsigned char mpf_checksum;	/* Checksum (makes sum 0)	*/
-	unsigned char mpf_feature1;	/* Standard or configuration ? 	*/
+	unsigned char mpf_feature1;	/* Standard or configuration ?	*/
 	unsigned char mpf_feature2;	/* Bit7 set for IMCR|PIC	*/
 	unsigned char mpf_feature3;	/* Unused (0)			*/
 	unsigned char mpf_feature4;	/* Unused (0)			*/
 	unsigned char mpf_feature5;	/* Unused (0)			*/
 };
 
+#define MPC_SIGNATURE "PCMP"
+
 struct mp_config_table
 {
 	char mpc_signature[4];
-#define MPC_SIGNATURE "PCMP"
 	unsigned short mpc_length;	/* Size of table */
 	char  mpc_spec;			/* 0x01 */
 	char  mpc_checksum;
 	char  mpc_oem[8];
 	char  mpc_productid[12];
-	unsigned long mpc_oemptr;	/* 0 if not present */
+	unsigned int mpc_oemptr;	/* 0 if not present */
 	unsigned short mpc_oemsize;	/* 0 if not present */
 	unsigned short mpc_oemcount;
-	unsigned long mpc_lapic;	/* APIC address */
-	unsigned long reserved;
+	unsigned int mpc_lapic;	/* APIC address */
+	unsigned int reserved;
 };
 
 /* Followed by entries */
 
-#define	MP_PROCESSOR	0
-#define	MP_BUS		1
-#define	MP_IOAPIC	2
-#define	MP_INTSRC	3
-#define	MP_LINTSRC	4
-#define	MP_TRANSLATION  192  /* Used by IBM NUMA-Q to describe node locality */
+#define	MP_PROCESSOR		0
+#define	MP_BUS			1
+#define	MP_IOAPIC		2
+#define	MP_INTSRC		3
+#define	MP_LINTSRC		4
+/* Used by IBM NUMA-Q to describe node locality */
+#define	MP_TRANSLATION		192
+
+#define CPU_ENABLED		1	/* Processor is available */
+#define CPU_BOOTPROCESSOR	2	/* Processor is the BP */
+
+#define CPU_STEPPING_MASK	0x000F
+#define CPU_MODEL_MASK		0x00F0
+#define CPU_FAMILY_MASK		0x0F00
 
 struct mpc_config_processor
 {
@@ -61,14 +77,9 @@ struct mpc_config_processor
 	unsigned char mpc_apicid;	/* Local APIC number */
 	unsigned char mpc_apicver;	/* Its versions */
 	unsigned char mpc_cpuflag;
-#define CPU_ENABLED		1	/* Processor is available */
-#define CPU_BOOTPROCESSOR	2	/* Processor is the BP */
-	unsigned long mpc_cpufeature;		
-#define CPU_STEPPING_MASK 0x0F
-#define CPU_MODEL_MASK	0xF0
-#define CPU_FAMILY_MASK	0xF00
-	unsigned long mpc_featureflag;	/* CPUID feature value */
-	unsigned long mpc_reserved[2];
+	unsigned int mpc_cpufeature;
+	unsigned int mpc_featureflag;	/* CPUID feature value */
+	unsigned int mpc_reserved[2];
 };
 
 struct mpc_config_bus
@@ -98,14 +109,15 @@ struct mpc_config_bus
 #define BUSTYPE_VME	"VME"
 #define BUSTYPE_XPRESS	"XPRESS"
 
+#define MPC_APIC_USABLE		0x01
+
 struct mpc_config_ioapic
 {
 	unsigned char mpc_type;
 	unsigned char mpc_apicid;
 	unsigned char mpc_apicver;
 	unsigned char mpc_flags;
-#define MPC_APIC_USABLE		0x01
-	unsigned long mpc_apicaddr;
+	unsigned int mpc_apicaddr;
 };
 
 struct mpc_config_intsrc
@@ -130,6 +142,7 @@ enum mp_irq_source_types {
 #define MP_IRQDIR_HIGH		1
 #define MP_IRQDIR_LOW		3
 
+#define MP_APIC_ALL	0xFF
 
 struct mpc_config_lintsrc
 {
@@ -138,15 +151,15 @@ struct mpc_config_lintsrc
 	unsigned short mpc_irqflag;
 	unsigned char mpc_srcbusid;
 	unsigned char mpc_srcbusirq;
-	unsigned char mpc_destapic;	
-#define MP_APIC_ALL	0xFF
+	unsigned char mpc_destapic;
 	unsigned char mpc_destapiclint;
 };
 
+#define MPC_OEM_SIGNATURE "_OEM"
+
 struct mp_config_oemtable
 {
 	char oem_signature[4];
-#define MPC_OEM_SIGNATURE "_OEM"
 	unsigned short oem_length;	/* Size of table */
 	char  oem_rev;			/* 0x01 */
 	char  oem_checksum;
@@ -155,13 +168,13 @@ struct mp_config_oemtable
 
 struct mpc_config_translation
 {
-        unsigned char mpc_type;
-        unsigned char trans_len;
-        unsigned char trans_type;
-        unsigned char trans_quad;
-        unsigned char trans_global;
-        unsigned char trans_local;
-        unsigned short trans_reserved;
+	unsigned char mpc_type;
+	unsigned char trans_len;
+	unsigned char trans_type;
+	unsigned char trans_quad;
+	unsigned char trans_global;
+	unsigned char trans_local;
+	unsigned short trans_reserved;
 };
 
 /*
diff -puN include/asm-x86/msr-index.h~git-x86 include/asm-x86/msr-index.h
--- a/include/asm-x86/msr-index.h~git-x86
+++ a/include/asm-x86/msr-index.h
@@ -63,6 +63,13 @@
 #define MSR_IA32_LASTINTFROMIP		0x000001dd
 #define MSR_IA32_LASTINTTOIP		0x000001de
 
+/* DEBUGCTLMSR bits (others vary by model): */
+#define _DEBUGCTLMSR_LBR	0 /* last branch recording */
+#define _DEBUGCTLMSR_BTF	1 /* single-step on branches */
+
+#define DEBUGCTLMSR_LBR		(1UL << _DEBUGCTLMSR_LBR)
+#define DEBUGCTLMSR_BTF		(1UL << _DEBUGCTLMSR_BTF)
+
 #define MSR_IA32_MC0_CTL		0x00000400
 #define MSR_IA32_MC0_STATUS		0x00000401
 #define MSR_IA32_MC0_ADDR		0x00000402
diff -puN include/asm-x86/msr.h~git-x86 include/asm-x86/msr.h
--- a/include/asm-x86/msr.h~git-x86
+++ a/include/asm-x86/msr.h
@@ -3,77 +3,104 @@
 
 #include <asm/msr-index.h>
 
-#ifdef __i386__
-
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
+#include <asm/asm.h>
 #include <asm/errno.h>
 
+static inline unsigned long long native_read_tscp(int *aux)
+{
+	unsigned long low, high;
+	asm volatile (".byte 0x0f,0x01,0xf9"
+		      : "=a" (low), "=d" (high), "=c" (*aux));
+	return low | ((u64)high >> 32);
+}
+
+/*
+ * i386 calling convention returns 64-bit value in edx:eax, while
+ * x86_64 returns at rax. Also, the "A" constraint does not really
+ * mean rdx:rax in x86_64, so we need specialized behaviour for each
+ * architecture
+ */
+#ifdef CONFIG_X86_64
+#define DECLARE_ARGS(val, low, high)	unsigned low, high
+#define EAX_EDX_VAL(val, low, high)	(low | ((u64)(high) << 32))
+#define EAX_EDX_ARGS(val, low, high)	"a" (low), "d" (high)
+#define EAX_EDX_RET(val, low, high)	"=a" (low), "=d" (high)
+#else
+#define DECLARE_ARGS(val, low, high)	unsigned long long val
+#define EAX_EDX_VAL(val, low, high)	(val)
+#define EAX_EDX_ARGS(val, low, high)	"A" (val)
+#define EAX_EDX_RET(val, low, high)	"=A" (val)
+#endif
+
 static inline unsigned long long native_read_msr(unsigned int msr)
 {
-	unsigned long long val;
+	DECLARE_ARGS(val, low, high);
 
-	asm volatile("rdmsr" : "=A" (val) : "c" (msr));
-	return val;
+	asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
+	return EAX_EDX_VAL(val, low, high);
 }
 
 static inline unsigned long long native_read_msr_safe(unsigned int msr,
 						      int *err)
 {
-	unsigned long long val;
+	DECLARE_ARGS(val, low, high);
 
-	asm volatile("2: rdmsr ; xorl %0,%0\n"
+	asm volatile("2: rdmsr ; xor %0,%0\n"
 		     "1:\n\t"
 		     ".section .fixup,\"ax\"\n\t"
-		     "3:  movl %3,%0 ; jmp 1b\n\t"
+		     "3:  mov %3,%0 ; jmp 1b\n\t"
 		     ".previous\n\t"
 		     ".section __ex_table,\"a\"\n"
-		     "   .align 4\n\t"
-		     "   .long	2b,3b\n\t"
+		     _ASM_ALIGN "\n\t"
+		     _ASM_PTR " 2b,3b\n\t"
 		     ".previous"
-		     : "=r" (*err), "=A" (val)
+		     : "=r" (*err), EAX_EDX_RET(val, low, high)
 		     : "c" (msr), "i" (-EFAULT));
-
-	return val;
+	return EAX_EDX_VAL(val, low, high);
 }
 
-static inline void native_write_msr(unsigned int msr, unsigned long long val)
+static inline void native_write_msr(unsigned int msr,
+				    unsigned low, unsigned high)
 {
-	asm volatile("wrmsr" : : "c" (msr), "A"(val));
+	asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high));
 }
 
 static inline int native_write_msr_safe(unsigned int msr,
-					unsigned long long val)
+					unsigned low, unsigned high)
 {
 	int err;
-	asm volatile("2: wrmsr ; xorl %0,%0\n"
+	asm volatile("2: wrmsr ; xor %0,%0\n"
 		     "1:\n\t"
 		     ".section .fixup,\"ax\"\n\t"
-		     "3:  movl %4,%0 ; jmp 1b\n\t"
+		     "3:  mov %4,%0 ; jmp 1b\n\t"
 		     ".previous\n\t"
 		     ".section __ex_table,\"a\"\n"
-		     "   .align 4\n\t"
-		     "   .long	2b,3b\n\t"
+		     _ASM_ALIGN "\n\t"
+		     _ASM_PTR " 2b,3b\n\t"
 		     ".previous"
 		     : "=a" (err)
-		     : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)),
+		     : "c" (msr), "0" (low), "d" (high),
 		       "i" (-EFAULT));
 	return err;
 }
 
 static inline unsigned long long native_read_tsc(void)
 {
-	unsigned long long val;
-	asm volatile("rdtsc" : "=A" (val));
-	return val;
+	DECLARE_ARGS(val, low, high);
+
+	asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
+	return EAX_EDX_VAL(val, low, high);
 }
 
-static inline unsigned long long native_read_pmc(void)
+static inline unsigned long long native_read_pmc(int counter)
 {
-	unsigned long long val;
-	asm volatile("rdpmc" : "=A" (val));
-	return val;
+	DECLARE_ARGS(val, low, high);
+
+	asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
+	return EAX_EDX_VAL(val, low, high);
 }
 
 #ifdef CONFIG_PARAVIRT
@@ -93,20 +120,21 @@ static inline unsigned long long native_
 		(val2) = (u32)(__val >> 32);				\
 	} while(0)
 
-static inline void wrmsr(u32 __msr, u32 __low, u32 __high)
+static inline void wrmsr(unsigned msr, unsigned low, unsigned high)
 {
-	native_write_msr(__msr, ((u64)__high << 32) | __low);
+	native_write_msr(msr, low, high);
 }
 
 #define rdmsrl(msr,val)							\
 	((val) = native_read_msr(msr))
 
-#define wrmsrl(msr,val)	native_write_msr(msr, val)
+#define wrmsrl(msr, val)						\
+	native_write_msr(msr, (u32)((u64)(val)), (u32)((u64)(val) >> 32))
 
 /* wrmsr with exception handling */
-static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high)
+static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
 {
-	return native_write_msr_safe(__msr, ((u64)__high << 32) | __low);
+	return native_write_msr_safe(msr, low, high);
 }
 
 /* rdmsr with exception handling */
@@ -129,197 +157,29 @@ static inline int wrmsr_safe(u32 __msr, 
 
 #define rdpmc(counter,low,high)					\
 	do {							\
-		u64 _l = native_read_pmc();			\
+		u64 _l = native_read_pmc(counter);		\
 		(low)  = (u32)_l;				\
 		(high) = (u32)(_l >> 32);			\
 	} while(0)
-#endif	/* !CONFIG_PARAVIRT */
-
-#ifdef CONFIG_SMP
-void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
-void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
-int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
-#else  /*  CONFIG_SMP  */
-static inline void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
-	rdmsr(msr_no, *l, *h);
-}
-static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
-	wrmsr(msr_no, l, h);
-}
-static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
-	return rdmsr_safe(msr_no, l, h);
-}
-static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
-	return wrmsr_safe(msr_no, l, h);
-}
-#endif  /*  CONFIG_SMP  */
-#endif  /* ! __ASSEMBLY__ */
-#endif  /* __KERNEL__ */
 
-#else   /* __i386__ */
-
-#ifndef __ASSEMBLY__
-#include <linux/errno.h>
-/*
- * Access to machine-specific registers (available on 586 and better only)
- * Note: the rd* operations modify the parameters directly (without using
- * pointer indirection), this allows gcc to optimize better
- */
+#define rdtscp(low, high, aux)						\
+       do {                                                            \
+		unsigned long long _val = native_read_tscp(&(aux));     \
+		(low) = (u32)_val;                                      \
+		(high) = (u32)(_val >> 32);                             \
+       } while (0)
 
-#define rdmsr(msr,val1,val2) \
-       __asm__ __volatile__("rdmsr" \
-			    : "=a" (val1), "=d" (val2) \
-			    : "c" (msr))
-
-
-#define rdmsrl(msr,val) do { unsigned long a__,b__; \
-       __asm__ __volatile__("rdmsr" \
-			    : "=a" (a__), "=d" (b__) \
-			    : "c" (msr)); \
-       val = a__ | (b__<<32); \
-} while(0)
-
-#define wrmsr(msr,val1,val2) \
-     __asm__ __volatile__("wrmsr" \
-			  : /* no outputs */ \
-			  : "c" (msr), "a" (val1), "d" (val2))
+#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux))
 
-#define wrmsrl(msr,val) wrmsr(msr,(__u32)((__u64)(val)),((__u64)(val))>>32)
+#endif	/* !CONFIG_PARAVIRT */
 
-/* wrmsr with exception handling */
-#define wrmsr_safe(msr,a,b) ({ int ret__;			\
-	asm volatile("2: wrmsr ; xorl %0,%0\n"			\
-		     "1:\n\t"					\
-		     ".section .fixup,\"ax\"\n\t"		\
-		     "3:  movl %4,%0 ; jmp 1b\n\t"		\
-		     ".previous\n\t"				\
-		     ".section __ex_table,\"a\"\n"		\
-		     "   .align 8\n\t"				\
-		     "   .quad	2b,3b\n\t"			\
-		     ".previous"				\
-		     : "=a" (ret__)				\
-		     : "c" (msr), "0" (a), "d" (b), "i" (-EFAULT)); \
-	ret__; })
 
 #define checking_wrmsrl(msr,val) wrmsr_safe(msr,(u32)(val),(u32)((val)>>32))
 
-#define rdmsr_safe(msr,a,b) \
-	({ int ret__;						\
-	  asm volatile ("1:       rdmsr\n"			\
-			"2:\n"					\
-			".section .fixup,\"ax\"\n"		\
-			"3:       movl %4,%0\n"			\
-			" jmp 2b\n"				\
-			".previous\n"				\
-			".section __ex_table,\"a\"\n"		\
-			" .align 8\n"				\
-			" .quad 1b,3b\n"				\
-			".previous":"=&bDS" (ret__), "=a"(*(a)), "=d"(*(b)) \
-			:"c"(msr), "i"(-EIO), "0"(0));			\
-	  ret__; })
-
-#define rdtsc(low,high) \
-     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
-
-#define rdtscl(low) \
-     __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx")
-
-#define rdtscp(low,high,aux) \
-     asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (low), "=d" (high), "=c" (aux))
-
-#define rdtscll(val) do { \
-     unsigned int __a,__d; \
-     asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
-     (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
-} while(0)
-
-#define rdtscpll(val, aux) do { \
-     unsigned long __a, __d; \
-     asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (__a), "=d" (__d), "=c" (aux)); \
-     (val) = (__d << 32) | __a; \
-} while (0)
-
 #define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
 
 #define write_rdtscp_aux(val) wrmsr(0xc0000103, val, 0)
 
-#define rdpmc(counter,low,high) \
-     __asm__ __volatile__("rdpmc" \
-			  : "=a" (low), "=d" (high) \
-			  : "c" (counter))
-
-static inline void cpuid(int op, unsigned int *eax, unsigned int *ebx,
-			 unsigned int *ecx, unsigned int *edx)
-{
-	__asm__("cpuid"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (op));
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
-			       int *edx)
-{
-	__asm__("cpuid"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (op), "c" (count));
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
-	unsigned int eax;
-
-	__asm__("cpuid"
-		: "=a" (eax)
-		: "0" (op)
-		: "bx", "cx", "dx");
-	return eax;
-}
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
-	unsigned int eax, ebx;
-
-	__asm__("cpuid"
-		: "=a" (eax), "=b" (ebx)
-		: "0" (op)
-		: "cx", "dx" );
-	return ebx;
-}
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
-	unsigned int eax, ecx;
-
-	__asm__("cpuid"
-		: "=a" (eax), "=c" (ecx)
-		: "0" (op)
-		: "bx", "dx" );
-	return ecx;
-}
-static inline unsigned int cpuid_edx(unsigned int op)
-{
-	unsigned int eax, edx;
-
-	__asm__("cpuid"
-		: "=a" (eax), "=d" (edx)
-		: "0" (op)
-		: "bx", "cx");
-	return edx;
-}
-
 #ifdef CONFIG_SMP
 void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
 void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
@@ -343,8 +203,8 @@ static inline int wrmsr_safe_on_cpu(unsi
 	return wrmsr_safe(msr_no, l, h);
 }
 #endif  /* CONFIG_SMP */
-#endif  /* __ASSEMBLY__ */
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
 
-#endif  /* !__i386__ */
 
 #endif
diff -puN include/asm-x86/mtrr.h~git-x86 include/asm-x86/mtrr.h
--- a/include/asm-x86/mtrr.h~git-x86
+++ a/include/asm-x86/mtrr.h
@@ -89,9 +89,9 @@ struct mtrr_gentry
 extern void mtrr_save_fixed_ranges(void *);
 extern void mtrr_save_state(void);
 extern int mtrr_add (unsigned long base, unsigned long size,
-		     unsigned int type, char increment);
+		     unsigned int type, bool increment);
 extern int mtrr_add_page (unsigned long base, unsigned long size,
-		     unsigned int type, char increment);
+		     unsigned int type, bool increment);
 extern int mtrr_del (int reg, unsigned long base, unsigned long size);
 extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
 extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
@@ -101,12 +101,12 @@ extern void mtrr_bp_init(void);
 #define mtrr_save_fixed_ranges(arg) do {} while (0)
 #define mtrr_save_state() do {} while (0)
 static __inline__ int mtrr_add (unsigned long base, unsigned long size,
-				unsigned int type, char increment)
+				unsigned int type, bool increment)
 {
     return -ENODEV;
 }
 static __inline__ int mtrr_add_page (unsigned long base, unsigned long size,
-				unsigned int type, char increment)
+				unsigned int type, bool increment)
 {
     return -ENODEV;
 }
diff -puN include/asm-x86/nmi_32.h~git-x86 include/asm-x86/nmi_32.h
--- a/include/asm-x86/nmi_32.h~git-x86
+++ a/include/asm-x86/nmi_32.h
@@ -1,6 +1,3 @@
-/*
- *  linux/include/asm-i386/nmi.h
- */
 #ifndef ASM_NMI_H
 #define ASM_NMI_H
 
diff -puN include/asm-x86/nmi_64.h~git-x86 include/asm-x86/nmi_64.h
--- a/include/asm-x86/nmi_64.h~git-x86
+++ a/include/asm-x86/nmi_64.h
@@ -1,6 +1,3 @@
-/*
- *  linux/include/asm-i386/nmi.h
- */
 #ifndef ASM_NMI_H
 #define ASM_NMI_H
 
@@ -41,7 +38,6 @@ extern void die_nmi(char *str, struct pt
 
 #define get_nmi_reason() inb(0x61)
 
-extern int panic_on_timeout;
 extern int unknown_nmi_panic;
 extern int nmi_watchdog_enabled;
 
@@ -60,7 +56,6 @@ extern void enable_timer_nmi_watchdog(vo
 extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
 
 extern void nmi_watchdog_default(void);
-extern int setup_nmi_watchdog(char *);
 
 extern atomic_t nmi_active;
 extern unsigned int nmi_watchdog;
diff -puN include/asm-x86/numa_64.h~git-x86 include/asm-x86/numa_64.h
--- a/include/asm-x86/numa_64.h~git-x86
+++ a/include/asm-x86/numa_64.h
@@ -21,12 +21,18 @@ extern void srat_reserve_add_area(int no
 extern int hotadd_percent;
 
 extern unsigned char apicid_to_node[MAX_LOCAL_APIC];
+
+extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern unsigned long numa_free_all_bootmem(void);
+extern void setup_node_bootmem(int nodeid, unsigned long start,
+			       unsigned long end);
+
 #ifdef CONFIG_NUMA
 extern void __init init_cpu_to_node(void);
 
 static inline void clear_node_cpumask(int cpu)
 {
-	clear_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
+	clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]);
 }
 
 #else
diff -puN include/asm-x86/page_32.h~git-x86 include/asm-x86/page_32.h
--- a/include/asm-x86/page_32.h~git-x86
+++ a/include/asm-x86/page_32.h
@@ -12,12 +12,21 @@
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
+#include <linux/string.h>
+
 #ifdef CONFIG_X86_USE_3DNOW
 
 #include <asm/mmx.h>
 
-#define clear_page(page)	mmx_clear_page((void *)(page))
-#define copy_page(to,from)	mmx_copy_page(to,from)
+static inline void clear_page(void *page)
+{
+	mmx_clear_page(page);
+}
+
+static inline void copy_page(void *to, void *from)
+{
+	mmx_copy_page(to, from);
+}
 
 #else
 
@@ -26,13 +35,31 @@
  *	Maybe the K6-III ?
  */
  
-#define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
-#define copy_page(to,from)	memcpy((void *)(to), (void *)(from), PAGE_SIZE)
+static inline void clear_page(void *page)
+{
+	memset(page, 0, PAGE_SIZE);
+}
+
+static inline void copy_page(void *to, void *from)
+{
+	memcpy(to, from, PAGE_SIZE);
+}
 
 #endif
 
-#define clear_user_page(page, vaddr, pg)	clear_page(page)
-#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+struct page;
+
+static void inline clear_user_page(void *page, unsigned long vaddr,
+				struct page *pg)
+{
+	clear_page(page);
+}
+
+static void inline copy_user_page(void *to, void *from, unsigned long vaddr,
+				struct page *topage)
+{
+	copy_page(to, from);
+}
 
 #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
 	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
diff -puN include/asm-x86/page_64.h~git-x86 include/asm-x86/page_64.h
--- a/include/asm-x86/page_64.h~git-x86
+++ a/include/asm-x86/page_64.h
@@ -9,7 +9,12 @@
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 #define PHYSICAL_PAGE_MASK	(~(PAGE_SIZE-1) & __PHYSICAL_MASK)
 
-#define THREAD_ORDER 1 
+#ifdef CONFIG_THREAD_ORDER
+# define THREAD_ORDER	CONFIG_THREAD_ORDER
+#else
+# define THREAD_ORDER	1
+#endif
+
 #define THREAD_SIZE  (PAGE_SIZE << THREAD_ORDER)
 #define CURRENT_MASK (~(THREAD_SIZE-1))
 
@@ -41,12 +46,24 @@
 #ifndef __ASSEMBLY__
 
 extern unsigned long end_pfn;
+extern unsigned long end_pfn_map;
 
-void clear_page(void *);
-void copy_page(void *, void *);
+void clear_page(void *page);
+void copy_page(void *to, void *from);
 
-#define clear_user_page(page, vaddr, pg)	clear_page(page)
-#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+struct page;
+
+static void inline clear_user_page(void *page, unsigned long vaddr,
+				struct page *pg)
+{
+	clear_page(page);
+}
+
+static void inline copy_user_page(void *to, void *from, unsigned long vaddr,
+				struct page *topage)
+{
+	copy_page(to, from);
+}
 
 #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
 	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
@@ -121,9 +138,6 @@ extern unsigned long __phys_addr(unsigne
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define __boot_va(x)		__va(x)
 #define __boot_pa(x)		__pa(x)
-#ifdef CONFIG_FLATMEM
-#define pfn_valid(pfn)		((pfn) < end_pfn)
-#endif
 
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 #define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
diff -puN include/asm-x86/paravirt.h~git-x86 include/asm-x86/paravirt.h
--- a/include/asm-x86/paravirt.h~git-x86
+++ a/include/asm-x86/paravirt.h
@@ -101,7 +101,7 @@ struct pv_cpu_ops {
 				int entrynum, u32 low, u32 high);
 	void (*write_idt_entry)(struct desc_struct *,
 				int entrynum, u32 low, u32 high);
-	void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t);
+	void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
 
 	void (*set_iopl_mask)(unsigned mask);
 
@@ -115,13 +115,13 @@ struct pv_cpu_ops {
 	/* MSR, PMC and TSR operations.
 	   err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
 	u64 (*read_msr)(unsigned int msr, int *err);
-	int (*write_msr)(unsigned int msr, u64 val);
+	int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
 
 	u64 (*read_tsc)(void);
-	u64 (*read_pmc)(void);
+	u64 (*read_pmc)(int counter);
 
 	/* These two are jmp to, not actually called. */
-	void (*irq_enable_sysexit)(void);
+	void (*irq_enable_syscall_ret)(void);
 	void (*iret)(void);
 
 	struct pv_lazy_ops lazy_mode;
@@ -150,9 +150,9 @@ struct pv_apic_ops {
 	 * Direct APIC operations, principally for VMI.  Ideally
 	 * these shouldn't be in this interface.
 	 */
-	void (*apic_write)(unsigned long reg, unsigned long v);
-	void (*apic_write_atomic)(unsigned long reg, unsigned long v);
-	unsigned long (*apic_read)(unsigned long reg);
+	void (*apic_write)(unsigned long reg, u32 v);
+	void (*apic_write_atomic)(unsigned long reg, u32 v);
+	u32 (*apic_read)(unsigned long reg);
 	void (*setup_boot_clock)(void);
 	void (*setup_secondary_clock)(void);
 
@@ -449,10 +449,10 @@ static inline int paravirt_enabled(void)
 	return pv_info.paravirt_enabled;
 }
 
-static inline void load_esp0(struct tss_struct *tss,
+static inline void load_sp0(struct tss_struct *tss,
 			     struct thread_struct *thread)
 {
-	PVOP_VCALL2(pv_cpu_ops.load_esp0, tss, thread);
+	PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
 }
 
 #define ARCH_SETUP			pv_init_ops.arch_setup();
@@ -690,17 +690,17 @@ static inline void slow_down_io(void) {
 /*
  * Basic functions accessing APICs.
  */
-static inline void apic_write(unsigned long reg, unsigned long v)
+static inline void apic_write(unsigned long reg, u32 v)
 {
 	PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
 }
 
-static inline void apic_write_atomic(unsigned long reg, unsigned long v)
+static inline void apic_write_atomic(unsigned long reg, u32 v)
 {
 	PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v);
 }
 
-static inline unsigned long apic_read(unsigned long reg)
+static inline u32 apic_read(unsigned long reg)
 {
 	return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
 }
@@ -1138,9 +1138,10 @@ static inline unsigned long __raw_local_
 		  call *%cs:pv_irq_ops+PV_IRQ_irq_enable;		\
 		  popl %edx; popl %ecx; popl %eax)
 
-#define ENABLE_INTERRUPTS_SYSEXIT					       \
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), CLBR_NONE,\
-		  jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_sysexit)
+#define ENABLE_INTERRUPTS_SYSCALL_RET					\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\
+		  CLBR_NONE,						\
+		  jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_syscall_ret)
 
 #define GET_CR0_INTO_EAX			\
 	push %ecx; push %edx;			\
diff -puN include/asm-x86/pci.h~git-x86 include/asm-x86/pci.h
--- a/include/asm-x86/pci.h~git-x86
+++ a/include/asm-x86/pci.h
@@ -66,6 +66,7 @@ extern int pci_mmap_page_range(struct pc
 
 
 #ifdef CONFIG_PCI
+extern void early_quirks(void);
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
 					unsigned long *strategy_parameter)
@@ -73,9 +74,10 @@ static inline void pci_dma_burst_advice(
 	*strat = PCI_DMA_BURST_INFINITY;
 	*strategy_parameter = ~0UL;
 }
+#else
+static inline void early_quirks(void) { }
 #endif
 
-
 #endif  /* __KERNEL__ */
 
 #ifdef CONFIG_X86_32
@@ -90,6 +92,19 @@ static inline void pci_dma_burst_advice(
 /* generic pci stuff */
 #include <asm-generic/pci.h>
 
+#ifdef CONFIG_NUMA
+/* Returns the node based on pci bus */
+static inline int __pcibus_to_node(struct pci_bus *bus)
+{
+	struct pci_sysdata *sd = bus->sysdata;
+
+	return sd->node;
+}
 
+static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus)
+{
+	return node_to_cpumask(__pcibus_to_node(bus));
+}
+#endif
 
 #endif
diff -puN include/asm-x86/pci_64.h~git-x86 include/asm-x86/pci_64.h
--- a/include/asm-x86/pci_64.h~git-x86
+++ a/include/asm-x86/pci_64.h
@@ -26,7 +26,6 @@ extern int (*pci_config_write)(int seg, 
 
 
 extern void pci_iommu_alloc(void);
-extern int iommu_setup(char *opt);
 
 /* The PCI address space does equal the physical memory
  * address space.  The networking and block device layers use
diff -puN include/asm-x86/pda.h~git-x86 include/asm-x86/pda.h
--- a/include/asm-x86/pda.h~git-x86
+++ a/include/asm-x86/pda.h
@@ -40,6 +40,7 @@ struct x8664_pda {
 
 extern struct x8664_pda *_cpu_pda[];
 extern struct x8664_pda boot_cpu_pda[];
+extern void pda_init(int);
 
 #define cpu_pda(i) (_cpu_pda[i])
 
diff -puN include/asm-x86/pgtable_32.h~git-x86 include/asm-x86/pgtable_32.h
--- a/include/asm-x86/pgtable_32.h~git-x86
+++ a/include/asm-x86/pgtable_32.h
@@ -497,9 +497,15 @@ static inline void paravirt_pagetable_se
 
 #endif /* !__ASSEMBLY__ */
 
+/*
+ * kern_addr_valid() is (1) for FLATMEM and (0) for
+ * SPARSEMEM and DISCONTIGMEM
+ */
 #ifdef CONFIG_FLATMEM
 #define kern_addr_valid(addr)	(1)
-#endif /* CONFIG_FLATMEM */
+#else
+#define kern_addr_valid(kaddr)	(0)
+#endif
 
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
 		remap_pfn_range(vma, vaddr, pfn, size, prot)
diff -puN include/asm-x86/pgtable_64.h~git-x86 include/asm-x86/pgtable_64.h
--- a/include/asm-x86/pgtable_64.h~git-x86
+++ a/include/asm-x86/pgtable_64.h
@@ -150,22 +150,26 @@ static inline pte_t ptep_get_and_clear_f
 #define _PAGE_BIT_ACCESSED	5
 #define _PAGE_BIT_DIRTY		6
 #define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page */
+#define _PAGE_BIT_FILE		6
 #define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
 #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
 
-#define _PAGE_PRESENT	0x001
-#define _PAGE_RW	0x002
-#define _PAGE_USER	0x004
-#define _PAGE_PWT	0x008
-#define _PAGE_PCD	0x010
-#define _PAGE_ACCESSED	0x020
-#define _PAGE_DIRTY	0x040
-#define _PAGE_PSE	0x080	/* 2MB page */
-#define _PAGE_FILE	0x040	/* nonlinear file mapping, saved PTE; unset:swap */
-#define _PAGE_GLOBAL	0x100	/* Global TLB entry */
+#define _PAGE_PRESENT	(_AC(1, UL)<<_PAGE_BIT_PRESENT)
+#define _PAGE_RW	(_AC(1, UL)<<_PAGE_BIT_RW)
+#define _PAGE_USER	(_AC(1, UL)<<_PAGE_BIT_USER)
+#define _PAGE_PWT	(_AC(1, UL)<<_PAGE_BIT_PWT)
+#define _PAGE_PCD	(_AC(1, UL)<<_PAGE_BIT_PCD)
+#define _PAGE_ACCESSED	(_AC(1, UL)<<_PAGE_BIT_ACCESSED)
+#define _PAGE_DIRTY	(_AC(1, UL)<<_PAGE_BIT_DIRTY)
+/* 2MB page */
+#define _PAGE_PSE	(_AC(1, UL)<<_PAGE_BIT_PSE)
+/* nonlinear file mapping, saved PTE; unset:swap */
+#define _PAGE_FILE	(_AC(1, UL)<<_PAGE_BIT_FILE)
+/* Global TLB entry */
+#define _PAGE_GLOBAL	(_AC(1, UL)<<_PAGE_BIT_GLOBAL)
 
 #define _PAGE_PROTNONE	0x080	/* If not present */
-#define _PAGE_NX        (_AC(1,UL)<<_PAGE_BIT_NX)
+#define _PAGE_NX        (_AC(1, UL)<<_PAGE_BIT_NX)
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -248,8 +252,7 @@ static inline unsigned long pmd_bad(pmd_
 #define pte_present(x)	(pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE))
 #define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
 
-#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))	/* FIXME: is this
-						   right? */
+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))	/* FIXME: is this right? */
 #define pte_page(x)	pfn_to_page(pte_pfn(x))
 #define pte_pfn(x)  ((pte_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
@@ -352,7 +355,6 @@ static inline int pmd_large(pmd_t pte) {
 
 /* page, protection -> pte */
 #define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
-#define mk_pte_huge(entry) (pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE)
  
 /* Change flags of a PTE */
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -410,6 +412,7 @@ pte_t *lookup_address(unsigned long addr
 		remap_pfn_range(vma, vaddr, pfn, size, prot)
 
 #define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
 #define pgtable_cache_init()   do { } while (0)
 #define check_pgt_cache()      do { } while (0)
diff -puN include/asm-x86/processor.h~git-x86 include/asm-x86/processor.h
--- a/include/asm-x86/processor.h~git-x86
+++ a/include/asm-x86/processor.h
@@ -1,5 +1,83 @@
+#ifndef __ASM_X86_PROCESSOR_H
+#define __ASM_X86_PROCESSOR_H
+
+static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
+					 unsigned int *ecx, unsigned int *edx)
+{
+	/* ecx is often an input as well as an output. */
+	__asm__("cpuid"
+		: "=a" (*eax),
+		  "=b" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (*eax), "2" (*ecx));
+}
+
+
 #ifdef CONFIG_X86_32
 # include "processor_32.h"
 #else
 # include "processor_64.h"
 #endif
+
+#ifndef CONFIG_PARAVIRT
+#define __cpuid native_cpuid
+#endif
+
+/*
+ * Generic CPUID function
+ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
+ * resulting in stale register contents being returned.
+ */
+static inline void cpuid(unsigned int op,
+			 unsigned int *eax, unsigned int *ebx,
+			 unsigned int *ecx, unsigned int *edx)
+{
+	*eax = op;
+	*ecx = 0;
+	__cpuid(eax, ebx, ecx, edx);
+}
+
+/* Some CPUID calls want 'count' to be placed in ecx */
+static inline void cpuid_count(unsigned int op, int count,
+			       unsigned int *eax, unsigned int *ebx,
+			       unsigned int *ecx, unsigned int *edx)
+{
+	*eax = op;
+	*ecx = count;
+	__cpuid(eax, ebx, ecx, edx);
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+static inline unsigned int cpuid_eax(unsigned int op)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+	return eax;
+}
+static inline unsigned int cpuid_ebx(unsigned int op)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+	return ebx;
+}
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+	return ecx;
+}
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+	return edx;
+}
+
+#endif
diff -puN include/asm-x86/processor_32.h~git-x86 include/asm-x86/processor_32.h
--- a/include/asm-x86/processor_32.h~git-x86
+++ a/include/asm-x86/processor_32.h
@@ -1,6 +1,4 @@
 /*
- * include/asm-i386/processor.h
- *
  * Copyright (C) 1994 Linus Torvalds
  */
 
@@ -30,11 +28,12 @@ struct desc_struct {
 	unsigned long a,b;
 };
 
-#define desc_empty(desc) \
-		(!((desc)->a | (desc)->b))
+static inline int desc_empty(const void *ptr)
+{
+	const u32 *desc = ptr;
+	return !(desc[0] | desc[1]);
+}
 
-#define desc_equal(desc1, desc2) \
-		(((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
@@ -134,18 +133,6 @@ extern void detect_ht(struct cpuinfo_x86
 static inline void detect_ht(struct cpuinfo_x86 *c) {}
 #endif
 
-static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
-					 unsigned int *ecx, unsigned int *edx)
-{
-	/* ecx is often an input as well as an output. */
-	__asm__("cpuid"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (*eax), "2" (*ecx));
-}
-
 #define load_cr3(pgdir) write_cr3(__pa(pgdir))
 
 /*
@@ -293,20 +280,17 @@ struct thread_struct;
 /* This is the TSS defined by the hardware. */
 struct i386_hw_tss {
 	unsigned short	back_link,__blh;
-	unsigned long	esp0;
+	unsigned long	sp0;
 	unsigned short	ss0,__ss0h;
-	unsigned long	esp1;
+	unsigned long	sp1;
 	unsigned short	ss1,__ss1h;	/* ss1 is used to cache MSR_IA32_SYSENTER_CS */
-	unsigned long	esp2;
+	unsigned long	sp2;
 	unsigned short	ss2,__ss2h;
 	unsigned long	__cr3;
-	unsigned long	eip;
-	unsigned long	eflags;
-	unsigned long	eax,ecx,edx,ebx;
-	unsigned long	esp;
-	unsigned long	ebp;
-	unsigned long	esi;
-	unsigned long	edi;
+	unsigned long	ip;
+	unsigned long	flags;
+	unsigned long	ax, cx, dx, bx;
+	unsigned long	sp, bp, si, di;
 	unsigned short	es, __esh;
 	unsigned short	cs, __csh;
 	unsigned short	ss, __ssh;
@@ -347,14 +331,19 @@ struct tss_struct {
 struct thread_struct {
 /* cached TLS descriptors. */
 	struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
-	unsigned long	esp0;
+	unsigned long	sp0;
 	unsigned long	sysenter_cs;
-	unsigned long	eip;
-	unsigned long	esp;
+	unsigned long	ip;
+	unsigned long	sp;
 	unsigned long	fs;
 	unsigned long	gs;
 /* Hardware debugging registers */
-	unsigned long	debugreg[8];  /* %%db0-7 debug registers */
+	unsigned long	debugreg0;
+	unsigned long	debugreg1;
+	unsigned long	debugreg2;
+	unsigned long	debugreg3;
+	unsigned long	debugreg6;
+	unsigned long	debugreg7;
 /* fault info */
 	unsigned long	cr2, trap_no, error_code;
 /* floating point info */
@@ -362,17 +351,22 @@ struct thread_struct {
 /* virtual 86 mode info */
 	struct vm86_struct __user * vm86_info;
 	unsigned long		screen_bitmap;
-	unsigned long		v86flags, v86mask, saved_esp0;
+	unsigned long		v86flags, v86mask, saved_sp0;
 	unsigned int		saved_fs, saved_gs;
 /* IO permissions */
 	unsigned long	*io_bitmap_ptr;
  	unsigned long	iopl;
 /* max allowed port in the bitmap, in bytes: */
 	unsigned long	io_bitmap_max;
+/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
+	unsigned long	debugctlmsr;
+/* Debug Store - if not 0 points to a DS Save Area configuration;
+ *               goes into MSR_IA32_DS_AREA */
+	unsigned long	ds_area_msr;
 };
 
 #define INIT_THREAD  {							\
-	.esp0 = sizeof(init_stack) + (long)&init_stack,			\
+	.sp0 = sizeof(init_stack) + (long)&init_stack,			\
 	.vm86_info = NULL,						\
 	.sysenter_cs = __KERNEL_CS,					\
 	.io_bitmap_ptr = NULL,						\
@@ -387,7 +381,7 @@ struct thread_struct {
  */
 #define INIT_TSS  {							\
 	.x86_tss = {							\
-		.esp0		= sizeof(init_stack) + (long)&init_stack, \
+		.sp0		= sizeof(init_stack) + (long)&init_stack, \
 		.ss0		= __KERNEL_DS,				\
 		.ss1		= __KERNEL_CS,				\
 		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,		\
@@ -397,14 +391,14 @@ struct thread_struct {
 
 #define start_thread(regs, new_eip, new_esp) do {		\
 	__asm__("movl %0,%%gs": :"r" (0));			\
-	regs->xfs = 0;						\
+	regs->fs = 0;						\
 	set_fs(USER_DS);					\
-	regs->xds = __USER_DS;					\
-	regs->xes = __USER_DS;					\
-	regs->xss = __USER_DS;					\
-	regs->xcs = __USER_CS;					\
-	regs->eip = new_eip;					\
-	regs->esp = new_esp;					\
+	regs->ds = __USER_DS;					\
+	regs->es = __USER_DS;					\
+	regs->ss = __USER_DS;					\
+	regs->cs = __USER_CS;					\
+	regs->ip = new_eip;					\
+	regs->sp = new_esp;					\
 } while (0)
 
 /* Forward declaration, a strange C thing */
@@ -423,7 +417,6 @@ extern void prepare_to_copy(struct task_
 extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
-void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack);
 
 unsigned long get_wchan(struct task_struct *p);
 
@@ -440,7 +433,7 @@ unsigned long get_wchan(struct task_stru
  * is accessable even if the CPU haven't stored the SS/ESP registers
  * on the stack (interrupt gate does not save these registers
  * when switching to the same priv ring).
- * Therefore beware: accessing the xss/esp fields of the
+ * Therefore beware: accessing the ss/esp fields of the
  * "struct pt_regs" is possible, but they may contain the
  * completely wrong values.
  */
@@ -451,8 +444,8 @@ unsigned long get_wchan(struct task_stru
        __regs__ - 1;                                                   \
 })
 
-#define KSTK_EIP(task) (task_pt_regs(task)->eip)
-#define KSTK_ESP(task) (task_pt_regs(task)->esp)
+#define KSTK_EIP(task) (task_pt_regs(task)->ip)
+#define KSTK_ESP(task) (task_pt_regs(task)->sp)
 
 
 struct microcode_header {
@@ -498,9 +491,9 @@ static inline void rep_nop(void)
 
 #define cpu_relax()	rep_nop()
 
-static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread)
+static inline void native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
 {
-	tss->x86_tss.esp0 = thread->esp0;
+	tss->x86_tss.sp0 = thread->sp0;
 	/* This can only happen when SEP is enabled, no need to test "SEP"arately */
 	if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
 		tss->x86_tss.ss1 = thread->sysenter_cs;
@@ -578,11 +571,10 @@ static inline void native_set_iopl_mask(
 #include <asm/paravirt.h>
 #else
 #define paravirt_enabled() 0
-#define __cpuid native_cpuid
 
-static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
+static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread)
 {
-	native_load_esp0(tss, thread);
+	native_load_sp0(tss, thread);
 }
 
 /*
@@ -596,62 +588,6 @@ static inline void load_esp0(struct tss_
 #define set_iopl_mask native_set_iopl_mask
 #endif /* CONFIG_PARAVIRT */
 
-/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op,
-			 unsigned int *eax, unsigned int *ebx,
-			 unsigned int *ecx, unsigned int *edx)
-{
-	*eax = op;
-	*ecx = 0;
-	__cpuid(eax, ebx, ecx, edx);
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(unsigned int op, int count,
-			       unsigned int *eax, unsigned int *ebx,
-			       unsigned int *ecx, unsigned int *edx)
-{
-	*eax = op;
-	*ecx = count;
-	__cpuid(eax, ebx, ecx, edx);
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-	return eax;
-}
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-	return ebx;
-}
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-	return ecx;
-}
-static inline unsigned int cpuid_edx(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-	return edx;
-}
-
 /* generic versions from gas */
 #define GENERIC_NOP1	".byte 0x90\n"
 #define GENERIC_NOP2    	".byte 0x89,0xf6\n"
diff -puN include/asm-x86/processor_64.h~git-x86 include/asm-x86/processor_64.h
--- a/include/asm-x86/processor_64.h~git-x86
+++ a/include/asm-x86/processor_64.h
@@ -1,6 +1,4 @@
 /*
- * include/asm-x86_64/processor.h
- *
  * Copyright (C) 1994 Linus Torvalds
  */
 
@@ -32,11 +30,11 @@
 #define VIP_MASK	0x00100000	/* virtual interrupt pending */
 #define ID_MASK		0x00200000
 
-#define desc_empty(desc) \
-               (!((desc)->a | (desc)->b))
-
-#define desc_equal(desc1, desc2) \
-               (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
+static inline int desc_empty(const void *ptr)
+{
+	const u32 *desc = ptr;
+	return !(desc[0] | desc[1]);
+}
 
 /*
  * Default implementation of macro that returns current
@@ -63,6 +61,7 @@ struct cpuinfo_x86 {
 	int	x86_tlbsize;	/* number of 4K pages in DTLB/ITLB combined(in pages)*/
         __u8    x86_virt_bits, x86_phys_bits;
 	__u8	x86_max_cores;	/* cpuid returned max cores value */
+	__u8	x86_coreid_bits; /* cpuid returned core id bits */
         __u32   x86_power; 	
 	__u32   extended_cpuid_level;	/* Max extended CPUID function supported */
 	unsigned long loops_per_jiffy;
@@ -178,9 +177,9 @@ union i387_union {
 
 struct tss_struct {
 	u32 reserved1;
-	u64 rsp0;	
-	u64 rsp1;
-	u64 rsp2;
+	u64 sp0;	
+	u64 sp1;
+	u64 sp2;
 	u64 reserved2;
 	u64 ist[7];
 	u32 reserved3;
@@ -217,9 +216,9 @@ DECLARE_PER_CPU(struct orig_ist, orig_is
 #endif
 
 struct thread_struct {
-	unsigned long	rsp0;
-	unsigned long	rsp;
-	unsigned long 	userrsp;	/* Copy from PDA */ 
+	unsigned long	sp0;
+	unsigned long	sp;
+	unsigned long 	usersp;	/* Copy from PDA */ 
 	unsigned long	fs;
 	unsigned long	gs;
 	unsigned short	es, ds, fsindex, gsindex;	
@@ -239,16 +238,21 @@ struct thread_struct {
 	int		ioperm;
 	unsigned long	*io_bitmap_ptr;
 	unsigned io_bitmap_max;
+/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
+	unsigned long	debugctlmsr;
+/* Debug Store - if not 0 points to a DS Save Area configuration;
+ *               goes into MSR_IA32_DS_AREA */
+	unsigned long	ds_area_msr;
 /* cached TLS descriptors. */
 	u64 tls_array[GDT_ENTRY_TLS_ENTRIES];
 } __attribute__((aligned(16)));
 
 #define INIT_THREAD  { \
-	.rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+	.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
 }
 
 #define INIT_TSS  { \
-	.rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+	.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
 }
 
 #define INIT_MMAP \
@@ -257,12 +261,12 @@ struct thread_struct {
 #define start_thread(regs,new_rip,new_rsp) do { \
 	asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0));	 \
 	load_gs_index(0);							\
-	(regs)->rip = (new_rip);						 \
-	(regs)->rsp = (new_rsp);						 \
+	(regs)->ip = (new_rip);						 \
+	(regs)->sp = (new_rsp);						 \
 	write_pda(oldrsp, (new_rsp));						 \
 	(regs)->cs = __USER_CS;							 \
 	(regs)->ss = __USER_DS;							 \
-	(regs)->eflags = 0x200;							 \
+	(regs)->flags = 0x200;							 \
 	set_fs(USER_DS);							 \
 } while(0) 
 
@@ -292,11 +296,11 @@ extern long kernel_thread(int (*fn)(void
  * Return saved PC of a blocked thread.
  * What is this good for? it will be always the scheduler or ret_from_fork.
  */
-#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.rsp - 8))
+#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
 
 extern unsigned long get_wchan(struct task_struct *p);
-#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.rsp0 - 1)
-#define KSTK_EIP(tsk) (task_pt_regs(tsk)->rip)
+#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
+#define KSTK_EIP(tsk) (task_pt_regs(tsk)->ip)
 #define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
 
 
@@ -434,6 +438,10 @@ static inline void __sti_mwait(unsigned 
 
 extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
 
+extern int force_mwait;
+
+extern void select_idle_routine(const struct cpuinfo_x86 *c);
+
 #define stack_current() \
 ({								\
 	struct thread_info *ti;					\
diff -puN include/asm-x86/proto.h~git-x86 include/asm-x86/proto.h
--- a/include/asm-x86/proto.h~git-x86
+++ a/include/asm-x86/proto.h
@@ -5,87 +5,26 @@
 
 /* misc architecture specific prototypes */
 
-struct cpuinfo_x86; 
-struct pt_regs;
-
-extern void start_kernel(void);
-extern void pda_init(int); 
-
 extern void early_idt_handler(void);
 
-extern void mcheck_init(struct cpuinfo_x86 *c);
 extern void init_memory_mapping(unsigned long start, unsigned long end);
 
-extern void system_call(void); 
-extern int kernel_syscall(void);
+extern void system_call(void);
 extern void syscall_init(void);
 
 extern void ia32_syscall(void);
-extern void ia32_cstar_target(void); 
-extern void ia32_sysenter_target(void); 
-
-extern void config_acpi_tables(void);
-extern void ia32_syscall(void);
-
-extern int pmtimer_mark_offset(void);
-extern void pmtimer_resume(void);
-extern void pmtimer_wait(unsigned);
-extern unsigned int do_gettimeoffset_pm(void);
-#ifdef CONFIG_X86_PM_TIMER
-extern u32 pmtmr_ioport;
-#else
-#define pmtmr_ioport 0
-#endif
-extern int nohpet;
-
-extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
-
-extern void early_identify_cpu(struct cpuinfo_x86 *c);
-
-extern int k8_scan_nodes(unsigned long start, unsigned long end);
-
-extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
-extern unsigned long numa_free_all_bootmem(void);
+extern void ia32_cstar_target(void);
+extern void ia32_sysenter_target(void);
 
 extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
 
-extern void load_gs_index(unsigned gs);
-
-extern unsigned long end_pfn_map; 
-
-extern void show_trace(struct task_struct *, struct pt_regs *, unsigned long * rsp);
-extern void show_registers(struct pt_regs *regs);
-
-extern void exception_table_check(void);
-
-extern void acpi_reserve_bootmem(void);
-
-extern void swap_low_mappings(void);
-
-extern void __show_regs(struct pt_regs * regs);
-extern void show_regs(struct pt_regs * regs);
-
 extern void syscall32_cpu_init(void);
 
-extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
-
-extern void early_quirks(void);
 extern void check_efer(void);
 
-extern void select_idle_routine(const struct cpuinfo_x86 *c);
-
 extern unsigned long table_start, table_end;
 
-extern int exception_trace;
-extern unsigned cpu_khz;
-extern unsigned tsc_khz;
-
 extern int reboot_force;
-extern int notsc_setup(char *);
-
-extern int gsi_irq_sharing(int gsi);
-
-extern int force_mwait;
 
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
 
diff -puN include/asm-x86/ptrace-abi.h~git-x86 include/asm-x86/ptrace-abi.h
--- a/include/asm-x86/ptrace-abi.h~git-x86
+++ a/include/asm-x86/ptrace-abi.h
@@ -78,4 +78,58 @@
 # define PTRACE_SYSEMU_SINGLESTEP 32
 #endif
 
+#define PTRACE_SINGLEBLOCK	33	/* resume execution until next branch */
+
+/* Return maximal BTS buffer size in number of records,
+   if successuf; -1, otherwise.
+   EOPNOTSUPP...processor does not support bts tracing */
+#define PTRACE_BTS_MAX_BUFFER_SIZE 40
+
+/* Allocate new bts buffer (free old one, if exists) of size DATA bts records;
+   parameter ADDR is ignored.
+   Return 0, if successful; -1, otherwise.
+   EOPNOTSUPP...processor does not support bts tracing
+   EINVAL.......invalid size in records
+   ENOMEM.......out of memory */
+#define PTRACE_BTS_ALLOCATE_BUFFER 41
+
+/* Return the size of the bts buffer in number of bts records,
+   if successful; -1, otherwise.
+   EOPNOTSUPP...processor does not support bts tracing
+   ENXIO........no buffer allocated */
+#define PTRACE_BTS_GET_BUFFER_SIZE 42
+
+/* Return the index of the next bts record to be written,
+   if successful; -1, otherwise.
+   EOPNOTSUPP...processor does not support bts tracing
+   ENXIO........no buffer allocated
+   After the first warp-around, this is the start of the circular bts buffer. */
+#define PTRACE_BTS_GET_INDEX 43
+
+/* Read the DATA'th bts record into a ptrace_bts_record buffer provided in ADDR.
+   Return 0, if successful; -1, otherwise
+   EOPNOTSUPP...processor does not support bts tracing
+   ENXIO........no buffer allocated
+   EINVAL.......invalid index */
+#define PTRACE_BTS_READ_RECORD 44
+
+/* Configure last branch trace; the configuration is given as a bit-mask of
+   PTRACE_BTS_O_* options in DATA; parameter ADDR is ignored.
+   Return 0, if successful; -1, otherwise
+   EOPNOTSUPP...processor does not support bts tracing
+   ENXIO........no buffer allocated */
+#define PTRACE_BTS_CONFIG 45
+
+/* Return the configuration as bit-mask of PTRACE_BTS_O_* options
+   if successful; -1, otherwise.
+   EOPNOTSUPP...processor does not support bts tracing
+   ENXIO........no buffer allocated */
+#define PTRACE_BTS_STATUS 46
+
+/* Trace configuration options */
+/* Collect last branch trace */
+#define PTRACE_BTS_O_TRACE_TASK 0x1
+/* Take timestamps when the task arrives and departs */
+#define PTRACE_BTS_O_TIMESTAMPS 0x2
+
 #endif
diff -puN include/asm-x86/ptrace.h~git-x86 include/asm-x86/ptrace.h
--- a/include/asm-x86/ptrace.h~git-x86
+++ a/include/asm-x86/ptrace.h
@@ -4,12 +4,25 @@
 #include <linux/compiler.h>	/* For __user */
 #include <asm/ptrace-abi.h>
 
+
 #ifndef __ASSEMBLY__
 
+#ifdef __KERNEL__
+
+#include <asm/ds.h>
+
+struct task_struct;
+extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
+
+#endif /* __KERNEL__ */
+
+
 #ifdef __i386__
 /* this struct defines the way the registers are stored on the
    stack during a system call. */
 
+#ifndef __KERNEL__
+
 struct pt_regs {
 	long ebx;
 	long ecx;
@@ -21,7 +34,7 @@ struct pt_regs {
 	int  xds;
 	int  xes;
 	int  xfs;
-	/* int  xgs; */
+	/* int  gs; */
 	long orig_eax;
 	long eip;
 	int  xcs;
@@ -30,7 +43,27 @@ struct pt_regs {
 	int  xss;
 };
 
-#ifdef __KERNEL__
+#else /* __KERNEL__ */
+
+struct pt_regs {
+	long bx;
+	long cx;
+	long dx;
+	long si;
+	long di;
+	long bp;
+	long ax;
+	int  ds;
+	int  es;
+	int  fs;
+	/* int  gs; */
+	long orig_ax;
+	long ip;
+	int  cs;
+	long flags;
+	long sp;
+	int  ss;
+};
 
 #include <asm/vm86.h>
 #include <asm/segment.h>
@@ -47,27 +80,30 @@ extern void send_sigtrap(struct task_str
  */
 static inline int user_mode(struct pt_regs *regs)
 {
-	return (regs->xcs & SEGMENT_RPL_MASK) == USER_RPL;
+	return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL;
 }
 static inline int user_mode_vm(struct pt_regs *regs)
 {
-	return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL;
+	return ((regs->cs & SEGMENT_RPL_MASK) |
+		(regs->flags & VM_MASK)) >= USER_RPL;
 }
 static inline int v8086_mode(struct pt_regs *regs)
 {
-	return (regs->eflags & VM_MASK);
+	return (regs->flags & VM_MASK);
 }
 
-#define instruction_pointer(regs) ((regs)->eip)
-#define frame_pointer(regs) ((regs)->ebp)
+#define instruction_pointer(regs) ((regs)->ip)
+#define frame_pointer(regs) ((regs)->bp)
 #define stack_pointer(regs) ((unsigned long)(regs))
-#define regs_return_value(regs) ((regs)->eax)
+#define regs_return_value(regs) ((regs)->ax)
 
 extern unsigned long profile_pc(struct pt_regs *regs);
 #endif /* __KERNEL__ */
 
 #else /* __i386__ */
 
+#ifndef __KERNEL__
+
 struct pt_regs {
 	unsigned long r15;
 	unsigned long r14;
@@ -96,14 +132,43 @@ struct pt_regs {
 /* top of stack page */
 };
 
-#ifdef __KERNEL__
+#else /* __KERNEL__ */
+
+struct pt_regs {
+	unsigned long r15;
+	unsigned long r14;
+	unsigned long r13;
+	unsigned long r12;
+	unsigned long bp;
+	unsigned long bx;
+/* arguments: non interrupts/non tracing syscalls only save upto here*/
+	unsigned long r11;
+	unsigned long r10;
+	unsigned long r9;
+	unsigned long r8;
+	unsigned long ax;
+	unsigned long cx;
+	unsigned long dx;
+	unsigned long si;
+	unsigned long di;
+	unsigned long orig_ax;
+/* end of arguments */
+/* cpu exception frame or undefined */
+	unsigned long ip;
+	unsigned long cs;
+	unsigned long flags;
+	unsigned long sp;
+	unsigned long ss;
+/* top of stack page */
+};
 
 #define user_mode(regs) (!!((regs)->cs & 3))
 #define user_mode_vm(regs) user_mode(regs)
-#define instruction_pointer(regs) ((regs)->rip)
-#define frame_pointer(regs) ((regs)->rbp)
-#define stack_pointer(regs) ((regs)->rsp)
-#define regs_return_value(regs) ((regs)->rax)
+#define v8086_mode(regs) 0	/* No V86 mode support in long mode */
+#define instruction_pointer(regs) ((regs)->ip)
+#define frame_pointer(regs) ((regs)->bp)
+#define stack_pointer(regs) ((regs)->sp)
+#define regs_return_value(regs) ((regs)->ax)
 
 extern unsigned long profile_pc(struct pt_regs *regs);
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
@@ -113,30 +178,33 @@ struct task_struct;
 extern unsigned long
 convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs);
 
-enum {
-	EF_CF   = 0x00000001,
-	EF_PF   = 0x00000004,
-	EF_AF   = 0x00000010,
-	EF_ZF   = 0x00000040,
-	EF_SF   = 0x00000080,
-	EF_TF   = 0x00000100,
-	EF_IE   = 0x00000200,
-	EF_DF   = 0x00000400,
-	EF_OF   = 0x00000800,
-	EF_IOPL = 0x00003000,
-	EF_IOPL_RING0 = 0x00000000,
-	EF_IOPL_RING1 = 0x00001000,
-	EF_IOPL_RING2 = 0x00002000,
-	EF_NT   = 0x00004000,   /* nested task */
-	EF_RF   = 0x00010000,   /* resume */
-	EF_VM   = 0x00020000,   /* virtual mode */
-	EF_AC   = 0x00040000,   /* alignment */
-	EF_VIF  = 0x00080000,   /* virtual interrupt */
-	EF_VIP  = 0x00100000,   /* virtual interrupt pending */
-	EF_ID   = 0x00200000,   /* id */
-};
 #endif /* __KERNEL__ */
 #endif /* !__i386__ */
+
+#ifdef __KERNEL__
+
+/*
+ * These are defined as per linux/ptrace.h, which see.
+ */
+#define arch_has_single_step()	(1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
+
+extern void user_enable_block_step(struct task_struct *);
+#ifdef CONFIG_X86_DEBUGCTLMSR
+#define arch_has_block_step()	(1)
+#else
+#define arch_has_block_step()	(boot_cpu_data.x86 >= 6)
+#endif
+
+struct user_desc;
+extern int do_get_thread_area(struct task_struct *p, int idx,
+			      struct user_desc __user *info);
+extern int do_set_thread_area(struct task_struct *p, int idx,
+			      struct user_desc __user *info, int can_allocate);
+
+#endif /* __KERNEL__ */
+
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff -puN include/asm-x86/resume-trace.h~git-x86 include/asm-x86/resume-trace.h
--- a/include/asm-x86/resume-trace.h~git-x86
+++ a/include/asm-x86/resume-trace.h
@@ -1,5 +1,20 @@
-#ifdef CONFIG_X86_32
-# include "resume-trace_32.h"
-#else
-# include "resume-trace_64.h"
+#ifndef _ASM_X86_RESUME_TRACE_H
+#define _ASM_X86_RESUME_TRACE_H
+
+#include <asm/asm.h>
+
+#define TRACE_RESUME(user) do {					\
+	if (pm_trace_enabled) {					\
+		void *tracedata;				\
+		asm volatile(_ASM_MOV_UL " $1f,%0\n"		\
+			".section .tracedata,\"a\"\n"		\
+			"1:\t.word %c1\n\t"			\
+			_ASM_PTR " %c2\n"			\
+			".previous"				\
+			:"=r" (tracedata)			\
+			: "i" (__LINE__), "i" (__FILE__));	\
+		generate_resume_trace(tracedata, user);		\
+	}							\
+} while (0)
+
 #endif
diff -puN include/asm-x86/resume-trace_32.h~git-x86 /dev/null
--- a/include/asm-x86/resume-trace_32.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#define TRACE_RESUME(user) do {					\
-	if (pm_trace_enabled) {					\
-		void *tracedata;				\
-		asm volatile("movl $1f,%0\n"			\
-			".section .tracedata,\"a\"\n"		\
-			"1:\t.word %c1\n"			\
-			"\t.long %c2\n"				\
-			".previous"				\
-			:"=r" (tracedata)			\
-			: "i" (__LINE__), "i" (__FILE__));	\
-		generate_resume_trace(tracedata, user);		\
-	}							\
-} while (0)
diff -puN include/asm-x86/resume-trace_64.h~git-x86 /dev/null
--- a/include/asm-x86/resume-trace_64.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#define TRACE_RESUME(user) do {					\
-	if (pm_trace_enabled) {					\
-		void *tracedata;				\
-		asm volatile("movq $1f,%0\n"			\
-			".section .tracedata,\"a\"\n"		\
-			"1:\t.word %c1\n"			\
-			"\t.quad %c2\n"				\
-			".previous"				\
-			:"=r" (tracedata)			\
-			: "i" (__LINE__), "i" (__FILE__));	\
-		generate_resume_trace(tracedata, user);		\
-	}							\
-} while (0)
diff -puN include/asm-x86/rio.h~git-x86 include/asm-x86/rio.h
--- a/include/asm-x86/rio.h~git-x86
+++ a/include/asm-x86/rio.h
@@ -1,6 +1,6 @@
 /*
- * Derived from include/asm-i386/mach-summit/mach_mpparse.h
- *          and include/asm-i386/mach-default/bios_ebda.h
+ * Derived from include/asm-x86/mach-summit/mach_mpparse.h
+ *          and include/asm-x86/mach-default/bios_ebda.h
  *
  * Author: Laurent Vivier <Laurent.Vivier@bull.net>
  */
diff -puN include/asm-x86/rwlock.h~git-x86 include/asm-x86/rwlock.h
--- a/include/asm-x86/rwlock.h~git-x86
+++ a/include/asm-x86/rwlock.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_RWLOCK_H
 
 #define RW_LOCK_BIAS		 0x01000000
-#define RW_LOCK_BIAS_STR	"0x01000000"
 
 /* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */
 
diff -puN include/asm-x86/rwsem.h~git-x86 include/asm-x86/rwsem.h
--- a/include/asm-x86/rwsem.h~git-x86
+++ a/include/asm-x86/rwsem.h
@@ -2,7 +2,7 @@
  *
  * Written by David Howells (dhowells@redhat.com).
  *
- * Derived from asm-i386/semaphore.h
+ * Derived from asm-x86/semaphore.h
  *
  *
  * The MSW of the count is the negated number of active writers and waiting
diff -puN include/asm-x86/scatterlist.h~git-x86 include/asm-x86/scatterlist.h
--- a/include/asm-x86/scatterlist.h~git-x86
+++ a/include/asm-x86/scatterlist.h
@@ -1,5 +1,35 @@
+#ifndef _ASM_X86_SCATTERLIST_H
+#define _ASM_X86_SCATTERLIST_H
+
+#include <asm/types.h>
+
+struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+	unsigned long	sg_magic;
+#endif
+	unsigned long	page_link;
+	unsigned int	offset;
+	unsigned int	length;
+	dma_addr_t	dma_address;
+#ifdef CONFIG_X86_64
+	unsigned int	dma_length;
+#endif
+};
+
+#define ARCH_HAS_SG_CHAIN
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+
+/*
+ * These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg)	((sg)->dma_address)
 #ifdef CONFIG_X86_32
-# include "scatterlist_32.h"
+# define sg_dma_len(sg)		((sg)->length)
 #else
-# include "scatterlist_64.h"
+# define sg_dma_len(sg)		((sg)->dma_length)
+#endif
+
 #endif
diff -puN include/asm-x86/scatterlist_32.h~git-x86 /dev/null
--- a/include/asm-x86/scatterlist_32.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef _I386_SCATTERLIST_H
-#define _I386_SCATTERLIST_H
-
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-    unsigned long	sg_magic;
-#endif
-    unsigned long	page_link;
-    unsigned int	offset;
-    dma_addr_t		dma_address;
-    unsigned int	length;
-};
-
-#define ARCH_HAS_SG_CHAIN
-
-/* These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns.
- */
-#define sg_dma_address(sg)	((sg)->dma_address)
-#define sg_dma_len(sg)		((sg)->length)
-
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
-#endif /* !(_I386_SCATTERLIST_H) */
diff -puN include/asm-x86/scatterlist_64.h~git-x86 /dev/null
--- a/include/asm-x86/scatterlist_64.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _X8664_SCATTERLIST_H
-#define _X8664_SCATTERLIST_H
-
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-    unsigned long	sg_magic;
-#endif
-    unsigned long	page_link;
-    unsigned int	offset;
-    unsigned int	length;
-    dma_addr_t		dma_address;
-    unsigned int        dma_length;
-};
-
-#define ARCH_HAS_SG_CHAIN
-
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
-/* These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns.
- */
-#define sg_dma_address(sg)     ((sg)->dma_address)
-#define sg_dma_len(sg)         ((sg)->dma_length)
-
-#endif 
diff -puN include/asm-x86/segment.h~git-x86 include/asm-x86/segment.h
--- a/include/asm-x86/segment.h~git-x86
+++ a/include/asm-x86/segment.h
@@ -1,5 +1,203 @@
+#ifndef _ASM_X86_SEGMENT_H_
+#define _ASM_X86_SEGMENT_H_
+
 #ifdef CONFIG_X86_32
-# include "segment_32.h"
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ *   0 - null
+ *   1 - reserved
+ *   2 - reserved
+ *   3 - reserved
+ *
+ *   4 - unused			<==== new cacheline
+ *   5 - unused
+ *
+ *  ------- start of TLS (Thread-Local Storage) segments:
+ *
+ *   6 - TLS segment #1			[ glibc's TLS segment ]
+ *   7 - TLS segment #2			[ Wine's %fs Win32 segment ]
+ *   8 - TLS segment #3
+ *   9 - reserved
+ *  10 - reserved
+ *  11 - reserved
+ *
+ *  ------- start of kernel segments:
+ *
+ *  12 - kernel code segment		<==== new cacheline
+ *  13 - kernel data segment
+ *  14 - default user CS
+ *  15 - default user DS
+ *  16 - TSS
+ *  17 - LDT
+ *  18 - PNPBIOS support (16->32 gate)
+ *  19 - PNPBIOS support
+ *  20 - PNPBIOS support
+ *  21 - PNPBIOS support
+ *  22 - PNPBIOS support
+ *  23 - APM BIOS support
+ *  24 - APM BIOS support
+ *  25 - APM BIOS support
+ *
+ *  26 - ESPFIX small SS
+ *  27 - per-cpu			[ offset to per-cpu data area ]
+ *  28 - unused
+ *  29 - unused
+ *  30 - unused
+ *  31 - TSS for double fault handler
+ */
+#define GDT_ENTRY_TLS_MIN	6
+#define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
+
+#define GDT_ENTRY_DEFAULT_USER_CS	14
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS	15
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+#define GDT_ENTRY_KERNEL_BASE	12
+
+#define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE + 0)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE + 1)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE + 4)
+#define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE + 5)
+
+#define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 6)
+#define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 11)
+
+#define GDT_ENTRY_ESPFIX_SS		(GDT_ENTRY_KERNEL_BASE + 14)
+#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
+
+#define GDT_ENTRY_PERCPU			(GDT_ENTRY_KERNEL_BASE + 15)
+#ifdef CONFIG_SMP
+#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
 #else
-# include "segment_64.h"
+#define __KERNEL_PERCPU 0
+#endif
+
+#define GDT_ENTRY_DOUBLEFAULT_TSS	31
+
+/*
+ * The GDT has 32 entries
+ */
+#define GDT_ENTRIES 32
+
+/* Simple and small GDT entries for booting only */
+
+#define GDT_ENTRY_BOOT_CS		2
+#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
+
+#define GDT_ENTRY_BOOT_DS		(GDT_ENTRY_BOOT_CS + 1)
+#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
+
+/* The PnP BIOS entries in the GDT */
+#define GDT_ENTRY_PNPBIOS_CS32		(GDT_ENTRY_PNPBIOS_BASE + 0)
+#define GDT_ENTRY_PNPBIOS_CS16		(GDT_ENTRY_PNPBIOS_BASE + 1)
+#define GDT_ENTRY_PNPBIOS_DS		(GDT_ENTRY_PNPBIOS_BASE + 2)
+#define GDT_ENTRY_PNPBIOS_TS1		(GDT_ENTRY_PNPBIOS_BASE + 3)
+#define GDT_ENTRY_PNPBIOS_TS2		(GDT_ENTRY_PNPBIOS_BASE + 4)
+
+/* The PnP BIOS selectors */
+#define PNP_CS32   (GDT_ENTRY_PNPBIOS_CS32 * 8)	/* segment for calling fn */
+#define PNP_CS16   (GDT_ENTRY_PNPBIOS_CS16 * 8)	/* code segment for BIOS */
+#define PNP_DS     (GDT_ENTRY_PNPBIOS_DS * 8)	/* data segment for BIOS */
+#define PNP_TS1    (GDT_ENTRY_PNPBIOS_TS1 * 8)	/* transfer data segment */
+#define PNP_TS2    (GDT_ENTRY_PNPBIOS_TS2 * 8)	/* another data segment */
+
+/* Bottom two bits of selector give the ring privilege level */
+#define SEGMENT_RPL_MASK	0x3
+/* Bit 2 is table indicator (LDT/GDT) */
+#define SEGMENT_TI_MASK		0x4
+
+/* User mode is privilege level 3 */
+#define USER_RPL		0x3
+/* LDT segment has TI set, GDT has it cleared */
+#define SEGMENT_LDT		0x4
+#define SEGMENT_GDT		0x0
+
+/*
+ * Matching rules for certain types of segments.
+ */
+
+/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */
+#define SEGMENT_IS_KERNEL_CODE(x) (((x) & 0xfc) == GDT_ENTRY_KERNEL_CS * 8)
+
+/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
+#define SEGMENT_IS_FLAT_CODE(x)  (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
+
+/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
+#define SEGMENT_IS_PNP_CODE(x)   (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
+
+
+#else
+#include <asm/cache.h>
+
+/* Simple and small GDT entries for booting only */
+
+#define GDT_ENTRY_BOOT_CS		2
+#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
+
+#define GDT_ENTRY_BOOT_DS		(GDT_ENTRY_BOOT_CS + 1)
+#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
+
+#define __KERNEL_CS	0x10
+#define __KERNEL_DS	0x18
+
+#define __KERNEL32_CS   0x08
+
+/*
+ * we cannot use the same code segment descriptor for user and kernel
+ * -- not even in the long flat mode, because of different DPL /kkeil
+ * The segment offset needs to contain a RPL. Grr. -AK
+ * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
+ */
+
+#define __USER32_CS   0x23   /* 4*8+3 */
+#define __USER_DS     0x2b   /* 5*8+3 */
+#define __USER_CS     0x33   /* 6*8+3 */
+#define __USER32_DS	__USER_DS
+
+#define GDT_ENTRY_TSS 8	/* needs two entries */
+#define GDT_ENTRY_LDT 10 /* needs two entries */
+#define GDT_ENTRY_TLS_MIN 12
+#define GDT_ENTRY_TLS_MAX 14
+
+#define GDT_ENTRY_PER_CPU 15	/* Abused to load per CPU data from limit */
+#define __PER_CPU_SEG	(GDT_ENTRY_PER_CPU * 8 + 3)
+
+/* TLS indexes for 64bit - hardcoded in arch_prctl */
+#define FS_TLS 0
+#define GS_TLS 1
+
+#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
+#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
+
+#define GDT_ENTRIES 16
+
+#endif
+
+#ifndef CONFIG_PARAVIRT
+#define get_kernel_rpl()  0
+#endif
+
+/* User mode is privilege level 3 */
+#define USER_RPL		0x3
+/* LDT segment has TI set, GDT has it cleared */
+#define SEGMENT_LDT		0x4
+#define SEGMENT_GDT		0x0
+
+/* Bottom two bits of selector give the ring privilege level */
+#define SEGMENT_RPL_MASK	0x3
+/* Bit 2 is table indicator (LDT/GDT) */
+#define SEGMENT_TI_MASK		0x4
+
+#define IDT_ENTRIES 256
+#define GDT_SIZE (GDT_ENTRIES * 8)
+#define GDT_ENTRY_TLS_ENTRIES 3
+#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+
 #endif
diff -puN include/asm-x86/segment_32.h~git-x86 include/asm-x86/segment_32.h
--- a/include/asm-x86/segment_32.h~git-x86
+++ a/include/asm-x86/segment_32.h
@@ -1,148 +1 @@
-#ifndef _ASM_SEGMENT_H
-#define _ASM_SEGMENT_H
 
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- *   0 - null
- *   1 - reserved
- *   2 - reserved
- *   3 - reserved
- *
- *   4 - unused			<==== new cacheline
- *   5 - unused
- *
- *  ------- start of TLS (Thread-Local Storage) segments:
- *
- *   6 - TLS segment #1			[ glibc's TLS segment ]
- *   7 - TLS segment #2			[ Wine's %fs Win32 segment ]
- *   8 - TLS segment #3
- *   9 - reserved
- *  10 - reserved
- *  11 - reserved
- *
- *  ------- start of kernel segments:
- *
- *  12 - kernel code segment		<==== new cacheline
- *  13 - kernel data segment
- *  14 - default user CS
- *  15 - default user DS
- *  16 - TSS
- *  17 - LDT
- *  18 - PNPBIOS support (16->32 gate)
- *  19 - PNPBIOS support
- *  20 - PNPBIOS support
- *  21 - PNPBIOS support
- *  22 - PNPBIOS support
- *  23 - APM BIOS support
- *  24 - APM BIOS support
- *  25 - APM BIOS support 
- *
- *  26 - ESPFIX small SS
- *  27 - per-cpu			[ offset to per-cpu data area ]
- *  28 - unused
- *  29 - unused
- *  30 - unused
- *  31 - TSS for double fault handler
- */
-#define GDT_ENTRY_TLS_ENTRIES	3
-#define GDT_ENTRY_TLS_MIN	6
-#define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
-
-#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
-
-#define GDT_ENTRY_DEFAULT_USER_CS	14
-#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
-
-#define GDT_ENTRY_DEFAULT_USER_DS	15
-#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
-
-#define GDT_ENTRY_KERNEL_BASE	12
-
-#define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE + 0)
-#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
-
-#define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE + 1)
-#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
-
-#define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE + 4)
-#define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE + 5)
-
-#define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 6)
-#define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 11)
-
-#define GDT_ENTRY_ESPFIX_SS		(GDT_ENTRY_KERNEL_BASE + 14)
-#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
-
-#define GDT_ENTRY_PERCPU			(GDT_ENTRY_KERNEL_BASE + 15)
-#ifdef CONFIG_SMP
-#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
-#else
-#define __KERNEL_PERCPU 0
-#endif
-
-#define GDT_ENTRY_DOUBLEFAULT_TSS	31
-
-/*
- * The GDT has 32 entries
- */
-#define GDT_ENTRIES 32
-#define GDT_SIZE (GDT_ENTRIES * 8)
-
-/* Simple and small GDT entries for booting only */
-
-#define GDT_ENTRY_BOOT_CS		2
-#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
-
-#define GDT_ENTRY_BOOT_DS		(GDT_ENTRY_BOOT_CS + 1)
-#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
-
-/* The PnP BIOS entries in the GDT */
-#define GDT_ENTRY_PNPBIOS_CS32		(GDT_ENTRY_PNPBIOS_BASE + 0)
-#define GDT_ENTRY_PNPBIOS_CS16		(GDT_ENTRY_PNPBIOS_BASE + 1)
-#define GDT_ENTRY_PNPBIOS_DS		(GDT_ENTRY_PNPBIOS_BASE + 2)
-#define GDT_ENTRY_PNPBIOS_TS1		(GDT_ENTRY_PNPBIOS_BASE + 3)
-#define GDT_ENTRY_PNPBIOS_TS2		(GDT_ENTRY_PNPBIOS_BASE + 4)
-
-/* The PnP BIOS selectors */
-#define PNP_CS32   (GDT_ENTRY_PNPBIOS_CS32 * 8)	/* segment for calling fn */
-#define PNP_CS16   (GDT_ENTRY_PNPBIOS_CS16 * 8)	/* code segment for BIOS */
-#define PNP_DS     (GDT_ENTRY_PNPBIOS_DS * 8)	/* data segment for BIOS */
-#define PNP_TS1    (GDT_ENTRY_PNPBIOS_TS1 * 8)	/* transfer data segment */
-#define PNP_TS2    (GDT_ENTRY_PNPBIOS_TS2 * 8)	/* another data segment */
-
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- */
-#define IDT_ENTRIES 256
-
-/* Bottom two bits of selector give the ring privilege level */
-#define SEGMENT_RPL_MASK	0x3
-/* Bit 2 is table indicator (LDT/GDT) */
-#define SEGMENT_TI_MASK		0x4
-
-/* User mode is privilege level 3 */
-#define USER_RPL		0x3
-/* LDT segment has TI set, GDT has it cleared */
-#define SEGMENT_LDT		0x4
-#define SEGMENT_GDT		0x0
-
-#ifndef CONFIG_PARAVIRT
-#define get_kernel_rpl()  0
-#endif
-/*
- * Matching rules for certain types of segments.
- */
-
-/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */
-#define SEGMENT_IS_KERNEL_CODE(x) (((x) & 0xfc) == GDT_ENTRY_KERNEL_CS * 8)
-
-/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
-#define SEGMENT_IS_FLAT_CODE(x)  (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
-
-/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
-#define SEGMENT_IS_PNP_CODE(x)   (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
-
-#endif
diff -puN include/asm-x86/segment_64.h~git-x86 /dev/null
--- a/include/asm-x86/segment_64.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef _ASM_SEGMENT_H
-#define _ASM_SEGMENT_H
-
-#include <asm/cache.h>
-
-/* Simple and small GDT entries for booting only */
-
-#define GDT_ENTRY_BOOT_CS		2
-#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
-
-#define GDT_ENTRY_BOOT_DS		(GDT_ENTRY_BOOT_CS + 1)
-#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
-
-#define __KERNEL_CS	0x10
-#define __KERNEL_DS	0x18
-
-#define __KERNEL32_CS   0x08
-
-/* 
- * we cannot use the same code segment descriptor for user and kernel
- * -- not even in the long flat mode, because of different DPL /kkeil 
- * The segment offset needs to contain a RPL. Grr. -AK
- * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) 
- */
-
-#define __USER32_CS   0x23   /* 4*8+3 */ 
-#define __USER_DS     0x2b   /* 5*8+3 */ 
-#define __USER_CS     0x33   /* 6*8+3 */ 
-#define __USER32_DS	__USER_DS 
-
-#define GDT_ENTRY_TSS 8	/* needs two entries */
-#define GDT_ENTRY_LDT 10 /* needs two entries */
-#define GDT_ENTRY_TLS_MIN 12
-#define GDT_ENTRY_TLS_MAX 14
-
-#define GDT_ENTRY_TLS_ENTRIES 3
-
-#define GDT_ENTRY_PER_CPU 15	/* Abused to load per CPU data from limit */
-#define __PER_CPU_SEG	(GDT_ENTRY_PER_CPU * 8 + 3)
-
-/* TLS indexes for 64bit - hardcoded in arch_prctl */
-#define FS_TLS 0	
-#define GS_TLS 1	
-
-#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 16
-#define GDT_SIZE (GDT_ENTRIES * 8)
-#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) 
-
-#endif
diff -puN include/asm-x86/sigcontext.h~git-x86 include/asm-x86/sigcontext.h
--- a/include/asm-x86/sigcontext.h~git-x86
+++ a/include/asm-x86/sigcontext.h
@@ -63,20 +63,20 @@ struct sigcontext {
 	unsigned short fs, __fsh;
 	unsigned short es, __esh;
 	unsigned short ds, __dsh;
-	unsigned long edi;
-	unsigned long esi;
-	unsigned long ebp;
-	unsigned long esp;
-	unsigned long ebx;
-	unsigned long edx;
-	unsigned long ecx;
-	unsigned long eax;
+	unsigned long di;
+	unsigned long si;
+	unsigned long bp;
+	unsigned long sp;
+	unsigned long bx;
+	unsigned long dx;
+	unsigned long cx;
+	unsigned long ax;
 	unsigned long trapno;
 	unsigned long err;
-	unsigned long eip;
+	unsigned long ip;
 	unsigned short cs, __csh;
-	unsigned long eflags;
-	unsigned long esp_at_signal;
+	unsigned long flags;
+	unsigned long sp_at_signal;
 	unsigned short ss, __ssh;
 	struct _fpstate __user * fpstate;
 	unsigned long oldmask;
@@ -111,16 +111,16 @@ struct sigcontext {
 	unsigned long r13;
 	unsigned long r14;
 	unsigned long r15;
-	unsigned long rdi;
-	unsigned long rsi;
-	unsigned long rbp;
-	unsigned long rbx;
-	unsigned long rdx;
-	unsigned long rax;
-	unsigned long rcx;
-	unsigned long rsp;
-	unsigned long rip;
-	unsigned long eflags;		/* RFLAGS */
+	unsigned long di;
+	unsigned long si;
+	unsigned long bp;
+	unsigned long bx;
+	unsigned long dx;
+	unsigned long ax;
+	unsigned long cx;
+	unsigned long sp;
+	unsigned long ip;
+	unsigned long flags;
 	unsigned short cs;
 	unsigned short gs;
 	unsigned short fs;
diff -puN include/asm-x86/sigcontext32.h~git-x86 include/asm-x86/sigcontext32.h
--- a/include/asm-x86/sigcontext32.h~git-x86
+++ a/include/asm-x86/sigcontext32.h
@@ -48,20 +48,20 @@ struct sigcontext_ia32 {
        unsigned short fs, __fsh;
        unsigned short es, __esh;
        unsigned short ds, __dsh;
-       unsigned int edi;
-       unsigned int esi;
-       unsigned int ebp;
-       unsigned int esp;
-       unsigned int ebx;
-       unsigned int edx;
-       unsigned int ecx;
-       unsigned int eax;
+       unsigned int di;
+       unsigned int si;
+       unsigned int bp;
+       unsigned int sp;
+       unsigned int bx;
+       unsigned int dx;
+       unsigned int cx;
+       unsigned int ax;
        unsigned int trapno;
        unsigned int err;
-       unsigned int eip;
+       unsigned int ip;
        unsigned short cs, __csh;
-       unsigned int eflags;
-       unsigned int esp_at_signal;
+       unsigned int flags;
+       unsigned int sp_at_signal;
        unsigned short ss, __ssh;
        unsigned int fpstate;		/* really (struct _fpstate_ia32 *) */
        unsigned int oldmask;
diff -puN include/asm-x86/signal.h~git-x86 include/asm-x86/signal.h
--- a/include/asm-x86/signal.h~git-x86
+++ a/include/asm-x86/signal.h
@@ -245,21 +245,14 @@ static __inline__ int sigfindinword(unsi
 
 struct pt_regs;
 
-#define ptrace_signal_deliver(regs, cookie)		\
-	do {						\
-		if (current->ptrace & PT_DTRACE) {	\
-			current->ptrace &= ~PT_DTRACE;	\
-			(regs)->eflags &= ~TF_MASK;	\
-		}					\
-	} while (0)
-
 #else /* __i386__ */
 
 #undef __HAVE_ARCH_SIG_BITOPS
 
+#endif /* !__i386__ */
+
 #define ptrace_signal_deliver(regs, cookie) do { } while (0)
 
-#endif /* !__i386__ */
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
diff -puN include/asm-x86/smp_32.h~git-x86 include/asm-x86/smp_32.h
--- a/include/asm-x86/smp_32.h~git-x86
+++ a/include/asm-x86/smp_32.h
@@ -1,51 +1,41 @@
 #ifndef __ASM_SMP_H
 #define __ASM_SMP_H
 
+#ifndef __ASSEMBLY__
+#include <linux/cpumask.h>
+#include <linux/init.h>
+
 /*
  * We need the APIC definitions automatically as part of 'smp.h'
  */
-#ifndef __ASSEMBLY__
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+# include <asm/mpspec.h>
+# include <asm/apic.h>
+# ifdef CONFIG_X86_IO_APIC
+#  include <asm/io_apic.h>
+# endif
 #endif
 
-#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
-#include <linux/bitops.h>
-#include <asm/mpspec.h>
-#include <asm/apic.h>
-#ifdef CONFIG_X86_IO_APIC
-#include <asm/io_apic.h>
-#endif
-#endif
+extern cpumask_t cpu_callout_map;
+extern cpumask_t cpu_callin_map;
 
-#define BAD_APICID 0xFFu
-#ifdef CONFIG_SMP
-#ifndef __ASSEMBLY__
+extern int smp_num_siblings;
+extern unsigned int num_processors;
 
-/*
- * Private routines/data
- */
- 
 extern void smp_alloc_memory(void);
-extern int pic_mode;
-extern int smp_num_siblings;
-DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
-DECLARE_PER_CPU(cpumask_t, cpu_core_map);
+extern void lock_ipi_call_lock(void);
+extern void unlock_ipi_call_lock(void);
 
 extern void (*mtrr_hook) (void);
 extern void zap_low_mappings (void);
-extern void lock_ipi_call_lock(void);
-extern void unlock_ipi_call_lock(void);
 
-#define MAX_APICID 256
 extern u8 __initdata x86_cpu_to_apicid_init[];
 extern void *x86_cpu_to_apicid_ptr;
-DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
-
-#define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
 
-extern void set_cpu_sibling_map(int cpu);
+DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
+DECLARE_PER_CPU(cpumask_t, cpu_core_map);
+DECLARE_PER_CPU(u8, cpu_llc_id);
+DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
 
 #ifdef CONFIG_HOTPLUG_CPU
 extern void cpu_exit_clear(void);
@@ -53,6 +43,9 @@ extern void cpu_uninit(void);
 extern void remove_siblinginfo(int cpu);
 #endif
 
+/* Globals due to paravirt */
+extern void set_cpu_sibling_map(int cpu);
+
 struct smp_ops
 {
 	void (*smp_prepare_boot_cpu)(void);
@@ -67,6 +60,7 @@ struct smp_ops
 				      int wait);
 };
 
+#ifdef CONFIG_SMP
 extern struct smp_ops smp_ops;
 
 static inline void smp_prepare_boot_cpu(void)
@@ -107,10 +101,12 @@ int native_cpu_up(unsigned int cpunum);
 void native_smp_cpus_done(unsigned int max_cpus);
 
 #ifndef CONFIG_PARAVIRT
-#define startup_ipi_hook(phys_apicid, start_eip, start_esp) 		\
-do { } while (0)
+#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0)
 #endif
 
+extern int __cpu_disable(void);
+extern void __cpu_die(unsigned int cpu);
+
 /*
  * This function is needed by all SMP systems. It must _always_ be valid
  * from the initial startup. We map APIC_BASE very early in page_setup(),
@@ -119,9 +115,11 @@ do { } while (0)
 DECLARE_PER_CPU(int, cpu_number);
 #define raw_smp_processor_id() (x86_read_percpu(cpu_number))
 
-extern cpumask_t cpu_callout_map;
-extern cpumask_t cpu_callin_map;
-extern cpumask_t cpu_possible_map;
+#define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
+
+extern int safe_smp_processor_id(void);
+
+void __cpuinit smp_store_cpu_info(int id);
 
 /* We don't mark CPUs online until __cpu_up(), so we need another measure */
 static inline int num_booting_cpus(void)
@@ -129,56 +127,39 @@ static inline int num_booting_cpus(void)
 	return cpus_weight(cpu_callout_map);
 }
 
-extern int safe_smp_processor_id(void);
-extern int __cpu_disable(void);
-extern void __cpu_die(unsigned int cpu);
-extern unsigned int num_processors;
-
-void __cpuinit smp_store_cpu_info(int id);
-
-#endif /* !__ASSEMBLY__ */
-
 #else /* CONFIG_SMP */
 
 #define safe_smp_processor_id()		0
 #define cpu_physical_id(cpu)		boot_cpu_physical_apicid
 
-#define NO_PROC_ID		0xFF		/* No processor magic marker */
-
-#endif /* CONFIG_SMP */
-
-#ifndef __ASSEMBLY__
+#endif /* !CONFIG_SMP */
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
-#ifdef APIC_DEFINITION
+static __inline int logical_smp_processor_id(void)
+{
+	/* we don't want to mark this access volatile - bad code generation */
+	return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+}
+
+# ifdef APIC_DEFINITION
 extern int hard_smp_processor_id(void);
-#else
-#include <mach_apicdef.h>
+# else
+#  include <mach_apicdef.h>
 static inline int hard_smp_processor_id(void)
 {
 	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
+	return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
 }
-#endif /* APIC_DEFINITION */
+# endif /* APIC_DEFINITION */
 
 #else /* CONFIG_X86_LOCAL_APIC */
 
-#ifndef CONFIG_SMP
-#define hard_smp_processor_id()		0
-#endif
+# ifndef CONFIG_SMP
+#  define hard_smp_processor_id()	0
+# endif
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
-extern u8 apicid_2_node[];
-
-#ifdef CONFIG_X86_LOCAL_APIC
-static __inline int logical_smp_processor_id(void)
-{
-	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
-}
-#endif
-#endif
-
+#endif /* !ASSEMBLY */
 #endif
diff -puN include/asm-x86/smp_64.h~git-x86 include/asm-x86/smp_64.h
--- a/include/asm-x86/smp_64.h~git-x86
+++ a/include/asm-x86/smp_64.h
@@ -1,130 +1,99 @@
 #ifndef __ASM_SMP_H
 #define __ASM_SMP_H
 
-/*
- * We need the APIC definitions automatically as part of 'smp.h'
- */
-#include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <linux/bitops.h>
 #include <linux/init.h>
-extern int disable_apic;
 
-#include <asm/mpspec.h>
+/*
+ * We need the APIC definitions automatically as part of 'smp.h'
+ */
 #include <asm/apic.h>
 #include <asm/io_apic.h>
-#include <asm/thread_info.h>
-
-#ifdef CONFIG_SMP
-
+#include <asm/mpspec.h>
 #include <asm/pda.h>
+#include <asm/thread_info.h>
 
-struct pt_regs;
-
-extern cpumask_t cpu_present_mask;
-extern cpumask_t cpu_possible_map;
-extern cpumask_t cpu_online_map;
 extern cpumask_t cpu_callout_map;
 extern cpumask_t cpu_initialized;
 
-/*
- * Private routines/data
- */
- 
+extern int smp_num_siblings;
+extern unsigned int num_processors;
+
 extern void smp_alloc_memory(void);
-extern volatile unsigned long smp_invalidate_needed;
 extern void lock_ipi_call_lock(void);
 extern void unlock_ipi_call_lock(void);
-extern int smp_num_siblings;
-extern void smp_send_reschedule(int cpu);
+
 extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
 				  void *info, int wait);
 
-/*
- * cpu_sibling_map and cpu_core_map now live
- * in the per cpu area
- *
- * extern cpumask_t cpu_sibling_map[NR_CPUS];
- * extern cpumask_t cpu_core_map[NR_CPUS];
- */
+extern u8 __initdata x86_cpu_to_apicid_init[];
+extern void *x86_cpu_to_apicid_ptr;
+extern u8 bios_cpu_apicid[];
+
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 DECLARE_PER_CPU(u8, cpu_llc_id);
+DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
 
-#define SMP_TRAMPOLINE_BASE 0x6000
-
-/*
- * On x86 all CPUs are mapped 1:1 to the APIC space.
- * This simplifies scheduling and IPI sending and
- * compresses data structures.
- */
-
-static inline int num_booting_cpus(void)
+static inline int cpu_present_to_apicid(int mps_cpu)
 {
-	return cpus_weight(cpu_callout_map);
+	if (mps_cpu < NR_CPUS)
+		return (int)bios_cpu_apicid[mps_cpu];
+	else
+		return BAD_APICID;
 }
 
-#define raw_smp_processor_id() read_pda(cpunumber)
+#ifdef CONFIG_SMP
+
+#define SMP_TRAMPOLINE_BASE 0x6000
 
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
 extern void prefill_possible_map(void);
-extern unsigned num_processors;
 extern unsigned __cpuinitdata disabled_cpus;
 
-#define NO_PROC_ID		0xFF		/* No processor magic marker */
-
-#endif /* CONFIG_SMP */
-
-#define safe_smp_processor_id()		smp_processor_id()
+#define raw_smp_processor_id()	read_pda(cpunumber)
+#define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
 
-static inline int hard_smp_processor_id(void)
-{
-	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
-}
+#define stack_smp_processor_id()					\
+	({								\
+	struct thread_info *ti;						\
+	__asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));	\
+	ti->cpu;							\
+})
 
 /*
- * Some lowlevel functions might want to know about
- * the real APIC ID <-> CPU # mapping.
+ * On x86 all CPUs are mapped 1:1 to the APIC space. This simplifies
+ * scheduling and IPI sending and compresses data structures.
  */
-extern u8 __initdata x86_cpu_to_apicid_init[];
-extern void *x86_cpu_to_apicid_ptr;
-DECLARE_PER_CPU(u8, x86_cpu_to_apicid);	/* physical ID */
-extern u8 bios_cpu_apicid[];
-
-static inline int cpu_present_to_apicid(int mps_cpu)
+static inline int num_booting_cpus(void)
 {
-	if (mps_cpu < NR_CPUS)
-		return (int)bios_cpu_apicid[mps_cpu];
-	else
-		return BAD_APICID;
+	return cpus_weight(cpu_callout_map);
 }
 
-#ifndef CONFIG_SMP
+extern void smp_send_reschedule(int cpu);
+
+#else /* CONFIG_SMP */
+
+extern unsigned int boot_cpu_id;
+#define cpu_physical_id(cpu)	boot_cpu_id
 #define stack_smp_processor_id() 0
-#define cpu_logical_map(x) (x)
-#else
-#include <asm/thread_info.h>
-#define stack_smp_processor_id() \
-({ 								\
-	struct thread_info *ti;					\
-	__asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));	\
-	ti->cpu;						\
-})
-#endif
+
+#endif /* !CONFIG_SMP */
+
+#define safe_smp_processor_id()		smp_processor_id()
 
 static __inline int logical_smp_processor_id(void)
 {
 	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
+	return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+}
+
+static inline int hard_smp_processor_id(void)
+{
+	/* we don't want to mark this access volatile - bad code generation */
+	return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
 }
 
-#ifdef CONFIG_SMP
-#define cpu_physical_id(cpu)		per_cpu(x86_cpu_to_apicid, cpu)
-#else
-extern unsigned int boot_cpu_id;
-#define cpu_physical_id(cpu)		boot_cpu_id
-#endif /* !CONFIG_SMP */
 #endif
 
diff -puN include/asm-x86/sparsemem.h~git-x86 include/asm-x86/sparsemem.h
--- a/include/asm-x86/sparsemem.h~git-x86
+++ a/include/asm-x86/sparsemem.h
@@ -1,5 +1,34 @@
+#ifndef _ASM_X86_SPARSEMEM_H
+#define _ASM_X86_SPARSEMEM_H
+
+#ifdef CONFIG_SPARSEMEM
+/*
+ * generic non-linear memory support:
+ *
+ * 1) we will not split memory into more chunks than will fit into the flags
+ *    field of the struct page
+ *
+ * SECTION_SIZE_BITS		2^n: size of each section
+ * MAX_PHYSADDR_BITS		2^n: max size of physical address space
+ * MAX_PHYSMEM_BITS		2^n: how much memory we can have in that space
+ *
+ */
+
 #ifdef CONFIG_X86_32
-# include "sparsemem_32.h"
-#else
-# include "sparsemem_64.h"
+# ifdef CONFIG_X86_PAE
+#  define SECTION_SIZE_BITS	30
+#  define MAX_PHYSADDR_BITS	36
+#  define MAX_PHYSMEM_BITS	36
+# else
+#  define SECTION_SIZE_BITS	26
+#  define MAX_PHYSADDR_BITS	32
+#  define MAX_PHYSMEM_BITS	32
+# endif
+#else /* CONFIG_X86_32 */
+# define SECTION_SIZE_BITS	27 /* matt - 128 is convenient right now */
+# define MAX_PHYSADDR_BITS	40
+# define MAX_PHYSMEM_BITS	40
+#endif
+
+#endif /* CONFIG_SPARSEMEM */
 #endif
diff -puN include/asm-x86/sparsemem_32.h~git-x86 /dev/null
--- a/include/asm-x86/sparsemem_32.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef _I386_SPARSEMEM_H
-#define _I386_SPARSEMEM_H
-#ifdef CONFIG_SPARSEMEM
-
-/*
- * generic non-linear memory support:
- *
- * 1) we will not split memory into more chunks than will fit into the
- *    flags field of the struct page
- */
-
-/*
- * SECTION_SIZE_BITS		2^N: how big each section will be
- * MAX_PHYSADDR_BITS		2^N: how much physical address space we have
- * MAX_PHYSMEM_BITS		2^N: how much memory we can have in that space
- */
-#ifdef CONFIG_X86_PAE
-#define SECTION_SIZE_BITS       30
-#define MAX_PHYSADDR_BITS       36
-#define MAX_PHYSMEM_BITS	36
-#else
-#define SECTION_SIZE_BITS       26
-#define MAX_PHYSADDR_BITS       32
-#define MAX_PHYSMEM_BITS	32
-#endif
-
-/* XXX: FIXME -- wli */
-#define kern_addr_valid(kaddr)  (0)
-
-#endif /* CONFIG_SPARSEMEM */
-#endif /* _I386_SPARSEMEM_H */
diff -puN include/asm-x86/sparsemem_64.h~git-x86 /dev/null
--- a/include/asm-x86/sparsemem_64.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef _ASM_X86_64_SPARSEMEM_H
-#define _ASM_X86_64_SPARSEMEM_H 1
-
-#ifdef CONFIG_SPARSEMEM
-
-/*
- * generic non-linear memory support:
- *
- * 1) we will not split memory into more chunks than will fit into the flags
- *    field of the struct page
- *
- * SECTION_SIZE_BITS		2^n: size of each section
- * MAX_PHYSADDR_BITS		2^n: max size of physical address space
- * MAX_PHYSMEM_BITS		2^n: how much memory we can have in that space
- *
- */
-
-#define SECTION_SIZE_BITS	27 /* matt - 128 is convenient right now */
-#define MAX_PHYSADDR_BITS	40
-#define MAX_PHYSMEM_BITS	40
-
-extern int early_pfn_to_nid(unsigned long pfn);
-
-#endif /* CONFIG_SPARSEMEM */
-
-#endif /* _ASM_X86_64_SPARSEMEM_H */
diff -puN include/asm-x86/spinlock.h~git-x86 include/asm-x86/spinlock.h
--- a/include/asm-x86/spinlock.h~git-x86
+++ a/include/asm-x86/spinlock.h
@@ -1,5 +1,226 @@
+#ifndef _X86_SPINLOCK_H_
+#define _X86_SPINLOCK_H_
+
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ *
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ *
+ * (the type definitions are in asm/spinlock_types.h)
+ */
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define CLI_STRING	"cli"
+#define STI_STRING	"sti"
+#define CLI_STI_CLOBBERS
+#define CLI_STI_INPUT_ARGS
+#endif /* CONFIG_PARAVIRT */
+
 #ifdef CONFIG_X86_32
-# include "spinlock_32.h"
+typedef char _slock_t;
+# define LOCK_INS_DEC "decb"
+# define LOCK_INS_XCH "xchgb"
+# define LOCK_INS_MOV "movb"
+# define LOCK_INS_CMP "cmpb"
+# define LOCK_PTR_REG "a"
 #else
-# include "spinlock_64.h"
+typedef int _slock_t;
+# define LOCK_INS_DEC "decl"
+# define LOCK_INS_XCH "xchgl"
+# define LOCK_INS_MOV "movl"
+# define LOCK_INS_CMP "cmpl"
+# define LOCK_PTR_REG "D"
+#endif
+
+static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+{
+	return *(volatile _slock_t *)(&(lock)->slock) <= 0;
+}
+
+static inline void __raw_spin_lock(raw_spinlock_t *lock)
+{
+	asm volatile(
+		"\n1:\t"
+		LOCK_PREFIX " ; " LOCK_INS_DEC " %0\n\t"
+		"jns 3f\n"
+		"2:\t"
+		"rep;nop\n\t"
+		LOCK_INS_CMP " $0,%0\n\t"
+		"jle 2b\n\t"
+		"jmp 1b\n"
+		"3:\n\t"
+		: "+m" (lock->slock) : : "memory");
+}
+
+/*
+ * It is easier for the lock validator if interrupts are not re-enabled
+ * in the middle of a lock-acquire. This is a performance feature anyway
+ * so we turn it off:
+ *
+ * NOTE: there's an irqs-on section here, which normally would have to be
+ * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant.
+ */
+#ifndef CONFIG_PROVE_LOCKING
+static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+					 unsigned long flags)
+{
+	asm volatile(
+		"\n1:\t"
+		LOCK_PREFIX " ; " LOCK_INS_DEC " %[slock]\n\t"
+		"jns 5f\n"
+		"testl $0x200, %[flags]\n\t"
+		"jz 4f\n\t"
+		STI_STRING "\n"
+		"3:\t"
+		"rep;nop\n\t"
+		LOCK_INS_CMP " $0, %[slock]\n\t"
+		"jle 3b\n\t"
+		CLI_STRING "\n\t"
+		"jmp 1b\n"
+		"4:\t"
+		"rep;nop\n\t"
+		LOCK_INS_CMP " $0, %[slock]\n\t"
+		"jg 1b\n\t"
+		"jmp 4b\n"
+		"5:\n\t"
+		: [slock] "+m" (lock->slock)
+		: [flags] "r" ((u32)flags)
+		  CLI_STI_INPUT_ARGS
+		: "memory" CLI_STI_CLOBBERS);
+}
+#endif
+
+static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+{
+	_slock_t oldval;
+
+	asm volatile(
+		LOCK_INS_XCH " %0,%1"
+		:"=q" (oldval), "+m" (lock->slock)
+		:"0" (0) : "memory");
+
+	return oldval > 0;
+}
+
+/*
+ * __raw_spin_unlock based on writing $1 to the low byte.
+ * This method works. Despite all the confusion.
+ * (except on PPro SMP or if we are using OOSTORE, so we use xchgb there)
+ * (PPro errata 66, 92)
+ */
+#if defined(X86_64) || \
+	(!defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE))
+
+static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+	asm volatile(LOCK_INS_MOV " $1,%0" : "=m" (lock->slock) :: "memory");
+}
+
+#else
+
+static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+	unsigned char oldval = 1;
+
+	asm volatile("xchgb %b0, %1"
+		     : "=q" (oldval), "+m" (lock->slock)
+		     : "0" (oldval) : "memory");
+}
+
+#endif
+
+static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+{
+	while (__raw_spin_is_locked(lock))
+		cpu_relax();
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ *
+ * On x86, we implement read-write locks as a 32-bit counter
+ * with the high bit (sign) being the "contended" bit.
+ */
+
+static inline int __raw_read_can_lock(raw_rwlock_t *lock)
+{
+	return (int)(lock)->lock > 0;
+}
+
+static inline int __raw_write_can_lock(raw_rwlock_t *lock)
+{
+	return (lock)->lock == RW_LOCK_BIAS;
+}
+
+static inline void __raw_read_lock(raw_rwlock_t *rw)
+{
+	asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
+		     "jns 1f\n"
+		     "call __read_lock_failed\n\t"
+		     "1:\n"
+		     ::LOCK_PTR_REG (rw) : "memory");
+}
+
+static inline void __raw_write_lock(raw_rwlock_t *rw)
+{
+	asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
+		     "jz 1f\n"
+		     "call __write_lock_failed\n\t"
+		     "1:\n"
+		     ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
+}
+
+static inline int __raw_read_trylock(raw_rwlock_t *lock)
+{
+	atomic_t *count = (atomic_t *)lock;
+
+	atomic_dec(count);
+	if (atomic_read(count) >= 0)
+		return 1;
+	atomic_inc(count);
+	return 0;
+}
+
+static inline int __raw_write_trylock(raw_rwlock_t *lock)
+{
+	atomic_t *count = (atomic_t *)lock;
+
+	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+		return 1;
+	atomic_add(RW_LOCK_BIAS, count);
+	return 0;
+}
+
+static inline void __raw_read_unlock(raw_rwlock_t *rw)
+{
+	asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
+}
+
+static inline void __raw_write_unlock(raw_rwlock_t *rw)
+{
+	asm volatile(LOCK_PREFIX "addl %1, %0"
+		     : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
+}
+
+#define _raw_spin_relax(lock)	cpu_relax()
+#define _raw_read_relax(lock)	cpu_relax()
+#define _raw_write_relax(lock)	cpu_relax()
+
 #endif
diff -puN include/asm-x86/spinlock_32.h~git-x86 /dev/null
--- a/include/asm-x86/spinlock_32.h
+++ /dev/null
@@ -1,221 +0,0 @@
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-#include <linux/compiler.h>
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define CLI_STRING	"cli"
-#define STI_STRING	"sti"
-#define CLI_STI_CLOBBERS
-#define CLI_STI_INPUT_ARGS
-#endif /* CONFIG_PARAVIRT */
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- *
- * Simple spin lock operations.  There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions. They have a cost.
- *
- * (the type definitions are in asm/spinlock_types.h)
- */
-
-static inline int __raw_spin_is_locked(raw_spinlock_t *x)
-{
-	return *(volatile signed char *)(&(x)->slock) <= 0;
-}
-
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
-{
-	asm volatile("\n1:\t"
-		     LOCK_PREFIX " ; decb %0\n\t"
-		     "jns 3f\n"
-		     "2:\t"
-		     "rep;nop\n\t"
-		     "cmpb $0,%0\n\t"
-		     "jle 2b\n\t"
-		     "jmp 1b\n"
-		     "3:\n\t"
-		     : "+m" (lock->slock) : : "memory");
-}
-
-/*
- * It is easier for the lock validator if interrupts are not re-enabled
- * in the middle of a lock-acquire. This is a performance feature anyway
- * so we turn it off:
- *
- * NOTE: there's an irqs-on section here, which normally would have to be
- * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant.
- */
-#ifndef CONFIG_PROVE_LOCKING
-static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
-{
-	asm volatile(
-		"\n1:\t"
-		LOCK_PREFIX " ; decb %[slock]\n\t"
-		"jns 5f\n"
-		"2:\t"
-		"testl $0x200, %[flags]\n\t"
-		"jz 4f\n\t"
-		STI_STRING "\n"
-		"3:\t"
-		"rep;nop\n\t"
-		"cmpb $0, %[slock]\n\t"
-		"jle 3b\n\t"
-		CLI_STRING "\n\t"
-		"jmp 1b\n"
-		"4:\t"
-		"rep;nop\n\t"
-		"cmpb $0, %[slock]\n\t"
-		"jg 1b\n\t"
-		"jmp 4b\n"
-		"5:\n\t"
-		: [slock] "+m" (lock->slock)
-		: [flags] "r" (flags)
-	 	  CLI_STI_INPUT_ARGS
-		: "memory" CLI_STI_CLOBBERS);
-}
-#endif
-
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
-{
-	char oldval;
-	asm volatile(
-		"xchgb %b0,%1"
-		:"=q" (oldval), "+m" (lock->slock)
-		:"0" (0) : "memory");
-	return oldval > 0;
-}
-
-/*
- * __raw_spin_unlock based on writing $1 to the low byte.
- * This method works. Despite all the confusion.
- * (except on PPro SMP or if we are using OOSTORE, so we use xchgb there)
- * (PPro errata 66, 92)
- */
-
-#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
-	asm volatile("movb $1,%0" : "+m" (lock->slock) :: "memory");
-}
-
-#else
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
-	char oldval = 1;
-
-	asm volatile("xchgb %b0, %1"
-		     : "=q" (oldval), "+m" (lock->slock)
-		     : "0" (oldval) : "memory");
-}
-
-#endif
-
-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
-{
-	while (__raw_spin_is_locked(lock))
-		cpu_relax();
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- *
- * On x86, we implement read-write locks as a 32-bit counter
- * with the high bit (sign) being the "contended" bit.
- *
- * The inline assembly is non-obvious. Think about it.
- *
- * Changed to use the same technique as rw semaphores.  See
- * semaphore.h for details.  -ben
- *
- * the helpers are in arch/i386/kernel/semaphore.c
- */
-
-/**
- * read_can_lock - would read_trylock() succeed?
- * @lock: the rwlock in question.
- */
-static inline int __raw_read_can_lock(raw_rwlock_t *x)
-{
-	return (int)(x)->lock > 0;
-}
-
-/**
- * write_can_lock - would write_trylock() succeed?
- * @lock: the rwlock in question.
- */
-static inline int __raw_write_can_lock(raw_rwlock_t *x)
-{
-	return (x)->lock == RW_LOCK_BIAS;
-}
-
-static inline void __raw_read_lock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
-		     "jns 1f\n"
-		     "call __read_lock_failed\n\t"
-		     "1:\n"
-		     ::"a" (rw) : "memory");
-}
-
-static inline void __raw_write_lock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX " subl $" RW_LOCK_BIAS_STR ",(%0)\n\t"
-		     "jz 1f\n"
-		     "call __write_lock_failed\n\t"
-		     "1:\n"
-		     ::"a" (rw) : "memory");
-}
-
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t *)lock;
-	atomic_dec(count);
-	if (atomic_read(count) >= 0)
-		return 1;
-	atomic_inc(count);
-	return 0;
-}
-
-static inline int __raw_write_trylock(raw_rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t *)lock;
-	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
-		return 1;
-	atomic_add(RW_LOCK_BIAS, count);
-	return 0;
-}
-
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
-}
-
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ", %0"
-				 : "+m" (rw->lock) : : "memory");
-}
-
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
-
-#endif /* __ASM_SPINLOCK_H */
diff -puN include/asm-x86/spinlock_64.h~git-x86 /dev/null
--- a/include/asm-x86/spinlock_64.h
+++ /dev/null
@@ -1,167 +0,0 @@
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- *
- * Simple spin lock operations.  There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions. They have a cost.
- *
- * (the type definitions are in asm/spinlock_types.h)
- */
-
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
-{
-	return *(volatile signed int *)(&(lock)->slock) <= 0;
-}
-
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
-{
-	asm volatile(
-		"\n1:\t"
-		LOCK_PREFIX " ; decl %0\n\t"
-		"jns 2f\n"
-		"3:\n"
-		"rep;nop\n\t"
-		"cmpl $0,%0\n\t"
-		"jle 3b\n\t"
-		"jmp 1b\n"
-		"2:\t" : "=m" (lock->slock) : : "memory");
-}
-
-/*
- * Same as __raw_spin_lock, but reenable interrupts during spinning.
- */
-#ifndef CONFIG_PROVE_LOCKING
-static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
-{
-	asm volatile(
-		"\n1:\t"
-		LOCK_PREFIX " ; decl %0\n\t"
-		"jns 5f\n"
-		"testl $0x200, %1\n\t"	/* interrupts were disabled? */
-		"jz 4f\n\t"
-	        "sti\n"
-		"3:\t"
-		"rep;nop\n\t"
-		"cmpl $0, %0\n\t"
-		"jle 3b\n\t"
-		"cli\n\t"
-		"jmp 1b\n"
-		"4:\t"
-		"rep;nop\n\t"
-		"cmpl $0, %0\n\t"
-		"jg 1b\n\t"
-		"jmp 4b\n"
-		"5:\n\t"
-		: "+m" (lock->slock) : "r" ((unsigned)flags) : "memory");
-}
-#endif
-
-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
-{
-	int oldval;
-
-	asm volatile(
-		"xchgl %0,%1"
-		:"=q" (oldval), "=m" (lock->slock)
-		:"0" (0) : "memory");
-
-	return oldval > 0;
-}
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
-	asm volatile("movl $1,%0" :"=m" (lock->slock) :: "memory");
-}
-
-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
-{
-	while (__raw_spin_is_locked(lock))
-		cpu_relax();
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- *
- * On x86, we implement read-write locks as a 32-bit counter
- * with the high bit (sign) being the "contended" bit.
- */
-
-static inline int __raw_read_can_lock(raw_rwlock_t *lock)
-{
-	return (int)(lock)->lock > 0;
-}
-
-static inline int __raw_write_can_lock(raw_rwlock_t *lock)
-{
-	return (lock)->lock == RW_LOCK_BIAS;
-}
-
-static inline void __raw_read_lock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX "subl $1,(%0)\n\t"
-		     "jns 1f\n"
-		     "call __read_lock_failed\n"
-		     "1:\n"
-		     ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory");
-}
-
-static inline void __raw_write_lock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX "subl %1,(%0)\n\t"
-		     "jz 1f\n"
-		     "\tcall __write_lock_failed\n\t"
-		     "1:\n"
-		     ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory");
-}
-
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t *)lock;
-	atomic_dec(count);
-	if (atomic_read(count) >= 0)
-		return 1;
-	atomic_inc(count);
-	return 0;
-}
-
-static inline int __raw_write_trylock(raw_rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t *)lock;
-	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
-		return 1;
-	atomic_add(RW_LOCK_BIAS, count);
-	return 0;
-}
-
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX " ; incl %0" :"=m" (rw->lock) : : "memory");
-}
-
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
-{
-	asm volatile(LOCK_PREFIX " ; addl $" RW_LOCK_BIAS_STR ",%0"
-				: "=m" (rw->lock) : : "memory");
-}
-
-#define _raw_spin_relax(lock)	cpu_relax()
-#define _raw_read_relax(lock)	cpu_relax()
-#define _raw_write_relax(lock)	cpu_relax()
-
-#endif /* __ASM_SPINLOCK_H */
diff -puN include/asm-x86/suspend_64.h~git-x86 include/asm-x86/suspend_64.h
--- a/include/asm-x86/suspend_64.h~git-x86
+++ a/include/asm-x86/suspend_64.h
@@ -20,7 +20,7 @@ struct saved_context {
 	struct pt_regs regs;
   	u16 ds, es, fs, gs, ss;
 	unsigned long gs_base, gs_kernel_base, fs_base;
-	unsigned long cr0, cr2, cr3, cr4, cr8;
+	unsigned long cr0, cr2, cr3, cr4;
 	unsigned long efer;
 	u16 gdt_pad;
 	u16 gdt_limit;
diff -puN include/asm-x86/system.h~git-x86 include/asm-x86/system.h
--- a/include/asm-x86/system.h~git-x86
+++ a/include/asm-x86/system.h
@@ -1,5 +1,377 @@
+#ifndef _ASM_X86_SYSTEM_H_
+#define _ASM_X86_SYSTEM_H_
+
+#include <asm/asm.h>
+#include <asm/segment.h>
+#include <asm/cpufeature.h>
+#include <asm/cmpxchg.h>
+
+#include <linux/kernel.h>
+#include <linux/irqflags.h>
+
 #ifdef CONFIG_X86_32
-# include "system_32.h"
+#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */
+
+struct task_struct; /* one of the stranger aspects of C forward declarations */
+extern struct task_struct *FASTCALL(__switch_to(struct task_struct *prev,
+						struct task_struct *next));
+
+/*
+ * Saving eflags is important. It switches not only IOPL between tasks,
+ * it also protects other tasks from NT leaking through sysenter etc.
+ */
+#define switch_to(prev, next, last) do {				\
+	unsigned long esi, edi;						\
+	asm volatile("pushfl\n\t"		/* Save flags */	\
+		     "pushl %%ebp\n\t"					\
+		     "movl %%esp,%0\n\t"	/* save ESP */		\
+		     "movl %5,%%esp\n\t"	/* restore ESP */	\
+		     "movl $1f,%1\n\t"		/* save EIP */		\
+		     "pushl %6\n\t"		/* restore EIP */	\
+		     "jmp __switch_to\n"				\
+		     "1:\t"						\
+		     "popl %%ebp\n\t"					\
+		     "popfl"						\
+		     :"=m" (prev->thread.sp), "=m" (prev->thread.ip),	\
+		      "=a" (last), "=S" (esi), "=D" (edi)		\
+		     :"m" (next->thread.sp), "m" (next->thread.ip),	\
+		      "2" (prev), "d" (next));				\
+} while (0)
+
+/*
+ * disable hlt during certain critical i/o operations
+ */
+#define HAVE_DISABLE_HLT
 #else
-# include "system_64.h"
+#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
+#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
+
+/* frame pointer must be last for get_wchan */
+#define SAVE_CONTEXT    "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
+#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
+
+#define __EXTRA_CLOBBER  \
+	, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
+	  "r12", "r13", "r14", "r15"
+
+/* Save restore flags to clear handle leaking NT */
+#define switch_to(prev, next, last) \
+	asm volatile(SAVE_CONTEXT					  \
+	     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
+	     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
+	     "call __switch_to\n\t"					  \
+	     ".globl thread_return\n"					  \
+	     "thread_return:\n\t"					  \
+	     "movq %%gs:%P[pda_pcurrent],%%rsi\n\t"			  \
+	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
+	     LOCK_PREFIX "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"	  \
+	     "movq %%rax,%%rdi\n\t" 					  \
+	     "jc   ret_from_fork\n\t"					  \
+	     RESTORE_CONTEXT						  \
+	     : "=a" (last)					  	  \
+	     : [next] "S" (next), [prev] "D" (prev),			  \
+	       [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
+	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
+	       [tif_fork] "i" (TIF_FORK),			  	  \
+	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
+	       [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent))  \
+	     : "memory", "cc" __EXTRA_CLOBBER)
+#endif
+
+#ifdef __KERNEL__
+#define _set_base(addr, base) do { unsigned long __pr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+	"rorl $16,%%edx\n\t" \
+	"movb %%dl,%2\n\t" \
+	"movb %%dh,%3" \
+	:"=&d" (__pr) \
+	:"m" (*((addr)+2)), \
+	 "m" (*((addr)+4)), \
+	 "m" (*((addr)+7)), \
+	 "0" (base) \
+	); } while (0)
+
+#define _set_limit(addr, limit) do { unsigned long __lr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+	"rorl $16,%%edx\n\t" \
+	"movb %2,%%dh\n\t" \
+	"andb $0xf0,%%dh\n\t" \
+	"orb %%dh,%%dl\n\t" \
+	"movb %%dl,%2" \
+	:"=&d" (__lr) \
+	:"m" (*(addr)), \
+	 "m" (*((addr)+6)), \
+	 "0" (limit) \
+	); } while (0)
+
+#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
+#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
+
+extern void load_gs_index(unsigned);
+
+/*
+ * Load a segment. Fall back on loading the zero
+ * segment if something goes wrong..
+ */
+#define loadsegment(seg, value)			\
+	asm volatile("\n"			\
+		"1:\t"				\
+		"movl %k0,%%" #seg "\n"		\
+		"2:\n"				\
+		".section .fixup,\"ax\"\n"	\
+		"3:\t"				\
+		"movl %k1, %%" #seg "\n\t"	\
+		"jmp 2b\n"			\
+		".previous\n"			\
+		".section __ex_table,\"a\"\n\t"	\
+		_ASM_ALIGN "\n\t"		\
+		_ASM_PTR " 1b,3b\n"		\
+		".previous"			\
+		: :"r" (value), "r" (0))
+
+
+/*
+ * Save a segment register away
+ */
+#define savesegment(seg, value) \
+	asm volatile("mov %%" #seg ",%0":"=rm" (value))
+
+static inline unsigned long get_limit(unsigned long segment)
+{
+	unsigned long __limit;
+	__asm__("lsll %1,%0"
+		:"=r" (__limit):"r" (segment));
+	return __limit+1;
+}
+
+static inline void native_clts(void)
+{
+	asm volatile ("clts");
+}
+
+/*
+ * Volatile isn't enough to prevent the compiler from reordering the
+ * read/write functions for the control registers and messing everything up.
+ * A memory clobber would solve the problem, but would prevent reordering of
+ * all loads stores around it, which can hurt performance. Solution is to
+ * use a variable and mimic reads and writes to it to enforce serialization
+ */
+static unsigned long __force_order;
+
+static inline unsigned long native_read_cr0(void)
+{
+	unsigned long val;
+	asm volatile("mov %%cr0,%0\n\t" :"=r" (val), "=m" (__force_order));
+	return val;
+}
+
+static inline void native_write_cr0(unsigned long val)
+{
+	asm volatile("mov %0,%%cr0": :"r" (val), "m" (__force_order));
+}
+
+static inline unsigned long native_read_cr2(void)
+{
+	unsigned long val;
+	asm volatile("mov %%cr2,%0\n\t" :"=r" (val), "=m" (__force_order));
+	return val;
+}
+
+static inline void native_write_cr2(unsigned long val)
+{
+	asm volatile("mov %0,%%cr2": :"r" (val), "m" (__force_order));
+}
+
+static inline unsigned long native_read_cr3(void)
+{
+	unsigned long val;
+	asm volatile("mov %%cr3,%0\n\t" :"=r" (val), "=m" (__force_order));
+	return val;
+}
+
+static inline void native_write_cr3(unsigned long val)
+{
+	asm volatile("mov %0,%%cr3": :"r" (val), "m" (__force_order));
+}
+
+static inline unsigned long native_read_cr4(void)
+{
+	unsigned long val;
+	asm volatile("mov %%cr4,%0\n\t" :"=r" (val), "=m" (__force_order));
+	return val;
+}
+
+static inline unsigned long native_read_cr4_safe(void)
+{
+	unsigned long val;
+	/* This could fault if %cr4 does not exist. In x86_64, a cr4 always
+	 * exists, so it will never fail. */
+#ifdef CONFIG_X86_32
+	asm volatile("1: mov %%cr4, %0		\n"
+		"2:				\n"
+		".section __ex_table,\"a\"	\n"
+		".long 1b,2b			\n"
+		".previous			\n"
+		: "=r" (val), "=m" (__force_order) : "0" (0));
+#else
+	val = native_read_cr4();
+#endif
+	return val;
+}
+
+static inline void native_write_cr4(unsigned long val)
+{
+	asm volatile("mov %0,%%cr4": :"r" (val), "m" (__force_order));
+}
+
+static inline void native_wbinvd(void)
+{
+	asm volatile("wbinvd": : :"memory");
+}
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define read_cr0()	(native_read_cr0())
+#define write_cr0(x)	(native_write_cr0(x))
+#define read_cr2()	(native_read_cr2())
+#define write_cr2(x)	(native_write_cr2(x))
+#define read_cr3()	(native_read_cr3())
+#define write_cr3(x)	(native_write_cr3(x))
+#define read_cr4()	(native_read_cr4())
+#define read_cr4_safe()	(native_read_cr4_safe())
+#define write_cr4(x)	(native_write_cr4(x))
+#define wbinvd()	(native_wbinvd())
+
+/* Clear the 'TS' bit */
+#define clts()		(native_clts())
+
+#endif/* CONFIG_PARAVIRT */
+
+#define stts() write_cr0(8 | read_cr0())
+
+#endif /* __KERNEL__ */
+
+static inline void clflush(void *__p)
+{
+	asm volatile("clflush %0" : "+m" (*(char __force *)__p));
+}
+
+#define nop() __asm__ __volatile__ ("nop")
+
+void disable_hlt(void);
+void enable_hlt(void);
+
+extern int es7000_plat;
+void cpu_idle_wait(void);
+
+extern unsigned long arch_align_stack(unsigned long sp);
+extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
+
+void default_idle(void);
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+#ifdef CONFIG_X86_32
+/*
+ * For now, "wmb()" doesn't actually do anything, as all
+ * Intel CPU's follow what Intel calls a *Processor Order*,
+ * in which all writes are seen in the program order even
+ * outside the CPU.
+ *
+ * I expect future Intel CPU's to have a weaker ordering,
+ * but I'd also expect them to finally get their act together
+ * and add some real memory barriers if so.
+ *
+ * Some non intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
+#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
+#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
+#else
+#define mb() 	asm volatile("mfence":::"memory")
+#define rmb()	asm volatile("lfence":::"memory")
+#define wmb()	asm volatile("sfence" ::: "memory")
+#endif
+
+/**
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier.  All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads.  This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies.  See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	b = 2;
+ *	memory_barrier();
+ *	p = &b;				q = p;
+ *					read_barrier_depends();
+ *					d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends().  However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	a = 2;
+ *	memory_barrier();
+ *	b = 3;				y = b;
+ *					read_barrier_depends();
+ *					x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b".  Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
+ * in cases like this where there are no data dependencies.
+ **/
+
+#define read_barrier_depends()	do { } while (0)
+
+#ifdef CONFIG_SMP
+#define smp_mb()	mb()
+#ifdef CONFIG_X86_PPRO_FENCE
+# define smp_rmb()	rmb()
+#else
+# define smp_rmb()	barrier()
+#endif
+#ifdef CONFIG_X86_OOSTORE
+# define smp_wmb() 	wmb()
+#else
+# define smp_wmb()	barrier()
+#endif
+#define smp_read_barrier_depends()	read_barrier_depends()
+#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
+#else
+#define smp_mb()	barrier()
+#define smp_rmb()	barrier()
+#define smp_wmb()	barrier()
+#define smp_read_barrier_depends()	do { } while (0)
+#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif
+
+
 #endif
diff -puN include/asm-x86/system_32.h~git-x86 /dev/null
--- a/include/asm-x86/system_32.h
+++ /dev/null
@@ -1,320 +0,0 @@
-#ifndef __ASM_SYSTEM_H
-#define __ASM_SYSTEM_H
-
-#include <linux/kernel.h>
-#include <asm/segment.h>
-#include <asm/cpufeature.h>
-#include <asm/cmpxchg.h>
-
-#ifdef __KERNEL__
-#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */
-
-struct task_struct;	/* one of the stranger aspects of C forward declarations.. */
-extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
-
-/*
- * Saving eflags is important. It switches not only IOPL between tasks,
- * it also protects other tasks from NT leaking through sysenter etc.
- */
-#define switch_to(prev,next,last) do {					\
-	unsigned long esi,edi;						\
-	asm volatile("pushfl\n\t"		/* Save flags */	\
-		     "pushl %%ebp\n\t"					\
-		     "movl %%esp,%0\n\t"	/* save ESP */		\
-		     "movl %5,%%esp\n\t"	/* restore ESP */	\
-		     "movl $1f,%1\n\t"		/* save EIP */		\
-		     "pushl %6\n\t"		/* restore EIP */	\
-		     "jmp __switch_to\n"				\
-		     "1:\t"						\
-		     "popl %%ebp\n\t"					\
-		     "popfl"						\
-		     :"=m" (prev->thread.esp),"=m" (prev->thread.eip),	\
-		      "=a" (last),"=S" (esi),"=D" (edi)			\
-		     :"m" (next->thread.esp),"m" (next->thread.eip),	\
-		      "2" (prev), "d" (next));				\
-} while (0)
-
-#define _set_base(addr,base) do { unsigned long __pr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
-	"rorl $16,%%edx\n\t" \
-	"movb %%dl,%2\n\t" \
-	"movb %%dh,%3" \
-	:"=&d" (__pr) \
-	:"m" (*((addr)+2)), \
-	 "m" (*((addr)+4)), \
-	 "m" (*((addr)+7)), \
-         "0" (base) \
-        ); } while(0)
-
-#define _set_limit(addr,limit) do { unsigned long __lr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
-	"rorl $16,%%edx\n\t" \
-	"movb %2,%%dh\n\t" \
-	"andb $0xf0,%%dh\n\t" \
-	"orb %%dh,%%dl\n\t" \
-	"movb %%dl,%2" \
-	:"=&d" (__lr) \
-	:"m" (*(addr)), \
-	 "m" (*((addr)+6)), \
-	 "0" (limit) \
-        ); } while(0)
-
-#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) )
-#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1) )
-
-/*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
- */
-#define loadsegment(seg,value)			\
-	asm volatile("\n"			\
-		"1:\t"				\
-		"mov %0,%%" #seg "\n"		\
-		"2:\n"				\
-		".section .fixup,\"ax\"\n"	\
-		"3:\t"				\
-		"pushl $0\n\t"			\
-		"popl %%" #seg "\n\t"		\
-		"jmp 2b\n"			\
-		".previous\n"			\
-		".section __ex_table,\"a\"\n\t"	\
-		".align 4\n\t"			\
-		".long 1b,3b\n"			\
-		".previous"			\
-		: :"rm" (value))
-
-/*
- * Save a segment register away
- */
-#define savesegment(seg, value) \
-	asm volatile("mov %%" #seg ",%0":"=rm" (value))
-
-
-static inline void native_clts(void)
-{
-	asm volatile ("clts");
-}
-
-static inline unsigned long native_read_cr0(void)
-{
-	unsigned long val;
-	asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
-	return val;
-}
-
-static inline void native_write_cr0(unsigned long val)
-{
-	asm volatile("movl %0,%%cr0": :"r" (val));
-}
-
-static inline unsigned long native_read_cr2(void)
-{
-	unsigned long val;
-	asm volatile("movl %%cr2,%0\n\t" :"=r" (val));
-	return val;
-}
-
-static inline void native_write_cr2(unsigned long val)
-{
-	asm volatile("movl %0,%%cr2": :"r" (val));
-}
-
-static inline unsigned long native_read_cr3(void)
-{
-	unsigned long val;
-	asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
-	return val;
-}
-
-static inline void native_write_cr3(unsigned long val)
-{
-	asm volatile("movl %0,%%cr3": :"r" (val));
-}
-
-static inline unsigned long native_read_cr4(void)
-{
-	unsigned long val;
-	asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
-	return val;
-}
-
-static inline unsigned long native_read_cr4_safe(void)
-{
-	unsigned long val;
-	/* This could fault if %cr4 does not exist */
-	asm volatile("1: movl %%cr4, %0		\n"
-		"2:				\n"
-		".section __ex_table,\"a\"	\n"
-		".long 1b,2b			\n"
-		".previous			\n"
-		: "=r" (val): "0" (0));
-	return val;
-}
-
-static inline void native_write_cr4(unsigned long val)
-{
-	asm volatile("movl %0,%%cr4": :"r" (val));
-}
-
-static inline void native_wbinvd(void)
-{
-	asm volatile("wbinvd": : :"memory");
-}
-
-static inline void clflush(volatile void *__p)
-{
-	asm volatile("clflush %0" : "+m" (*(char __force *)__p));
-}
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define read_cr0()	(native_read_cr0())
-#define write_cr0(x)	(native_write_cr0(x))
-#define read_cr2()	(native_read_cr2())
-#define write_cr2(x)	(native_write_cr2(x))
-#define read_cr3()	(native_read_cr3())
-#define write_cr3(x)	(native_write_cr3(x))
-#define read_cr4()	(native_read_cr4())
-#define read_cr4_safe()	(native_read_cr4_safe())
-#define write_cr4(x)	(native_write_cr4(x))
-#define wbinvd()	(native_wbinvd())
-
-/* Clear the 'TS' bit */
-#define clts()		(native_clts())
-
-#endif/* CONFIG_PARAVIRT */
-
-/* Set the 'TS' bit */
-#define stts() write_cr0(8 | read_cr0())
-
-#endif	/* __KERNEL__ */
-
-static inline unsigned long get_limit(unsigned long segment)
-{
-	unsigned long __limit;
-	__asm__("lsll %1,%0"
-		:"=r" (__limit):"r" (segment));
-	return __limit+1;
-}
-
-#define nop() __asm__ __volatile__ ("nop")
-
-/*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
- *
- * For now, "wmb()" doesn't actually do anything, as all
- * Intel CPU's follow what Intel calls a *Processor Order*,
- * in which all writes are seen in the program order even
- * outside the CPU.
- *
- * I expect future Intel CPU's to have a weaker ordering,
- * but I'd also expect them to finally get their act together
- * and add some real memory barriers if so.
- *
- * Some non intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
- 
-
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
-
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier.  All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads.  This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies.  See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	b = 2;
- *	memory_barrier();
- *	p = &b;				q = p;
- *					read_barrier_depends();
- *					d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends().  However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	a = 2;
- *	memory_barrier();
- *	b = 3;				y = b;
- *					read_barrier_depends();
- *					x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b".  Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
- * in cases like this where there are no data dependencies.
- **/
-
-#define read_barrier_depends()	do { } while(0)
-
-#ifdef CONFIG_SMP
-#define smp_mb()	mb()
-#ifdef CONFIG_X86_PPRO_FENCE
-# define smp_rmb()	rmb()
-#else
-# define smp_rmb()	barrier()
-#endif
-#ifdef CONFIG_X86_OOSTORE
-# define smp_wmb() 	wmb()
-#else
-# define smp_wmb()	barrier()
-#endif
-#define smp_read_barrier_depends()	read_barrier_depends()
-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
-#else
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do { } while(0)
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
-#endif
-
-#include <linux/irqflags.h>
-
-/*
- * disable hlt during certain critical i/o operations
- */
-#define HAVE_DISABLE_HLT
-void disable_hlt(void);
-void enable_hlt(void);
-
-extern int es7000_plat;
-void cpu_idle_wait(void);
-
-extern unsigned long arch_align_stack(unsigned long sp);
-extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
-
-void default_idle(void);
-void __show_registers(struct pt_regs *, int all);
-
-#endif
diff -puN include/asm-x86/system_64.h~git-x86 /dev/null
--- a/include/asm-x86/system_64.h
+++ /dev/null
@@ -1,178 +0,0 @@
-#ifndef __ASM_SYSTEM_H
-#define __ASM_SYSTEM_H
-
-#include <linux/kernel.h>
-#include <asm/segment.h>
-#include <asm/cmpxchg.h>
-
-#ifdef __KERNEL__
-
-#define __SAVE(reg,offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
-#define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
-
-/* frame pointer must be last for get_wchan */
-#define SAVE_CONTEXT    "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
-
-#define __EXTRA_CLOBBER  \
-	,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
-
-/* Save restore flags to clear handle leaking NT */
-#define switch_to(prev,next,last) \
-	asm volatile(SAVE_CONTEXT						    \
-		     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
-		     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
-		     "call __switch_to\n\t"					  \
-		     ".globl thread_return\n"					\
-		     "thread_return:\n\t"					    \
-		     "movq %%gs:%P[pda_pcurrent],%%rsi\n\t"			  \
-		     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
-		     LOCK_PREFIX "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"	  \
-		     "movq %%rax,%%rdi\n\t" 					  \
-		     "jc   ret_from_fork\n\t"					  \
-		     RESTORE_CONTEXT						    \
-		     : "=a" (last)					  	  \
-		     : [next] "S" (next), [prev] "D" (prev),			  \
-		       [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \
-		       [ti_flags] "i" (offsetof(struct thread_info, flags)),\
-		       [tif_fork] "i" (TIF_FORK),			  \
-		       [thread_info] "i" (offsetof(struct task_struct, stack)), \
-		       [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent))   \
-		     : "memory", "cc" __EXTRA_CLOBBER)
-    
-extern void load_gs_index(unsigned); 
-
-/*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
- */
-#define loadsegment(seg,value)	\
-	asm volatile("\n"			\
-		"1:\t"				\
-		"movl %k0,%%" #seg "\n"		\
-		"2:\n"				\
-		".section .fixup,\"ax\"\n"	\
-		"3:\t"				\
-		"movl %1,%%" #seg "\n\t" 	\
-		"jmp 2b\n"			\
-		".previous\n"			\
-		".section __ex_table,\"a\"\n\t"	\
-		".align 8\n\t"			\
-		".quad 1b,3b\n"			\
-		".previous"			\
-		: :"r" (value), "r" (0))
-
-/*
- * Clear and set 'TS' bit respectively
- */
-#define clts() __asm__ __volatile__ ("clts")
-
-static inline unsigned long read_cr0(void)
-{ 
-	unsigned long cr0;
-	asm volatile("movq %%cr0,%0" : "=r" (cr0));
-	return cr0;
-}
-
-static inline void write_cr0(unsigned long val) 
-{ 
-	asm volatile("movq %0,%%cr0" :: "r" (val));
-}
-
-static inline unsigned long read_cr2(void)
-{
-	unsigned long cr2;
-	asm volatile("movq %%cr2,%0" : "=r" (cr2));
-	return cr2;
-}
-
-static inline void write_cr2(unsigned long val)
-{
-	asm volatile("movq %0,%%cr2" :: "r" (val));
-}
-
-static inline unsigned long read_cr3(void)
-{ 
-	unsigned long cr3;
-	asm volatile("movq %%cr3,%0" : "=r" (cr3));
-	return cr3;
-}
-
-static inline void write_cr3(unsigned long val)
-{
-	asm volatile("movq %0,%%cr3" :: "r" (val) : "memory");
-}
-
-static inline unsigned long read_cr4(void)
-{ 
-	unsigned long cr4;
-	asm volatile("movq %%cr4,%0" : "=r" (cr4));
-	return cr4;
-}
-
-static inline void write_cr4(unsigned long val)
-{ 
-	asm volatile("movq %0,%%cr4" :: "r" (val) : "memory");
-}
-
-static inline unsigned long read_cr8(void)
-{
-	unsigned long cr8;
-	asm volatile("movq %%cr8,%0" : "=r" (cr8));
-	return cr8;
-}
-
-static inline void write_cr8(unsigned long val)
-{
-	asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
-}
-
-#define stts() write_cr0(8 | read_cr0())
-
-#define wbinvd() \
-	__asm__ __volatile__ ("wbinvd": : :"memory")
-
-#endif	/* __KERNEL__ */
-
-static inline void clflush(volatile void *__p)
-{
-	asm volatile("clflush %0" : "+m" (*(char __force *)__p));
-}
-
-#define nop() __asm__ __volatile__ ("nop")
-
-#ifdef CONFIG_SMP
-#define smp_mb()	mb()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do {} while(0)
-#else
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do {} while(0)
-#endif
-
-    
-/*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
- */
-#define mb() 	asm volatile("mfence":::"memory")
-#define rmb()	asm volatile("lfence":::"memory")
-#define wmb()	asm volatile("sfence" ::: "memory")
-
-#define read_barrier_depends()	do {} while(0)
-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
-
-#define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0)
-
-#include <linux/irqflags.h>
-
-void cpu_idle_wait(void);
-
-extern unsigned long arch_align_stack(unsigned long sp);
-extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
-
-#endif
diff -puN include/asm-x86/thread_info_32.h~git-x86 include/asm-x86/thread_info_32.h
--- a/include/asm-x86/thread_info_32.h~git-x86
+++ a/include/asm-x86/thread_info_32.h
@@ -137,6 +137,10 @@ static inline struct thread_info *curren
 #define TIF_IO_BITMAP		18	/* uses I/O bitmap */
 #define TIF_FREEZE		19	/* is freezing for suspend */
 #define TIF_NOTSC		20	/* TSC is not accessible in userland */
+#define TIF_FORCED_TF		21	/* true if TF in eflags artificially */
+#define TIF_DEBUGCTLMSR		22	/* uses thread_struct.debugctlmsr */
+#define TIF_DS_AREA_MSR 	23      /* uses thread_struct.ds_area_msr */
+#define TIF_BTS_TRACE_TS        24      /* record scheduling event timestamps */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
@@ -151,6 +155,10 @@ static inline struct thread_info *curren
 #define _TIF_IO_BITMAP		(1<<TIF_IO_BITMAP)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
 #define _TIF_NOTSC		(1<<TIF_NOTSC)
+#define _TIF_FORCED_TF		(1<<TIF_FORCED_TF)
+#define _TIF_DEBUGCTLMSR	(1<<TIF_DEBUGCTLMSR)
+#define _TIF_DS_AREA_MSR	(1<<TIF_DS_AREA_MSR)
+#define _TIF_BTS_TRACE_TS	(1<<TIF_BTS_TRACE_TS)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
@@ -160,8 +168,12 @@ static inline struct thread_info *curren
 #define _TIF_ALLWORK_MASK	(0x0000FFFF & ~_TIF_SECCOMP)
 
 /* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW_NEXT (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUG)
-#define _TIF_WORK_CTXSW_PREV (_TIF_IO_BITMAP | _TIF_NOTSC)
+#define _TIF_WORK_CTXSW \
+    (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR | \
+     _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS)
+#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG)
+
 
 /*
  * Thread-synchronous status.
diff -puN include/asm-x86/thread_info_64.h~git-x86 include/asm-x86/thread_info_64.h
--- a/include/asm-x86/thread_info_64.h~git-x86
+++ a/include/asm-x86/thread_info_64.h
@@ -33,6 +33,9 @@ struct thread_info {
 
 	mm_segment_t		addr_limit;	
 	struct restart_block    restart_block;
+#ifdef CONFIG_IA32_EMULATION
+	void __user		*sysenter_return;
+#endif
 };
 #endif
 
@@ -74,20 +77,14 @@ static inline struct thread_info *stack_
 
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk)					\
-    ({								\
-	struct thread_info *ret;				\
-								\
-	ret = ((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER)); \
-	if (ret)						\
-		memset(ret, 0, THREAD_SIZE);			\
-	ret;							\
-    })
+#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO)
 #else
-#define alloc_thread_info(tsk) \
-	((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER))
+#define THREAD_FLAGS GFP_KERNEL
 #endif
 
+#define alloc_thread_info(tsk) \
+	((struct thread_info *) __get_free_pages(THREAD_FLAGS, THREAD_ORDER))
+
 #define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
 
 #else /* !__ASSEMBLY__ */
@@ -115,7 +112,7 @@ static inline struct thread_info *stack_
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal */
 #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
-/* 16 free */
+#define TIF_FORCED_TF		16	/* true if TF in eflags artificially */
 #define TIF_IA32		17	/* 32bit process */ 
 #define TIF_FORK		18	/* ret_from_fork */
 #define TIF_ABI_PENDING		19
@@ -123,6 +120,9 @@ static inline struct thread_info *stack_
 #define TIF_DEBUG		21	/* uses debug registers */
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
 #define TIF_FREEZE		23	/* is freezing for suspend */
+#define TIF_DEBUGCTLMSR		24	/* uses thread_struct.debugctlmsr */
+#define TIF_DS_AREA_MSR 	25      /* uses thread_struct.ds_area_msr */
+#define TIF_BTS_TRACE_TS	26      /* record scheduling event timestamps */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
@@ -133,12 +133,16 @@ static inline struct thread_info *stack_
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_MCE_NOTIFY		(1<<TIF_MCE_NOTIFY)
+#define _TIF_FORCED_TF		(1<<TIF_FORCED_TF)
 #define _TIF_IA32		(1<<TIF_IA32)
 #define _TIF_FORK		(1<<TIF_FORK)
 #define _TIF_ABI_PENDING	(1<<TIF_ABI_PENDING)
 #define _TIF_DEBUG		(1<<TIF_DEBUG)
 #define _TIF_IO_BITMAP		(1<<TIF_IO_BITMAP)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
+#define _TIF_DEBUGCTLMSR	(1<<TIF_DEBUGCTLMSR)
+#define _TIF_DS_AREA_MSR	(1<<TIF_DS_AREA_MSR)
+#define _TIF_BTS_TRACE_TS	(1<<TIF_BTS_TRACE_TS)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
@@ -147,7 +151,10 @@ static inline struct thread_info *stack_
 #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
 
 /* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW (_TIF_DEBUG|_TIF_IO_BITMAP)
+#define _TIF_WORK_CTXSW \
+    (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS)
+#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
 
 #define PREEMPT_ACTIVE     0x10000000
 
diff -puN include/asm-x86/time.h~git-x86 include/asm-x86/time.h
--- a/include/asm-x86/time.h~git-x86
+++ a/include/asm-x86/time.h
@@ -2,7 +2,7 @@
 #define _ASMi386_TIME_H
 
 #include <linux/efi.h>
-#include "mach_time.h"
+#include <asm/mc146818rtc.h>
 
 static inline unsigned long native_get_wallclock(void)
 {
diff -puN include/asm-x86/timer.h~git-x86 include/asm-x86/timer.h
--- a/include/asm-x86/timer.h~git-x86
+++ a/include/asm-x86/timer.h
@@ -2,6 +2,7 @@
 #define _ASMi386_TIMER_H
 #include <linux/init.h>
 #include <linux/pm.h>
+#include <linux/percpu.h>
 
 #define TICK_SIZE (tick_nsec / 1000)
 
@@ -16,7 +17,7 @@ extern int recalibrate_cpu_khz(void);
 #define calculate_cpu_khz() native_calculate_cpu_khz()
 #endif
 
-/* Accellerators for sched_clock()
+/* Accelerators for sched_clock()
  * convert from cycles(64bits) => nanoseconds (64bits)
  *  basic equation:
  *		ns = cycles / (freq / ns_per_sec)
@@ -31,20 +32,32 @@ extern int recalibrate_cpu_khz(void);
  *	And since SC is a constant power of two, we can convert the div
  *  into a shift.
  *
- *  We can use khz divisor instead of mhz to keep a better percision, since
+ *  We can use khz divisor instead of mhz to keep a better precision, since
  *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
  *  (mathieu.desnoyers@polymtl.ca)
  *
  *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
  */
-extern unsigned long cyc2ns_scale __read_mostly;
+
+DECLARE_PER_CPU(unsigned long, cyc2ns);
 
 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
 
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
 {
-	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+	return cyc * per_cpu(cyc2ns, smp_processor_id()) >> CYC2NS_SCALE_FACTOR;
 }
 
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+	unsigned long long ns;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ns = __cycles_2_ns(cyc);
+	local_irq_restore(flags);
+
+	return ns;
+}
 
 #endif
diff -puN include/asm-x86/timex.h~git-x86 include/asm-x86/timex.h
--- a/include/asm-x86/timex.h~git-x86
+++ a/include/asm-x86/timex.h
@@ -7,6 +7,8 @@
 
 #ifdef CONFIG_X86_ELAN
 #  define PIT_TICK_RATE 1189200 /* AMD Elan has different frequency! */
+#elif defined(CONFIG_X86_RDC321X)
+#  define PIT_TICK_RATE 1041667 /* Underlying HZ for R8610 */
 #else
 #  define PIT_TICK_RATE 1193182 /* Underlying HZ */
 #endif
diff -puN include/asm-x86/tlbflush.h~git-x86 include/asm-x86/tlbflush.h
--- a/include/asm-x86/tlbflush.h~git-x86
+++ a/include/asm-x86/tlbflush.h
@@ -1,5 +1,158 @@
+#ifndef _ASM_X86_TLBFLUSH_H
+#define _ASM_X86_TLBFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define __flush_tlb() __native_flush_tlb()
+#define __flush_tlb_global() __native_flush_tlb_global()
+#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
+#endif
+
+static inline void __native_flush_tlb(void)
+{
+	write_cr3(read_cr3());
+}
+
+static inline void __native_flush_tlb_global(void)
+{
+	unsigned long cr4 = read_cr4();
+
+	/* clear PGE */
+	write_cr4(cr4 & ~X86_CR4_PGE);
+	/* write old PGE again and flush TLBs */
+	write_cr4(cr4);
+}
+
+static inline void __native_flush_tlb_single(unsigned long addr)
+{
+	__asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory");
+}
+
+static inline void __flush_tlb_all(void)
+{
+	if (cpu_has_pge)
+		__flush_tlb_global();
+	else
+		__flush_tlb();
+}
+
+static inline void __flush_tlb_one(unsigned long addr)
+{
+	if (cpu_has_invlpg)
+		__flush_tlb_single(addr);
+	else
+		__flush_tlb();
+}
+
 #ifdef CONFIG_X86_32
-# include "tlbflush_32.h"
+# define TLB_FLUSH_ALL	0xffffffff
 #else
-# include "tlbflush_64.h"
+# define TLB_FLUSH_ALL	-1ULL
+#endif
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ *  - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus
+ *
+ * ..but the i386 has somewhat limited tlb flushing capabilities,
+ * and page-granular flushes are available only on i486 and up.
+ *
+ * x86-64 can only flush individual pages or full VMs. For a range flush
+ * we always do the full VM. Might be worth trying if for a small
+ * range a few INVLPGs in a row are a win.
+ */
+
+#ifndef CONFIG_SMP
+
+#define flush_tlb() __flush_tlb()
+#define flush_tlb_all() __flush_tlb_all()
+#define local_flush_tlb() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	if (mm == current->active_mm)
+		__flush_tlb();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long addr)
+{
+	if (vma->vm_mm == current->active_mm)
+		__flush_tlb_one(addr);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	if (vma->vm_mm == current->active_mm)
+		__flush_tlb();
+}
+
+static inline void native_flush_tlb_others(const cpumask_t *cpumask,
+					   struct mm_struct *mm,
+					   unsigned long va)
+{
+}
+
+#else  /* SMP */
+
+#include <asm/smp.h>
+
+#define local_flush_tlb() __flush_tlb()
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+
+#define flush_tlb()	flush_tlb_current_task()
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	flush_tlb_mm(vma->vm_mm);
+}
+
+void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
+			     unsigned long va);
+
+#define TLBSTATE_OK	1
+#define TLBSTATE_LAZY	2
+
+#ifdef CONFIG_X86_32
+struct tlb_state
+{
+	struct mm_struct *active_mm;
+	int state;
+	char __cacheline_padding[L1_CACHE_BYTES-8];
+};
+DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
+#endif
+
+#endif	/* SMP */
+
+#ifndef CONFIG_PARAVIRT
+#define flush_tlb_others(mask, mm, va)	native_flush_tlb_others(&mask, mm, va)
 #endif
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+					  unsigned long end)
+{
+	flush_tlb_all();
+}
+
+#endif /* _ASM_X86_TLBFLUSH_H */
diff -puN include/asm-x86/tlbflush_32.h~git-x86 /dev/null
--- a/include/asm-x86/tlbflush_32.h
+++ /dev/null
@@ -1,168 +0,0 @@
-#ifndef _I386_TLBFLUSH_H
-#define _I386_TLBFLUSH_H
-
-#include <linux/mm.h>
-#include <asm/processor.h>
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-#define __flush_tlb() __native_flush_tlb()
-#define __flush_tlb_global() __native_flush_tlb_global()
-#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
-#endif
-
-#define __native_flush_tlb()						\
-	do {								\
-		unsigned int tmpreg;					\
-									\
-		__asm__ __volatile__(					\
-			"movl %%cr3, %0;              \n"		\
-			"movl %0, %%cr3;  # flush TLB \n"		\
-			: "=r" (tmpreg)					\
-			:: "memory");					\
-	} while (0)
-
-/*
- * Global pages have to be flushed a bit differently. Not a real
- * performance problem because this does not happen often.
- */
-#define __native_flush_tlb_global()					\
-	do {								\
-		unsigned int tmpreg, cr4, cr4_orig;			\
-									\
-		__asm__ __volatile__(					\
-			"movl %%cr4, %2;  # turn off PGE     \n"	\
-			"movl %2, %1;                        \n"	\
-			"andl %3, %1;                        \n"	\
-			"movl %1, %%cr4;                     \n"	\
-			"movl %%cr3, %0;                     \n"	\
-			"movl %0, %%cr3;  # flush TLB        \n"	\
-			"movl %2, %%cr4;  # turn PGE back on \n"	\
-			: "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig)	\
-			: "i" (~X86_CR4_PGE)				\
-			: "memory");					\
-	} while (0)
-
-#define __native_flush_tlb_single(addr) 				\
-	__asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory")
-
-# define __flush_tlb_all()						\
-	do {								\
-		if (cpu_has_pge)					\
-			__flush_tlb_global();				\
-		else							\
-			__flush_tlb();					\
-	} while (0)
-
-#define cpu_has_invlpg	(boot_cpu_data.x86 > 3)
-
-#ifdef CONFIG_X86_INVLPG
-# define __flush_tlb_one(addr) __flush_tlb_single(addr)
-#else
-# define __flush_tlb_one(addr)						\
-	do {								\
-		if (cpu_has_invlpg)					\
-			__flush_tlb_single(addr);			\
-		else							\
-			__flush_tlb();					\
-	} while (0)
-#endif
-
-/*
- * TLB flushing:
- *
- *  - flush_tlb() flushes the current mm struct TLBs
- *  - flush_tlb_all() flushes all processes TLBs
- *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
- *  - flush_tlb_page(vma, vmaddr) flushes one page
- *  - flush_tlb_range(vma, start, end) flushes a range of pages
- *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_others(cpumask, mm, va) flushes a TLBs on other cpus
- *
- * ..but the i386 has somewhat limited tlb flushing capabilities,
- * and page-granular flushes are available only on i486 and up.
- */
-
-#define TLB_FLUSH_ALL	0xffffffff
-
-
-#ifndef CONFIG_SMP
-
-#include <linux/sched.h>
-
-#define flush_tlb() __flush_tlb()
-#define flush_tlb_all() __flush_tlb_all()
-#define local_flush_tlb() __flush_tlb()
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-	if (mm == current->active_mm)
-		__flush_tlb();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
-	unsigned long addr)
-{
-	if (vma->vm_mm == current->active_mm)
-		__flush_tlb_one(addr);
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-	unsigned long start, unsigned long end)
-{
-	if (vma->vm_mm == current->active_mm)
-		__flush_tlb();
-}
-
-static inline void native_flush_tlb_others(const cpumask_t *cpumask,
-					   struct mm_struct *mm, unsigned long va)
-{
-}
-
-#else  /* SMP */
-
-#include <asm/smp.h>
-
-#define local_flush_tlb() \
-	__flush_tlb()
-
-extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
-extern void flush_tlb_mm(struct mm_struct *);
-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
-
-#define flush_tlb()	flush_tlb_current_task()
-
-static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
-{
-	flush_tlb_mm(vma->vm_mm);
-}
-
-void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
-			     unsigned long va);
-
-#define TLBSTATE_OK	1
-#define TLBSTATE_LAZY	2
-
-struct tlb_state
-{
-	struct mm_struct *active_mm;
-	int state;
-	char __cacheline_padding[L1_CACHE_BYTES-8];
-};
-DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
-#endif	/* SMP */
-
-#ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, mm, va)		\
-	native_flush_tlb_others(&mask, mm, va)
-#endif
-
-static inline void flush_tlb_kernel_range(unsigned long start,
-					unsigned long end)
-{
-	flush_tlb_all();
-}
-
-#endif /* _I386_TLBFLUSH_H */
diff -puN include/asm-x86/tlbflush_64.h~git-x86 /dev/null
--- a/include/asm-x86/tlbflush_64.h
+++ /dev/null
@@ -1,100 +0,0 @@
-#ifndef _X8664_TLBFLUSH_H
-#define _X8664_TLBFLUSH_H
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <asm/processor.h>
-#include <asm/system.h>
-
-static inline void __flush_tlb(void)
-{
-	write_cr3(read_cr3());
-}
-
-static inline void __flush_tlb_all(void)
-{
-	unsigned long cr4 = read_cr4();
-	write_cr4(cr4 & ~X86_CR4_PGE);	/* clear PGE */
-	write_cr4(cr4);			/* write old PGE again and flush TLBs */
-}
-
-#define __flush_tlb_one(addr) \
-	__asm__ __volatile__("invlpg (%0)" :: "r" (addr) : "memory")
-
-
-/*
- * TLB flushing:
- *
- *  - flush_tlb() flushes the current mm struct TLBs
- *  - flush_tlb_all() flushes all processes TLBs
- *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
- *  - flush_tlb_page(vma, vmaddr) flushes one page
- *  - flush_tlb_range(vma, start, end) flushes a range of pages
- *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *
- * x86-64 can only flush individual pages or full VMs. For a range flush
- * we always do the full VM. Might be worth trying if for a small
- * range a few INVLPGs in a row are a win.
- */
-
-#ifndef CONFIG_SMP
-
-#define flush_tlb() __flush_tlb()
-#define flush_tlb_all() __flush_tlb_all()
-#define local_flush_tlb() __flush_tlb()
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-	if (mm == current->active_mm)
-		__flush_tlb();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
-	unsigned long addr)
-{
-	if (vma->vm_mm == current->active_mm)
-		__flush_tlb_one(addr);
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-	unsigned long start, unsigned long end)
-{
-	if (vma->vm_mm == current->active_mm)
-		__flush_tlb();
-}
-
-#else
-
-#include <asm/smp.h>
-
-#define local_flush_tlb() \
-	__flush_tlb()
-
-extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
-extern void flush_tlb_mm(struct mm_struct *);
-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
-
-#define flush_tlb()	flush_tlb_current_task()
-
-static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
-{
-	flush_tlb_mm(vma->vm_mm);
-}
-
-#define TLBSTATE_OK	1
-#define TLBSTATE_LAZY	2
-
-/* Roughly an IPI every 20MB with 4k pages for freeing page table
-   ranges. Cost is about 42k of memory for each CPU. */
-#define ARCH_FREE_PTE_NR 5350	
-
-#endif
-
-static inline void flush_tlb_kernel_range(unsigned long start,
-					unsigned long end)
-{
-	flush_tlb_all();
-}
-
-#endif /* _X8664_TLBFLUSH_H */
diff -puN include/asm-x86/topology.h~git-x86 include/asm-x86/topology.h
--- a/include/asm-x86/topology.h~git-x86
+++ a/include/asm-x86/topology.h
@@ -1,5 +1,144 @@
+/*
+ * Written by: Matthew Dobson, IBM Corporation
+ *
+ * Copyright (C) 2002, IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <colpatch@us.ibm.com>
+ */
+#ifndef _ASM_X86_TOPOLOGY_H
+#define _ASM_X86_TOPOLOGY_H
+
+#ifdef CONFIG_NUMA
+#include <linux/cpumask.h>
+#include <asm/mpspec.h>
+
+/* Mappings between logical cpu number and node number */
+extern int cpu_to_node_map[];
+extern cpumask_t node_to_cpumask_map[];
+
+/* Returns the number of the node containing CPU 'cpu' */
+static inline int cpu_to_node(int cpu)
+{
+	return cpu_to_node_map[cpu];
+}
+
+/*
+ * Returns the number of the node containing Node 'node'. This
+ * architecture is flat, so it is a pretty simple function!
+ */
+#define parent_node(node) (node)
+
+/* Returns a bitmask of CPUs on Node 'node'. */
+static inline cpumask_t node_to_cpumask(int node)
+{
+	return node_to_cpumask_map[node];
+}
+
+/* Returns the number of the first CPU on Node 'node'. */
+static inline int node_to_first_cpu(int node)
+{
+	cpumask_t mask = node_to_cpumask(node);
+
+	return first_cpu(mask);
+}
+
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
+#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus)
+
 #ifdef CONFIG_X86_32
-# include "topology_32.h"
+extern unsigned long node_start_pfn[];
+extern unsigned long node_end_pfn[];
+extern unsigned long node_remap_size[];
+#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
+
+# ifdef CONFIG_X86_HT
+#  define ENABLE_TOPO_DEFINES
+# endif
+
+# define SD_CACHE_NICE_TRIES	1
+# define SD_IDLE_IDX		1
+# define SD_NEWIDLE_IDX		2
+# define SD_FORKEXEC_IDX	0
+
 #else
-# include "topology_64.h"
+
+# ifdef CONFIG_SMP
+#  define ENABLE_TOPO_DEFINES
+# endif
+
+# define SD_CACHE_NICE_TRIES	2
+# define SD_IDLE_IDX		2
+# define SD_NEWIDLE_IDX		0
+# define SD_FORKEXEC_IDX	1
+
+#endif
+
+/* sched_domains SD_NODE_INIT for NUMAQ machines */
+#define SD_NODE_INIT (struct sched_domain) {		\
+	.span			= CPU_MASK_NONE,	\
+	.parent			= NULL,			\
+	.child			= NULL,			\
+	.groups			= NULL,			\
+	.min_interval		= 8,			\
+	.max_interval		= 32,			\
+	.busy_factor		= 32,			\
+	.imbalance_pct		= 125,			\
+	.cache_nice_tries	= SD_CACHE_NICE_TRIES,	\
+	.busy_idx		= 3,			\
+	.idle_idx		= SD_IDLE_IDX,		\
+	.newidle_idx		= SD_NEWIDLE_IDX,	\
+	.wake_idx		= 1,			\
+	.forkexec_idx		= SD_FORKEXEC_IDX,	\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_FORK	\
+				| SD_SERIALIZE		\
+				| SD_WAKE_BALANCE,	\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 1,			\
+	.nr_balance_failed	= 0,			\
+}
+
+#ifdef CONFIG_X86_64_ACPI_NUMA
+extern int __node_distance(int, int);
+#define node_distance(a, b) __node_distance(a, b)
+#endif
+
+#else /* CONFIG_NUMA */
+
+#include <asm-generic/topology.h>
+
+#endif
+
+extern cpumask_t cpu_coregroup_map(int cpu);
+
+#ifdef ENABLE_TOPO_DEFINES
+#define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
+#define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
+#define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
+#define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
+#endif
+
+#ifdef CONFIG_SMP
+#define mc_capable()			(boot_cpu_data.x86_max_cores > 1)
+#define smt_capable()			(smp_num_siblings > 1)
+#endif
+
 #endif
diff -puN include/asm-x86/topology_32.h~git-x86 /dev/null
--- a/include/asm-x86/topology_32.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * linux/include/asm-i386/topology.h
- *
- * Written by: Matthew Dobson, IBM Corporation
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.          
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <colpatch@us.ibm.com>
- */
-#ifndef _ASM_I386_TOPOLOGY_H
-#define _ASM_I386_TOPOLOGY_H
-
-#ifdef CONFIG_X86_HT
-#define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
-#define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
-#define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
-#define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
-#endif
-
-#ifdef CONFIG_NUMA
-
-#include <asm/mpspec.h>
-
-#include <linux/cpumask.h>
-
-/* Mappings between logical cpu number and node number */
-extern cpumask_t node_2_cpu_mask[];
-extern int cpu_2_node[];
-
-/* Returns the number of the node containing CPU 'cpu' */
-static inline int cpu_to_node(int cpu)
-{ 
-	return cpu_2_node[cpu];
-}
-
-/* Returns the number of the node containing Node 'node'.  This architecture is flat, 
-   so it is a pretty simple function! */
-#define parent_node(node) (node)
-
-/* Returns a bitmask of CPUs on Node 'node'. */
-static inline cpumask_t node_to_cpumask(int node)
-{
-	return node_2_cpu_mask[node];
-}
-
-/* Returns the number of the first CPU on Node 'node'. */
-static inline int node_to_first_cpu(int node)
-{ 
-	cpumask_t mask = node_to_cpumask(node);
-	return first_cpu(mask);
-}
-
-#define pcibus_to_node(bus) ((struct pci_sysdata *)((bus)->sysdata))->node
-#define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus))
-
-/* sched_domains SD_NODE_INIT for NUMAQ machines */
-#define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
-	.parent			= NULL,			\
-	.child			= NULL,			\
-	.groups			= NULL,			\
-	.min_interval		= 8,			\
-	.max_interval		= 32,			\
-	.busy_factor		= 32,			\
-	.imbalance_pct		= 125,			\
-	.cache_nice_tries	= 1,			\
-	.busy_idx		= 3,			\
-	.idle_idx		= 1,			\
-	.newidle_idx		= 2,			\
-	.wake_idx		= 1,			\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_FORK	\
-				| SD_SERIALIZE		\
-				| SD_WAKE_BALANCE,	\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 1,			\
-	.nr_balance_failed	= 0,			\
-}
-
-extern unsigned long node_start_pfn[];
-extern unsigned long node_end_pfn[];
-extern unsigned long node_remap_size[];
-
-#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
-
-#else /* !CONFIG_NUMA */
-/*
- * Other i386 platforms should define their own version of the 
- * above macros here.
- */
-
-#include <asm-generic/topology.h>
-
-#endif /* CONFIG_NUMA */
-
-extern cpumask_t cpu_coregroup_map(int cpu);
-
-#ifdef CONFIG_SMP
-#define mc_capable()	(boot_cpu_data.x86_max_cores > 1)
-#define smt_capable()	(smp_num_siblings > 1)
-#endif
-
-#endif /* _ASM_I386_TOPOLOGY_H */
diff -puN include/asm-x86/topology_64.h~git-x86 /dev/null
--- a/include/asm-x86/topology_64.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#ifndef _ASM_X86_64_TOPOLOGY_H
-#define _ASM_X86_64_TOPOLOGY_H
-
-
-#ifdef CONFIG_NUMA
-
-#include <asm/mpspec.h>
-#include <linux/bitops.h>
-
-extern cpumask_t cpu_online_map;
-
-extern unsigned char cpu_to_node[];
-extern cpumask_t     node_to_cpumask[];
-
-#ifdef CONFIG_ACPI_NUMA
-extern int __node_distance(int, int);
-#define node_distance(a,b) __node_distance(a,b)
-/* #else fallback version */
-#endif
-
-#define cpu_to_node(cpu)		(cpu_to_node[cpu])
-#define parent_node(node)		(node)
-#define node_to_first_cpu(node) 	(first_cpu(node_to_cpumask[node]))
-#define node_to_cpumask(node)		(node_to_cpumask[node])
-#define pcibus_to_node(bus)	((struct pci_sysdata *)((bus)->sysdata))->node
-#define pcibus_to_cpumask(bus)		node_to_cpumask(pcibus_to_node(bus));
-
-#define numa_node_id()			read_pda(nodenumber)
-
-/* sched_domains SD_NODE_INIT for x86_64 machines */
-#define SD_NODE_INIT (struct sched_domain) {		\
-	.span			= CPU_MASK_NONE,	\
-	.parent			= NULL,			\
-	.child			= NULL,			\
-	.groups			= NULL,			\
-	.min_interval		= 8,			\
-	.max_interval		= 32,			\
-	.busy_factor		= 32,			\
-	.imbalance_pct		= 125,			\
-	.cache_nice_tries	= 2,			\
-	.busy_idx		= 3,			\
-	.idle_idx		= 2,			\
-	.newidle_idx		= 0, 			\
-	.wake_idx		= 1,			\
-	.forkexec_idx		= 1,			\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_FORK	\
-				| SD_BALANCE_EXEC	\
-				| SD_SERIALIZE		\
-				| SD_WAKE_BALANCE,	\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 1,			\
-	.nr_balance_failed	= 0,			\
-}
-
-#endif
-
-#ifdef CONFIG_SMP
-#define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
-#define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
-#define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
-#define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
-#define mc_capable()			(boot_cpu_data.x86_max_cores > 1)
-#define smt_capable() 			(smp_num_siblings > 1)
-#endif
-
-#include <asm-generic/topology.h>
-
-extern cpumask_t cpu_coregroup_map(int cpu);
-
-#endif
diff -puN include/asm-x86/tsc.h~git-x86 include/asm-x86/tsc.h
--- a/include/asm-x86/tsc.h~git-x86
+++ a/include/asm-x86/tsc.h
@@ -33,14 +33,14 @@ static inline cycles_t get_cycles(void)
 }
 
 /* Like get_cycles, but make sure the CPU is synchronized. */
-static __always_inline cycles_t get_cycles_sync(void)
+static __always_inline cycles_t __get_cycles_sync(void)
 {
 	unsigned long long ret;
 	unsigned eax, edx;
 
 	/*
-  	 * Use RDTSCP if possible; it is guaranteed to be synchronous
- 	 * and doesn't cause a VMEXIT on Hypervisors
+	 * Use RDTSCP if possible; it is guaranteed to be synchronous
+	 * and doesn't cause a VMEXIT on Hypervisors
 	 */
 	alternative_io(ASM_NOP3, ".byte 0x0f,0x01,0xf9", X86_FEATURE_RDTSCP,
 		       ASM_OUTPUT2("=a" (eax), "=d" (edx)),
@@ -50,16 +50,45 @@ static __always_inline cycles_t get_cycl
 		return ret;
 
 	/*
-	 * Don't do an additional sync on CPUs where we know
-	 * RDTSC is already synchronous:
+	 * Use RDTSC on other CPUs. This might not be fully synchronous,
+	 * but it's not a problem: the only coherency we care about is
+	 * the GTOD output to user-space, and syscalls are synchronization
+	 * points anyway:
 	 */
-	alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
-			  "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
-	rdtscll(ret);
 
+	return 0;
+}
+
+static __always_inline cycles_t get_cycles_sync(void)
+{
+	unsigned long long ret;
+	ret = __get_cycles_sync();
+	if (!ret)
+		rdtscll(ret);
 	return ret;
 }
 
+#ifdef CONFIG_PARAVIRT
+/*
+ * For paravirt guests, some functionalities are executed through function
+ * pointers in the various pvops structures.
+ * These function pointers exist inside the kernel and can not
+ * be accessed by user space. To avoid this, we make a copy of the
+ * get_cycles_sync (called in kernel) but force the use of native_read_tsc.
+ * Ideally, the guest should set up it's own clock and vread
+ */
+static __always_inline long long vget_cycles_sync(void)
+{
+	unsigned long long ret;
+	ret = __get_cycles_sync();
+	if (!ret)
+		ret = native_read_tsc();
+	return ret;
+}
+#else
+# define vget_cycles_sync() get_cycles_sync()
+#endif
+
 extern void tsc_init(void);
 extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);
@@ -73,8 +102,7 @@ int check_tsc_unstable(void);
 extern void check_tsc_sync_source(int cpu);
 extern void check_tsc_sync_target(void);
 
-#ifdef CONFIG_X86_64
 extern void tsc_calibrate(void);
-#endif
+extern int notsc_setup(char *);
 
 #endif
diff -puN include/asm-x86/user_32.h~git-x86 include/asm-x86/user_32.h
--- a/include/asm-x86/user_32.h~git-x86
+++ a/include/asm-x86/user_32.h
@@ -75,13 +75,23 @@ struct user_fxsr_struct {
  * doesn't use the extra segment registers)
  */
 struct user_regs_struct {
-	long ebx, ecx, edx, esi, edi, ebp, eax;
-	unsigned short ds, __ds, es, __es;
-	unsigned short fs, __fs, gs, __gs;
-	long orig_eax, eip;
-	unsigned short cs, __cs;
-	long eflags, esp;
-	unsigned short ss, __ss;
+	unsigned long	bx;
+	unsigned long	cx;
+	unsigned long	dx;
+	unsigned long	si;
+	unsigned long	di;
+	unsigned long	bp;
+	unsigned long	ax;
+	unsigned long	ds;
+	unsigned long	es;
+	unsigned long	fs;
+	unsigned long	gs;
+	unsigned long	orig_ax;
+	unsigned long	ip;
+	unsigned long	cs;
+	unsigned long	flags;
+	unsigned long	sp;
+	unsigned long	ss;
 };
 
 /* When the kernel dumps core, it starts by dumping the user struct -
diff -puN include/asm-x86/user_64.h~git-x86 include/asm-x86/user_64.h
--- a/include/asm-x86/user_64.h~git-x86
+++ a/include/asm-x86/user_64.h
@@ -40,13 +40,13 @@
  * and both the standard and SIMD floating point data can be accessed via
  * the new ptrace requests.  In either case, changes to the FPU environment
  * will be reflected in the task's state as expected.
- * 
+ *
  * x86-64 support by Andi Kleen.
  */
 
 /* This matches the 64bit FXSAVE format as defined by AMD. It is the same
    as the 32bit format defined by Intel, except that the selector:offset pairs for
-   data and eip are replaced with flat 64bit pointers. */ 
+   data and eip are replaced with flat 64bit pointers. */
 struct user_i387_struct {
 	unsigned short	cwd;
 	unsigned short	swd;
@@ -65,13 +65,34 @@ struct user_i387_struct {
  * Segment register layout in coredumps.
  */
 struct user_regs_struct {
-	unsigned long r15,r14,r13,r12,rbp,rbx,r11,r10;
-	unsigned long r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax;
-	unsigned long rip,cs,eflags;
-	unsigned long rsp,ss;
-  	unsigned long fs_base, gs_base;
-	unsigned long ds,es,fs,gs; 
-}; 
+	unsigned long	r15;
+	unsigned long	r14;
+	unsigned long	r13;
+	unsigned long	r12;
+	unsigned long	bp;
+	unsigned long	bx;
+	unsigned long	r11;
+	unsigned long	r10;
+	unsigned long	r9;
+	unsigned long	r8;
+	unsigned long	ax;
+	unsigned long	cx;
+	unsigned long	dx;
+	unsigned long	si;
+	unsigned long	di;
+	unsigned long	orig_ax;
+	unsigned long	ip;
+	unsigned long	cs;
+	unsigned long	flags;
+	unsigned long	sp;
+	unsigned long	ss;
+	unsigned long	fs_base;
+	unsigned long	gs_base;
+	unsigned long	ds;
+	unsigned long	es;
+	unsigned long	fs;
+	unsigned long	gs;
+};
 
 /* When the kernel dumps core, it starts by dumping the user struct -
    this will be used by gdb to figure out where the data and stack segments
@@ -94,7 +115,7 @@ struct user{
 				   This is actually the bottom of the stack,
 				   the top of the stack is always found in the
 				   esp register.  */
-  long int signal;     		/* Signal that caused the core dump. */
+  long int signal;		/* Signal that caused the core dump. */
   int reserved;			/* No longer used */
   int pad1;
   struct user_pt_regs * u_ar0;	/* Used by gdb to help find the values for */
diff -puN /dev/null include/asm-x86/vdso.h
--- /dev/null
+++ a/include/asm-x86/vdso.h
@@ -0,0 +1,28 @@
+#ifndef _ASM_X86_VDSO_H
+#define _ASM_X86_VDSO_H	1
+
+#ifdef CONFIG_X86_64
+extern const char VDSO64_PRELINK[];
+
+/*
+ * Given a pointer to the vDSO image, find the pointer to VDSO64_name
+ * as that symbol is defined in the vDSO sources or linker script.
+ */
+#define VDSO64_SYMBOL(base, name) ({		\
+	extern const char VDSO64_##name[];	\
+	(void *) (VDSO64_##name - VDSO64_PRELINK + (unsigned long) (base)); })
+#endif
+
+#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
+extern const char VDSO32_PRELINK[];
+
+/*
+ * Given a pointer to the vDSO image, find the pointer to VDSO32_name
+ * as that symbol is defined in the vDSO sources or linker script.
+ */
+#define VDSO32_SYMBOL(base, name) ({		\
+	extern const char VDSO32_##name[];	\
+	(void *) (VDSO32_##name - VDSO32_PRELINK + (unsigned long) (base)); })
+#endif
+
+#endif	/* asm-x86/vdso.h */
diff -puN include/asm-x86/vsyscall32.h~git-x86 /dev/null
--- a/include/asm-x86/vsyscall32.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _ASM_VSYSCALL32_H
-#define _ASM_VSYSCALL32_H 1
-
-/* Values need to match arch/x86_64/ia32/vsyscall.lds */
-
-#ifdef __ASSEMBLY__
-#define VSYSCALL32_BASE 0xffffe000
-#define VSYSCALL32_SYSEXIT (VSYSCALL32_BASE + 0x410)
-#else
-#define VSYSCALL32_BASE 0xffffe000UL
-#define VSYSCALL32_END (VSYSCALL32_BASE + PAGE_SIZE)
-#define VSYSCALL32_EHDR ((const struct elf32_hdr *) VSYSCALL32_BASE)
-
-#define VSYSCALL32_VSYSCALL ((void *)VSYSCALL32_BASE + 0x400) 
-#define VSYSCALL32_SYSEXIT ((void *)VSYSCALL32_BASE + 0x410)
-#define VSYSCALL32_SIGRETURN ((void __user *)VSYSCALL32_BASE + 0x500) 
-#define VSYSCALL32_RTSIGRETURN ((void __user *)VSYSCALL32_BASE + 0x600) 
-#endif
-
-#endif
diff -puN include/asm-x86/xor_32.h~git-x86 include/asm-x86/xor_32.h
--- a/include/asm-x86/xor_32.h~git-x86
+++ a/include/asm-x86/xor_32.h
@@ -1,6 +1,4 @@
 /*
- * include/asm-i386/xor.h
- *
  * Optimized RAID-5 checksumming functions for MMX and SSE.
  *
  * This program is free software; you can redistribute it and/or modify
diff -puN include/asm-x86/xor_64.h~git-x86 include/asm-x86/xor_64.h
--- a/include/asm-x86/xor_64.h~git-x86
+++ a/include/asm-x86/xor_64.h
@@ -1,6 +1,4 @@
 /*
- * include/asm-x86_64/xor.h
- *
  * Optimized RAID-5 checksumming functions for MMX and SSE.
  *
  * This program is free software; you can redistribute it and/or modify
diff -puN include/linux/acpi_pmtmr.h~git-x86 include/linux/acpi_pmtmr.h
--- a/include/linux/acpi_pmtmr.h~git-x86
+++ a/include/linux/acpi_pmtmr.h
@@ -25,6 +25,8 @@ static inline u32 acpi_pm_read_early(voi
 	return acpi_pm_read_verified() & ACPI_PM_MASK;
 }
 
+extern void pmtimer_wait(unsigned);
+
 #else
 
 static inline u32 acpi_pm_read_early(void)
diff -puN include/linux/compat.h~git-x86 include/linux/compat.h
--- a/include/linux/compat.h~git-x86
+++ a/include/linux/compat.h
@@ -191,6 +191,10 @@ asmlinkage long compat_sys_select(int n,
 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
 		struct compat_timeval __user *tvp);
 
+asmlinkage long compat_sys_wait4(compat_pid_t pid,
+				 compat_uint_t *stat_addr, int options,
+				 struct compat_rusage *ru);
+
 #define BITS_PER_COMPAT_LONG    (8*sizeof(compat_long_t))
 
 #define BITS_TO_COMPAT_LONGS(bits) \
diff -puN include/linux/hpet.h~git-x86 include/linux/hpet.h
--- a/include/linux/hpet.h~git-x86
+++ a/include/linux/hpet.h
@@ -115,9 +115,6 @@ static inline void hpet_reserve_timer(st
 }
 
 int hpet_alloc(struct hpet_data *);
-int hpet_register(struct hpet_task *, int);
-int hpet_unregister(struct hpet_task *);
-int hpet_control(struct hpet_task *, unsigned int, unsigned long);
 
 #endif /* __KERNEL__ */
 
diff -puN include/linux/ioport.h~git-x86 include/linux/ioport.h
--- a/include/linux/ioport.h~git-x86
+++ a/include/linux/ioport.h
@@ -8,6 +8,7 @@
 #ifndef _LINUX_IOPORT_H
 #define _LINUX_IOPORT_H
 
+#ifndef __ASSEMBLY__
 #include <linux/compiler.h>
 #include <linux/types.h>
 /*
@@ -153,4 +154,5 @@ extern struct resource * __devm_request_
 extern void __devm_release_region(struct device *dev, struct resource *parent,
 				  resource_size_t start, resource_size_t n);
 
+#endif /* __ASSEMBLY__ */
 #endif	/* _LINUX_IOPORT_H */
diff -puN include/linux/pci.h~git-x86 include/linux/pci.h
--- a/include/linux/pci.h~git-x86
+++ a/include/linux/pci.h
@@ -632,6 +632,7 @@ int pci_scan_bridge(struct pci_bus *bus,
 
 void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *),
 		  void *userdata);
+int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix);
 int pci_cfg_space_size(struct pci_dev *dev);
 unsigned char pci_bus_max_busnr(struct pci_bus* bus);
 
@@ -904,5 +905,13 @@ extern unsigned long pci_cardbus_mem_siz
 
 extern int pcibios_add_platform_entries(struct pci_dev *dev);
 
+#ifdef CONFIG_PCI_MMCONFIG
+extern void __init pci_mmcfg_early_init(int type);
+extern void __init pci_mmcfg_late_init(void);
+#else
+static inline void pci_mmcfg_early_init(int type) { }
+static inline void pci_mmcfg_late_init(void) { }
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
diff -puN include/linux/pci_ids.h~git-x86 include/linux/pci_ids.h
--- a/include/linux/pci_ids.h~git-x86
+++ a/include/linux/pci_ids.h
@@ -2109,6 +2109,13 @@
 
 #define PCI_VENDOR_ID_RDC		0x17f3
 
+#define PCI_VENDOR_ID_RDC		0x17f3
+#define PCI_DEVICE_ID_RDC_R6020		0x6020
+#define PCI_DEVICE_ID_RDC_R6030		0x6030
+#define PCI_DEVICE_ID_RDC_R6040		0x6040
+#define PCI_DEVICE_ID_RDC_R6060		0x6060
+#define PCI_DEVICE_ID_RDC_R6061		0x6061
+
 #define PCI_VENDOR_ID_SITECOM		0x182d
 #define PCI_DEVICE_ID_SITECOM_DC105V2	0x3069
 
diff -puN include/linux/ptrace.h~git-x86 include/linux/ptrace.h
--- a/include/linux/ptrace.h~git-x86
+++ a/include/linux/ptrace.h
@@ -128,6 +128,81 @@ int generic_ptrace_pokedata(struct task_
 #define force_successful_syscall_return() do { } while (0)
 #endif
 
+/*
+ * <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__.
+ *
+ * These do-nothing inlines are used when the arch does not
+ * implement single-step.  The kerneldoc comments are here
+ * to document the interface for all arch definitions.
+ */
+
+#ifndef arch_has_single_step
+/**
+ * arch_has_single_step - does this CPU support user-mode single-step?
+ *
+ * If this is defined, then there must be function declarations or
+ * inlines for user_enable_single_step() and user_disable_single_step().
+ * arch_has_single_step() should evaluate to nonzero iff the machine
+ * supports instruction single-step for user mode.
+ * It can be a constant or it can test a CPU feature bit.
+ */
+#define arch_has_single_step()		(0)
+
+/**
+ * user_enable_single_step - single-step in user-mode task
+ * @task: either current or a task stopped in %TASK_TRACED
+ *
+ * This can only be called when arch_has_single_step() has returned nonzero.
+ * Set @task so that when it returns to user mode, it will trap after the
+ * next single instruction executes.  If arch_has_block_step() is defined,
+ * this must clear the effects of user_enable_block_step() too.
+ */
+static inline void user_enable_single_step(struct task_struct *task)
+{
+	BUG();			/* This can never be called.  */
+}
+
+/**
+ * user_disable_single_step - cancel user-mode single-step
+ * @task: either current or a task stopped in %TASK_TRACED
+ *
+ * Clear @task of the effects of user_enable_single_step() and
+ * user_enable_block_step().  This can be called whether or not either
+ * of those was ever called on @task, and even if arch_has_single_step()
+ * returned zero.
+ */
+static inline void user_disable_single_step(struct task_struct *task)
+{
+}
+#endif	/* arch_has_single_step */
+
+#ifndef arch_has_block_step
+/**
+ * arch_has_block_step - does this CPU support user-mode block-step?
+ *
+ * If this is defined, then there must be a function declaration or inline
+ * for user_enable_block_step(), and arch_has_single_step() must be defined
+ * too.  arch_has_block_step() should evaluate to nonzero iff the machine
+ * supports step-until-branch for user mode.  It can be a constant or it
+ * can test a CPU feature bit.
+ */
+#define arch_has_block_step()		(0)
+
+/**
+ * user_enable_block_step - step until branch in user-mode task
+ * @task: either current or a task stopped in %TASK_TRACED
+ *
+ * This can only be called when arch_has_block_step() has returned nonzero,
+ * and will never be called when single-instruction stepping is being used.
+ * Set @task so that when it returns to user mode, it will trap after the
+ * next branch or trap taken.
+ */
+static inline void user_enable_block_step(struct task_struct *task)
+{
+	BUG();			/* This can never be called.  */
+}
+#endif	/* arch_has_block_step */
+
 #endif
 
 #endif
diff -puN include/linux/thread_info.h~git-x86 include/linux/thread_info.h
--- a/include/linux/thread_info.h~git-x86
+++ a/include/linux/thread_info.h
@@ -42,27 +42,27 @@ extern long do_no_restart_syscall(struct
 
 static inline void set_ti_thread_flag(struct thread_info *ti, int flag)
 {
-	set_bit(flag,&ti->flags);
+	set_bit(flag, (unsigned long *)&ti->flags);
 }
 
 static inline void clear_ti_thread_flag(struct thread_info *ti, int flag)
 {
-	clear_bit(flag,&ti->flags);
+	clear_bit(flag, (unsigned long *)&ti->flags);
 }
 
 static inline int test_and_set_ti_thread_flag(struct thread_info *ti, int flag)
 {
-	return test_and_set_bit(flag,&ti->flags);
+	return test_and_set_bit(flag, (unsigned long *)&ti->flags);
 }
 
 static inline int test_and_clear_ti_thread_flag(struct thread_info *ti, int flag)
 {
-	return test_and_clear_bit(flag,&ti->flags);
+	return test_and_clear_bit(flag, (unsigned long *)&ti->flags);
 }
 
 static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
 {
-	return test_bit(flag,&ti->flags);
+	return test_bit(flag, (unsigned long *)&ti->flags);
 }
 
 #define set_thread_flag(flag) \
diff -puN include/linux/timer.h~git-x86 include/linux/timer.h
--- a/include/linux/timer.h~git-x86
+++ a/include/linux/timer.h
@@ -5,7 +5,7 @@
 #include <linux/ktime.h>
 #include <linux/stddef.h>
 
-struct tvec_t_base_s;
+struct tvec_base;
 
 struct timer_list {
 	struct list_head entry;
@@ -14,7 +14,7 @@ struct timer_list {
 	void (*function)(unsigned long);
 	unsigned long data;
 
-	struct tvec_t_base_s *base;
+	struct tvec_base *base;
 #ifdef CONFIG_TIMER_STATS
 	void *start_site;
 	char start_comm[16];
@@ -22,7 +22,7 @@ struct timer_list {
 #endif
 };
 
-extern struct tvec_t_base_s boot_tvec_bases;
+extern struct tvec_base boot_tvec_bases;
 
 #define TIMER_INITIALIZER(_function, _expires, _data) {		\
 		.function = (_function),			\
diff -puN kernel/ptrace.c~git-x86 kernel/ptrace.c
--- a/kernel/ptrace.c~git-x86
+++ a/kernel/ptrace.c
@@ -366,6 +366,60 @@ static int ptrace_setsiginfo(struct task
 	return error;
 }
 
+
+#ifdef PTRACE_SINGLESTEP
+#define is_singlestep(request)		((request) == PTRACE_SINGLESTEP)
+#else
+#define is_singlestep(request)		0
+#endif
+
+#ifdef PTRACE_SINGLEBLOCK
+#define is_singleblock(request)		((request) == PTRACE_SINGLEBLOCK)
+#else
+#define is_singleblock(request)		0
+#endif
+
+#ifdef PTRACE_SYSEMU
+#define is_sysemu_singlestep(request)	((request) == PTRACE_SYSEMU_SINGLESTEP)
+#else
+#define is_sysemu_singlestep(request)	0
+#endif
+
+static int ptrace_resume(struct task_struct *child, long request, long data)
+{
+	if (!valid_signal(data))
+		return -EIO;
+
+	if (request == PTRACE_SYSCALL)
+		set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+	else
+		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+
+#ifdef TIF_SYSCALL_EMU
+	if (request == PTRACE_SYSEMU || request == PTRACE_SYSEMU_SINGLESTEP)
+		set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+	else
+		clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+#endif
+
+	if (is_singleblock(request)) {
+		if (unlikely(!arch_has_block_step()))
+			return -EIO;
+		user_enable_block_step(child);
+	} else if (is_singlestep(request) || is_sysemu_singlestep(request)) {
+		if (unlikely(!arch_has_single_step()))
+			return -EIO;
+		user_enable_single_step(child);
+	}
+	else
+		user_disable_single_step(child);
+
+	child->exit_code = data;
+	wake_up_process(child);
+
+	return 0;
+}
+
 int ptrace_request(struct task_struct *child, long request,
 		   long addr, long data)
 {
@@ -390,6 +444,26 @@ int ptrace_request(struct task_struct *c
 	case PTRACE_DETACH:	 /* detach a process that was attached. */
 		ret = ptrace_detach(child, data);
 		break;
+
+#ifdef PTRACE_SINGLESTEP
+	case PTRACE_SINGLESTEP:
+#endif
+#ifdef PTRACE_SINGLEBLOCK
+	case PTRACE_SINGLEBLOCK:
+#endif
+#ifdef PTRACE_SYSEMU
+	case PTRACE_SYSEMU:
+	case PTRACE_SYSEMU_SINGLESTEP:
+#endif
+	case PTRACE_SYSCALL:
+	case PTRACE_CONT:
+		return ptrace_resume(child, request, data);
+
+	case PTRACE_KILL:
+		if (child->exit_state)	/* already dead */
+			return 0;
+		return ptrace_resume(child, request, SIGKILL);
+
 	default:
 		break;
 	}
diff -puN kernel/signal.c~git-x86 kernel/signal.c
--- a/kernel/signal.c~git-x86
+++ a/kernel/signal.c
@@ -733,13 +733,13 @@ static void print_fatal_signal(struct pt
 		current->comm, task_pid_nr(current), signr);
 
 #if defined(__i386__) && !defined(__arch_um__)
-	printk("code at %08lx: ", regs->eip);
+	printk("code at %08lx: ", regs->ip);
 	{
 		int i;
 		for (i = 0; i < 16; i++) {
 			unsigned char insn;
 
-			__get_user(insn, (unsigned char *)(regs->eip + i));
+			__get_user(insn, (unsigned char *)(regs->ip + i));
 			printk("%02x ", insn);
 		}
 	}
diff -puN kernel/softirq.c~git-x86 kernel/softirq.c
--- a/kernel/softirq.c~git-x86
+++ a/kernel/softirq.c
@@ -3,7 +3,9 @@
  *
  *	Copyright (C) 1992 Linus Torvalds
  *
- * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+ *	Distribute under GPLv2.
+ *
+ *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
  */
 
 #include <linux/module.h>
diff -puN kernel/time/tick-sched.c~git-x86 kernel/time/tick-sched.c
--- a/kernel/time/tick-sched.c~git-x86
+++ a/kernel/time/tick-sched.c
@@ -9,7 +9,7 @@
  *
  *  Started by: Thomas Gleixner and Ingo Molnar
  *
- *  For licencing details see kernel-base/COPYING
+ *  Distribute under GPLv2.
  */
 #include <linux/cpu.h>
 #include <linux/err.h>
@@ -318,7 +318,7 @@ void tick_nohz_stop_sched_tick(void)
 			/* Check, if the timer was already in the past */
 			if (hrtimer_active(&ts->sched_timer))
 				goto out;
-		} else if(!tick_program_event(expires, 0))
+		} else if (!tick_program_event(expires, 0))
 				goto out;
 		/*
 		 * We are past the event already. So we crossed a
@@ -528,7 +528,7 @@ static inline void tick_nohz_switch_to_n
  */
 #ifdef CONFIG_HIGH_RES_TIMERS
 /*
- * We rearm the timer until we get disabled by the idle code
+ * We rearm the timer until we get disabled by the idle code.
  * Called with interrupts disabled and timer->base->cpu_base->lock held.
  */
 static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
diff -puN kernel/time/timer_stats.c~git-x86 kernel/time/timer_stats.c
--- a/kernel/time/timer_stats.c~git-x86
+++ a/kernel/time/timer_stats.c
@@ -26,7 +26,7 @@
  * the pid and cmdline from the owner process if applicable.
  *
  * Start/stop data collection:
- * # echo 1[0] >/proc/timer_stats
+ * # echo [1|0] >/proc/timer_stats
  *
  * Display the information collected so far:
  * # cat /proc/timer_stats
diff -puN kernel/timer.c~git-x86 kernel/timer.c
--- a/kernel/timer.c~git-x86
+++ a/kernel/timer.c
@@ -58,59 +58,57 @@ EXPORT_SYMBOL(jiffies_64);
 #define TVN_MASK (TVN_SIZE - 1)
 #define TVR_MASK (TVR_SIZE - 1)
 
-typedef struct tvec_s {
+struct tvec {
 	struct list_head vec[TVN_SIZE];
-} tvec_t;
+};
 
-typedef struct tvec_root_s {
+struct tvec_root {
 	struct list_head vec[TVR_SIZE];
-} tvec_root_t;
+};
 
-struct tvec_t_base_s {
+struct tvec_base {
 	spinlock_t lock;
 	struct timer_list *running_timer;
 	unsigned long timer_jiffies;
-	tvec_root_t tv1;
-	tvec_t tv2;
-	tvec_t tv3;
-	tvec_t tv4;
-	tvec_t tv5;
+	struct tvec_root tv1;
+	struct tvec tv2;
+	struct tvec tv3;
+	struct tvec tv4;
+	struct tvec tv5;
 } ____cacheline_aligned;
 
-typedef struct tvec_t_base_s tvec_base_t;
-
-tvec_base_t boot_tvec_bases;
+struct tvec_base boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
-static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
+static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
 
 /*
- * Note that all tvec_bases is 2 byte aligned and lower bit of
+ * Note that all tvec_bases are 2 byte aligned and lower bit of
  * base in timer_list is guaranteed to be zero. Use the LSB for
  * the new flag to indicate whether the timer is deferrable
  */
 #define TBASE_DEFERRABLE_FLAG		(0x1)
 
 /* Functions below help us manage 'deferrable' flag */
-static inline unsigned int tbase_get_deferrable(tvec_base_t *base)
+static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
 {
 	return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
 }
 
-static inline tvec_base_t *tbase_get_base(tvec_base_t *base)
+static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
 {
-	return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
+	return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
 }
 
 static inline void timer_set_deferrable(struct timer_list *timer)
 {
-	timer->base = ((tvec_base_t *)((unsigned long)(timer->base) |
+	timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
 				       TBASE_DEFERRABLE_FLAG));
 }
 
 static inline void
-timer_set_base(struct timer_list *timer, tvec_base_t *new_base)
+timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
 {
-	timer->base = (tvec_base_t *)((unsigned long)(new_base) |
+	timer->base = (struct tvec_base *)((unsigned long)(new_base) |
 				      tbase_get_deferrable(timer->base));
 }
 
@@ -246,7 +244,7 @@ unsigned long round_jiffies_relative(uns
 EXPORT_SYMBOL_GPL(round_jiffies_relative);
 
 
-static inline void set_running_timer(tvec_base_t *base,
+static inline void set_running_timer(struct tvec_base *base,
 					struct timer_list *timer)
 {
 #ifdef CONFIG_SMP
@@ -254,7 +252,7 @@ static inline void set_running_timer(tve
 #endif
 }
 
-static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
+static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 {
 	unsigned long expires = timer->expires;
 	unsigned long idx = expires - base->timer_jiffies;
@@ -371,14 +369,14 @@ static inline void detach_timer(struct t
  * possible to set timer->base = NULL and drop the lock: the timer remains
  * locked.
  */
-static tvec_base_t *lock_timer_base(struct timer_list *timer,
+static struct tvec_base *lock_timer_base(struct timer_list *timer,
 					unsigned long *flags)
 	__acquires(timer->base->lock)
 {
-	tvec_base_t *base;
+	struct tvec_base *base;
 
 	for (;;) {
-		tvec_base_t *prelock_base = timer->base;
+		struct tvec_base *prelock_base = timer->base;
 		base = tbase_get_base(prelock_base);
 		if (likely(base != NULL)) {
 			spin_lock_irqsave(&base->lock, *flags);
@@ -393,7 +391,7 @@ static tvec_base_t *lock_timer_base(stru
 
 int __mod_timer(struct timer_list *timer, unsigned long expires)
 {
-	tvec_base_t *base, *new_base;
+	struct tvec_base *base, *new_base;
 	unsigned long flags;
 	int ret = 0;
 
@@ -445,7 +443,7 @@ EXPORT_SYMBOL(__mod_timer);
  */
 void add_timer_on(struct timer_list *timer, int cpu)
 {
-	tvec_base_t *base = per_cpu(tvec_bases, cpu);
+	struct tvec_base *base = per_cpu(tvec_bases, cpu);
 	unsigned long flags;
 
 	timer_stats_timer_set_start_info(timer);
@@ -508,7 +506,7 @@ EXPORT_SYMBOL(mod_timer);
  */
 int del_timer(struct timer_list *timer)
 {
-	tvec_base_t *base;
+	struct tvec_base *base;
 	unsigned long flags;
 	int ret = 0;
 
@@ -539,7 +537,7 @@ EXPORT_SYMBOL(del_timer);
  */
 int try_to_del_timer_sync(struct timer_list *timer)
 {
-	tvec_base_t *base;
+	struct tvec_base *base;
 	unsigned long flags;
 	int ret = -1;
 
@@ -591,7 +589,7 @@ int del_timer_sync(struct timer_list *ti
 EXPORT_SYMBOL(del_timer_sync);
 #endif
 
-static int cascade(tvec_base_t *base, tvec_t *tv, int index)
+static int cascade(struct tvec_base *base, struct tvec *tv, int index)
 {
 	/* cascade all the timers from tv up one level */
 	struct timer_list *timer, *tmp;
@@ -620,7 +618,7 @@ static int cascade(tvec_base_t *base, tv
  * This function cascades all vectors and executes all expired timer
  * vectors.
  */
-static inline void __run_timers(tvec_base_t *base)
+static inline void __run_timers(struct tvec_base *base)
 {
 	struct timer_list *timer;
 
@@ -657,7 +655,7 @@ static inline void __run_timers(tvec_bas
 				int preempt_count = preempt_count();
 				fn(data);
 				if (preempt_count != preempt_count()) {
-					printk(KERN_WARNING "huh, entered %p "
+					printk(KERN_ERR "huh, entered %p "
 					       "with preempt_count %08x, exited"
 					       " with %08x?\n",
 					       fn, preempt_count,
@@ -678,13 +676,13 @@ static inline void __run_timers(tvec_bas
  * is used on S/390 to stop all activity when a cpus is idle.
  * This functions needs to be called disabled.
  */
-static unsigned long __next_timer_interrupt(tvec_base_t *base)
+static unsigned long __next_timer_interrupt(struct tvec_base *base)
 {
 	unsigned long timer_jiffies = base->timer_jiffies;
 	unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
 	int index, slot, array, found = 0;
 	struct timer_list *nte;
-	tvec_t *varray[4];
+	struct tvec *varray[4];
 
 	/* Look for timer events in tv1. */
 	index = slot = timer_jiffies & TVR_MASK;
@@ -716,7 +714,7 @@ cascade:
 	varray[3] = &base->tv5;
 
 	for (array = 0; array < 4; array++) {
-		tvec_t *varp = varray[array];
+		struct tvec *varp = varray[array];
 
 		index = slot = timer_jiffies & TVN_MASK;
 		do {
@@ -795,7 +793,7 @@ static unsigned long cmp_next_hrtimer_ev
  */
 unsigned long get_next_timer_interrupt(unsigned long now)
 {
-	tvec_base_t *base = __get_cpu_var(tvec_bases);
+	struct tvec_base *base = __get_cpu_var(tvec_bases);
 	unsigned long expires;
 
 	spin_lock(&base->lock);
@@ -894,7 +892,7 @@ static inline void calc_load(unsigned lo
  */
 static void run_timer_softirq(struct softirq_action *h)
 {
-	tvec_base_t *base = __get_cpu_var(tvec_bases);
+	struct tvec_base *base = __get_cpu_var(tvec_bases);
 
 	hrtimer_run_queues();
 
@@ -1222,7 +1220,7 @@ static struct lock_class_key base_lock_k
 static int __devinit init_timers_cpu(int cpu)
 {
 	int j;
-	tvec_base_t *base;
+	struct tvec_base *base;
 	static char __devinitdata tvec_base_done[NR_CPUS];
 
 	if (!tvec_base_done[cpu]) {
@@ -1277,7 +1275,7 @@ static int __devinit init_timers_cpu(int
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
+static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
 {
 	struct timer_list *timer;
 
@@ -1291,8 +1289,8 @@ static void migrate_timer_list(tvec_base
 
 static void __devinit migrate_timers(int cpu)
 {
-	tvec_base_t *old_base;
-	tvec_base_t *new_base;
+	struct tvec_base *old_base;
+	struct tvec_base *new_base;
 	int i;
 
 	BUG_ON(cpu_online(cpu));
diff -puN mm/mmap.c~git-x86 mm/mmap.c
--- a/mm/mmap.c~git-x86
+++ a/mm/mmap.c
@@ -251,7 +251,8 @@ asmlinkage unsigned long sys_brk(unsigne
 	 * not page aligned -Ram Gupta
 	 */
 	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
-	if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+	if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
+			(mm->end_data - mm->start_data) > rlim)
 		goto out;
 
 	newbrk = PAGE_ALIGN(brk);
_