GIT 4698d61211da22213bbff28ac71e499d24a9714f git+ssh://master.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git#mm commit 4698d61211da22213bbff28ac71e499d24a9714f Author: Jiri Kosina Date: Mon Nov 19 00:03:56 2007 +0100 x86: randomize brk Randomize the location of the heap (brk) for i386 and x86_64. The range is randomized in the range starting at current brk location up to 0x02000000 offset for both architectures. This, together with pie-executable-randomization.patch and pie-executable-randomization-fix.patch, should make the address space randomization on i386 and x86_64 complete. Arjan says: This is known to break older versions of some emacs variants, whose dumper code assumed that the last variable declared in the program is equal to the start of the dynamically allocated memory region. (The dumper is the code where emacs effectively dumps core at the end of it's compilation stage; this coredump is then loaded as the main program during normal use) iirc this was 5 years or so; we found this way back when I was at RH and we first did the security stuff there (including this brk randomization). It wasn't all variants of emacs, and it got fixed as a result (I vaguely remember that emacs already had code to deal with it for other archs/oses, just ifdeffed wrongly). It's a rare and wrong assumption as a general thing, just on x86 it mostly happened to be true (but to be honest, it'll break too if gcc does something fancy or if the linker does a non-standard order). Still its something we should at least document. Note 2: afaik it only broke the emacs *build*. I'm not 100% sure about that (it IS 5 years ago) though. [akpm@linux-foundation.org: deuglification] Signed-off-by: Jiri Kosina Cc: Arjan van de Ven Cc: Roland McGrath Cc: Jakub Jelinek Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 7ceea029489eeb20ab2bb82171a4eb2dffd57e6f Author: Christoph Lameter Date: Mon Nov 19 00:03:55 2007 +0100 x86: make stack size configurable Make the stack size configurable necessary. SGI NUMA configurations may need more stack because cpumasks and nodemasks are at times kept on the stack. This patch allows to run with 16k or 32k kernel stacks. [tglx@linutronix.de: add range check and dependencies] Signed-off-by: Christoph Lameter Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andi Kleen Cc: Mike Travis Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit f8e557046e2193fb122a6c942295f4a1690dd2d2 Author: Andrew Morton Date: Mon Nov 19 00:03:55 2007 +0100 x86-arch_register_cpu-section-fix i386 allnoconfig: WARNING: vmlinux.o(.text+0x6f2e): Section mismatch: reference to .init.text:register_cpu (between 'arch_register_cpu' and 'text_poke') Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit e59e0b407f0f4ac3a0a00eb06dc6e59114c80208 Author: Aaron Durbin Date: Mon Nov 19 00:03:55 2007 +0100 x86: add acpi reboot option Add the ability to reboot an x86_64 based machine using the RESET_REG in the FADT ACPI table. Signed-off-by: Aaron Durbin Cc: Len Brown Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 142caaa8e5908fcf2c3778f1a4840bb092275110 Author: Barry Kasindorf Date: Mon Nov 19 00:03:55 2007 +0100 oprofile-op_model_athalonc-support-for-amd-family10h-barcelona-performance-counters This patch is for controlling the upper 32bits of the event ctrl msrs. This includes the upper 4 bits of the event select and the Guest Only and Host Only bits This patch is necessary to make Event Based Profiling work reliably on a Family 10h processor {akpm@linux-foundation.org: checkpatch.pl fixes] Signed-off-by: Barry Kasindorf Signed-off-by: Robert Richter Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 820da741c656b4f788eebc4853fd83fb245ff2a1 Author: Florian Fainelli Date: Thu Oct 18 15:59:00 2007 +0200 x86: Add the RDC machine specific reboot fixup The RDC R-321x SoC needs a reboot fixup which uses its internal hardware watchdog set to reset the CPU on next tick. Signed-off-by: Florian Fainelli Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 5c41981f8cb219c1bd504844034730fb408ab461 Author: Florian Fainelli Date: Thu Oct 18 15:51:31 2007 +0200 x86: Add support for the RDC R-321x SoC This patch adds support for the RDC R-321x system-on-chip, also known as R-861x-(G). It uses the generic GPIO API and has support for the on-chip hardware watchdog. Signed-off-by: Florian Fainelli Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit dc58b5d15af5bfbd7a2ccceaebafe7e47c60b26f Author: Florian Fainelli Date: Thu Oct 18 15:57:55 2007 +0200 pci: Add PCI identifiers for the RDC devices This patch defines the PCI identifiers found in the RDC R-321x System-on-Chip. Signed-off-by: Florian Fainelli Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit d785bf666030e34b411e6f797c4f7a4fdcb301f8 Author: Florian Fainelli Date: Thu Oct 18 15:51:24 2007 +0200 x86: Add generic GPIO support to x86 This patch adds the generic GPIO support to the x86 architecture. We do the same as for MIPS, we let the machine override the gpio callbacks and provide defaults one in mach-generic. Signed-off-by: Florian Fainelli Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 2391bb43affb647c103712e4519ca5fe03fc25e3 Author: Andres Salomon Date: Mon Nov 19 00:03:55 2007 +0100 x86: GEODE: update GPIO API to support setting multiple GPIOs at once The existing Geode GPIO API only allows for updating one GPIO at once. There are instances where users want to update multiple GPIOs at once. With the current API, they are given two choices; either ignore the GPIO API: outl(0xc000, gpio_base + GPIO_OUTPUT_VAL); outl(0xc000, gpio_base + GPIO_OUTPUT_ENABLE); Alternatively, call each GPIO update separately: geode_gpio_set(14, GPIO_OUTPUT_VAL); geode_gpio_set(15, GPIO_OUTPUT_VAL); geode_gpio_set(14, GPIO_OUTPUT_ENABLE); geode_gpio_set(15, GPIO_OUTPUT_ENABLE); Neither are desirable. This patch changes the GPIO API to allow for setting of multiple GPIOs at once; rather than being passed an integer, we pass a bitmask and provide a translation function. The above code would now look like this: geode_gpio_set(geode_gpio(14)|geode_gpio(15), GPIO_OUTPUT_VAL); geode_gpio_set(geode_gpio(14)|geode_gpio(15), GPIO_OUTPUT_ENABLE); Since there are no upstream users of the GPIO API yet (afaik), best to change this now. This also adds a bit of sanity checking; it is no longer possible to use a GPIO above 28. Note the semantics of geode_gpio_isset() have changed: geode_gpio_isset(geode_gpio(3)|geode_gpio(4), ...) will only return true iff both GPIOs are set. Signed-off-by: Andres Salomon Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit a5fee89ab53126c15e58ab8b6bc7e1996175139a Author: Vladimir Berezniker Date: Mon Nov 19 00:03:55 2007 +0100 x86_64: sanitize user specified e820 memmap values Sanitize user specified e820 memory ranges, using the same logic that is applied to the values returned by the BIOS. This ensures consistent handling regardless of the source of the memory mappings. Allows overriding portions of the memory map without specifying one in it's entirety (memmap=exactmap). E.g. marking a range of bad RAM as reserved with memmap=48M$528M BIOS supplied range BIOS-e820: 0000000000100000 - 000000007fe80000 (usable) becomes user: 0000000000100000 - 0000000021000000 (usable) user: 0000000021000000 - 0000000024000000 (reserved) user: 0000000024000000 - 000000007fe80000 (usable) Previously this did not work, as the original BIOS range was left untouched while the user defined range was appended to the end of the memory map. [ tglx: arch/x86 adaptation ] Signed-off-by: Vladimir Berezniker Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b98f9a859ed350c049fd490f5a73aa668bc4df68 Author: Andi Kleen Date: Mon Nov 19 00:03:55 2007 +0100 x86: replace nvidia timer override quirk with pci id list and unify quirks This replaces the old NF3/NF4 reference BIOS timer override quirk with a device ID list. We need to ignore the timer override on these systems, but not ignore it on NF5 based systems. Previously this was distingushed by checking for HPET, but a lot of BIOS vendors didn't enable HPET in their pre Vista BIOSes. Replace the old "for all of nvidia" quirk with a quirk containing pci device ID. I goobled this list together from pci.ids and googling and it may be incomplete. I'm still not 100% sure the list is correct, but the only way to find out is to do testing in mainline. So let's do that. [ tglx: arch/x86 adaptation ] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit db70f4c144b83da2c7ef2d3fe3d31a9ad45af28f Author: Yinghai Lu Date: Mon Nov 19 00:03:54 2007 +0100 x86: check and enable MMCONFIG for AMD Family 10h Opteron check and enable MMCONFIG for AMD Family 10h Opteron. [akpm@linux-foundation.org: section fix] Signed-off-by: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit dca2bb18deb7a148d935273827e6b4701af89311 Author: Yinghai Lu Date: Mon Nov 19 00:03:54 2007 +0100 x86: set cfg_size for AMD Family 10h in case MMCONFIG is used reuse pci_cfg_space_size but skip check pci express and pci-x CAP ID. Signed-off-by: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 16b2a4281fe1772ab61e870d556e7481c45c12d0 Author: Yinghai Lu Date: Mon Nov 19 00:03:54 2007 +0100 x86: check MSR to get mmconfig for amd family 10h opterons So even MCFG is not there, we still can use MMCONFIG. Signed-off-by: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 4b978a1a849db60a85cfed91519736f7c651d1d7 Author: Robert Hancock Date: Mon Nov 19 00:03:54 2007 +0100 x86: validate against ACPI motherboard resources This path adds validation of the MMCONFIG table against the ACPI reserved motherboard resources. If the MMCONFIG table is found to be reserved in ACPI, we don't bother checking the E820 table. The PCI Express firmware spec apparently tells BIOS developers that reservation in ACPI is required and E820 reservation is optional, so checking against ACPI first makes sense. Many BIOSes don't reserve the MMCONFIG region in E820 even though it is perfectly functional, the existing check needlessly disables MMCONFIG in these cases. In order to do this, MMCONFIG setup has been split into two phases. If PCI configuration type 1 is not available then MMCONFIG is enabled early as before. Otherwise, it is enabled later after the ACPI interpreter is enabled, since we need to be able to execute control methods in order to check the ACPI reserved resources. Presently this is just triggered off the end of ACPI interpreter initialization. There are a few other behavioral changes here: - Validate all MMCONFIG configurations provided, not just the first one. - Validate the entire required length of each configuration according to the provided ending bus number is reserved, not just the minimum required allocation. - Validate that the area is reserved even if we read it from the chipset directly and not from the MCFG table. This catches the case where the BIOS didn't set the location properly in the chipset and has mapped it over other things it shouldn't have. This also cleans up the MMCONFIG initialization functions so that they simply do nothing if MMCONFIG is not compiled in. Based on an original patch by Rajesh Shah from Intel. [akpm@linux-foundation.org: many fixes and cleanups] Signed-off-by: Robert Hancock Signed-off-by: Andi Kleen Cc: Rajesh Shah Cc: Jesse Barnes Acked-by: Linus Torvalds Cc: Andi Kleen Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 5507058614e92d490169fab4cf3210e589ad7a12 Author: Andi Kleen Date: Mon Nov 19 00:03:54 2007 +0100 x86: untable __init references between IO data Earlier patch added IO APIC setup into local APIC setup. This caused modpost warnings. Fix them by untangling setup_local_APIC() and splitting it into smaller functions. The IO APIC initialization is only called for the BP init. Also removed some outdated debugging code and minor cleanup. [ tglx: arch/x86 adaptation ] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 7d36304dfafb811065d6cb621fd638dd7a649e82 Author: Yinghai Lu Date: Mon Nov 19 00:03:54 2007 +0100 x86: use core id bits for apicid_to_node initialization We shoud use core id bits instead of max cores, in case later with AMD downcores Quad core Opteron. [ tglx: arch/x86 adaptation ] Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen Cc: Christoph Lameter Cc: Len Brown Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 744b08d9a738cfa08f42ece699960e6d517eb5d6 Author: Yinghai Lu Date: Mon Nov 19 00:03:54 2007 +0100 x86: store core id bits in cpuinfo_x8 We need to store core id bits to cpuinfo_x86 in early_identify_cpu. So we use it to create acpiid_to_node array in k8topolgy.c [ tglx: arch/x86 adaptation ] Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen Cc: Christoph Lameter Cc: Len Brown Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 007a0ac922c1feba2f538cf4f5492fdf4d12b292 Author: Adrian Bunk Date: Mon Nov 19 00:03:53 2007 +0100 i386: remove -maccumulate-outgoing-args Contrary to the comment "newer gccs do it by default", newer gcc versions default to -maccumulate-outgoing-args only with CONFIG_CC_OPTIMIZE_FOR_SIZE=n, and then only with some CPU settings. Measured with an i386 defconfig, gcc 4.2.1 and kernel 2.6.23-rc1 ("orig" is the plain kernel, "changed is with -maccumulate-outgoing-args removed): $ ls -la vmlinux* -rwxrwxr-x 1 bunk bunk 6269713 2007-07-24 22:19 vmlinux.changed -rwxrwxr-x 1 bunk bunk 6425361 2007-07-24 22:19 vmlinux.orig $ size vmlinux.* text data bss dec hex filename 4493465 504108 614400 5611973 55a1c5 vmlinux.changed 4646160 504108 614400 5764668 57f63c vmlinux.orig $ That's a 2.5% size increase that does for sure hurt small systems. If the stack unwinder ever comes back and needs this as indicated in the comment, adding it to the cflags when the user enabled the unwinder should be a better option. [ tglx: arch/x86 adaptation ] Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit d883088698ff9cec2d040686652a6e833df23299 Author: Yinghai Lu Date: Mon Nov 19 00:03:53 2007 +0100 x86: clear IO_APIC before enabing apic error vector. some apic id lifting system: 4 socket quad core, 8 socket quad core will do apic id lifting for BSP. but io-apic regs for ExtINT still use 0 as dest. so when we enable apic error vector in BSP, we will get one APIC error. CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line) CPU: L2 Cache: 512K (64 bytes/line) CPU 0/4 -> Node 0 CPU: Physical Processor ID: 1 CPU: Processor Core ID: 0 SMP alternatives: switching to UP code ACPI: Core revision 20070126 enabled ExtINT on CPU#0 ESR value after enabling vector: 00000000, after 0000000c APIC error on CPU0: 0c(08) ENABLING IO-APIC IRQs Synchronizing Arb IDs. So move enable_IO_APIC from setup_IO_APIC into setup_local_APIC and call it before enabling apic error vector. [ tglx: arch/x86 adaptation ] Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 6a2062fea5edf53279f020f7e82413a1a83cd702 Author: Thomas Gleixner Date: Sun Nov 18 23:23:11 2007 +0100 x86: cleanup smp.h variants Bring the smp.h variants into sync to prepare merging and paravirt support. Signed-off-by: Thomas Gleixner commit 19a3cf3e7319a3b778465016cc74e69acc1e6fe1 Author: Thomas Gleixner Date: Sun Nov 18 23:16:14 2007 +0100 x86: merge mpspec variants The delta is now minimal. Merge them Signed-off-by: Thomas Gleixner commit 2d83c85ed0255665898f2cf8d7899f62a111f5d9 Author: Thomas Gleixner Date: Sun Nov 18 23:13:04 2007 +0100 x86: cleanup mpspec variants Bring the mpspec variants into sync to prepare merging and paravirt support. Signed-off-by: Thomas Gleixner commit 3cf5dd71a2946cb940d2375a8559cd3a955a3af3 Author: Thomas Gleixner Date: Sun Nov 18 22:56:28 2007 +0100 x86: merge tlbflush.h variants The delta is now minimal. Merge them Signed-off-by: Thomas Gleixner commit 73b0f2e6f1e855f9bcff5c3fce985cf6eb3a816f Author: Thomas Gleixner Date: Sun Nov 18 22:55:22 2007 +0100 x86: cleanup tlbflush.h variants Bring the tlbflush.h variants into sync to prepare merging and paravirt support. Signed-off-by: Thomas Gleixner commit 09034e3d3b7555ef5709bf559b92b6aa42243d08 Author: Thomas Gleixner Date: Sun Nov 18 23:18:49 2007 +0100 x86 cleanup boot_ioreamp_32.c Coding style cleanup before modifying the file. Signed-off-by: Thomas Gleixner commit 1c826ab29824a86550ecb06eaffd40ad57ae41ff Author: Glauber de Oliveira Costa Date: Mon Oct 29 07:40:40 2007 -0300 x86: consolidate spinlock.h The cli and sti instructions need to be replaced by paravirt hooks. For the i386 architecture, this is already done. The code requirements aren't much different from x86_64 POV, so this part is consolidated in the common header Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Steven Rostedt Acked-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner commit b701d2e28552bc259510a30d8c333a2361151d99 Author: Glauber de Oliveira Costa Date: Tue Nov 13 12:35:44 2007 -0200 irqflags consolidation This patch consolidates the irqflags include files containing common paravirt definitions. The native definition for interrupt handling, halt, and such, are the same for 32 and 64 bit, and they are kept in irqflags.h. the differences are split in the arch-specific files. The syscall function, irq_enable_sysexit, has a very specific i386 naming, and its name is then changed to a more general one. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Steven Rostedt Acked-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner commit a75645b16c3d911e6842f0a0d558ac2a674e8ae3 Author: Hiroshi Shimamoto Date: Thu Nov 15 14:14:31 2007 -0800 x86: clean up nmi_32/64.c clean up and make nmi_32/64.c more similar. - white space and coding style clean up. - nmi_cpu_busy is available on CONFIG_SMP. - move functions __acpi_nmi_enable, acpi_nmi_enable, __acpi_nmi_disable and acpi_nmi_disable. - make variables name more similar. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 50006c7c08262713e3e2ad424debf3eeba69633a Author: clameter@sgi.com Date: Mon Nov 19 00:03:52 2007 +0100 x86: clean up stack allocation and free Clean up the allocation and freeing of stacks a bit by using a __GFP_ZERO flag instead of memset. Signed-off-by: Christoph Lameter Cc: Ingo Molnar Cc: Andi Kleen Cc: Mike Travis Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner commit 26015465fb36b22bfd90dedb2ad6730d03c841e5 Author: Randy Dunlap Date: Mon Nov 19 00:03:52 2007 +0100 x86: bitops_32.h style cleanups Coding style cleanups in x86/bitops_32.h: - drop space in "* addr" - whitespace & indentation fixes - spello fixes Signed-off-by: Randy Dunlap Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner commit 9fbc2ec808a166e3cdbdd9b11273625e1e23030a Author: Bernhard Walle Date: Mon Nov 19 00:03:52 2007 +0100 x86: remove extern declarations for code, data, bss resources This patch removes the extern struct resource declarations for data_resource, code_resource and bss_resource on x86 and declares that three structures as static as done on other architectures like IA64. On i386, these structures are moved to setup_32.c (from e820_32.c) because that's code that is not specific to e820 and also required on EFI systems. That makes the "extern" reference superfluous. On x86_64, data_resource, code_resource and bss_resource are passed to e820_reserve_resources() as arguments just as done on i386 and IA64. That also avoids the "extern" reference and it's possible to make it static. Signed-off-by: Bernhard Walle Cc: Ingo Molnar Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner commit 4023518f525c34a3010ddf519095365c0d63672d Author: Cyrill Gorcunov Date: Mon Nov 19 00:03:52 2007 +0100 x86: remove dead code in ia32-emu Remove useless second time checking of fsave argument in save_i387_ia32() routine. It's possible the compiler is doing the same but that is much better to remove the dead code explicitly. Signed-off-by: Cyrill Gorcunov Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton commit f9d361fde5200a4c303b74493c9e1639d1133210 Author: Lucas Woods Date: Mon Nov 19 00:03:52 2007 +0100 x86: remove duplicate includes Signed-off-by: Lucas Woods Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner commit cefe311e31ffc74ce494d3e05b5050b1a2d9d6f0 Author: Paul Jimenez Date: Mon Nov 12 23:42:02 2007 -0600 x86: mtrr use type bool [RESEND AGAIN] This is a janitorish patch to 1) remove private TRUE/FALSE #def's in favor of using the standard enum from linux/stddef.h and 2) switch the variables holding those values to type 'bool' (from linux/types.h) since it both seems more appropriate and allows for potentially better optimization. As a truly minor aside, I removed a couple of comments documenting a 'do_safe' parameter that seems to no longer exist. Signed-off-by: Paul Jimenez Signed-off-by: Thomas Gleixner commit 849514b748578c76528b4cbb059e925fa7dc67b2 Author: Adrian Bunk Date: Fri Nov 9 07:03:13 2007 +0100 x86: remove acpi_pci_link_exit() acpi_pci_link_exit() is both unused and empty. Signed-off-by: Adrian Bunk Signed-off-by: Thomas Gleixner commit 1a325f3a233c73a427e4c264180e320fa6c911fe Author: Adrian Bunk Date: Fri Nov 9 07:03:38 2007 +0100 x86: pci-dma_64.c: cleanups This patch contains the following cleanups: - make the needlessly global iommu_setup() static - remove the unused EXPORT_SYMBOL(iommu_merge) Signed-off-by: Adrian Bunk Signed-off-by: Thomas Gleixner commit c35e8c5d64afdcebe63d437852040bed0eef39f3 Author: Adrian Bunk Date: Fri Nov 9 07:03:30 2007 +0100 x86: pci-calgary_64.c: make a variable static "debugging" is a horrible name for a global variable - thankfully it can become static. Also put it out of __read_mostly so that gcc no longer has to emit it at all. Signed-off-by: Adrian Bunk Signed-off-by: Thomas Gleixner commit d6c5f07b27e33d2b5a21f5e50dce55fefa4b6338 Author: Adrian Bunk Date: Fri Nov 9 07:03:28 2007 +0100 x86: nmi_64.c: make code static This patch makes the following needlessly global code static: - panic_on_timeout - setup_nmi_watchdog() Signed-off-by: Adrian Bunk Signed-off-by: Thomas Gleixner commit f42ee9122c4c7fa2f10635637f925cbef91157d7 Author: Adrian Bunk Date: Fri Nov 9 07:03:16 2007 +0100 x86 mce_64.c: make struct mcelog static This patch makes the needlessly global struct mcelog static. Signed-off-by: Adrian Bunk Signed-off-by: Thomas Gleixner commit 1eddbb314acf80941bfb58152de30022ac8f9fee Author: Hiroshi Shimamoto Date: Fri Nov 9 10:34:44 2007 -0800 x86: io_apic_64.c: remove unused config check CONFIG_IRQBALANCE doesn't exist on x86_64. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Thomas Gleixner commit 77463f45acb8e23c82768539d8bb4001d7b00447 Author: Adrian Bunk Date: Mon Nov 19 00:03:51 2007 +0100 x86 e820_64.c: make 2 functions static This patch makes the following needlessly global functions static: - e820_print_map() - early_panic() Signed-off-by: Adrian Bunk Signed-off-by: Thomas Gleixner commit 559782c7d7676d048afd0350cbf017f83fffc9a4 Author: Adrian Bunk Date: Fri Nov 9 07:02:11 2007 +0100 x86: acpi_pciprobe_dmi_table[] must be __devinitdata This patch fixes the following section mismatches with CONFIG_HOTPLUG=n: <-- snip --> ... WARNING: vmlinux.o(.data+0x23640): Section mismatch: reference to .init.text.20:can_skip_ioresource_align (between 'acpi_pciprobe_dmi_table' and 'pcibios_irq_mask') WARNING: vmlinux.o(.data+0x2366c): Section mismatch: reference to .init.text.20:can_skip_ioresource_align (between 'acpi_pciprobe_dmi_table' and 'pcibios_irq_mask') WARNING: vmlinux.o(.data+0x23698): Section mismatch: reference to .init.text.20:can_skip_ioresource_align (between 'acpi_pciprobe_dmi_table' and 'pcibios_irq_mask') ... <-- snip --> Signed-off-by: Adrian Bunk Signed-off-by: Thomas Gleixner commit af33c840608e541e2a6e10daa8f9072ae6f5295c Author: H. Peter Anvin Date: Fri Nov 2 14:43:23 2007 -0700 x86: actually merge This actually merges into . Signed-off-by: H. Peter Anvin Signed-off-by: Thomas Gleixner commit 54abcb98bcdaa9ea35687e0813c15096a807be17 Author: H. Peter Anvin Date: Fri Nov 2 14:40:17 2007 -0700 x86: prepare merger of Prepare for merging by making the 32- and 64-bit versions textually identical. This involves: - removing arbitrary header inclusion differences - reorganizing the 32-bit version slightly to match the 64-bit version - using to unify the assembly code - renaming struct paravirt_patch to struct paravirt_patch_site in the 64-bit version to match the 32-bit version; there are no references to struct paravirt_patch elsewhere in the tree. Signed-off-by: H. Peter Anvin Signed-off-by: Thomas Gleixner commit fe3d00adc0bd68ea2ae202542e2e71d4b4cbb586 Author: Paul Jimenez Date: Tue Oct 30 10:29:57 2007 -0500 x86: Make i8259_64 more _32-like Howdy! Here's a simple janitorish patch for you: This patch mainly hinges around two includes and their ramifications: #include which provides cached_{slave,master}_mask #include which provides PIC_{MASTER,SLAVE}_{IMR,CMD} Adding these two includes and using those half dozen or so definitions removed 140+ lines of diffs between i8259_32.c and i8259_64.c, thus making it easier for the real substantitive differences between them to show up, and hopefully therefore making it easier to eventually merge the two. All the warnings that checkpatch.pl throws (missing spaces after commas and >80 character lines) exist intentionally to match i8259_32.c. Signed-off-by: Paul Jimenez Signed-off-by: Thomas Gleixner commit d310e88e8f79261abf0c48a24431b81b9d2fe178 Author: Thomas Gleixner Date: Mon Nov 19 00:03:50 2007 +0100 x86: move 8259 defines to i8259.h Move the i8259 defines and remove the now io_ports.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 6386b817f88f1aed61a8f4e3f8c8afa8ccbf6c3c Author: Adrian Bunk Date: Mon Nov 5 18:08:34 2007 +0100 x86: unexport __{read,write}_lock_failed This patch removes the unused exports for __{read,write}_lock_failed. Signed-off-by: Adrian Bunk commit 72c829b131ccfcc2fe8d1f67b4d841ca903da97f Author: Dave Jones Date: Mon Oct 29 18:49:36 2007 -0400 Remove more bogus filenames in comments. Signed-off-by: Dave Jones Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit feadc4a39922a6a55fa1615b7d7726b7f787b363 Author: Thomas Gleixner Date: Mon Nov 19 00:03:50 2007 +0100 x86: Nuke a ton of unused exports Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 3826c6bc20e019f1537985329f3699b6b4c62f7d Author: Thomas Gleixner Date: Mon Nov 19 00:03:50 2007 +0100 x86: Remove dead code and exports No users. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit ddc4131c179e1a5359e69a42d7fca97d974aa75e Author: Thomas Gleixner Date: Mon Nov 19 00:03:50 2007 +0100 x86: nuke a ton of dead hpet code No users, just ballast Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 346d0a3126cfe2442f47392c3640cce58ebf31c4 Author: Thomas Gleixner Date: Mon Nov 19 00:03:50 2007 +0100 x86: smp_64.c: Remove unused exports and cleanup while at it The exports are nowhere used. There is even no reason why they were ever introduced. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 27dc299b844bf9bcec260b69a075531d3b1cd88f Author: Thomas Gleixner Date: Mon Nov 19 00:03:50 2007 +0100 x86: clean up arch/x86/kernel/time_64.c includes Reduce the lets include all to the minimum. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit df870e743022a0880daa8707320b1198461dba80 Author: Thomas Gleixner Date: Mon Nov 19 00:03:50 2007 +0100 x86: share rtc code Remove the rtc code from time_64.c and add the extra bits to the i386 path. The ACPI century check is probably valid for i386 as well, but this is material for a separate patch. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit fd7e412cb8d5c9ad695db698c81085cb70c4c034 Author: Thomas Gleixner Date: Mon Nov 19 00:03:50 2007 +0100 x86: isolate the rtc code for sharing The mach-default/mach_time.h code inline is moved to arch/x86/kernel/rtc.c and the header files are adjusted. Shrink the 3 dozen includes to the ones we really need. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit eafca2a20e18400737aab4ee38d299810416b198 Author: Thomas Gleixner Date: Mon Nov 19 00:03:49 2007 +0100 x86: unify mc146818rtc.h - prepare for sharing rtc code Unify mc146818rtc.h by adding the rtc_cmos_read/write functions to time_64.c. This is a preparatory patch to finaly share the rtc code, which is unsurprisingly similar. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit d3456538b9941d7ea887c7284597d4005c06e5f9 Author: Thomas Gleixner Date: Mon Nov 19 00:03:49 2007 +0100 x86: remove the duplicated arch/x86/ia32/mmap32.c Use mmap_32.c in arch/x86/mm instead Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 0148cc185fe9e525b84f2e485b57760e0784d0dd Author: Thomas Gleixner Date: Mon Nov 19 00:03:49 2007 +0100 x86: clean up arch/x86/mm/mmap_32/64.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 4cdcc96f6f2b80d3b57b5644a8bc897118595545 Author: Thomas Gleixner Date: Mon Nov 19 00:03:49 2007 +0100 x86: clean up arch/x86/kernel/vsmp_64.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 95665bce6b82b810d215bf044a4b892cd23831e1 Author: Thomas Gleixner Date: Mon Nov 19 00:03:49 2007 +0100 x86: clean up ioport_32.c Remove unused variables, rename the "unused" argument to regp. It is used ! Codingstyle fixes. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 149de042fba8ec31383962f958047e10d980eb64 Author: Thomas Gleixner Date: Mon Nov 19 00:03:49 2007 +0100 x86: simplify set_bitmap in ioport_32.c Simplify set_bitmap(). This is not in a hotpath and we really can use the straight forward loop through those bits. A similar implementation is used in the 64 bit code as well. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 3dee071a6d07d488d823c70422a470c973da653e Author: Thomas Gleixner Date: Mon Nov 19 00:03:49 2007 +0100 x86: merge include/asm-x86/scatterlist.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit f33ca1878b99f356a15d730e1fa5598910e95b7d Author: Thomas Gleixner Date: Mon Nov 19 00:03:49 2007 +0100 x86: merge include/asm-x86/dma.h Almost identical. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit f34d35d9e13f271354bcd01289a508cb6d8253c8 Author: Thomas Gleixner Date: Mon Nov 19 00:03:48 2007 +0100 x86: merge futex_32/64.h Finally merge them together. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit d50599835d11496e790f81f37d79fd255e63252a Author: Thomas Gleixner Date: Mon Nov 19 00:03:48 2007 +0100 x86: prepare merging futex_32/64.h Replace .quad/.long with a define and use the same asm syntax for i386 and x86. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 65031cc080f147ef4104a1a08d36adef48c8a8bf Author: Thomas Gleixner Date: Mon Nov 19 00:03:48 2007 +0100 x86: prepare merging arch/x86/kernel/apic_32/64.c Shuffle code around, so we get a readable diff. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 27a300e4490143b81b05f04cdbe975e7076fd1f1 Author: Thomas Gleixner Date: Mon Nov 19 00:03:48 2007 +0100 x86: make smp_local_timer_interrupt() static Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit bbba96d17e76b3eaf426bd669cb8a980b6257291 Author: Thomas Gleixner Date: Mon Nov 19 00:03:48 2007 +0100 x86: move ack_bad_irq into irq code Match i386, where we have this in the irq code. It belongs there. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 529b035bb11dfcbcff9e024694c25efb14706a03 Author: Thomas Gleixner Date: Mon Nov 19 00:03:48 2007 +0100 x86: move ioapic code where it belongs The commit 399287229c775a8962a852a761d65dc9475dec7c hacked the ioapic resource mapping into apic.c for no good reason. Move the code into io_apic_64.c where it belongs. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 3d8e8e87d112f39eac5083e83dabb17dd670cb96 Author: Thomas Gleixner Date: Mon Nov 19 00:03:48 2007 +0100 x86: remove obsolte declarations from proto.h Nuke duplicate and obsolete crap from this ugly dump bin. There are still some entries left which need to be sorted out, but I'm tired of that puzzle game right now. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 53f5d1bb9014a95405a62c49ba0974e90edc88f4 Author: Thomas Gleixner Date: Mon Nov 19 00:03:47 2007 +0100 x86: remove duplicate start_kernel declaration start_kernel is already declared in a generic header file. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit a4cdb785faf28e7f949f139da0eb46d2efa4aa11 Author: Thomas Gleixner Date: Mon Nov 19 00:03:47 2007 +0100 x86: remove obsolete nohpet declaration Lonely user is hpet.c, so no need to declare it elsewhere. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 9a872ede54b10ffc39480a90e59b0a85f2ce4b5f Author: Thomas Gleixner Date: Mon Nov 19 00:03:47 2007 +0100 x86: move pmtmr related declarations Move more stuff out of proto.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 1ec25f6908b2a84293a7ed49de5a2bd1cecfda5f Author: Thomas Gleixner Date: Mon Nov 19 00:03:47 2007 +0100 x86: move tsc related declarations tsc has also it's own header file. Nuke the stupid 64 bit ifdef while at it. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 8e875a8923a531b04c9d40fda46f8e438b78e84e Author: Thomas Gleixner Date: Mon Nov 19 00:03:47 2007 +0100 x86: move pda related declaration pda has its own header file as well. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 6a052d05ac50caf6974549f873c3bbdfd841ee80 Author: Thomas Gleixner Date: Mon Nov 19 00:03:47 2007 +0100 x86: move page related declaration end_pfn is in page.h, so end_pfn_map has a place there as well Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 222a929f87134cfe8c4e0edbf4ba0460191b58db Author: Thomas Gleixner Date: Mon Nov 19 00:03:47 2007 +0100 x86: move numa related declarations More stuff shuffeled to the correct place Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 353ee5c3f45d9188e2ec9f6391ddd7a96d93b09b Author: Thomas Gleixner Date: Mon Nov 19 00:03:47 2007 +0100 x86: move mce related declarations Move the mce related declarations where they belong, fix the users and remove 32bit dependency in mce.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit bc5bc17f670047fbcf886edd83a4ee0e5ebca112 Author: Thomas Gleixner Date: Mon Nov 19 00:03:47 2007 +0100 x86: move debug related declarations to kdebug.h Move them and fixup some users. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit fdaecf0bb3408508e2f573086677684c327d8056 Author: Thomas Gleixner Date: Mon Nov 19 00:03:47 2007 +0100 x86: move k8 related declarations Move k8 related declarations to k8.h and fix numa_64.c Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 0ed75405cba3bc9267e80d2116562c3a4a379e83 Author: Thomas Gleixner Date: Mon Nov 19 00:03:47 2007 +0100 x86: move idle related declarations Move idle related declarations to processor_64.h, where the the others are as well. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 9d1353cff3416d7b589aeb6f54118545f849c31b Author: Thomas Gleixner Date: Mon Nov 19 00:03:46 2007 +0100 x86: make early_indentify_cpu static early_indentify_cpu is only used in setup_64.c Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 430ea2782a181655160fcffbee763ee98d5acdf9 Author: Thomas Gleixner Date: Mon Nov 19 00:03:46 2007 +0100 x86: move acpi and pci declarations Move acpi/pci related declarations to the correct headers and remove the duplicate. Build fix from: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit da49f999fb6f9bc83d07086624e13f9b3ff9ded0 Author: Thomas Gleixner Date: Mon Nov 19 00:03:46 2007 +0100 x86: remove duplicated declarations Remove declarations which are made already in the appropriate header file. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 5592059921ae4e0dd0ab7a0de784b00efc246d87 Author: Thomas Gleixner Date: Mon Nov 19 00:03:46 2007 +0100 x86: merge apic_32/64.h Unify apic.h variants. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 5ce6778f1820e87ec372d906faad44c4ba22901b Author: Thomas Gleixner Date: Mon Nov 19 00:03:46 2007 +0100 x86: use u32 for some lapic functions Use u32 so 32 and 64bit have the same interface. Andrew Morton: xen, lguest build fixes Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit cf5a0efd0c701c75d948ef2a508df8a30edf9596 Author: Thomas Gleixner Date: Mon Nov 19 00:03:46 2007 +0100 x86: use u32 for safe_apic_wait_icr_idle() Preperatory patch for merging apic headers. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit e861bfdf64beff183a281b6815682a3ac1ce9c8b Author: Thomas Gleixner Date: Mon Nov 19 00:03:46 2007 +0100 x86: rename get_maxlvt to lapic_get_maxlvt Use the same name for the 32 and 64 bit variant. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 8caa5484ca62f125723954462fb0cf1a7279440c Author: Thomas Gleixner Date: Mon Nov 19 00:03:46 2007 +0100 x86: prepare unification of include/asm-x86/apic_32/64.h White space and coding style clenaup. Move the K8 local apic defines to apicdef.h, where they belong Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 651c0411cfa9c46a90241172cd2b8a513e61ed1b Author: Thomas Gleixner Date: Mon Nov 19 00:03:46 2007 +0100 x86: Unify include/asm-x86/apicdef_32/64.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit c723f4e81a524015e80bb9c1cf0de9410f4f80fa Author: Thomas Gleixner Date: Mon Nov 19 00:03:46 2007 +0100 x86: merge arch/x86/kernel/ldt_32/64.c Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 14c57a00573fca22648500fa96d95f0e8cd5d877 Author: Thomas Gleixner Date: Mon Nov 19 00:03:45 2007 +0100 x86: prepare arch/x86/kernel/ldt_32/64.c for merging White space and coding style cleanups. Change unsigned to int. There is no win when we compare mincount against pc->size, which is an int as well. Casting pc->size to unsigned just might hide real problems. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit d7503ad21bc466dbc9dcc389724b38440fa14ed1 Author: Thomas Gleixner Date: Mon Nov 19 00:03:45 2007 +0100 x86: introduce ldt_write accessor Create a ldt write accessor like the 32 bit one. Preparatory patch for merging ldt.c and anyway necessary for 64bit paravirt ops. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 4c42ca3d513df4ff8cc4e200c455e809ee81f26a Author: Thomas Gleixner Date: Mon Nov 19 00:03:45 2007 +0100 x86: clean up include/asm-x86/desc_64.h White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 6eb06af716bd95c256a05d037235b571d78dc80c Author: Thomas Gleixner Date: Mon Nov 19 00:03:45 2007 +0100 x86: clean up arch/x86/kernel/ldt_32/64.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 797f783a4da88f68ebac58919881b9ac62d11afb Author: Thomas Gleixner Date: Mon Nov 19 00:03:45 2007 +0100 x86: clean up arch/x86/kernel/e820_64.c White space and coding style cleanup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 86ad6e164b538f2cf8017c5e2c91ce61ce462994 Author: Ingo Molnar Date: Mon Nov 19 00:03:45 2007 +0100 x86: code cleanups in arch/x86/kernel/pci-gart_64.c code cleanups: errors lines of code errors/KLOC arch/x86/kernel/pci-gart_64.c 183 748 244.6 arch/x86/kernel/pci-gart_64.c 0 790 0 Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 755a95d7b2a26b429bb5cb02883f127847bfa679 Author: Ingo Molnar Date: Mon Nov 19 00:03:45 2007 +0100 x86: lindent arch/i386/math-emu, cleanup manually clean up some of the damage that lindent caused. (this is a separate commit so that in the unlikely case of a typo we can bisect it down to the manual edits.) Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit c1eb1f2367c629d849c528188a9f4f6f0738a788 Author: Ingo Molnar Date: Mon Nov 19 00:03:45 2007 +0100 x86: lindent arch/i386/math-emu lindent these files: errors lines of code errors/KLOC arch/x86/math-emu/ 2236 9424 237.2 arch/x86/math-emu/ 128 8706 14.7 no other changes. No code changed: text data bss dec hex filename 5589802 612739 3833856 10036397 9924ad vmlinux.before 5589802 612739 3833856 10036397 9924ad vmlinux.after the intent of this patch is to ease the automated tracking of kernel code quality - it's just much easier for us to maintain it if every file in arch/x86 is supposed to be clean. NOTE: it is a known problem of lindent that it causes some style damage of its own, but it's a safe tool (well, except for the gcc array range initializers extension), so we did the bulk of the changes via lindent, and did the manual fixups in a followup patch. the resulting math-emu code has been tested by Thomas Gleixner on a real 386 DX CPU as well, and it works fine. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit bd64935c82728d1f09778e901ae1a15f5d5f6fab Author: Ingo Molnar Date: Mon Nov 19 00:03:45 2007 +0100 x86: mach-voyager, lindent lindent the mach-voyager files to get rid of more than 300 style errors: errors lines of code errors/KLOC arch/x86/mach-voyager/ [old] 409 3729 109.6 arch/x86/mach-voyager/ [new] 71 3678 19.3 Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit ee08bdc34706b61e5839b3252a479e33d5cc5514 Author: Ingo Molnar Date: Mon Nov 19 00:03:45 2007 +0100 x86: clean up arch/x86/kernel/aperture_64.c printk()s clean up arch/x86/kernel/aperture_64.c printk()s. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 7f89bb760b65b4056f2d1e6a4fe482ef56c05050 Author: Ingo Molnar Date: Mon Nov 19 00:03:44 2007 +0100 x86: clean up arch/x86/kernel/aperture_64.c whitespace cleanup. No code changed: text data bss dec hex filename 2080 76 4 2160 870 aperture_64.o.before 2080 76 4 2160 870 aperture_64.o.after errors lines of code errors/KLOC arch/x86/kernel/aperture_64.c 114 299 381.2 arch/x86/kernel/aperture_64.c 0 315 0 Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 6631190d80187694e41cc0cdab12e4c19ab363e1 Author: Thomas Gleixner Date: Mon Nov 19 00:03:44 2007 +0100 x86: clean up arch/x86/ia32/mmap32.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 4fffb008ecff95f3332c1c86a4c021099bd2d40a Author: Thomas Gleixner Date: Mon Nov 19 00:03:44 2007 +0100 x86: clean up arch/x86/ia32/syscall32.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit ba327686a36dfc6cf361110c267d432e522ac5d8 Author: Thomas Gleixner Date: Mon Nov 19 00:03:44 2007 +0100 x86: clean up arch/x86/ia32/sys_ia32.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 5d110d781c430ae72ac449dc102a9b20c3cfbe1e Author: Thomas Gleixner Date: Mon Nov 19 00:03:44 2007 +0100 x86: clean up arch/x86/ia32/ptrace32.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit dfc8c22829137fdd5bfa204a1511ccf8568bf3ce Author: Thomas Gleixner Date: Mon Nov 19 00:03:44 2007 +0100 x86: clean up arch/x86/ia32/ipc32.c White space and coding style cleanup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 6e883b5438eb2f3ba0222e8cb06d88006b94ed9e Author: Thomas Gleixner Date: Mon Nov 19 00:03:44 2007 +0100 x86: clean up arch/x86/ia32/ia32_signal.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit 4b57771d4972b55061cefcb0eedcfafaca25427d Author: Thomas Gleixner Date: Mon Nov 19 00:03:44 2007 +0100 x86: clean up arch/x86/ia32/aout32.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit dfbdeac517dd04b3df23eb592f18ed1f258adc08 Author: Thomas Gleixner Date: Mon Nov 19 00:03:44 2007 +0100 x86: clean up arch/x86/ia32/fpu32.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit e05735adfbb76427ba62fbbad0da6cfe214a6b9f Author: Ingo Molnar Date: Mon Nov 19 00:03:44 2007 +0100 x86: clean up arch/x86/mm/pageattr_64.c clean up arch/x86/mm/pageattr_64.c. no code changed: text data bss dec hex filename 1751 16 0 1767 6e7 pageattr_64.o.before 1751 16 0 1767 6e7 pageattr_64.o.after Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit b8dc9841e60a5fe3f4c3dceb520915c1cf0637df Author: Ingo Molnar Date: Mon Nov 19 00:03:44 2007 +0100 x86: clean up arch/x86/mm/pageattr_32.c clean up arch/x86/mm/pageattr_32.c. no code changed: text data bss dec hex filename 1255 40 0 1295 50f pageattr_32.o.before 1255 40 0 1295 50f pageattr_32.o.after Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 745ba7d25bccd4f9479cf861747fd100ccfe3aee Author: Thomas Gleixner Date: Mon Nov 19 00:03:43 2007 +0100 x86: unify arch/x86/crypto/twofish_32/64.c Get rid of another duplicate file. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar commit c7078184515d424a6a635ba73f3aa331db5a182c Author: H. Peter Anvin Date: Tue Oct 23 14:22:58 2007 -0700 x86: unify asm/cpufeature.h asm/cpufeature.h was already almost unified; this completes the job. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit 13213b1d645246c0a80afee2e24a0c05cb2ebc9d Author: H. Peter Anvin Date: Fri Nov 2 13:59:47 2007 -0700 x86: add Create , with common definitions suitable for assembly unification. Signed-off-by: H. Peter Anvin commit 2a6b2f497d06e7d6ec15ff718bfd5d0ec77befe3 Author: John Stultz Date: Mon Nov 19 00:03:43 2007 +0100 time: add ADJ_OFFSET_SS_READ Michael Kerrisk reported that a long standing bug in the adjtimex() system call causes glibc's adjtime(3) function to deliver the wrong results if 'delta' is NULL. add the ADJ_OFFSET_SS_READ API detail, which will be used by glibc to fix this API compatibility bug. Also see: http://bugzilla.kernel.org/show_bug.cgi?id=6761 [ mingo@elte.hu: added patch description ] Must-be-acked-by: Ulrich Drepper Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit a537a28be7b35b40be78b0421ca950aef9a0f1a7 Author: Arjan van de Ven Date: Sat Nov 17 11:35:01 2007 -0800 x86: printk kernel version in WARN_ON and other dump_stack users today, all oopses contain a version number of the kernel, which is nice because the people who actually do bother to read the oops get this vital bit of information always without having to ask the reporter in another round trip. However, WARN_ON() and many other dump_stack() users right now lack this information; the patch below adds this. This information is essential for getting people to use their time effectively when looking at these things; in addition, it's essential for tools that try to collect statistics about defects. Please consider, maybe even for 2.6.24 since its so simple and important for long term quality processes The code is identical between 32/64 bit; a lot of this code should be unified over time, the patch keeps the identical-ness in tact. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner commit fd0aa9e78065a7b98d99191998d03c3e0b1c74b0 Author: Alexey Starikovskiy Date: Mon Nov 19 00:03:43 2007 +0100 ACPI: Set max_cstate to 1 for early Opterons. AMD Opteron processors before CG revision don't like C-states > 1. Refer to bugzilla.kernel.org #5303. [tglx: reworked the patch so it does not wreck ia64] This solves the long standing bugzilla #5303 and probably some more on affected machines. Signed-off-by: Thomas Gleixner commit 3400fa5bd6ed57090a28840960332ca041fc8f3c Author: Maciej W. Rozycki Date: Mon Nov 19 00:03:43 2007 +0100 x86: fix NMI watchdog & 'stopped time' problem More than 3 years ago Niclas Gustafsson reported a 'stopped time' problem: > Watching the /proc/interrupts with 10s apart after the "stop". > > [root@s151 root]# more /proc/interrupts > CPU0 > 0: 66413955 local-APIC-edge timer [...] > LOC: 67355837 > ERR: 0 > MIS: 0 > [root@s151 root]# more /proc/interrupts > CPU0 > 0: 66413955 local-APIC-edge timer [...] > LOC: 67379568 > ERR: 0 > MIS: 0 This may be because buggy SMM firmware messes with the 8259A (configured for a transparent mode -- yes that rare "local-APIC-edge" mode is tricky ;-) ) insanely. this should resolve: http://bugzilla.kernel.org/show_bug.cgi?id=2544 http://bugzilla.kernel.org/show_bug.cgi?id=6296 Patch-dusted-off-by: Ingo Molnar Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Morton --- Documentation/x86_64/boot-options.txt | 5 arch/x86/Kconfig | 25 arch/x86/Makefile_32 | 9 arch/x86/crypto/Makefile | 4 arch/x86/crypto/twofish.c | 101 arch/x86/crypto/twofish_32.c | 97 arch/x86/crypto/twofish_64.c | 97 arch/x86/ia32/Makefile | 3 arch/x86/ia32/fpu32.c | 132 arch/x86/ia32/ia32_aout.c | 210 - arch/x86/ia32/ia32_signal.c | 373 +- arch/x86/ia32/ipc32.c | 30 arch/x86/ia32/mmap32.c | 79 arch/x86/ia32/ptrace32.c | 190 - arch/x86/ia32/sys_ia32.c | 496 +-- arch/x86/ia32/syscall32.c | 45 arch/x86/kernel/Makefile_32 | 4 arch/x86/kernel/Makefile_64 | 4 arch/x86/kernel/aperture_64.c | 280 - arch/x86/kernel/apic_32.c | 98 arch/x86/kernel/apic_64.c | 1458 ++++---- arch/x86/kernel/apm_32.c | 2 arch/x86/kernel/asm-offsets_32.c | 2 arch/x86/kernel/cpu/mcheck/mce_64.c | 2 arch/x86/kernel/cpu/mtrr/amd.c | 2 arch/x86/kernel/cpu/mtrr/generic.c | 16 arch/x86/kernel/cpu/mtrr/if.c | 15 arch/x86/kernel/cpu/mtrr/main.c | 8 arch/x86/kernel/cpu/mtrr/mtrr.h | 6 arch/x86/kernel/cpu/perfctr-watchdog.c | 1 arch/x86/kernel/e820_32.c | 110 arch/x86/kernel/e820_64.c | 330 +- arch/x86/kernel/early-quirks.c | 54 arch/x86/kernel/entry_32.S | 8 arch/x86/kernel/geode_32.c | 48 arch/x86/kernel/head64.c | 2 arch/x86/kernel/hpet.c | 1 arch/x86/kernel/i386_ksyms_32.c | 7 arch/x86/kernel/i8259_32.c | 2 arch/x86/kernel/i8259_64.c | 154 arch/x86/kernel/init_task.c | 1 arch/x86/kernel/io_apic_32.c | 14 arch/x86/kernel/io_apic_64.c | 12 arch/x86/kernel/ioport_32.c | 50 arch/x86/kernel/irq_64.c | 20 arch/x86/kernel/ldt.c | 264 + arch/x86/kernel/ldt_32.c | 248 - arch/x86/kernel/ldt_64.c | 250 - arch/x86/kernel/mpparse_32.c | 18 arch/x86/kernel/nmi_32.c | 24 arch/x86/kernel/nmi_64.c | 99 arch/x86/kernel/paravirt_32.c | 10 arch/x86/kernel/pci-calgary_64.c | 5 arch/x86/kernel/pci-dma_64.c | 3 arch/x86/kernel/pci-gart_64.c | 489 +-- arch/x86/kernel/pci-swiotlb_64.c | 1 arch/x86/kernel/pmtimer_64.c | 4 arch/x86/kernel/process_32.c | 8 arch/x86/kernel/process_64.c | 14 arch/x86/kernel/reboot_64.c | 11 arch/x86/kernel/reboot_fixups_32.c | 14 arch/x86/kernel/rtc.c | 196 + arch/x86/kernel/setup_32.c | 109 arch/x86/kernel/setup_64.c | 111 arch/x86/kernel/smp_64.c | 86 arch/x86/kernel/smpboot_32.c | 1 arch/x86/kernel/smpboot_64.c | 23 arch/x86/kernel/stacktrace.c | 1 arch/x86/kernel/time_32.c | 106 arch/x86/kernel/time_64.c | 166 - arch/x86/kernel/topology.c | 5 arch/x86/kernel/traps_32.c | 5 arch/x86/kernel/traps_64.c | 15 arch/x86/kernel/vmi_32.c | 4 arch/x86/kernel/vmiclock_32.c | 1 arch/x86/kernel/vsmp_64.c | 11 arch/x86/kernel/x8664_ksyms_64.c | 9 arch/x86/lguest/boot.c | 4 arch/x86/mach-rdc321x/Makefile | 5 arch/x86/mach-rdc321x/gpio.c | 91 arch/x86/mach-rdc321x/platform.c | 68 arch/x86/mach-rdc321x/wdt.c | 275 + arch/x86/mach-voyager/setup.c | 32 arch/x86/mach-voyager/voyager_basic.c | 132 arch/x86/mach-voyager/voyager_cat.c | 601 +-- arch/x86/mach-voyager/voyager_smp.c | 680 +--- arch/x86/mach-voyager/voyager_thread.c | 52 arch/x86/math-emu/errors.c | 880 ++--- arch/x86/math-emu/exception.h | 9 arch/x86/math-emu/fpu_arith.c | 150 arch/x86/math-emu/fpu_asm.h | 1 arch/x86/math-emu/fpu_aux.c | 211 - arch/x86/math-emu/fpu_emu.h | 67 arch/x86/math-emu/fpu_entry.c | 1220 +++---- arch/x86/math-emu/fpu_etc.c | 185 - arch/x86/math-emu/fpu_proto.h | 28 arch/x86/math-emu/fpu_tags.c | 92 arch/x86/math-emu/fpu_trig.c | 2930 ++++++++---------- arch/x86/math-emu/get_address.c | 646 +-- arch/x86/math-emu/load_store.c | 452 +- arch/x86/math-emu/poly.h | 69 arch/x86/math-emu/poly_2xm1.c | 197 - arch/x86/math-emu/poly_atan.c | 347 +- arch/x86/math-emu/poly_l2.c | 386 +- arch/x86/math-emu/poly_sin.c | 643 +-- arch/x86/math-emu/poly_tan.c | 334 -- arch/x86/math-emu/reg_add_sub.c | 565 +-- arch/x86/math-emu/reg_compare.c | 573 +-- arch/x86/math-emu/reg_constant.c | 71 arch/x86/math-emu/reg_convert.c | 51 arch/x86/math-emu/reg_divide.c | 319 - arch/x86/math-emu/reg_ld_str.c | 2155 ++++++------- arch/x86/math-emu/reg_mul.c | 171 - arch/x86/math-emu/status_w.h | 8 arch/x86/mm/Makefile_64 | 2 arch/x86/mm/boot_ioremap_32.c | 24 arch/x86/mm/init_64.c | 2 arch/x86/mm/k8topology_64.c | 17 arch/x86/mm/mmap_32.c | 8 arch/x86/mm/mmap_64.c | 11 arch/x86/mm/numa_64.c | 2 arch/x86/mm/pageattr_32.c | 151 arch/x86/mm/pageattr_64.c | 143 arch/x86/mm/srat_64.c | 3 arch/x86/oprofile/op_model_athlon.c | 22 arch/x86/pci/fixup.c | 13 arch/x86/pci/init.c | 4 arch/x86/pci/mmconfig-shared.c | 209 + arch/x86/pci/pci.h | 1 arch/x86/vdso/vclock_gettime.c | 1 arch/x86/xen/enlighten.c | 6 drivers/acpi/bus.c | 2 drivers/acpi/processor_idle.c | 1 drivers/char/hpet.c | 75 drivers/pci/probe.c | 11 fs/binfmt_elf.c | 6 include/asm-ia64/acpi.h | 1 include/asm-x86/acpi.h | 27 include/asm-x86/acpi_32.h | 6 include/asm-x86/acpi_64.h | 4 include/asm-x86/alternative.h | 162 include/asm-x86/alternative_32.h | 154 include/asm-x86/alternative_64.h | 159 include/asm-x86/apic.h | 140 include/asm-x86/apic_32.h | 126 include/asm-x86/apic_64.h | 102 include/asm-x86/apicdef.h | 407 ++ include/asm-x86/apicdef_32.h | 375 -- include/asm-x86/apicdef_64.h | 392 -- include/asm-x86/arch_hooks.h | 5 include/asm-x86/asm.h | 18 include/asm-x86/bitops_32.h | 48 include/asm-x86/bug.h | 3 include/asm-x86/checksum_64.h | 2 include/asm-x86/cpufeature.h | 198 + include/asm-x86/cpufeature_32.h | 176 - include/asm-x86/cpufeature_64.h | 30 include/asm-x86/desc_64.h | 104 include/asm-x86/dma.h | 318 + include/asm-x86/dma_32.h | 297 - include/asm-x86/dma_64.h | 304 - include/asm-x86/e820_32.h | 6 include/asm-x86/e820_64.h | 6 include/asm-x86/elf.h | 3 include/asm-x86/futex.h | 138 include/asm-x86/futex_32.h | 135 include/asm-x86/futex_64.h | 125 include/asm-x86/geode.h | 12 include/asm-x86/gpio.h | 6 include/asm-x86/hw_irq_64.h | 1 include/asm-x86/i387_32.h | 2 include/asm-x86/i387_64.h | 2 include/asm-x86/i8259.h | 17 include/asm-x86/ia32_unistd.h | 2 include/asm-x86/ide.h | 2 include/asm-x86/idle.h | 1 include/asm-x86/io_apic_64.h | 8 include/asm-x86/irqflags.h | 246 + include/asm-x86/irqflags_32.h | 197 - include/asm-x86/irqflags_64.h | 176 - include/asm-x86/k8.h | 1 include/asm-x86/kdebug.h | 5 include/asm-x86/mach-bigsmp/mach_apic.h | 12 include/asm-x86/mach-default/apm.h | 2 include/asm-x86/mach-default/io_ports.h | 25 include/asm-x86/mach-default/mach_apic.h | 16 include/asm-x86/mach-default/mach_time.h | 111 include/asm-x86/mach-default/mach_timer.h | 2 include/asm-x86/mach-default/mach_traps.h | 2 include/asm-x86/mach-es7000/mach_apic.h | 10 include/asm-x86/mach-generic/gpio.h | 15 include/asm-x86/mach-rdc321x/gpio.h | 56 include/asm-x86/mach-rdc321x/rdc321x_defs.h | 6 include/asm-x86/mach-summit/mach_apic.h | 18 include/asm-x86/mc146818rtc.h | 101 include/asm-x86/mc146818rtc_32.h | 97 include/asm-x86/mc146818rtc_64.h | 29 include/asm-x86/mce.h | 14 include/asm-x86/mmu_context_64.h | 2 include/asm-x86/mpspec.h | 96 include/asm-x86/mpspec_32.h | 81 include/asm-x86/mpspec_64.h | 233 - include/asm-x86/mpspec_def.h | 87 include/asm-x86/mtrr.h | 8 include/asm-x86/nmi_32.h | 3 include/asm-x86/nmi_64.h | 5 include/asm-x86/numa_64.h | 6 include/asm-x86/page_64.h | 4 include/asm-x86/paravirt.h | 21 include/asm-x86/pci.h | 4 include/asm-x86/pci_64.h | 1 include/asm-x86/pda.h | 1 include/asm-x86/processor_32.h | 3 include/asm-x86/processor_64.h | 7 include/asm-x86/proto.h | 67 include/asm-x86/rio.h | 4 include/asm-x86/rwsem.h | 2 include/asm-x86/scatterlist.h | 34 include/asm-x86/scatterlist_32.h | 28 include/asm-x86/scatterlist_64.h | 29 include/asm-x86/smp_32.h | 117 include/asm-x86/smp_64.h | 133 include/asm-x86/spinlock.h | 14 include/asm-x86/spinlock_32.h | 9 include/asm-x86/spinlock_64.h | 8 include/asm-x86/system_32.h | 1 include/asm-x86/thread_info_64.h | 18 include/asm-x86/time.h | 2 include/asm-x86/timex.h | 2 include/asm-x86/tlbflush.h | 155 include/asm-x86/tlbflush_32.h | 168 - include/asm-x86/tlbflush_64.h | 100 include/asm-x86/topology_32.h | 2 include/asm-x86/topology_64.h | 2 include/asm-x86/tsc.h | 3 include/asm-x86/xor_32.h | 2 include/asm-x86/xor_64.h | 2 include/linux/acpi_pmtmr.h | 2 include/linux/compat.h | 4 include/linux/hpet.h | 3 include/linux/ioport.h | 2 include/linux/pci.h | 9 include/linux/pci_ids.h | 7 include/linux/timex.h | 1 kernel/time/ntp.c | 3 mm/mmap.c | 3 246 files changed, 13495 insertions(+), 15500 deletions(-) diff -puN Documentation/x86_64/boot-options.txt~git-x86 Documentation/x86_64/boot-options.txt --- a/Documentation/x86_64/boot-options.txt~git-x86 +++ a/Documentation/x86_64/boot-options.txt @@ -110,12 +110,15 @@ Idle loop Rebooting - reboot=b[ios] | t[riple] | k[bd] [, [w]arm | [c]old] + reboot=b[ios] | t[riple] | k[bd] | a[cpi] [, [w]arm | [c]old] bios Use the CPU reboot vector for warm reset warm Don't set the cold reboot flag cold Set the cold reboot flag triple Force a triple fault (init) kbd Use the keyboard controller. cold reset (default) + acpi Use the ACPI RESET_REG in the FADT. If ACPI is not configured or the + ACPI reset does not work, the reboot path attempts the reset using + the keyboard controller. Using warm reset will be much faster especially on big memory systems because the BIOS will not go through the memory check. diff -puN arch/x86/Kconfig~git-x86 arch/x86/Kconfig --- a/arch/x86/Kconfig~git-x86 +++ a/arch/x86/Kconfig @@ -81,6 +81,10 @@ config GENERIC_BUG default y depends on BUG +config GENERIC_GPIO + bool + default n + config GENERIC_HWEIGHT bool default y @@ -290,6 +294,17 @@ config X86_ES7000 Only choose this option if you have such a system, otherwise you should say N here. +config X86_RDC321X + bool "RDC R-321x SoC" + select M486 + select X86_REBOOTFIXUPS + select GENERIC_GPIO + select LEDS_GPIO + help + This option is needed for RDC R-321x system-on-chip, also known + as R-8610-(G). + If you don't have one of these chips, you should say N here. + config X86_VSMP bool "Support for ScaleMP vSMP" depends on X86_64 && PCI @@ -646,7 +661,7 @@ config X86_REBOOTFIXUPS system. Currently, the only fixup is for the Geode machines using - CS5530A and CS5536 chipsets. + CS5530A and CS5536 chipsets and the RDC R-321x SoC. Say Y if you want to enable the fixup. Currently, it's safe to enable this option even if you don't need it. @@ -852,6 +867,14 @@ config X86_64_ACPI_NUMA help Enable ACPI SRAT based node topology detection. +config THREAD_ORDER + int "Kernel stack size (in page order)" + range 1 3 + default "1" + depends on X86_64 && NUMA + help + Page order for the thread stack. + config NUMA_EMU bool "NUMA emulation" depends on X86_64 && NUMA diff -puN arch/x86/Makefile_32~git-x86 arch/x86/Makefile_32 --- a/arch/x86/Makefile_32~git-x86 +++ a/arch/x86/Makefile_32 @@ -48,10 +48,6 @@ include $(srctree)/arch/x86/Makefile_32. # temporary until string.h is fixed cflags-y += -ffreestanding -# this works around some issues with generating unwind tables in older gccs -# newer gccs do it by default -cflags-y += -maccumulate-outgoing-args - # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use # a lot more stack due to the lack of sharing of stacklots: KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;) @@ -85,6 +81,11 @@ mcore-$(CONFIG_X86_NUMAQ) := arch/x86/ma mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-x86/mach-bigsmp mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default +# RDC R-321x subarch support +mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x +mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default +core-$(CONFIG_X86_RDC321X) += arch/x86/mach-rdc321x/ + #Summit subarch support mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default diff -puN arch/x86/crypto/Makefile~git-x86 arch/x86/crypto/Makefile --- a/arch/x86/crypto/Makefile~git-x86 +++ a/arch/x86/crypto/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o aes-i586-y := aes-i586-asm_32.o aes_32.o -twofish-i586-y := twofish-i586-asm_32.o twofish_32.o +twofish-i586-y := twofish-i586-asm_32.o twofish.o aes-x86_64-y := aes-x86_64-asm_64.o aes_64.o -twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o +twofish-x86_64-y := twofish-x86_64-asm_64.o twofish.o diff -puN /dev/null arch/x86/crypto/twofish.c --- /dev/null +++ a/arch/x86/crypto/twofish.c @@ -0,0 +1,101 @@ +/* + * Glue Code for optimized x86 assembler version of TWOFISH + * + * Originally Twofish for GPG + * By Matthew Skala , July 26, 1998 + * 256-bit key length added March 20, 1999 + * Some modifications to reduce the text size by Werner Koch, April, 1998 + * Ported to the kerneli patch by Marc Mutz + * Ported to CryptoAPI by Colin Slater + * + * The original author has disclaimed all copyright interest in this + * code and thus put it in the public domain. The subsequent authors + * have put this under the GNU General Public License. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * This code is a "clean room" implementation, written from the paper + * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, + * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available + * through http://www.counterpane.com/twofish.html + * + * For background information on multiplication in finite fields, used for + * the matrix operations in the key schedule, see the book _Contemporary + * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the + * Third Edition. + */ + +#include +#include +#include +#include +#include + + +asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); +asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + +static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_enc_blk(tfm, dst, src); +} + +static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_dec_blk(tfm, dst, src); +} + +static struct crypto_alg alg = { + .cra_name = "twofish", +#ifdef CONFIG_X86_32 + .cra_driver_name = "twofish-i586", +#else + .cra_driver_name = "twofish-x86_64", +#endif + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = TF_MIN_KEY_SIZE, + .cia_max_keysize = TF_MAX_KEY_SIZE, + .cia_setkey = twofish_setkey, + .cia_encrypt = twofish_encrypt, + .cia_decrypt = twofish_decrypt + } + } +}; + +static int __init init(void) +{ + return crypto_register_alg(&alg); +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Twofish Cipher Algorithm, x86 asm optimized"); +MODULE_ALIAS("twofish"); diff -puN arch/x86/crypto/twofish_32.c~git-x86 /dev/null --- a/arch/x86/crypto/twofish_32.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Glue Code for optimized 586 assembler version of TWOFISH - * - * Originally Twofish for GPG - * By Matthew Skala , July 26, 1998 - * 256-bit key length added March 20, 1999 - * Some modifications to reduce the text size by Werner Koch, April, 1998 - * Ported to the kerneli patch by Marc Mutz - * Ported to CryptoAPI by Colin Slater - * - * The original author has disclaimed all copyright interest in this - * code and thus put it in the public domain. The subsequent authors - * have put this under the GNU General Public License. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - * This code is a "clean room" implementation, written from the paper - * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, - * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available - * through http://www.counterpane.com/twofish.html - * - * For background information on multiplication in finite fields, used for - * the matrix operations in the key schedule, see the book _Contemporary - * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the - * Third Edition. - */ - -#include -#include -#include -#include -#include - - -asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); -asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); - -static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_enc_blk(tfm, dst, src); -} - -static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_dec_blk(tfm, dst, src); -} - -static struct crypto_alg alg = { - .cra_name = "twofish", - .cra_driver_name = "twofish-i586", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 3, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = TF_MIN_KEY_SIZE, - .cia_max_keysize = TF_MAX_KEY_SIZE, - .cia_setkey = twofish_setkey, - .cia_encrypt = twofish_encrypt, - .cia_decrypt = twofish_decrypt - } - } -}; - -static int __init init(void) -{ - return crypto_register_alg(&alg); -} - -static void __exit fini(void) -{ - crypto_unregister_alg(&alg); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized"); -MODULE_ALIAS("twofish"); diff -puN arch/x86/crypto/twofish_64.c~git-x86 /dev/null --- a/arch/x86/crypto/twofish_64.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Glue Code for optimized x86_64 assembler version of TWOFISH - * - * Originally Twofish for GPG - * By Matthew Skala , July 26, 1998 - * 256-bit key length added March 20, 1999 - * Some modifications to reduce the text size by Werner Koch, April, 1998 - * Ported to the kerneli patch by Marc Mutz - * Ported to CryptoAPI by Colin Slater - * - * The original author has disclaimed all copyright interest in this - * code and thus put it in the public domain. The subsequent authors - * have put this under the GNU General Public License. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - * This code is a "clean room" implementation, written from the paper - * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, - * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available - * through http://www.counterpane.com/twofish.html - * - * For background information on multiplication in finite fields, used for - * the matrix operations in the key schedule, see the book _Contemporary - * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the - * Third Edition. - */ - -#include -#include -#include -#include -#include -#include - -asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); -asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); - -static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_enc_blk(tfm, dst, src); -} - -static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_dec_blk(tfm, dst, src); -} - -static struct crypto_alg alg = { - .cra_name = "twofish", - .cra_driver_name = "twofish-x86_64", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 3, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = TF_MIN_KEY_SIZE, - .cia_max_keysize = TF_MAX_KEY_SIZE, - .cia_setkey = twofish_setkey, - .cia_encrypt = twofish_encrypt, - .cia_decrypt = twofish_decrypt - } - } -}; - -static int __init init(void) -{ - return crypto_register_alg(&alg); -} - -static void __exit fini(void) -{ - crypto_unregister_alg(&alg); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized"); -MODULE_ALIAS("twofish"); diff -puN arch/x86/ia32/Makefile~git-x86 arch/x86/ia32/Makefile --- a/arch/x86/ia32/Makefile~git-x86 +++ a/arch/x86/ia32/Makefile @@ -3,8 +3,7 @@ # obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \ - ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o \ - mmap32.o + ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o sysv-$(CONFIG_SYSVIPC) := ipc32.o obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) diff -puN arch/x86/ia32/fpu32.c~git-x86 arch/x86/ia32/fpu32.c --- a/arch/x86/ia32/fpu32.c~git-x86 +++ a/arch/x86/ia32/fpu32.c @@ -1,8 +1,8 @@ -/* +/* * Copyright 2002 Andi Kleen, SuSE Labs. * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes. * This is used for ptrace, signals and coredumps in 32bit emulation. - */ + */ #include #include @@ -13,96 +13,97 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd) { unsigned int tmp; /* to avoid 16 bit prefixes in the code */ - + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ - tmp = ~twd; - tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ - /* and move the valid bits to the lower byte. */ - tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ - tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ - tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ - return tmp; + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + return tmp; } +#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16); +#define FP_EXP_TAG_VALID 0 +#define FP_EXP_TAG_ZERO 1 +#define FP_EXP_TAG_SPECIAL 2 +#define FP_EXP_TAG_EMPTY 3 + static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) { - struct _fpxreg *st = NULL; + struct _fpxreg *st; unsigned long tos = (fxsave->swd >> 11) & 7; unsigned long twd = (unsigned long) fxsave->twd; unsigned long tag; unsigned long ret = 0xffff0000; int i; -#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16); - - for (i = 0 ; i < 8 ; i++) { + for (i = 0; i < 8; i++, twd >>= 1) { if (twd & 0x1) { - st = FPREG_ADDR( fxsave, (i - tos) & 7 ); + st = FPREG_ADDR(fxsave, (i - tos) & 7); switch (st->exponent & 0x7fff) { case 0x7fff: - tag = 2; /* Special */ + tag = FP_EXP_TAG_SPECIAL; break; case 0x0000: - if ( !st->significand[0] && - !st->significand[1] && - !st->significand[2] && - !st->significand[3] ) { - tag = 1; /* Zero */ - } else { - tag = 2; /* Special */ - } + if (!st->significand[0] && + !st->significand[1] && + !st->significand[2] && + !st->significand[3]) + tag = FP_EXP_TAG_ZERO; + else + tag = FP_EXP_TAG_SPECIAL; break; default: - if (st->significand[3] & 0x8000) { - tag = 0; /* Valid */ - } else { - tag = 2; /* Special */ - } + if (st->significand[3] & 0x8000) + tag = FP_EXP_TAG_VALID; + else + tag = FP_EXP_TAG_SPECIAL; break; } } else { - tag = 3; /* Empty */ + tag = FP_EXP_TAG_EMPTY; } - ret |= (tag << (2 * i)); - twd = twd >> 1; + ret |= tag << (2 * i); } return ret; } +#define G(num, val) err |= __get_user(val, num + (u32 __user *)buf) static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave, struct _fpstate_ia32 __user *buf) { struct _fpxreg *to; struct _fpreg __user *from; - int i; + int i, err = 0; u32 v; - int err = 0; -#define G(num,val) err |= __get_user(val, num + (u32 __user *)buf) G(0, fxsave->cwd); G(1, fxsave->swd); G(2, fxsave->twd); fxsave->twd = twd_i387_to_fxsr(fxsave->twd); G(3, fxsave->rip); G(4, v); - fxsave->fop = v>>16; /* cs ignored */ + /* cs ignored */ + fxsave->fop = v>>16; G(5, fxsave->rdp); /* 6: ds ignored */ -#undef G - if (err) - return -1; + if (err) + return -1; to = (struct _fpxreg *)&fxsave->st_space[0]; from = &buf->_st[0]; - for (i = 0 ; i < 8 ; i++, to++, from++) { + for (i = 0; i < 8; i++, to++, from++) { if (__copy_from_user(to, from, sizeof(*from))) return -1; } return 0; } +#define P(num, val) err |= __put_user(val, num + (u32 __user *)buf) static inline int convert_fxsr_to_user(struct _fpstate_ia32 __user *buf, struct i387_fxsave_struct *fxsave, @@ -111,60 +112,59 @@ static inline int convert_fxsr_to_user(s { struct _fpreg __user *to; struct _fpxreg *from; - int i; - u16 cs,ds; - int err = 0; + int i, err = 0; + u16 cs, ds; if (tsk == current) { - /* should be actually ds/cs at fpu exception time, - but that information is not available in 64bit mode. */ - asm("movw %%ds,%0 " : "=r" (ds)); - asm("movw %%cs,%0 " : "=r" (cs)); - } else { /* ptrace. task has stopped. */ + /* + * should be actually ds/cs at fpu exception time, but + * that information is not available in 64bit mode. + */ + asm("movw %%ds,%0 " : "=r" (ds)); + asm("movw %%cs,%0 " : "=r" (cs)); + } else { + /* ptrace. task has stopped. */ ds = tsk->thread.ds; cs = regs->cs; - } + } -#define P(num,val) err |= __put_user(val, num + (u32 __user *)buf) P(0, (u32)fxsave->cwd | 0xffff0000); P(1, (u32)fxsave->swd | 0xffff0000); P(2, twd_fxsr_to_i387(fxsave)); P(3, (u32)fxsave->rip); - P(4, cs | ((u32)fxsave->fop) << 16); + P(4, cs | ((u32)fxsave->fop) << 16); P(5, fxsave->rdp); P(6, 0xffff0000 | ds); -#undef P - if (err) - return -1; + if (err) + return -1; to = &buf->_st[0]; from = (struct _fpxreg *) &fxsave->st_space[0]; - for ( i = 0 ; i < 8 ; i++, to++, from++ ) { + for (i = 0; i < 8; i++, to++, from++) { if (__copy_to_user(to, from, sizeof(*to))) return -1; } return 0; } -int restore_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, int fsave) -{ +int restore_i387_ia32(struct task_struct *tsk, + struct _fpstate_ia32 __user *buf, int fsave) +{ clear_fpu(tsk); - if (!fsave) { - if (__copy_from_user(&tsk->thread.i387.fxsave, + if (!fsave) { + if (__copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0], sizeof(struct i387_fxsave_struct))) return -1; tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; set_stopped_child_used_math(tsk); - } + } return convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf); -} +} -int save_i387_ia32(struct task_struct *tsk, - struct _fpstate_ia32 __user *buf, - struct pt_regs *regs, - int fsave) +int save_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 __user *buf, + struct pt_regs *regs, int fsave) { int err = 0; @@ -174,8 +174,6 @@ int save_i387_ia32(struct task_struct *t if (fsave) return 0; err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); - if (fsave) - return err ? -1 : 1; err |= __put_user(X86_FXSR_MAGIC, &buf->magic); err |= __copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave, sizeof(struct i387_fxsave_struct)); diff -puN arch/x86/ia32/ia32_aout.c~git-x86 arch/x86/ia32/ia32_aout.c --- a/arch/x86/ia32/ia32_aout.c~git-x86 +++ a/arch/x86/ia32/ia32_aout.c @@ -36,38 +36,44 @@ #undef WARN_OLD #undef CORE_DUMP /* probably broken */ -static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); -static int load_aout_library(struct file*); +static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs); +static int load_aout_library(struct file *); #ifdef CORE_DUMP -static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); +static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, + unsigned long limit); /* * fill in the user structure for a core dump.. */ -static void dump_thread32(struct pt_regs * regs, struct user32 * dump) +static void dump_thread32(struct pt_regs *regs, struct user32 *dump) { - u32 fs,gs; + u32 fs, gs; /* changed the size calculations - should hopefully work better. lbt */ dump->magic = CMAGIC; dump->start_code = 0; dump->start_stack = regs->rsp & ~(PAGE_SIZE - 1); dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; - dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; + dump->u_dsize = ((unsigned long) + (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; dump->u_dsize -= dump->u_tsize; dump->u_ssize = 0; - dump->u_debugreg[0] = current->thread.debugreg0; - dump->u_debugreg[1] = current->thread.debugreg1; - dump->u_debugreg[2] = current->thread.debugreg2; - dump->u_debugreg[3] = current->thread.debugreg3; - dump->u_debugreg[4] = 0; - dump->u_debugreg[5] = 0; - dump->u_debugreg[6] = current->thread.debugreg6; - dump->u_debugreg[7] = current->thread.debugreg7; + dump->u_debugreg[0] = current->thread.debugreg0; + dump->u_debugreg[1] = current->thread.debugreg1; + dump->u_debugreg[2] = current->thread.debugreg2; + dump->u_debugreg[3] = current->thread.debugreg3; + dump->u_debugreg[4] = 0; + dump->u_debugreg[5] = 0; + dump->u_debugreg[6] = current->thread.debugreg6; + dump->u_debugreg[7] = current->thread.debugreg7; - if (dump->start_stack < 0xc0000000) - dump->u_ssize = ((unsigned long) (0xc0000000 - dump->start_stack)) >> PAGE_SHIFT; + if (dump->start_stack < 0xc0000000) { + unsigned long tmp; + + tmp = (unsigned long) (0xc0000000 - dump->start_stack); + dump->u_ssize = tmp >> PAGE_SHIFT; + } dump->regs.ebx = regs->rbx; dump->regs.ecx = regs->rcx; @@ -79,7 +85,7 @@ static void dump_thread32(struct pt_regs dump->regs.ds = current->thread.ds; dump->regs.es = current->thread.es; asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs; - asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs; + asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs; dump->regs.orig_eax = regs->orig_rax; dump->regs.eip = regs->rip; dump->regs.cs = regs->cs; @@ -90,7 +96,7 @@ static void dump_thread32(struct pt_regs #if 1 /* FIXME */ dump->u_fpvalid = 0; #else - dump->u_fpvalid = dump_fpu (regs, &dump->i387); + dump->u_fpvalid = dump_fpu(regs, &dump->i387); #endif } @@ -128,15 +134,19 @@ static int dump_write(struct file *file, return file->f_op->write(file, addr, nr, &file->f_pos) == nr; } -#define DUMP_WRITE(addr, nr) \ +#define DUMP_WRITE(addr, nr) \ if (!dump_write(file, (void *)(addr), (nr))) \ goto end_coredump; -#define DUMP_SEEK(offset) \ -if (file->f_op->llseek) { \ - if (file->f_op->llseek(file,(offset),0) != (offset)) \ - goto end_coredump; \ -} else file->f_pos = (offset) +#define DUMP_SEEK(offset) \ + if (file->f_op->llseek) { \ + if (file->f_op->llseek(file, (offset), 0) != (offset)) \ + goto end_coredump; \ + } else \ + file->f_pos = (offset) + +#define START_DATA() (u.u_tsize << PAGE_SHIFT) +#define START_STACK(u) (u.start_stack) /* * Routine writes a core dump image in the current directory. @@ -148,62 +158,70 @@ if (file->f_op->llseek) { \ * dumping of the process results in another error.. */ -static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) +static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, + unsigned long limit) { mm_segment_t fs; int has_dumped = 0; unsigned long dump_start, dump_size; struct user32 dump; -# define START_DATA(u) (u.u_tsize << PAGE_SHIFT) -# define START_STACK(u) (u.start_stack) fs = get_fs(); set_fs(KERNEL_DS); has_dumped = 1; current->flags |= PF_DUMPCORE; - strncpy(dump.u_comm, current->comm, sizeof(current->comm)); - dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump))); + strncpy(dump.u_comm, current->comm, sizeof(current->comm)); + dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) - + ((unsigned long)(&dump))); dump.signal = signr; dump_thread32(regs, &dump); -/* If the size of the dump file exceeds the rlimit, then see what would happen - if we wrote the stack, but not the data area. */ + /* + * If the size of the dump file exceeds the rlimit, then see + * what would happen if we wrote the stack, but not the data + * area. + */ if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > limit) dump.u_dsize = 0; -/* Make sure we have enough room to write the stack and data areas. */ + /* Make sure we have enough room to write the stack and data areas. */ if ((dump.u_ssize + 1) * PAGE_SIZE > limit) dump.u_ssize = 0; -/* make sure we actually have a data and stack area to dump */ + /* make sure we actually have a data and stack area to dump */ set_fs(USER_DS); - if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump), dump.u_dsize << PAGE_SHIFT)) + if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump), + dump.u_dsize << PAGE_SHIFT)) dump.u_dsize = 0; - if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump), dump.u_ssize << PAGE_SHIFT)) + if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump), + dump.u_ssize << PAGE_SHIFT)) dump.u_ssize = 0; set_fs(KERNEL_DS); -/* struct user */ - DUMP_WRITE(&dump,sizeof(dump)); -/* Now dump all of the user data. Include malloced stuff as well */ + /* struct user */ + DUMP_WRITE(&dump, sizeof(dump)); + /* Now dump all of the user data. Include malloced stuff as well */ DUMP_SEEK(PAGE_SIZE); -/* now we start writing out the user space info */ + /* now we start writing out the user space info */ set_fs(USER_DS); -/* Dump the data area */ + /* Dump the data area */ if (dump.u_dsize != 0) { dump_start = START_DATA(dump); dump_size = dump.u_dsize << PAGE_SHIFT; - DUMP_WRITE(dump_start,dump_size); + DUMP_WRITE(dump_start, dump_size); } -/* Now prepare to dump the stack area */ + /* Now prepare to dump the stack area */ if (dump.u_ssize != 0) { dump_start = START_STACK(dump); dump_size = dump.u_ssize << PAGE_SHIFT; - DUMP_WRITE(dump_start,dump_size); + DUMP_WRITE(dump_start, dump_size); } -/* Finally dump the task struct. Not be used by gdb, but could be useful */ + /* + * Finally dump the task struct. Not be used by gdb, but + * could be useful + */ set_fs(KERNEL_DS); - DUMP_WRITE(current,sizeof(*current)); + DUMP_WRITE(current, sizeof(*current)); end_coredump: set_fs(fs); return has_dumped; @@ -217,35 +235,34 @@ end_coredump: */ static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm) { - u32 __user *argv; - u32 __user *envp; - u32 __user *sp; - int argc = bprm->argc; - int envc = bprm->envc; + u32 __user *argv, *envp, *sp; + int argc = bprm->argc, envc = bprm->envc; sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p); sp -= envc+1; envp = sp; sp -= argc+1; argv = sp; - put_user((unsigned long) envp,--sp); - put_user((unsigned long) argv,--sp); - put_user(argc,--sp); + put_user((unsigned long) envp, --sp); + put_user((unsigned long) argv, --sp); + put_user(argc, --sp); current->mm->arg_start = (unsigned long) p; - while (argc-->0) { + while (argc-- > 0) { char c; - put_user((u32)(unsigned long)p,argv++); + + put_user((u32)(unsigned long)p, argv++); do { - get_user(c,p++); + get_user(c, p++); } while (c); } put_user(0, argv); current->mm->arg_end = current->mm->env_start = (unsigned long) p; - while (envc-->0) { + while (envc-- > 0) { char c; - put_user((u32)(unsigned long)p,envp++); + + put_user((u32)(unsigned long)p, envp++); do { - get_user(c,p++); + get_user(c, p++); } while (c); } put_user(0, envp); @@ -257,20 +274,18 @@ static u32 __user *create_aout_tables(ch * These are the functions used to load a.out style executables and shared * libraries. There is no binary dependent code anywhere else. */ - -static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) +static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) { + unsigned long error, fd_offset, rlim; struct exec ex; - unsigned long error; - unsigned long fd_offset; - unsigned long rlim; int retval; ex = *((struct exec *) bprm->buf); /* exec-header */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || - i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { + i_size_read(bprm->file->f_path.dentry->d_inode) < + ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { return -ENOEXEC; } @@ -291,13 +306,13 @@ static int load_aout_binary(struct linux if (retval) return retval; - regs->cs = __USER32_CS; + regs->cs = __USER32_CS; regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; /* OK, This is the point of no return */ set_personality(PER_LINUX); - set_thread_flag(TIF_IA32); + set_thread_flag(TIF_IA32); clear_thread_flag(TIF_ABI_PENDING); current->mm->end_code = ex.a_text + @@ -311,7 +326,7 @@ static int load_aout_binary(struct linux current->mm->mmap = NULL; compute_creds(bprm); - current->flags &= ~PF_FORKNOEXEC; + current->flags &= ~PF_FORKNOEXEC; if (N_MAGIC(ex) == OMAGIC) { unsigned long text_addr, map_size; @@ -338,30 +353,30 @@ static int load_aout_binary(struct linux send_sig(SIGKILL, current, 0); return error; } - + flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data); } else { #ifdef WARN_OLD static unsigned long error_time, error_time2; if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && - (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) - { + (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) { printk(KERN_NOTICE "executable not page aligned\n"); error_time2 = jiffies; } if ((fd_offset & ~PAGE_MASK) != 0 && - (jiffies-error_time) > 5*HZ) - { - printk(KERN_WARNING - "fd_offset is not page aligned. Please convert program: %s\n", + (jiffies - error_time) > 5*HZ) { + printk(KERN_WARNING + "fd_offset is not page aligned. Please convert " + "program: %s\n", bprm->file->f_path.dentry->d_name.name); error_time = jiffies; } #endif - if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { + if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) { loff_t pos = fd_offset; + down_write(¤t->mm->mmap_sem); do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); up_write(¤t->mm->mmap_sem); @@ -376,9 +391,10 @@ static int load_aout_binary(struct linux down_write(¤t->mm->mmap_sem); error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, - PROT_READ | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, - fd_offset); + PROT_READ | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | + MAP_EXECUTABLE | MAP_32BIT, + fd_offset); up_write(¤t->mm->mmap_sem); if (error != N_TXTADDR(ex)) { @@ -387,9 +403,10 @@ static int load_aout_binary(struct linux } down_write(¤t->mm->mmap_sem); - error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, + error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | + MAP_EXECUTABLE | MAP_32BIT, fd_offset + ex.a_text); up_write(¤t->mm->mmap_sem); if (error != N_DATADDR(ex)) { @@ -403,9 +420,9 @@ beyond_if: set_brk(current->mm->start_brk, current->mm->brk); retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); - if (retval < 0) { - /* Someone check-me: is this error path enough? */ - send_sig(SIGKILL, current, 0); + if (retval < 0) { + /* Someone check-me: is this error path enough? */ + send_sig(SIGKILL, current, 0); return retval; } @@ -414,7 +431,7 @@ beyond_if: /* start thread */ asm volatile("movl %0,%%fs" :: "r" (0)); \ asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); - load_gs_index(0); + load_gs_index(0); (regs)->rip = ex.a_entry; (regs)->rsp = current->mm->start_stack; (regs)->eflags = 0x200; @@ -425,7 +442,7 @@ beyond_if: set_fs(USER_DS); if (unlikely(current->ptrace & PT_PTRACED)) { if (current->ptrace & PT_TRACE_EXEC) - ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP); + ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP); else send_sig(SIGTRAP, current, 0); } @@ -434,9 +451,8 @@ beyond_if: static int load_aout_library(struct file *file) { - struct inode * inode; - unsigned long bss, start_addr, len; - unsigned long error; + struct inode *inode; + unsigned long bss, start_addr, len, error; int retval; struct exec ex; @@ -450,7 +466,8 @@ static int load_aout_library(struct file /* We come in here for the regular a.out style of shared libraries */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || - i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { + i_size_read(inode) < + ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { goto out; } @@ -467,10 +484,10 @@ static int load_aout_library(struct file #ifdef WARN_OLD static unsigned long error_time; - if ((jiffies-error_time) > 5*HZ) - { - printk(KERN_WARNING - "N_TXTOFF is not page aligned. Please convert library: %s\n", + if ((jiffies-error_time) > 5*HZ) { + printk(KERN_WARNING + "N_TXTOFF is not page aligned. Please convert " + "library: %s\n", file->f_path.dentry->d_name.name); error_time = jiffies; } @@ -478,11 +495,12 @@ static int load_aout_library(struct file down_write(¤t->mm->mmap_sem); do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); up_write(¤t->mm->mmap_sem); - + file->f_op->read(file, (char __user *)start_addr, ex.a_text + ex.a_data, &pos); flush_icache_range((unsigned long) start_addr, - (unsigned long) start_addr + ex.a_text + ex.a_data); + (unsigned long) start_addr + ex.a_text + + ex.a_data); retval = 0; goto out; diff -puN arch/x86/ia32/ia32_signal.c~git-x86 arch/x86/ia32/ia32_signal.c --- a/arch/x86/ia32/ia32_signal.c~git-x86 +++ a/arch/x86/ia32/ia32_signal.c @@ -43,7 +43,8 @@ void signal_fault(struct pt_regs *regs, int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) { int err; - if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t))) + + if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) return -EFAULT; /* If you change siginfo_t structure, please make sure that @@ -53,16 +54,19 @@ int copy_siginfo_to_user32(compat_siginf 3 ints plus the relevant union member. */ err = __put_user(from->si_signo, &to->si_signo); err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); + err |= __put_user((short)from->si_code, &to->si_code); if (from->si_code < 0) { err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr); + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr); } else { - /* First 32bits of unions are always present: - * si_pid === si_band === si_tid === si_addr(LS half) */ - err |= __put_user(from->_sifields._pad[0], &to->_sifields._pad[0]); + /* + * First 32bits of unions are always present: + * si_pid === si_band === si_tid === si_addr(LS half) + */ + err |= __put_user(from->_sifields._pad[0], + &to->_sifields._pad[0]); switch (from->si_code >> 16) { case __SI_FAULT >> 16: break; @@ -76,14 +80,15 @@ int copy_siginfo_to_user32(compat_siginf err |= __put_user(from->si_uid, &to->si_uid); break; case __SI_POLL >> 16: - err |= __put_user(from->si_fd, &to->si_fd); + err |= __put_user(from->si_fd, &to->si_fd); break; case __SI_TIMER >> 16: - err |= __put_user(from->si_overrun, &to->si_overrun); + err |= __put_user(from->si_overrun, &to->si_overrun); err |= __put_user(ptr_to_compat(from->si_ptr), - &to->si_ptr); + &to->si_ptr); break; - case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + /* This is not generated by the kernel as of now. */ + case __SI_RT >> 16: case __SI_MESGQ >> 16: err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_int, &to->si_int); @@ -97,7 +102,8 @@ int copy_siginfo_from_user32(siginfo_t * { int err; u32 ptr32; - if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t))) + + if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) return -EFAULT; err = __get_user(to->si_signo, &from->si_signo); @@ -112,8 +118,7 @@ int copy_siginfo_from_user32(siginfo_t * return err; } -asmlinkage long -sys32_sigsuspend(int history0, int history1, old_sigset_t mask) +asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask) { mask &= _BLOCKABLE; spin_lock_irq(¤t->sighand->siglock); @@ -128,36 +133,37 @@ sys32_sigsuspend(int history0, int histo return -ERESTARTNOHAND; } -asmlinkage long -sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, - stack_ia32_t __user *uoss_ptr, - struct pt_regs *regs) +asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, + stack_ia32_t __user *uoss_ptr, + struct pt_regs *regs) { - stack_t uss,uoss; + stack_t uss, uoss; int ret; - mm_segment_t seg; - if (uss_ptr) { + mm_segment_t seg; + + if (uss_ptr) { u32 ptr; - memset(&uss,0,sizeof(stack_t)); - if (!access_ok(VERIFY_READ,uss_ptr,sizeof(stack_ia32_t)) || + + memset(&uss, 0, sizeof(stack_t)); + if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)) || __get_user(ptr, &uss_ptr->ss_sp) || __get_user(uss.ss_flags, &uss_ptr->ss_flags) || __get_user(uss.ss_size, &uss_ptr->ss_size)) return -EFAULT; uss.ss_sp = compat_ptr(ptr); } - seg = get_fs(); - set_fs(KERNEL_DS); + seg = get_fs(); + set_fs(KERNEL_DS); ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->rsp); - set_fs(seg); + set_fs(seg); if (ret >= 0 && uoss_ptr) { - if (!access_ok(VERIFY_WRITE,uoss_ptr,sizeof(stack_ia32_t)) || + if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)) || __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) || __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) || __put_user(uoss.ss_size, &uoss_ptr->ss_size)) ret = -EFAULT; - } - return ret; + } + return ret; } /* @@ -186,18 +192,6 @@ struct rt_sigframe char retcode[8]; }; -static int -ia32_restore_sigcontext(struct pt_regs *regs, struct sigcontext_ia32 __user *sc, unsigned int *peax) -{ - unsigned int err = 0; - - /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; - -#if DEBUG_SIG - printk("SIG restore_sigcontext: sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n", - sc, sc->err, sc->eip, sc->cs, sc->eflags); -#endif #define COPY(x) { \ unsigned int reg; \ err |= __get_user(reg, &sc->e ##x); \ @@ -205,68 +199,78 @@ ia32_restore_sigcontext(struct pt_regs * } #define RELOAD_SEG(seg,mask) \ - { unsigned int cur; \ + { unsigned int cur; \ unsigned short pre; \ err |= __get_user(pre, &sc->seg); \ - asm volatile("movl %%" #seg ",%0" : "=r" (cur)); \ - pre |= mask; \ - if (pre != cur) loadsegment(seg,pre); } - - /* Reload fs and gs if they have changed in the signal handler. - This does not handle long fs/gs base changes in the handler, but - does not clobber them at least in the normal case. */ - - { - unsigned gs, oldgs; - err |= __get_user(gs, &sc->gs); - gs |= 3; - asm("movl %%gs,%0" : "=r" (oldgs)); - if (gs != oldgs) - load_gs_index(gs); - } - RELOAD_SEG(fs,3); - RELOAD_SEG(ds,3); - RELOAD_SEG(es,3); + asm volatile("movl %%" #seg ",%0" : "=r" (cur)); \ + pre |= mask; \ + if (pre != cur) loadsegment(seg, pre); } + +static int ia32_restore_sigcontext(struct pt_regs *regs, + struct sigcontext_ia32 __user *sc, + unsigned int *peax) +{ + unsigned int tmpflags, gs, oldgs, err = 0; + struct _fpstate_ia32 __user *buf; + u32 tmp; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + +#if DEBUG_SIG + printk(KERN_DEBUG "SIG restore_sigcontext: " + "sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n", + sc, sc->err, sc->eip, sc->cs, sc->eflags); +#endif + + /* + * Reload fs and gs if they have changed in the signal + * handler. This does not handle long fs/gs base changes in + * the handler, but does not clobber them at least in the + * normal case. + */ + err |= __get_user(gs, &sc->gs); + gs |= 3; + asm("movl %%gs,%0" : "=r" (oldgs)); + if (gs != oldgs) + load_gs_index(gs); + + RELOAD_SEG(fs, 3); + RELOAD_SEG(ds, 3); + RELOAD_SEG(es, 3); COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); COPY(dx); COPY(cx); COPY(ip); - /* Don't touch extended registers */ - - err |= __get_user(regs->cs, &sc->cs); - regs->cs |= 3; - err |= __get_user(regs->ss, &sc->ss); - regs->ss |= 3; - - { - unsigned int tmpflags; - err |= __get_user(tmpflags, &sc->eflags); - regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); - regs->orig_rax = -1; /* disable syscall checks */ - } + /* Don't touch extended registers */ - { - u32 tmp; - struct _fpstate_ia32 __user * buf; - err |= __get_user(tmp, &sc->fpstate); - buf = compat_ptr(tmp); - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387_ia32(current, buf, 0); - } else { - struct task_struct *me = current; - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } + err |= __get_user(regs->cs, &sc->cs); + regs->cs |= 3; + err |= __get_user(regs->ss, &sc->ss); + regs->ss |= 3; + + err |= __get_user(tmpflags, &sc->eflags); + regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); + /* disable syscall checks */ + regs->orig_rax = -1; + + err |= __get_user(tmp, &sc->fpstate); + buf = compat_ptr(tmp); + if (buf) { + if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) + goto badframe; + err |= restore_i387_ia32(current, buf, 0); + } else { + struct task_struct *me = current; + + if (used_math()) { + clear_fpu(me); + clear_used_math(); } } - { - u32 tmp; - err |= __get_user(tmp, &sc->eax); - *peax = tmp; - } + err |= __get_user(tmp, &sc->eax); + *peax = tmp; + return err; badframe: @@ -283,7 +287,8 @@ asmlinkage long sys32_sigreturn(struct p goto badframe; if (__get_user(set.sig[0], &frame->sc.oldmask) || (_COMPAT_NSIG_WORDS > 1 - && __copy_from_user((((char *) &set.sig) + 4), &frame->extramask, + && __copy_from_user((((char *) &set.sig) + 4), + &frame->extramask, sizeof(frame->extramask)))) goto badframe; @@ -292,7 +297,7 @@ asmlinkage long sys32_sigreturn(struct p current->blocked = set; recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - + if (ia32_restore_sigcontext(regs, &frame->sc, &eax)) goto badframe; return eax; @@ -300,7 +305,7 @@ asmlinkage long sys32_sigreturn(struct p badframe: signal_fault(regs, frame, "32bit sigreturn"); return 0; -} +} asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) { @@ -321,7 +326,7 @@ asmlinkage long sys32_rt_sigreturn(struc current->blocked = set; recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - + if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) goto badframe; @@ -332,17 +337,17 @@ asmlinkage long sys32_rt_sigreturn(struc return eax; badframe: - signal_fault(regs,frame,"32bit rt sigreturn"); + signal_fault(regs, frame, "32bit rt sigreturn"); return 0; -} +} /* * Set up a signal frame. */ -static int -ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __user *fpstate, - struct pt_regs *regs, unsigned int mask) +static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, + struct _fpstate_ia32 __user *fpstate, + struct pt_regs *regs, unsigned int mask) { int tmp, err = 0; @@ -375,7 +380,7 @@ ia32_setup_sigcontext(struct sigcontext_ tmp = save_i387_ia32(current, fpstate, regs, 0); if (tmp < 0) err = -EFAULT; - else { + else { clear_used_math(); stts(); err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL), @@ -392,8 +397,8 @@ ia32_setup_sigcontext(struct sigcontext_ /* * Determine which stack to use.. */ -static void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, + size_t frame_size) { unsigned long rsp; @@ -409,9 +414,8 @@ get_sigframe(struct k_sigaction *ka, str /* This is the legacy signal stack switching. */ else if ((regs->ss & 0xffff) != __USER_DS && !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { + ka->sa.sa_restorer) rsp = (unsigned long) ka->sa.sa_restorer; - } rsp -= frame_size; /* Align the stack pointer according to the i386 ABI, @@ -421,11 +425,25 @@ get_sigframe(struct k_sigaction *ka, str } int ia32_setup_frame(int sig, struct k_sigaction *ka, - compat_sigset_t *set, struct pt_regs * regs) + compat_sigset_t *set, struct pt_regs *regs) { struct sigframe __user *frame; + void __user *restorer; int err = 0; + /* copy_to_user optimizes that into a single 8 byte store */ + static const struct { + u16 poplmovl; + u32 val; + u16 int80; + u16 pad; + } __attribute__((packed)) code = { + 0xb858, /* popl %eax ; movl $...,%eax */ + __NR_ia32_sigreturn, + 0x80cd, /* int $0x80 */ + 0, + }; + frame = get_sigframe(ka, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) @@ -443,38 +461,24 @@ int ia32_setup_frame(int sig, struct k_s if (_COMPAT_NSIG_WORDS > 1) { err |= __copy_to_user(frame->extramask, &set->sig[1], sizeof(frame->extramask)); + if (err) + goto give_sigsegv; } - if (err) - goto give_sigsegv; /* Return stub is in 32bit vsyscall page */ - { - void __user *restorer; - if (current->binfmt->hasvdso) - restorer = VSYSCALL32_SIGRETURN; - else - restorer = (void *)&frame->retcode; - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; - err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); - } - /* These are actually not used anymore, but left because some - gdb versions depend on them as a marker. */ - { - /* copy_to_user optimizes that into a single 8 byte store */ - static const struct { - u16 poplmovl; - u32 val; - u16 int80; - u16 pad; - } __attribute__((packed)) code = { - 0xb858, /* popl %eax ; movl $...,%eax */ - __NR_ia32_sigreturn, - 0x80cd, /* int $0x80 */ - 0, - }; - err |= __copy_to_user(frame->retcode, &code, 8); - } + if (current->binfmt->hasvdso) + restorer = VSYSCALL32_SIGRETURN; + else + restorer = (void *)&frame->retcode; + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); + + /* + * These are actually not used anymore, but left because some + * gdb versions depend on them as a marker. + */ + err |= __copy_to_user(frame->retcode, &code, 8); if (err) goto give_sigsegv; @@ -487,11 +491,11 @@ int ia32_setup_frame(int sig, struct k_s regs->rdx = 0; regs->rcx = 0; - asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); - asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); + asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); + asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); - regs->cs = __USER32_CS; - regs->ss = __USER32_DS; + regs->cs = __USER32_CS; + regs->ss = __USER32_DS; set_fs(USER_DS); regs->eflags &= ~TF_MASK; @@ -499,8 +503,8 @@ int ia32_setup_frame(int sig, struct k_s ptrace_notify(SIGTRAP); #if DEBUG_SIG - printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", - current->comm, current->pid, frame, regs->rip, frame->pretcode); + printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", + current->comm, current->pid, frame, regs->rip, frame->pretcode); #endif return 0; @@ -511,25 +515,34 @@ give_sigsegv: } int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - compat_sigset_t *set, struct pt_regs * regs) + compat_sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; + struct exec_domain *ed = current_thread_info()->exec_domain; + void __user *restorer = VSYSCALL32_RTSIGRETURN; int err = 0; + /* __copy_to_user optimizes that into a single 8 byte store */ + static const struct { + u8 movl; + u32 val; + u16 int80; + u16 pad; + u8 pad2; + } __attribute__((packed)) code = { + 0xb8, + __NR_ia32_rt_sigreturn, + 0x80cd, + 0, + }; + frame = get_sigframe(ka, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - { - struct exec_domain *ed = current_thread_info()->exec_domain; - err |= __put_user((ed - && ed->signal_invmap - && sig < 32 - ? ed->signal_invmap[sig] - : sig), - &frame->sig); - } + err |= __put_user((ed && ed->signal_invmap && sig < 32 + ? ed->signal_invmap[sig] : sig), &frame->sig); err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo); err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc); err |= copy_siginfo_to_user32(&frame->info, info); @@ -544,38 +557,20 @@ int ia32_setup_rt_frame(int sig, struct &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, - regs, set->sig[0]); + regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) goto give_sigsegv; - - { - void __user *restorer = VSYSCALL32_RTSIGRETURN; - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; - err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); - } - - /* This is movl $,%eax ; int $0x80 */ - /* Not actually used anymore, but left because some gdb versions - need it. */ - { - /* __copy_to_user optimizes that into a single 8 byte store */ - static const struct { - u8 movl; - u32 val; - u16 int80; - u16 pad; - u8 pad2; - } __attribute__((packed)) code = { - 0xb8, - __NR_ia32_rt_sigreturn, - 0x80cd, - 0, - }; - err |= __copy_to_user(frame->retcode, &code, 8); - } + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); + + /* + * Not actually used anymore, but left because some gdb + * versions need it. + */ + err |= __copy_to_user(frame->retcode, &code, 8); if (err) goto give_sigsegv; @@ -593,11 +588,11 @@ int ia32_setup_rt_frame(int sig, struct regs->rdx = (unsigned long) &frame->info; regs->rcx = (unsigned long) &frame->uc; - asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); - asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); - - regs->cs = __USER32_CS; - regs->ss = __USER32_DS; + asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); + asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); + + regs->cs = __USER32_CS; + regs->ss = __USER32_DS; set_fs(USER_DS); regs->eflags &= ~TF_MASK; @@ -605,8 +600,8 @@ int ia32_setup_rt_frame(int sig, struct ptrace_notify(SIGTRAP); #if DEBUG_SIG - printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", - current->comm, current->pid, frame, regs->rip, frame->pretcode); + printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", + current->comm, current->pid, frame, regs->rip, frame->pretcode); #endif return 0; diff -puN arch/x86/ia32/ipc32.c~git-x86 arch/x86/ia32/ipc32.c --- a/arch/x86/ia32/ipc32.c~git-x86 +++ a/arch/x86/ia32/ipc32.c @@ -9,9 +9,8 @@ #include #include -asmlinkage long -sys32_ipc(u32 call, int first, int second, int third, - compat_uptr_t ptr, u32 fifth) +asmlinkage long sys32_ipc(u32 call, int first, int second, int third, + compat_uptr_t ptr, u32 fifth) { int version; @@ -19,36 +18,35 @@ sys32_ipc(u32 call, int first, int secon call &= 0xffff; switch (call) { - case SEMOP: + case SEMOP: /* struct sembuf is the same on 32 and 64bit :)) */ return sys_semtimedop(first, compat_ptr(ptr), second, NULL); - case SEMTIMEDOP: + case SEMTIMEDOP: return compat_sys_semtimedop(first, compat_ptr(ptr), second, compat_ptr(fifth)); - case SEMGET: + case SEMGET: return sys_semget(first, second, third); - case SEMCTL: + case SEMCTL: return compat_sys_semctl(first, second, third, compat_ptr(ptr)); - case MSGSND: + case MSGSND: return compat_sys_msgsnd(first, second, third, compat_ptr(ptr)); - case MSGRCV: + case MSGRCV: return compat_sys_msgrcv(first, second, fifth, third, version, compat_ptr(ptr)); - case MSGGET: + case MSGGET: return sys_msgget((key_t) first, second); - case MSGCTL: + case MSGCTL: return compat_sys_msgctl(first, second, compat_ptr(ptr)); - case SHMAT: + case SHMAT: return compat_sys_shmat(first, second, third, version, compat_ptr(ptr)); - break; - case SHMDT: + case SHMDT: return sys_shmdt(compat_ptr(ptr)); - case SHMGET: + case SHMGET: return sys_shmget(first, (unsigned)second, third); - case SHMCTL: + case SHMCTL: return compat_sys_shmctl(first, second, compat_ptr(ptr)); } return -ENOSYS; diff -puN arch/x86/ia32/mmap32.c~git-x86 /dev/null --- a/arch/x86/ia32/mmap32.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * linux/arch/x86_64/ia32/mm/mmap.c - * - * flexible mmap layout support - * - * Based on the i386 version which was - * - * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * - * Started by Ingo Molnar - */ - -#include -#include -#include -#include - -/* - * Top of mmap area (just below the process stack). - * - * Leave an at least ~128 MB hole. - */ -#define MIN_GAP (128*1024*1024) -#define MAX_GAP (TASK_SIZE/6*5) - -static inline unsigned long mmap_base(struct mm_struct *mm) -{ - unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; - unsigned long random_factor = 0; - - if (current->flags & PF_RANDOMIZE) - random_factor = get_random_int() % (1024*1024); - - if (gap < MIN_GAP) - gap = MIN_GAP; - else if (gap > MAX_GAP) - gap = MAX_GAP; - - return PAGE_ALIGN(TASK_SIZE - gap - random_factor); -} - -/* - * This function, called very early during the creation of a new - * process VM image, sets up which VM layout function to use: - */ -void ia32_pick_mmap_layout(struct mm_struct *mm) -{ - /* - * Fall back to the standard layout if the personality - * bit is set, or if the expected stack growth is unlimited: - */ - if (sysctl_legacy_va_layout || - (current->personality & ADDR_COMPAT_LAYOUT) || - current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) { - mm->mmap_base = TASK_UNMAPPED_BASE; - mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; - } else { - mm->mmap_base = mmap_base(mm); - mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; - } -} diff -puN arch/x86/ia32/ptrace32.c~git-x86 arch/x86/ia32/ptrace32.c --- a/arch/x86/ia32/ptrace32.c~git-x86 +++ a/arch/x86/ia32/ptrace32.c @@ -1,13 +1,13 @@ -/* +/* * 32bit ptrace for x86-64. * * Copyright 2001,2002 Andi Kleen, SuSE Labs. - * Some parts copied from arch/i386/kernel/ptrace.c. See that file for earlier + * Some parts copied from arch/i386/kernel/ptrace.c. See that file for earlier * copyright. - * - * This allows to access 64bit processes too; but there is no way to see the extended - * register contents. - */ + * + * This allows to access 64bit processes too; but there is no way to + * see the extended register contents. + */ #include #include @@ -35,8 +35,9 @@ */ #define FLAG_MASK 0x54dd5UL -#define R32(l,q) \ - case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break +#define R32(l,q) \ + case offsetof(struct user32, regs.l): \ + stack[offsetof(struct pt_regs, q) / 8] = val; break static int putreg32(struct task_struct *child, unsigned regno, u32 val) { @@ -45,30 +46,35 @@ static int putreg32(struct task_struct * switch (regno) { case offsetof(struct user32, regs.fs): - if (val && (val & 3) != 3) return -EIO; + if (val && (val & 3) != 3) + return -EIO; child->thread.fsindex = val & 0xffff; break; case offsetof(struct user32, regs.gs): - if (val && (val & 3) != 3) return -EIO; + if (val && (val & 3) != 3) + return -EIO; child->thread.gsindex = val & 0xffff; break; case offsetof(struct user32, regs.ds): - if (val && (val & 3) != 3) return -EIO; + if (val && (val & 3) != 3) + return -EIO; child->thread.ds = val & 0xffff; break; case offsetof(struct user32, regs.es): child->thread.es = val & 0xffff; break; - case offsetof(struct user32, regs.ss): - if ((val & 3) != 3) return -EIO; - stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff; + case offsetof(struct user32, regs.ss): + if ((val & 3) != 3) + return -EIO; + stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff; break; - case offsetof(struct user32, regs.cs): - if ((val & 3) != 3) return -EIO; + case offsetof(struct user32, regs.cs): + if ((val & 3) != 3) + return -EIO; stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff; break; - R32(ebx, rbx); + R32(ebx, rbx); R32(ecx, rcx); R32(edx, rdx); R32(edi, rdi); @@ -81,12 +87,13 @@ static int putreg32(struct task_struct * case offsetof(struct user32, regs.eflags): { __u64 *flags = &stack[offsetof(struct pt_regs, eflags)/8]; + val &= FLAG_MASK; *flags = val | (*flags & ~FLAG_MASK); break; } - case offsetof(struct user32, u_debugreg[4]): + case offsetof(struct user32, u_debugreg[4]): case offsetof(struct user32, u_debugreg[5]): return -EIO; @@ -108,36 +115,40 @@ static int putreg32(struct task_struct * case offsetof(struct user32, u_debugreg[6]): child->thread.debugreg6 = val; - break; + break; case offsetof(struct user32, u_debugreg[7]): val &= ~DR_CONTROL_RESERVED; /* See arch/i386/kernel/ptrace.c for an explanation of * this awkward check.*/ - for(i=0; i<4; i++) + for (i = 0; i < 4; i++) if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1) return -EIO; - child->thread.debugreg7 = val; + child->thread.debugreg7 = val; if (val) set_tsk_thread_flag(child, TIF_DEBUG); else clear_tsk_thread_flag(child, TIF_DEBUG); - break; - + break; + default: if (regno > sizeof(struct user32) || (regno & 3)) return -EIO; - - /* Other dummy fields in the virtual user structure are ignored */ - break; + + /* + * Other dummy fields in the virtual user structure + * are ignored + */ + break; } return 0; } #undef R32 -#define R32(l,q) \ - case offsetof(struct user32, regs.l): *val = stack[offsetof(struct pt_regs, q)/8]; break +#define R32(l,q) \ + case offsetof(struct user32, regs.l): \ + *val = stack[offsetof(struct pt_regs, q)/8]; break static int getreg32(struct task_struct *child, unsigned regno, u32 *val) { @@ -145,7 +156,7 @@ static int getreg32(struct task_struct * switch (regno) { case offsetof(struct user32, regs.fs): - *val = child->thread.fsindex; + *val = child->thread.fsindex; break; case offsetof(struct user32, regs.gs): *val = child->thread.gsindex; @@ -159,7 +170,7 @@ static int getreg32(struct task_struct * R32(cs, cs); R32(ss, ss); - R32(ebx, rbx); + R32(ebx, rbx); R32(ecx, rcx); R32(edx, rdx); R32(edi, rdi); @@ -171,32 +182,35 @@ static int getreg32(struct task_struct * R32(eflags, eflags); R32(esp, rsp); - case offsetof(struct user32, u_debugreg[0]): - *val = child->thread.debugreg0; - break; - case offsetof(struct user32, u_debugreg[1]): - *val = child->thread.debugreg1; - break; - case offsetof(struct user32, u_debugreg[2]): - *val = child->thread.debugreg2; - break; - case offsetof(struct user32, u_debugreg[3]): - *val = child->thread.debugreg3; - break; - case offsetof(struct user32, u_debugreg[6]): - *val = child->thread.debugreg6; - break; - case offsetof(struct user32, u_debugreg[7]): - *val = child->thread.debugreg7; - break; - + case offsetof(struct user32, u_debugreg[0]): + *val = child->thread.debugreg0; + break; + case offsetof(struct user32, u_debugreg[1]): + *val = child->thread.debugreg1; + break; + case offsetof(struct user32, u_debugreg[2]): + *val = child->thread.debugreg2; + break; + case offsetof(struct user32, u_debugreg[3]): + *val = child->thread.debugreg3; + break; + case offsetof(struct user32, u_debugreg[6]): + *val = child->thread.debugreg6; + break; + case offsetof(struct user32, u_debugreg[7]): + *val = child->thread.debugreg7; + break; + default: if (regno > sizeof(struct user32) || (regno & 3)) return -EIO; - /* Other dummy fields in the virtual user structure are ignored */ + /* + * Other dummy fields in the virtual user structure + * are ignored + */ *val = 0; - break; + break; } return 0; } @@ -205,10 +219,11 @@ static int getreg32(struct task_struct * static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data) { - int ret; - compat_siginfo_t __user *si32 = compat_ptr(data); - siginfo_t ssi; siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t)); + compat_siginfo_t __user *si32 = compat_ptr(data); + siginfo_t ssi; + int ret; + if (request == PTRACE_SETSIGINFO) { memset(&ssi, 0, sizeof(siginfo_t)); ret = copy_siginfo_from_user32(&ssi, si32); @@ -231,12 +246,12 @@ static long ptrace32_siginfo(unsigned re asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) { struct task_struct *child; - struct pt_regs *childregs; + struct pt_regs *childregs; void __user *datap = compat_ptr(data); int ret; __u32 val; - switch (request) { + switch (request) { case PTRACE_TRACEME: case PTRACE_ATTACH: case PTRACE_KILL: @@ -248,7 +263,7 @@ asmlinkage long sys32_ptrace(long reques case PTRACE_SETOPTIONS: case PTRACE_SET_THREAD_AREA: case PTRACE_GET_THREAD_AREA: - return sys_ptrace(request, pid, addr, data); + return sys_ptrace(request, pid, addr, data); default: return -EINVAL; @@ -257,7 +272,7 @@ asmlinkage long sys32_ptrace(long reques case PTRACE_PEEKDATA: case PTRACE_POKEDATA: case PTRACE_POKETEXT: - case PTRACE_POKEUSR: + case PTRACE_POKEUSR: case PTRACE_PEEKUSR: case PTRACE_GETREGS: case PTRACE_SETREGS: @@ -287,17 +302,19 @@ asmlinkage long sys32_ptrace(long reques case PTRACE_PEEKDATA: case PTRACE_PEEKTEXT: ret = 0; - if (access_process_vm(child, addr, &val, sizeof(u32), 0)!=sizeof(u32)) + if (access_process_vm(child, addr, &val, sizeof(u32), 0) != + sizeof(u32)) ret = -EIO; else - ret = put_user(val, (unsigned int __user *)datap); - break; + ret = put_user(val, (unsigned int __user *)datap); + break; case PTRACE_POKEDATA: case PTRACE_POKETEXT: ret = 0; - if (access_process_vm(child, addr, &data, sizeof(u32), 1)!=sizeof(u32)) - ret = -EIO; + if (access_process_vm(child, addr, &data, sizeof(u32), 1) != + sizeof(u32)) + ret = -EIO; break; case PTRACE_PEEKUSR: @@ -312,14 +329,15 @@ asmlinkage long sys32_ptrace(long reques case PTRACE_GETREGS: { /* Get all gp regs from the child. */ int i; - if (!access_ok(VERIFY_WRITE, datap, 16*4)) { + + if (!access_ok(VERIFY_WRITE, datap, 16*4)) { ret = -EIO; break; } ret = 0; - for ( i = 0; i <= 16*4 ; i += sizeof(__u32) ) { + for (i = 0; i <= 16*4; i += sizeof(__u32)) { getreg32(child, i, &val); - ret |= __put_user(val,(u32 __user *)datap); + ret |= __put_user(val, (u32 __user *)datap); datap += sizeof(u32); } break; @@ -328,12 +346,13 @@ asmlinkage long sys32_ptrace(long reques case PTRACE_SETREGS: { /* Set all gp regs in the child. */ unsigned long tmp; int i; - if (!access_ok(VERIFY_READ, datap, 16*4)) { + + if (!access_ok(VERIFY_READ, datap, 16*4)) { ret = -EIO; break; } - ret = 0; - for ( i = 0; i <= 16*4; i += sizeof(u32) ) { + ret = 0; + for (i = 0; i <= 16*4; i += sizeof(u32)) { ret |= __get_user(tmp, (u32 __user *)datap); putreg32(child, i, tmp); datap += sizeof(u32); @@ -342,17 +361,17 @@ asmlinkage long sys32_ptrace(long reques } case PTRACE_GETFPREGS: - ret = -EIO; - if (!access_ok(VERIFY_READ, compat_ptr(data), + ret = -EIO; + if (!access_ok(VERIFY_READ, compat_ptr(data), sizeof(struct user_i387_struct))) break; save_i387_ia32(child, datap, childregs, 1); - ret = 0; + ret = 0; break; case PTRACE_SETFPREGS: ret = -EIO; - if (!access_ok(VERIFY_WRITE, datap, + if (!access_ok(VERIFY_WRITE, datap, sizeof(struct user_i387_struct))) break; ret = 0; @@ -360,9 +379,10 @@ asmlinkage long sys32_ptrace(long reques restore_i387_ia32(child, datap, 1); break; - case PTRACE_GETFPXREGS: { + case PTRACE_GETFPXREGS: { struct user32_fxsr_struct __user *u = datap; - init_fpu(child); + + init_fpu(child); ret = -EIO; if (!access_ok(VERIFY_WRITE, u, sizeof(*u))) break; @@ -370,27 +390,31 @@ asmlinkage long sys32_ptrace(long reques if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u))) break; ret = __put_user(childregs->cs, &u->fcs); - ret |= __put_user(child->thread.ds, &u->fos); - break; - } - case PTRACE_SETFPXREGS: { + ret |= __put_user(child->thread.ds, &u->fos); + break; + } + case PTRACE_SETFPXREGS: { struct user32_fxsr_struct __user *u = datap; + unlazy_fpu(child); ret = -EIO; if (!access_ok(VERIFY_READ, u, sizeof(*u))) break; - /* no checking to be bug-to-bug compatible with i386. */ - /* but silence warning */ + /* + * no checking to be bug-to-bug compatible with i386. + * but silence warning + */ if (__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u))) ; set_stopped_child_used_math(child); child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; - ret = 0; + ret = 0; break; } case PTRACE_GETEVENTMSG: - ret = put_user(child->ptrace_message,(unsigned int __user *)compat_ptr(data)); + ret = put_user(child->ptrace_message, + (unsigned int __user *)compat_ptr(data)); break; default: diff -puN arch/x86/ia32/sys_ia32.c~git-x86 arch/x86/ia32/sys_ia32.c --- a/arch/x86/ia32/sys_ia32.c~git-x86 +++ a/arch/x86/ia32/sys_ia32.c @@ -1,29 +1,29 @@ /* * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on - * sys_sparc32 + * sys_sparc32 * * Copyright (C) 2000 VA Linux Co * Copyright (C) 2000 Don Dugger - * Copyright (C) 1999 Arun Sharma - * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1999 Arun Sharma + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) * Copyright (C) 2000 Hewlett-Packard Co. * Copyright (C) 2000 David Mosberger-Tang - * Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port) + * Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port) * * These routines maintain argument size conversion between 32bit and 64bit - * environment. In 2.5 most of this should be moved to a generic directory. + * environment. In 2.5 most of this should be moved to a generic directory. * * This file assumes that there is a hole at the end of user address space. - * - * Some of the functions are LE specific currently. These are hopefully all marked. - * This should be fixed. + * + * Some of the functions are LE specific currently. These are + * hopefully all marked. This should be fixed. */ #include #include -#include -#include +#include +#include #include #include #include @@ -90,43 +90,44 @@ int cp_compat_stat(struct kstat *kbuf, s if (sizeof(ino) < sizeof(kbuf->ino) && ino != kbuf->ino) return -EOVERFLOW; if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct compat_stat)) || - __put_user (old_encode_dev(kbuf->dev), &ubuf->st_dev) || - __put_user (ino, &ubuf->st_ino) || - __put_user (kbuf->mode, &ubuf->st_mode) || - __put_user (kbuf->nlink, &ubuf->st_nlink) || - __put_user (uid, &ubuf->st_uid) || - __put_user (gid, &ubuf->st_gid) || - __put_user (old_encode_dev(kbuf->rdev), &ubuf->st_rdev) || - __put_user (kbuf->size, &ubuf->st_size) || - __put_user (kbuf->atime.tv_sec, &ubuf->st_atime) || - __put_user (kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) || - __put_user (kbuf->mtime.tv_sec, &ubuf->st_mtime) || - __put_user (kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) || - __put_user (kbuf->ctime.tv_sec, &ubuf->st_ctime) || - __put_user (kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) || - __put_user (kbuf->blksize, &ubuf->st_blksize) || - __put_user (kbuf->blocks, &ubuf->st_blocks)) + __put_user(old_encode_dev(kbuf->dev), &ubuf->st_dev) || + __put_user(ino, &ubuf->st_ino) || + __put_user(kbuf->mode, &ubuf->st_mode) || + __put_user(kbuf->nlink, &ubuf->st_nlink) || + __put_user(uid, &ubuf->st_uid) || + __put_user(gid, &ubuf->st_gid) || + __put_user(old_encode_dev(kbuf->rdev), &ubuf->st_rdev) || + __put_user(kbuf->size, &ubuf->st_size) || + __put_user(kbuf->atime.tv_sec, &ubuf->st_atime) || + __put_user(kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) || + __put_user(kbuf->mtime.tv_sec, &ubuf->st_mtime) || + __put_user(kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) || + __put_user(kbuf->ctime.tv_sec, &ubuf->st_ctime) || + __put_user(kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) || + __put_user(kbuf->blksize, &ubuf->st_blksize) || + __put_user(kbuf->blocks, &ubuf->st_blocks)) return -EFAULT; return 0; } -asmlinkage long -sys32_truncate64(char __user * filename, unsigned long offset_low, unsigned long offset_high) +asmlinkage long sys32_truncate64(char __user *filename, + unsigned long offset_low, + unsigned long offset_high) { return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low); } -asmlinkage long -sys32_ftruncate64(unsigned int fd, unsigned long offset_low, unsigned long offset_high) +asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low, + unsigned long offset_high) { return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low); } -/* Another set for IA32/LFS -- x86_64 struct stat is different due to - support for 64bit inode numbers. */ - -static int -cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) +/* + * Another set for IA32/LFS -- x86_64 struct stat is different due to + * support for 64bit inode numbers. + */ +static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) { typeof(ubuf->st_uid) uid = 0; typeof(ubuf->st_gid) gid = 0; @@ -134,38 +135,39 @@ cp_stat64(struct stat64 __user *ubuf, st SET_GID(gid, stat->gid); if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) || __put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) || - __put_user (stat->ino, &ubuf->__st_ino) || - __put_user (stat->ino, &ubuf->st_ino) || - __put_user (stat->mode, &ubuf->st_mode) || - __put_user (stat->nlink, &ubuf->st_nlink) || - __put_user (uid, &ubuf->st_uid) || - __put_user (gid, &ubuf->st_gid) || - __put_user (huge_encode_dev(stat->rdev), &ubuf->st_rdev) || - __put_user (stat->size, &ubuf->st_size) || - __put_user (stat->atime.tv_sec, &ubuf->st_atime) || - __put_user (stat->atime.tv_nsec, &ubuf->st_atime_nsec) || - __put_user (stat->mtime.tv_sec, &ubuf->st_mtime) || - __put_user (stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) || - __put_user (stat->ctime.tv_sec, &ubuf->st_ctime) || - __put_user (stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) || - __put_user (stat->blksize, &ubuf->st_blksize) || - __put_user (stat->blocks, &ubuf->st_blocks)) + __put_user(stat->ino, &ubuf->__st_ino) || + __put_user(stat->ino, &ubuf->st_ino) || + __put_user(stat->mode, &ubuf->st_mode) || + __put_user(stat->nlink, &ubuf->st_nlink) || + __put_user(uid, &ubuf->st_uid) || + __put_user(gid, &ubuf->st_gid) || + __put_user(huge_encode_dev(stat->rdev), &ubuf->st_rdev) || + __put_user(stat->size, &ubuf->st_size) || + __put_user(stat->atime.tv_sec, &ubuf->st_atime) || + __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec) || + __put_user(stat->mtime.tv_sec, &ubuf->st_mtime) || + __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) || + __put_user(stat->ctime.tv_sec, &ubuf->st_ctime) || + __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) || + __put_user(stat->blksize, &ubuf->st_blksize) || + __put_user(stat->blocks, &ubuf->st_blocks)) return -EFAULT; return 0; } -asmlinkage long -sys32_stat64(char __user * filename, struct stat64 __user *statbuf) +asmlinkage long sys32_stat64(char __user *filename, + struct stat64 __user *statbuf) { struct kstat stat; int ret = vfs_stat(filename, &stat); + if (!ret) ret = cp_stat64(statbuf, &stat); return ret; } -asmlinkage long -sys32_lstat64(char __user * filename, struct stat64 __user *statbuf) +asmlinkage long sys32_lstat64(char __user *filename, + struct stat64 __user *statbuf) { struct kstat stat; int ret = vfs_lstat(filename, &stat); @@ -174,8 +176,7 @@ sys32_lstat64(char __user * filename, st return ret; } -asmlinkage long -sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) +asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) { struct kstat stat; int ret = vfs_fstat(fd, &stat); @@ -184,9 +185,8 @@ sys32_fstat64(unsigned int fd, struct st return ret; } -asmlinkage long -sys32_fstatat(unsigned int dfd, char __user *filename, - struct stat64 __user* statbuf, int flag) +asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename, + struct stat64 __user *statbuf, int flag) { struct kstat stat; int error = -EINVAL; @@ -221,8 +221,7 @@ struct mmap_arg_struct { unsigned int offset; }; -asmlinkage long -sys32_mmap(struct mmap_arg_struct __user *arg) +asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) { struct mmap_arg_struct a; struct file *file = NULL; @@ -233,33 +232,33 @@ sys32_mmap(struct mmap_arg_struct __user return -EFAULT; if (a.offset & ~PAGE_MASK) - return -EINVAL; + return -EINVAL; if (!(a.flags & MAP_ANONYMOUS)) { file = fget(a.fd); if (!file) return -EBADF; } - - mm = current->mm; - down_write(&mm->mmap_sem); - retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, a.offset>>PAGE_SHIFT); + + mm = current->mm; + down_write(&mm->mmap_sem); + retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, + a.offset>>PAGE_SHIFT); if (file) fput(file); - up_write(&mm->mmap_sem); + up_write(&mm->mmap_sem); return retval; } -asmlinkage long -sys32_mprotect(unsigned long start, size_t len, unsigned long prot) +asmlinkage long sys32_mprotect(unsigned long start, size_t len, + unsigned long prot) { - return sys_mprotect(start,len,prot); + return sys_mprotect(start, len, prot); } -asmlinkage long -sys32_pipe(int __user *fd) +asmlinkage long sys32_pipe(int __user *fd) { int retval; int fds[2]; @@ -269,13 +268,13 @@ sys32_pipe(int __user *fd) goto out; if (copy_to_user(fd, fds, sizeof(fds))) retval = -EFAULT; - out: +out: return retval; } -asmlinkage long -sys32_rt_sigaction(int sig, struct sigaction32 __user *act, - struct sigaction32 __user *oact, unsigned int sigsetsize) +asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act, + struct sigaction32 __user *oact, + unsigned int sigsetsize) { struct k_sigaction new_ka, old_ka; int ret; @@ -291,12 +290,17 @@ sys32_rt_sigaction(int sig, struct sigac if (!access_ok(VERIFY_READ, act, sizeof(*act)) || __get_user(handler, &act->sa_handler) || __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user(restorer, &act->sa_restorer)|| - __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t))) + __get_user(restorer, &act->sa_restorer) || + __copy_from_user(&set32, &act->sa_mask, + sizeof(compat_sigset_t))) return -EFAULT; new_ka.sa.sa_handler = compat_ptr(handler); new_ka.sa.sa_restorer = compat_ptr(restorer); - /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */ + + /* + * FIXME: here we rely on _COMPAT_NSIG_WORS to be >= + * than _NSIG_WORDS << 1 + */ switch (_NSIG_WORDS) { case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] | (((long)set32.sig[7]) << 32); @@ -312,7 +316,10 @@ sys32_rt_sigaction(int sig, struct sigac ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { - /* FIXME: here we rely on _COMPAT_NSIG_WORS to be >= than _NSIG_WORDS << 1 */ + /* + * FIXME: here we rely on _COMPAT_NSIG_WORS to be >= + * than _NSIG_WORDS << 1 + */ switch (_NSIG_WORDS) { case 4: set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); @@ -328,23 +335,26 @@ sys32_rt_sigaction(int sig, struct sigac set32.sig[0] = old_ka.sa.sa_mask.sig[0]; } if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) || - __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) || + __put_user(ptr_to_compat(old_ka.sa.sa_handler), + &oact->sa_handler) || + __put_user(ptr_to_compat(old_ka.sa.sa_restorer), + &oact->sa_restorer) || __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || - __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t))) + __copy_to_user(&oact->sa_mask, &set32, + sizeof(compat_sigset_t))) return -EFAULT; } return ret; } -asmlinkage long -sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigaction32 __user *oact) +asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act, + struct old_sigaction32 __user *oact) { - struct k_sigaction new_ka, old_ka; - int ret; + struct k_sigaction new_ka, old_ka; + int ret; - if (act) { + if (act) { compat_old_sigset_t mask; compat_uptr_t handler, restorer; @@ -359,33 +369,35 @@ sys32_sigaction (int sig, struct old_sig new_ka.sa.sa_restorer = compat_ptr(restorer); siginitset(&new_ka.sa.sa_mask, mask); - } + } - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) || - __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) || + __put_user(ptr_to_compat(old_ka.sa.sa_handler), + &oact->sa_handler) || + __put_user(ptr_to_compat(old_ka.sa.sa_restorer), + &oact->sa_restorer) || __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) return -EFAULT; - } + } return ret; } -asmlinkage long -sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, - compat_sigset_t __user *oset, unsigned int sigsetsize) +asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, + unsigned int sigsetsize) { sigset_t s; compat_sigset_t s32; int ret; mm_segment_t old_fs = get_fs(); - + if (set) { - if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) + if (copy_from_user(&s32, set, sizeof(compat_sigset_t))) return -EFAULT; switch (_NSIG_WORDS) { case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); @@ -394,13 +406,14 @@ sys32_rt_sigprocmask(int how, compat_sig case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); } } - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); ret = sys_rt_sigprocmask(how, set ? (sigset_t __user *)&s : NULL, oset ? (sigset_t __user *)&s : NULL, - sigsetsize); - set_fs (old_fs); - if (ret) return ret; + sigsetsize); + set_fs(old_fs); + if (ret) + return ret; if (oset) { switch (_NSIG_WORDS) { case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; @@ -408,52 +421,49 @@ sys32_rt_sigprocmask(int how, compat_sig case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; } - if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) + if (copy_to_user(oset, &s32, sizeof(compat_sigset_t))) return -EFAULT; } return 0; } -static inline long -get_tv32(struct timeval *o, struct compat_timeval __user *i) +static inline long get_tv32(struct timeval *o, struct compat_timeval __user *i) { - int err = -EFAULT; - if (access_ok(VERIFY_READ, i, sizeof(*i))) { + int err = -EFAULT; + + if (access_ok(VERIFY_READ, i, sizeof(*i))) { err = __get_user(o->tv_sec, &i->tv_sec); err |= __get_user(o->tv_usec, &i->tv_usec); } - return err; + return err; } -static inline long -put_tv32(struct compat_timeval __user *o, struct timeval *i) +static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) { int err = -EFAULT; - if (access_ok(VERIFY_WRITE, o, sizeof(*o))) { + + if (access_ok(VERIFY_WRITE, o, sizeof(*o))) { err = __put_user(i->tv_sec, &o->tv_sec); err |= __put_user(i->tv_usec, &o->tv_usec); - } - return err; + } + return err; } -extern unsigned int alarm_setitimer(unsigned int seconds); - -asmlinkage long -sys32_alarm(unsigned int seconds) +asmlinkage long sys32_alarm(unsigned int seconds) { return alarm_setitimer(seconds); } -/* Translations due to time_t size differences. Which affects all - sorts of things, like timeval and itimerval. */ - -extern struct timezone sys_tz; - -asmlinkage long -sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) +/* + * Translations due to time_t size differences. Which affects all + * sorts of things, like timeval and itimerval. + */ +asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz) { if (tv) { struct timeval ktv; + do_gettimeofday(&ktv); if (put_tv32(tv, &ktv)) return -EFAULT; @@ -465,14 +475,14 @@ sys32_gettimeofday(struct compat_timeval return 0; } -asmlinkage long -sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) +asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz) { struct timeval ktv; struct timespec kts; struct timezone ktz; - if (tv) { + if (tv) { if (get_tv32(&ktv, tv)) return -EFAULT; kts.tv_sec = ktv.tv_sec; @@ -494,8 +504,7 @@ struct sel_arg_struct { unsigned int tvp; }; -asmlinkage long -sys32_old_select(struct sel_arg_struct __user *arg) +asmlinkage long sys32_old_select(struct sel_arg_struct __user *arg) { struct sel_arg_struct a; @@ -505,50 +514,45 @@ sys32_old_select(struct sel_arg_struct _ compat_ptr(a.exp), compat_ptr(a.tvp)); } -extern asmlinkage long -compat_sys_wait4(compat_pid_t pid, compat_uint_t * stat_addr, int options, - struct compat_rusage *ru); - -asmlinkage long -sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, int options) +asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, + int options) { return compat_sys_wait4(pid, stat_addr, options, NULL); } /* 32-bit timeval and related flotsam. */ -asmlinkage long -sys32_sysfs(int option, u32 arg1, u32 arg2) +asmlinkage long sys32_sysfs(int option, u32 arg1, u32 arg2) { return sys_sysfs(option, arg1, arg2); } -asmlinkage long -sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval) +asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, + struct compat_timespec __user *interval) { struct timespec t; int ret; - mm_segment_t old_fs = get_fs (); - - set_fs (KERNEL_DS); + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t); - set_fs (old_fs); + set_fs(old_fs); if (put_compat_timespec(&t, interval)) return -EFAULT; return ret; } -asmlinkage long -sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize) +asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, + compat_size_t sigsetsize) { sigset_t s; compat_sigset_t s32; int ret; mm_segment_t old_fs = get_fs(); - - set_fs (KERNEL_DS); + + set_fs(KERNEL_DS); ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize); - set_fs (old_fs); + set_fs(old_fs); if (!ret) { switch (_NSIG_WORDS) { case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; @@ -556,30 +560,29 @@ sys32_rt_sigpending(compat_sigset_t __us case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; } - if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) + if (copy_to_user(set, &s32, sizeof(compat_sigset_t))) return -EFAULT; } return ret; } -asmlinkage long -sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo) +asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, + compat_siginfo_t __user *uinfo) { siginfo_t info; int ret; mm_segment_t old_fs = get_fs(); - + if (copy_siginfo_from_user32(&info, uinfo)) return -EFAULT; - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info); - set_fs (old_fs); + set_fs(old_fs); return ret; } /* These are here just in case some old ia32 binary calls it. */ -asmlinkage long -sys32_pause(void) +asmlinkage long sys32_pause(void) { current->state = TASK_INTERRUPTIBLE; schedule(); @@ -599,25 +602,25 @@ struct sysctl_ia32 { }; -asmlinkage long -sys32_sysctl(struct sysctl_ia32 __user *args32) +asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *args32) { struct sysctl_ia32 a32; - mm_segment_t old_fs = get_fs (); + mm_segment_t old_fs = get_fs(); void __user *oldvalp, *newvalp; size_t oldlen; int __user *namep; long ret; - if (copy_from_user(&a32, args32, sizeof (a32))) + if (copy_from_user(&a32, args32, sizeof(a32))) return -EFAULT; /* - * We need to pre-validate these because we have to disable address checking - * before calling do_sysctl() because of OLDLEN but we can't run the risk of the - * user specifying bad addresses here. Well, since we're dealing with 32 bit - * addresses, we KNOW that access_ok() will always succeed, so this is an - * expensive NOP, but so what... + * We need to pre-validate these because we have to disable + * address checking before calling do_sysctl() because of + * OLDLEN but we can't run the risk of the user specifying bad + * addresses here. Well, since we're dealing with 32 bit + * addresses, we KNOW that access_ok() will always succeed, so + * this is an expensive NOP, but so what... */ namep = compat_ptr(a32.name); oldvalp = compat_ptr(a32.oldval); @@ -636,34 +639,34 @@ sys32_sysctl(struct sysctl_ia32 __user * unlock_kernel(); set_fs(old_fs); - if (oldvalp && put_user (oldlen, (int __user *)compat_ptr(a32.oldlenp))) + if (oldvalp && put_user(oldlen, (int __user *)compat_ptr(a32.oldlenp))) return -EFAULT; return ret; } #endif -/* warning: next two assume little endian */ -asmlinkage long -sys32_pread(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi) +/* warning: next two assume little endian */ +asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, + u32 poslo, u32 poshi) { return sys_pread64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); } -asmlinkage long -sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi) +asmlinkage long sys32_pwrite(unsigned int fd, char __user *ubuf, u32 count, + u32 poslo, u32 poshi) { return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); } -asmlinkage long -sys32_personality(unsigned long personality) +asmlinkage long sys32_personality(unsigned long personality) { int ret; - if (personality(current->personality) == PER_LINUX32 && + + if (personality(current->personality) == PER_LINUX32 && personality == PER_LINUX) personality = PER_LINUX32; ret = sys_personality(personality); @@ -672,34 +675,33 @@ sys32_personality(unsigned long personal return ret; } -asmlinkage long -sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count) +asmlinkage long sys32_sendfile(int out_fd, int in_fd, + compat_off_t __user *offset, s32 count) { mm_segment_t old_fs = get_fs(); int ret; off_t of; - + if (offset && get_user(of, offset)) return -EFAULT; - + set_fs(KERNEL_DS); ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL, count); set_fs(old_fs); - + if (offset && put_user(of, offset)) return -EFAULT; - return ret; } asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) { struct mm_struct *mm = current->mm; unsigned long error; - struct file * file = NULL; + struct file *file = NULL; flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { @@ -717,36 +719,35 @@ asmlinkage long sys32_mmap2(unsigned lon return error; } -asmlinkage long sys32_olduname(struct oldold_utsname __user * name) +asmlinkage long sys32_olduname(struct oldold_utsname __user *name) { + char *arch = "x86_64"; int err; if (!name) return -EFAULT; if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) return -EFAULT; - - down_read(&uts_sem); - err = __copy_to_user(&name->sysname,&utsname()->sysname, - __OLD_UTS_LEN); - err |= __put_user(0,name->sysname+__OLD_UTS_LEN); - err |= __copy_to_user(&name->nodename,&utsname()->nodename, - __OLD_UTS_LEN); - err |= __put_user(0,name->nodename+__OLD_UTS_LEN); - err |= __copy_to_user(&name->release,&utsname()->release, - __OLD_UTS_LEN); - err |= __put_user(0,name->release+__OLD_UTS_LEN); - err |= __copy_to_user(&name->version,&utsname()->version, - __OLD_UTS_LEN); - err |= __put_user(0,name->version+__OLD_UTS_LEN); - { - char *arch = "x86_64"; - if (personality(current->personality) == PER_LINUX32) - arch = "i686"; - - err |= __copy_to_user(&name->machine, arch, strlen(arch)+1); - } + down_read(&uts_sem); + + err = __copy_to_user(&name->sysname, &utsname()->sysname, + __OLD_UTS_LEN); + err |= __put_user(0, name->sysname+__OLD_UTS_LEN); + err |= __copy_to_user(&name->nodename, &utsname()->nodename, + __OLD_UTS_LEN); + err |= __put_user(0, name->nodename+__OLD_UTS_LEN); + err |= __copy_to_user(&name->release, &utsname()->release, + __OLD_UTS_LEN); + err |= __put_user(0, name->release+__OLD_UTS_LEN); + err |= __copy_to_user(&name->version, &utsname()->version, + __OLD_UTS_LEN); + err |= __put_user(0, name->version+__OLD_UTS_LEN); + + if (personality(current->personality) == PER_LINUX32) + arch = "i686"; + + err |= __copy_to_user(&name->machine, arch, strlen(arch) + 1); up_read(&uts_sem); @@ -755,17 +756,19 @@ asmlinkage long sys32_olduname(struct ol return err; } -long sys32_uname(struct old_utsname __user * name) +long sys32_uname(struct old_utsname __user *name) { int err; + if (!name) return -EFAULT; down_read(&uts_sem); - err = copy_to_user(name, utsname(), sizeof (*name)); + err = copy_to_user(name, utsname(), sizeof(*name)); up_read(&uts_sem); - if (personality(current->personality) == PER_LINUX32) + if (personality(current->personality) == PER_LINUX32) err |= copy_to_user(&name->machine, "i686", 5); - return err?-EFAULT:0; + + return err ? -EFAULT : 0; } long sys32_ustat(unsigned dev, struct ustat32 __user *u32p) @@ -773,27 +776,28 @@ long sys32_ustat(unsigned dev, struct us struct ustat u; mm_segment_t seg; int ret; - - seg = get_fs(); - set_fs(KERNEL_DS); + + seg = get_fs(); + set_fs(KERNEL_DS); ret = sys_ustat(dev, (struct ustat __user *)&u); set_fs(seg); - if (ret >= 0) { - if (!access_ok(VERIFY_WRITE,u32p,sizeof(struct ustat32)) || - __put_user((__u32) u.f_tfree, &u32p->f_tfree) || - __put_user((__u32) u.f_tinode, &u32p->f_tfree) || - __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) || - __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack))) - ret = -EFAULT; - } + if (ret < 0) + return ret; + + if (!access_ok(VERIFY_WRITE, u32p, sizeof(struct ustat32)) || + __put_user((__u32) u.f_tfree, &u32p->f_tfree) || + __put_user((__u32) u.f_tinode, &u32p->f_tfree) || + __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) || + __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack))) + ret = -EFAULT; return ret; -} +} asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv, compat_uptr_t __user *envp, struct pt_regs *regs) { long error; - char * filename; + char *filename; filename = getname(name); error = PTR_ERR(filename); @@ -814,16 +818,17 @@ asmlinkage long sys32_clone(unsigned int { void __user *parent_tid = (void __user *)regs->rdx; void __user *child_tid = (void __user *)regs->rdi; + if (!newsp) newsp = regs->rsp; - return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); + return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); } /* - * Some system calls that need sign extended arguments. This could be done by a generic wrapper. - */ - -long sys32_lseek (unsigned int fd, int offset, unsigned int whence) + * Some system calls that need sign extended arguments. This could be + * done by a generic wrapper. + */ +long sys32_lseek(unsigned int fd, int offset, unsigned int whence) { return sys_lseek(fd, offset, whence); } @@ -832,49 +837,52 @@ long sys32_kill(int pid, int sig) { return sys_kill(pid, sig); } - -long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, + +long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, __u32 len_low, __u32 len_high, int advice) -{ +{ return sys_fadvise64_64(fd, (((u64)offset_high)<<32) | offset_low, (((u64)len_high)<<32) | len_low, - advice); -} + advice); +} long sys32_vm86_warning(void) -{ +{ struct task_struct *me = current; static char lastcomm[sizeof(me->comm)]; + if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) { - compat_printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n", - me->comm); + compat_printk(KERN_INFO + "%s: vm86 mode not supported on 64 bit kernel\n", + me->comm); strncpy(lastcomm, me->comm, sizeof(lastcomm)); - } + } return -ENOSYS; -} +} long sys32_lookup_dcookie(u32 addr_low, u32 addr_high, - char __user * buf, size_t len) + char __user *buf, size_t len) { return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len); } -asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, size_t count) +asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, + size_t count) { return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count); } asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi, - unsigned n_low, unsigned n_hi, int flags) + unsigned n_low, unsigned n_hi, int flags) { return sys_sync_file_range(fd, ((u64)off_hi << 32) | off_low, ((u64)n_hi << 32) | n_low, flags); } -asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, size_t len, - int advice) +asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, + size_t len, int advice) { return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, len, advice); diff -puN arch/x86/ia32/syscall32.c~git-x86 arch/x86/ia32/syscall32.c --- a/arch/x86/ia32/syscall32.c~git-x86 +++ a/arch/x86/ia32/syscall32.c @@ -1,8 +1,9 @@ -/* Copyright 2002,2003 Andi Kleen, SuSE Labs */ - -/* vsyscall handling for 32bit processes. Map a stub page into it - on demand because 32bit cannot reach the kernel's fixmaps */ - +/* + * Copyright 2002,2003 Andi Kleen, SuSE Labs + * + * vsyscall handling for 32bit processes. Map a stub page into it on + * demand because 32bit cannot reach the kernel's fixmaps + */ #include #include #include @@ -50,31 +51,33 @@ int syscall32_setup_pages(struct linux_b } static int __init init_syscall32(void) -{ +{ char *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); - if (!syscall32_page) - panic("Cannot allocate syscall32 page"); + + if (!syscall32_page) + panic("Cannot allocate syscall32 page"); syscall32_pages[0] = virt_to_page(syscall32_page); - if (use_sysenter > 0) { - memcpy(syscall32_page, syscall32_sysenter, - syscall32_sysenter_end - syscall32_sysenter); - } else { - memcpy(syscall32_page, syscall32_syscall, - syscall32_syscall_end - syscall32_syscall); - } + if (use_sysenter > 0) { + memcpy(syscall32_page, syscall32_sysenter, + syscall32_sysenter_end - syscall32_sysenter); + } else { + memcpy(syscall32_page, syscall32_syscall, + syscall32_syscall_end - syscall32_syscall); + } return 0; -} - -__initcall(init_syscall32); +} +__initcall(init_syscall32); /* May not be __init: called during resume */ void syscall32_cpu_init(void) { if (use_sysenter < 0) - use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL); + use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL); - /* Load these always in case some future AMD CPU supports - SYSENTER from compat mode too. */ + /* + * Load these always in case some future AMD CPU supports + * SYSENTER from compat mode too. + */ checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL); checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); diff -puN arch/x86/kernel/Makefile_32~git-x86 arch/x86/kernel/Makefile_32 --- a/arch/x86/kernel/Makefile_32~git-x86 +++ a/arch/x86/kernel/Makefile_32 @@ -6,9 +6,9 @@ extra-y := head_32.o init_task.o vmlinux CPPFLAGS_vmlinux.lds += -Ui386 obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ - ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \ + ptrace_32.o time_32.o ioport_32.o ldt.o setup_32.o i8259_32.o sys_i386_32.o \ pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\ - quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o + quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o rtc.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ diff -puN arch/x86/kernel/Makefile_64~git-x86 arch/x86/kernel/Makefile_64 --- a/arch/x86/kernel/Makefile_64~git-x86 +++ a/arch/x86/kernel/Makefile_64 @@ -7,11 +7,11 @@ CPPFLAGS_vmlinux.lds += -Ux86_64 EXTRA_AFLAGS := -traditional obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ - ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \ + ptrace_64.o time_64.o ioport_64.o ldt.o setup_64.o i8259_64.o sys_x86_64.o \ x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \ setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \ pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \ - i8253.o + i8253.o rtc.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ diff -puN arch/x86/kernel/acpi/boot.c~git-x86 arch/x86/kernel/acpi/boot.c diff -puN arch/x86/kernel/acpi/sleep_64.c~git-x86 arch/x86/kernel/acpi/sleep_64.c diff -puN arch/x86/kernel/aperture_64.c~git-x86 arch/x86/kernel/aperture_64.c --- a/arch/x86/kernel/aperture_64.c~git-x86 +++ a/arch/x86/kernel/aperture_64.c @@ -1,12 +1,12 @@ -/* +/* * Firmware replacement code. - * + * * Work around broken BIOSes that don't set an aperture or only set the - * aperture in the AGP bridge. - * If all fails map the aperture over some low memory. This is cheaper than - * doing bounce buffering. The memory is lost. This is done at early boot - * because only the bootmem allocator can allocate 32+MB. - * + * aperture in the AGP bridge. + * If all fails map the aperture over some low memory. This is cheaper than + * doing bounce buffering. The memory is lost. This is done at early boot + * because only the bootmem allocator can allocate 32+MB. + * * Copyright 2002 Andi Kleen, SuSE Labs. */ #include @@ -30,7 +30,7 @@ int gart_iommu_aperture_disabled __initd int gart_iommu_aperture_allowed __initdata = 0; int fallback_aper_order __initdata = 1; /* 64MB */ -int fallback_aper_force __initdata = 0; +int fallback_aper_force __initdata = 0; int fix_aperture __initdata = 1; @@ -49,167 +49,180 @@ static void __init insert_aperture_resou /* This code runs before the PCI subsystem is initialized, so just access the northbridge directly. */ -static u32 __init allocate_aperture(void) +static u32 __init allocate_aperture(void) { u32 aper_size; - void *p; + void *p; - if (fallback_aper_order > 7) - fallback_aper_order = 7; - aper_size = (32 * 1024 * 1024) << fallback_aper_order; - - /* - * Aperture has to be naturally aligned. This means an 2GB aperture won't - * have much chance of finding a place in the lower 4GB of memory. - * Unfortunately we cannot move it up because that would make the - * IOMMU useless. + if (fallback_aper_order > 7) + fallback_aper_order = 7; + aper_size = (32 * 1024 * 1024) << fallback_aper_order; + + /* + * Aperture has to be naturally aligned. This means a 2GB aperture + * won't have much chance of finding a place in the lower 4GB of + * memory. Unfortunately we cannot move it up because that would + * make the IOMMU useless. */ p = __alloc_bootmem_nopanic(aper_size, aper_size, 0); if (!p || __pa(p)+aper_size > 0xffffffff) { - printk("Cannot allocate aperture memory hole (%p,%uK)\n", - p, aper_size>>10); + printk(KERN_ERR + "Cannot allocate aperture memory hole (%p,%uK)\n", + p, aper_size>>10); if (p) free_bootmem(__pa(p), aper_size); return 0; } - printk("Mapping aperture over %d KB of RAM @ %lx\n", - aper_size >> 10, __pa(p)); + printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", + aper_size >> 10, __pa(p)); insert_aperture_resource((u32)__pa(p), aper_size); - return (u32)__pa(p); + + return (u32)__pa(p); } static int __init aperture_valid(u64 aper_base, u32 aper_size) -{ - if (!aper_base) +{ + if (!aper_base) return 0; - if (aper_size < 64*1024*1024) { - printk("Aperture too small (%d MB)\n", aper_size>>20); + + if (aper_size < 64*1024*1024) { + printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20); return 0; } if (aper_base + aper_size > 0x100000000UL) { - printk("Aperture beyond 4GB. Ignoring.\n"); - return 0; + printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n"); + return 0; } if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { - printk("Aperture pointing to e820 RAM. Ignoring.\n"); - return 0; - } + printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n"); + return 0; + } + return 1; -} +} /* Find a PCI capability */ -static __u32 __init find_cap(int num, int slot, int func, int cap) -{ - u8 pos; +static __u32 __init find_cap(int num, int slot, int func, int cap) +{ int bytes; - if (!(read_pci_config_16(num,slot,func,PCI_STATUS) & PCI_STATUS_CAP_LIST)) + u8 pos; + + if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & + PCI_STATUS_CAP_LIST)) return 0; - pos = read_pci_config_byte(num,slot,func,PCI_CAPABILITY_LIST); - for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { + + pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); + for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { u8 id; - pos &= ~3; - id = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_ID); + + pos &= ~3; + id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); if (id == 0xff) break; - if (id == cap) - return pos; - pos = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_NEXT); - } + if (id == cap) + return pos; + pos = read_pci_config_byte(num, slot, func, + pos+PCI_CAP_LIST_NEXT); + } return 0; -} +} /* Read a standard AGPv3 bridge header */ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) -{ +{ u32 apsize; u32 apsizereg; int nbits; u32 aper_low, aper_hi; u64 aper; - printk("AGP bridge at %02x:%02x:%02x\n", num, slot, func); - apsizereg = read_pci_config_16(num,slot,func, cap + 0x14); + printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", num, slot, func); + apsizereg = read_pci_config_16(num, slot, func, cap + 0x14); if (apsizereg == 0xffffffff) { - printk("APSIZE in AGP bridge unreadable\n"); + printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); return 0; } apsize = apsizereg & 0xfff; /* Some BIOS use weird encodings not in the AGPv3 table. */ - if (apsize & 0xff) - apsize |= 0xf00; + if (apsize & 0xff) + apsize |= 0xf00; nbits = hweight16(apsize); *order = 7 - nbits; if ((int)*order < 0) /* < 32MB */ *order = 0; - - aper_low = read_pci_config(num,slot,func, 0x10); - aper_hi = read_pci_config(num,slot,func,0x14); + + aper_low = read_pci_config(num, slot, func, 0x10); + aper_hi = read_pci_config(num, slot, func, 0x14); aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); - printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", - aper, 32 << *order, apsizereg); + printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", + aper, 32 << *order, apsizereg); if (!aperture_valid(aper, (32*1024*1024) << *order)) - return 0; - return (u32)aper; -} - -/* Look for an AGP bridge. Windows only expects the aperture in the - AGP bridge and some BIOS forget to initialize the Northbridge too. - Work around this here. - - Do an PCI bus scan by hand because we're running before the PCI - subsystem. - - All K8 AGP bridges are AGPv3 compliant, so we can do this scan - generically. It's probably overkill to always scan all slots because - the AGP bridges should be always an own bus on the HT hierarchy, - but do it here for future safety. */ + return 0; + return (u32)aper; +} + +/* + * Look for an AGP bridge. Windows only expects the aperture in the + * AGP bridge and some BIOS forget to initialize the Northbridge too. + * Work around this here. + * + * Do an PCI bus scan by hand because we're running before the PCI + * subsystem. + * + * All K8 AGP bridges are AGPv3 compliant, so we can do this scan + * generically. It's probably overkill to always scan all slots because + * the AGP bridges should be always an own bus on the HT hierarchy, + * but do it here for future safety. + */ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) { int num, slot, func; /* Poor man's PCI discovery */ - for (num = 0; num < 256; num++) { - for (slot = 0; slot < 32; slot++) { - for (func = 0; func < 8; func++) { + for (num = 0; num < 256; num++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { u32 class, cap; u8 type; - class = read_pci_config(num,slot,func, + class = read_pci_config(num, slot, func, PCI_CLASS_REVISION); if (class == 0xffffffff) - break; - - switch (class >> 16) { + break; + + switch (class >> 16) { case PCI_CLASS_BRIDGE_HOST: case PCI_CLASS_BRIDGE_OTHER: /* needed? */ /* AGP bridge? */ - cap = find_cap(num,slot,func,PCI_CAP_ID_AGP); + cap = find_cap(num, slot, func, + PCI_CAP_ID_AGP); if (!cap) break; - *valid_agp = 1; - return read_agp(num,slot,func,cap,order); - } - + *valid_agp = 1; + return read_agp(num, slot, func, cap, + order); + } + /* No multi-function device? */ - type = read_pci_config_byte(num,slot,func, + type = read_pci_config_byte(num, slot, func, PCI_HEADER_TYPE); if (!(type & 0x80)) break; - } - } + } + } } - printk("No AGP bridge found\n"); + printk(KERN_INFO "No AGP bridge found\n"); + return 0; } void __init gart_iommu_hole_init(void) -{ - int fix, num; +{ u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; u64 aper_base, last_aper_base = 0; - int valid_agp = 0; + int fix, num, valid_agp = 0; if (gart_iommu_aperture_disabled || !fix_aperture || !early_pci_allowed()) @@ -218,24 +231,24 @@ void __init gart_iommu_hole_init(void) printk(KERN_INFO "Checking aperture...\n"); fix = 0; - for (num = 24; num < 32; num++) { + for (num = 24; num < 32; num++) { if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) continue; iommu_detected = 1; gart_iommu_aperture = 1; - aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; - aper_size = (32 * 1024 * 1024) << aper_order; + aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; + aper_size = (32 * 1024 * 1024) << aper_order; aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; - aper_base <<= 25; + aper_base <<= 25; + + printk(KERN_INFO "CPU %d: aperture @ %Lx size %u MB\n", + num-24, aper_base, aper_size>>20); - printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, - aper_base, aper_size>>20); - if (!aperture_valid(aper_base, aper_size)) { - fix = 1; - break; + fix = 1; + break; } if ((last_aper_order && aper_order != last_aper_order) || @@ -245,55 +258,64 @@ void __init gart_iommu_hole_init(void) } last_aper_order = aper_order; last_aper_base = aper_base; - } + } if (!fix && !fallback_aper_force) { if (last_aper_base) { unsigned long n = (32 * 1024 * 1024) << last_aper_order; + insert_aperture_resource((u32)last_aper_base, n); } - return; + return; } if (!fallback_aper_force) - aper_alloc = search_agp_bridge(&aper_order, &valid_agp); - - if (aper_alloc) { + aper_alloc = search_agp_bridge(&aper_order, &valid_agp); + + if (aper_alloc) { /* Got the aperture from the AGP bridge */ } else if (swiotlb && !valid_agp) { /* Do nothing */ } else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) || force_iommu || valid_agp || - fallback_aper_force) { - printk("Your BIOS doesn't leave a aperture memory hole\n"); - printk("Please enable the IOMMU option in the BIOS setup\n"); - printk("This costs you %d MB of RAM\n", - 32 << fallback_aper_order); + fallback_aper_force) { + printk(KERN_ERR + "Your BIOS doesn't leave a aperture memory hole\n"); + printk(KERN_ERR + "Please enable the IOMMU option in the BIOS setup\n"); + printk(KERN_ERR + "This costs you %d MB of RAM\n", + 32 << fallback_aper_order); aper_order = fallback_aper_order; aper_alloc = allocate_aperture(); - if (!aper_alloc) { - /* Could disable AGP and IOMMU here, but it's probably - not worth it. But the later users cannot deal with - bad apertures and turning on the aperture over memory - causes very strange problems, so it's better to - panic early. */ + if (!aper_alloc) { + /* + * Could disable AGP and IOMMU here, but it's + * probably not worth it. But the later users + * cannot deal with bad apertures and turning + * on the aperture over memory causes very + * strange problems, so it's better to panic + * early. + */ panic("Not enough memory for aperture"); } - } else { - return; - } + } else { + return; + } /* Fix up the north bridges */ - for (num = 24; num < 32; num++) { + for (num = 24; num < 32; num++) { if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) - continue; + continue; - /* Don't enable translation yet. That is done later. - Assume this BIOS didn't initialise the GART so - just overwrite all previous bits */ - write_pci_config(0, num, 3, 0x90, aper_order<<1); - write_pci_config(0, num, 3, 0x94, aper_alloc>>25); - } -} + /* + * Don't enable translation yet. That is done later. + * Assume this BIOS didn't initialise the GART so + * just overwrite all previous bits + */ + write_pci_config(0, num, 3, 0x90, aper_order<<1); + write_pci_config(0, num, 3, 0x94, aper_alloc>>25); + } +} diff -puN arch/x86/kernel/apic_32.c~git-x86 arch/x86/kernel/apic_32.c --- a/arch/x86/kernel/apic_32.c~git-x86 +++ a/arch/x86/kernel/apic_32.c @@ -43,8 +43,6 @@ #include #include -#include "io_ports.h" - /* * Sanity check */ @@ -135,9 +133,9 @@ void apic_wait_icr_idle(void) cpu_relax(); } -unsigned long safe_apic_wait_icr_idle(void) +u32 safe_apic_wait_icr_idle(void) { - unsigned long send_status; + u32 send_status; int timeout; timeout = 0; @@ -563,6 +561,9 @@ static void local_apic_timer_interrupt(v return; } + /* + * the NMI deadlock-detector uses this. + */ per_cpu(irq_stat, cpu).apic_timer_irqs++; evt->event_handler(evt); @@ -617,7 +618,7 @@ int setup_profiling_timer(unsigned int m void clear_local_APIC(void) { int maxlvt = lapic_get_maxlvt(); - unsigned long v; + u32 v; /* * Masking an LVT entry can trigger a local APIC error @@ -1210,50 +1211,6 @@ int __init APIC_init_uniprocessor (void) } /* - * APIC command line parameters - */ -static int __init parse_lapic(char *arg) -{ - enable_local_apic = 1; - return 0; -} -early_param("lapic", parse_lapic); - -static int __init parse_nolapic(char *arg) -{ - enable_local_apic = -1; - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - return 0; -} -early_param("nolapic", parse_nolapic); - -static int __init parse_disable_lapic_timer(char *arg) -{ - local_apic_timer_disabled = 1; - return 0; -} -early_param("nolapic_timer", parse_disable_lapic_timer); - -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - -static int __init apic_set_verbosity(char *str) -{ - if (strcmp("debug", str) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) - apic_verbosity = APIC_VERBOSE; - return 1; -} - -__setup("apic=", apic_set_verbosity); - - -/* * Local APIC interrupts */ @@ -1565,3 +1522,46 @@ device_initcall(init_lapic_sysfs); static void apic_pm_activate(void) { } #endif /* CONFIG_PM */ + +/* + * APIC command line parameters + */ +static int __init parse_lapic(char *arg) +{ + enable_local_apic = 1; + return 0; +} +early_param("lapic", parse_lapic); + +static int __init parse_nolapic(char *arg) +{ + enable_local_apic = -1; + clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); + return 0; +} +early_param("nolapic", parse_nolapic); + +static int __init parse_disable_lapic_timer(char *arg) +{ + local_apic_timer_disabled = 1; + return 0; +} +early_param("nolapic_timer", parse_disable_lapic_timer); + +static int __init parse_lapic_timer_c2_ok(char *arg) +{ + local_apic_timer_c2_ok = 1; + return 0; +} +early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); + +static int __init apic_set_verbosity(char *str) +{ + if (strcmp("debug", str) == 0) + apic_verbosity = APIC_DEBUG; + else if (strcmp("verbose", str) == 0) + apic_verbosity = APIC_VERBOSE; + return 1; +} +__setup("apic=", apic_set_verbosity); + diff -puN arch/x86/kernel/apic_64.c~git-x86 arch/x86/kernel/apic_64.c --- a/arch/x86/kernel/apic_64.c~git-x86 +++ a/arch/x86/kernel/apic_64.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -43,12 +44,12 @@ int apic_verbosity; int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; +int disable_apic; /* Local APIC timer works in C2? */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); -static struct resource *ioapic_resources; static struct resource lapic_resource = { .name = "Local APIC", .flags = IORESOURCE_MEM | IORESOURCE_BUSY, @@ -60,10 +61,8 @@ static int lapic_next_event(unsigned lon struct clock_event_device *evt); static void lapic_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt); - static void lapic_timer_broadcast(cpumask_t mask); - -static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen); +static void apic_pm_activate(void); static struct clock_event_device lapic_clockevent = { .name = "lapic", @@ -78,66 +77,43 @@ static struct clock_event_device lapic_c }; static DEFINE_PER_CPU(struct clock_event_device, lapic_events); -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt) +/* + * Get the LAPIC version + */ +static inline int lapic_get_version(void) { - apic_write(APIC_TMICT, delta); - return 0; + return GET_APIC_VERSION(apic_read(APIC_LVR)); } -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt) +/* + * Check, if the APIC is integrated or a seperate chip + */ +static inline int lapic_is_integrated(void) { - unsigned long flags; - unsigned int v; - - /* Lapic used as dummy for broadcast ? */ - if (evt->features & CLOCK_EVT_FEAT_DUMMY) - return; - - local_irq_save(flags); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_ONESHOT: - __setup_APIC_LVTT(calibration_result, - mode != CLOCK_EVT_MODE_PERIODIC, 1); - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - v = apic_read(APIC_LVTT); - v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, v); - break; - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here */ - break; - } - - local_irq_restore(flags); + return 1; } /* - * Local APIC timer broadcast function + * Check, whether this is a modern or a first generation APIC */ -static void lapic_timer_broadcast(cpumask_t mask) +static int modern_apic(void) { -#ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); -#endif + /* AMD systems use old APIC versions, so check the CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 >= 0xf) + return 1; + return lapic_get_version() >= 0x14; } -static void apic_pm_activate(void); - void apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -unsigned int safe_apic_wait_icr_idle(void) +u32 safe_apic_wait_icr_idle(void) { - unsigned int send_status; + u32 send_status; int timeout; timeout = 0; @@ -151,7 +127,10 @@ unsigned int safe_apic_wait_icr_idle(voi return send_status; } -void enable_NMI_through_LVT0 (void * dummy) +/** + * enable_NMI_through_LVT0 - enable NMI through local vector table 0 + */ +void enable_NMI_through_LVT0(void *dummy) { unsigned int v; @@ -160,7 +139,10 @@ void enable_NMI_through_LVT0 (void * dum apic_write(APIC_LVT0, v); } -int get_maxlvt(void) +/** + * lapic_get_maxlvt - get the maximum number of local vector table entries + */ +int lapic_get_maxlvt(void) { unsigned int v, maxlvt; @@ -170,203 +152,475 @@ int get_maxlvt(void) } /* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. + * This function sets up the local APIC timer, with a timeout of + * 'clocks' APIC bus clock. During calibration we actually call + * this function twice on the boot CPU, once with a bogus timeout + * value, second time for real. The other (noncalibrating) CPUs + * call this function only once, with the real, calibrated value. + * + * We do reads before writes even if unnecessary, to get around the + * P5 APIC double write bug. */ -void ack_bad_irq(unsigned int irq) -{ - printk("unexpected IRQ trap at vector %02x\n", irq); - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - * But don't ack when the APIC is disabled. -AK - */ - if (!disable_apic) - ack_APIC_irq(); -} -void clear_local_APIC(void) +static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { - int maxlvt; - unsigned int v; + unsigned int lvtt_value, tmp_value; - maxlvt = get_maxlvt(); + lvtt_value = LOCAL_TIMER_VECTOR; + if (!oneshot) + lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (!irqen) + lvtt_value |= APIC_LVT_MASKED; - /* - * Masking an LVT entry can trigger a local APIC error - * if the vector is zero. Mask LVTERR first to prevent this. - */ - if (maxlvt >= 3) { - v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ - apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); - } - /* - * Careful: we have to set masks only first to deassert - * any level-triggered sources. - */ - v = apic_read(APIC_LVTT); - apic_write(APIC_LVTT, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT1); - apic_write(APIC_LVT1, v | APIC_LVT_MASKED); - if (maxlvt >= 4) { - v = apic_read(APIC_LVTPC); - apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); - } + apic_write(APIC_LVTT, lvtt_value); /* - * Clean APIC state for other OSs: + * Divide PICLK by 16 */ - apic_write(APIC_LVTT, APIC_LVT_MASKED); - apic_write(APIC_LVT0, APIC_LVT_MASKED); - apic_write(APIC_LVT1, APIC_LVT_MASKED); - if (maxlvt >= 3) - apic_write(APIC_LVTERR, APIC_LVT_MASKED); - if (maxlvt >= 4) - apic_write(APIC_LVTPC, APIC_LVT_MASKED); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); + tmp_value = apic_read(APIC_TDCR); + apic_write(APIC_TDCR, (tmp_value + & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) + | APIC_TDR_DIV_16); + + if (!oneshot) + apic_write(APIC_TMICT, clocks); } -void disconnect_bsp_APIC(int virt_wire_setup) +/* + * Setup extended LVT (K8 specific) + */ +void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector, + unsigned char msg_type, unsigned char mask) { - /* Go back to Virtual Wire compatibility mode */ - unsigned long value; - - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write(APIC_SPIV, value); - - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED); - } + unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE; + unsigned int v = (mask << 16) | (msg_type << 8) | vector; - /* For LVT1 make it edge triggered, active high, nmi and enabled */ - value = apic_read(APIC_LVT1); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write(APIC_LVT1, value); + apic_write(reg, v); } -void disable_local_APIC(void) +/* + * Program the next event, relative to now + */ +static int lapic_next_event(unsigned long delta, + struct clock_event_device *evt) { - unsigned int value; - - clear_local_APIC(); - - /* - * Disable APIC (implies clearing of registers - * for 82489DX!). - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_SPIV_APIC_ENABLED; - apic_write(APIC_SPIV, value); + apic_write(APIC_TMICT, delta); + return 0; } -void lapic_shutdown(void) +/* + * Setup the lapic timer in periodic or oneshot mode + */ +static void lapic_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt) { unsigned long flags; + unsigned int v; - if (!cpu_has_apic) + /* Lapic used as dummy for broadcast ? */ + if (evt->features & CLOCK_EVT_FEAT_DUMMY) return; local_irq_save(flags); - disable_local_APIC(); + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_MODE_ONESHOT: + __setup_APIC_LVTT(calibration_result, + mode != CLOCK_EVT_MODE_PERIODIC, 1); + break; + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + v = apic_read(APIC_LVTT); + v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, v); + break; + case CLOCK_EVT_MODE_RESUME: + /* Nothing to do here */ + break; + } local_irq_restore(flags); } /* - * This is to verify that we're looking at a real local APIC. - * Check these against your board if the CPUs aren't getting - * started for no apparent reason. + * Local APIC timer broadcast function */ -int __init verify_local_APIC(void) +static void lapic_timer_broadcast(cpumask_t mask) { - unsigned int reg0, reg1; - - /* - * The version register is read-only in a real APIC. - */ - reg0 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); - apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); - reg1 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); +#ifdef CONFIG_SMP + send_IPI_mask(mask, LOCAL_TIMER_VECTOR); +#endif +} - /* - * The two version reads above should print the same - * numbers. If the second one is different, then we - * poke at a non-APIC. - */ - if (reg1 != reg0) - return 0; +/* + * Setup the local APIC timer for this CPU. Copy the initilized values + * of the boot CPU and register the clock event in the framework. + */ +static void setup_APIC_timer(void) +{ + struct clock_event_device *levt = &__get_cpu_var(lapic_events); - /* - * Check if the version looks reasonably. - */ - reg1 = GET_APIC_VERSION(reg0); - if (reg1 == 0x00 || reg1 == 0xff) - return 0; - reg1 = get_maxlvt(); - if (reg1 < 0x02 || reg1 == 0xff) - return 0; + memcpy(levt, &lapic_clockevent, sizeof(*levt)); + levt->cpumask = cpumask_of_cpu(smp_processor_id()); - /* - * The ID register is read/write in a real APIC. - */ - reg0 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); - apic_write(APIC_ID, reg0); - if (reg1 != (reg0 ^ APIC_ID_MASK)) - return 0; + clockevents_register_device(levt); +} - /* +/* + * In this function we calibrate APIC bus clocks to the external + * timer. Unfortunately we cannot use jiffies and the timer irq + * to calibrate, since some later bootup code depends on getting + * the first irq? Ugh. + * + * We want to do the calibration only once since we + * want to have local timer irqs syncron. CPUs connected + * by the same APIC bus have the very same bus frequency. + * And we want to have irqs off anyways, no accidental + * APIC irq that way. + */ + +#define TICK_COUNT 100000000 + +static void __init calibrate_APIC_clock(void) +{ + unsigned apic, apic_start; + unsigned long tsc, tsc_start; + int result; + + local_irq_disable(); + + /* + * Put whatever arbitrary (but long enough) timeout + * value into the APIC clock, we just want to get the + * counter running for calibration. + * + * No interrupt enable ! + */ + __setup_APIC_LVTT(250000000, 0, 0); + + apic_start = apic_read(APIC_TMCCT); +#ifdef CONFIG_X86_PM_TIMER + if (apic_calibrate_pmtmr && pmtmr_ioport) { + pmtimer_wait(5000); /* 5ms wait */ + apic = apic_read(APIC_TMCCT); + result = (apic_start - apic) * 1000L / 5; + } else +#endif + { + rdtscll(tsc_start); + + do { + apic = apic_read(APIC_TMCCT); + rdtscll(tsc); + } while ((tsc - tsc_start) < TICK_COUNT && + (apic_start - apic) < TICK_COUNT); + + result = (apic_start - apic) * 1000L * tsc_khz / + (tsc - tsc_start); + } + + local_irq_enable(); + + printk(KERN_DEBUG "APIC timer calibration result %d\n", result); + + printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", + result / 1000 / 1000, result / 1000 % 1000); + + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + + calibration_result = result / HZ; +} + +void __init setup_boot_APIC_clock(void) +{ + /* + * The local apic timer can be disabled via the kernel commandline. + * Register the lapic timer as a dummy clock event source on SMP + * systems, so the broadcast mechanism is used. On UP systems simply + * ignore it. + */ + if (disable_apic_timer) { + printk(KERN_INFO "Disabling APIC timer\n"); + /* No broadcast on UP ! */ + if (num_possible_cpus() > 1) + setup_APIC_timer(); + return; + } + + printk(KERN_INFO "Using local APIC timer interrupts.\n"); + calibrate_APIC_clock(); + + /* + * If nmi_watchdog is set to IO_APIC, we need the + * PIT/HPET going. Otherwise register lapic as a dummy + * device. + */ + if (nmi_watchdog != NMI_IO_APIC) + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + else + printk(KERN_WARNING "APIC timer registered as dummy," + " due to nmi_watchdog=1!\n"); + + setup_APIC_timer(); +} + +/* + * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the + * C1E flag only in the secondary CPU, so when we detect the wreckage + * we already have enabled the boot CPU local apic timer. Check, if + * disable_apic_timer is set and the DUMMY flag is cleared. If yes, + * set the DUMMY flag again and force the broadcast mode in the + * clockevents layer. + */ +void __cpuinit check_boot_apic_timer_broadcast(void) +{ + if (!disable_apic_timer || + (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) + return; + + printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n"); + lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY; + + local_irq_enable(); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id); + local_irq_disable(); +} + +void __cpuinit setup_secondary_APIC_clock(void) +{ + check_boot_apic_timer_broadcast(); + setup_APIC_timer(); +} + +/* + * The guts of the apic timer interrupt + */ +static void local_apic_timer_interrupt(void) +{ + int cpu = smp_processor_id(); + struct clock_event_device *evt = &per_cpu(lapic_events, cpu); + + /* + * Normally we should not be here till LAPIC has been initialized but + * in some cases like kdump, its possible that there is a pending LAPIC + * timer interrupt from previous kernel's context and is delivered in + * new kernel the moment interrupts are enabled. + * + * Interrupts are enabled early and LAPIC is setup much later, hence + * its possible that when we get here evt->event_handler is NULL. + * Check for event_handler being NULL and discard the interrupt as + * spurious. + */ + if (!evt->event_handler) { + printk(KERN_WARNING + "Spurious LAPIC timer interrupt on cpu %d\n", cpu); + /* Switch it off */ + lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); + return; + } + + /* + * the NMI deadlock-detector uses this. + */ + add_pda(apic_timer_irqs, 1); + + evt->event_handler(evt); +} + +/* + * Local APIC timer interrupt. This is the most natural way for doing + * local interrupts, but local timer interrupts can be emulated by + * broadcast interrupts too. [in case the hw doesn't support APIC timers] + * + * [ if a single-CPU system runs an SMP kernel then we call the local + * interrupt as well. Thus we cannot inline the local irq ... ] + */ +void smp_apic_timer_interrupt(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + /* + * NOTE! We'd better ACK the irq immediately, + * because timer handling can be slow. + */ + ack_APIC_irq(); + /* + * update_process_times() expects us to have done irq_enter(). + * Besides, if we don't timer interrupts ignore the global + * interrupt lock, which is the WrongThing (tm) to do. + */ + exit_idle(); + irq_enter(); + local_apic_timer_interrupt(); + irq_exit(); + set_irq_regs(old_regs); +} + +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + + +/* + * Local APIC start and shutdown + */ + +/** + * clear_local_APIC - shutdown the local APIC + * + * This is called, when a CPU is disabled and before rebooting, so the state of + * the local APIC has no dangling leftovers. Also used to cleanout any BIOS + * leftovers during boot. + */ +void clear_local_APIC(void) +{ + int maxlvt = lapic_get_maxlvt(); + u32 v; + + /* + * Masking an LVT entry can trigger a local APIC error + * if the vector is zero. Mask LVTERR first to prevent this. + */ + if (maxlvt >= 3) { + v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ + apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); + } + /* + * Careful: we have to set masks only first to deassert + * any level-triggered sources. + */ + v = apic_read(APIC_LVTT); + apic_write(APIC_LVTT, v | APIC_LVT_MASKED); + v = apic_read(APIC_LVT0); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); + v = apic_read(APIC_LVT1); + apic_write(APIC_LVT1, v | APIC_LVT_MASKED); + if (maxlvt >= 4) { + v = apic_read(APIC_LVTPC); + apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); + } + + /* + * Clean APIC state for other OSs: + */ + apic_write(APIC_LVTT, APIC_LVT_MASKED); + apic_write(APIC_LVT0, APIC_LVT_MASKED); + apic_write(APIC_LVT1, APIC_LVT_MASKED); + if (maxlvt >= 3) + apic_write(APIC_LVTERR, APIC_LVT_MASKED); + if (maxlvt >= 4) + apic_write(APIC_LVTPC, APIC_LVT_MASKED); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); +} + +/** + * disable_local_APIC - clear and disable the local APIC + */ +void disable_local_APIC(void) +{ + unsigned int value; + + clear_local_APIC(); + + /* + * Disable APIC (implies clearing of registers + * for 82489DX!). + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_SPIV_APIC_ENABLED; + apic_write(APIC_SPIV, value); +} + +void lapic_shutdown(void) +{ + unsigned long flags; + + if (!cpu_has_apic) + return; + + local_irq_save(flags); + + disable_local_APIC(); + + local_irq_restore(flags); +} + +/* + * This is to verify that we're looking at a real local APIC. + * Check these against your board if the CPUs aren't getting + * started for no apparent reason. + */ +int __init verify_local_APIC(void) +{ + unsigned int reg0, reg1; + + /* + * The version register is read-only in a real APIC. + */ + reg0 = apic_read(APIC_LVR); + apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); + apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); + reg1 = apic_read(APIC_LVR); + apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); + + /* + * The two version reads above should print the same + * numbers. If the second one is different, then we + * poke at a non-APIC. + */ + if (reg1 != reg0) + return 0; + + /* + * Check if the version looks reasonably. + */ + reg1 = GET_APIC_VERSION(reg0); + if (reg1 == 0x00 || reg1 == 0xff) + return 0; + reg1 = lapic_get_maxlvt(); + if (reg1 < 0x02 || reg1 == 0xff) + return 0; + + /* + * The ID register is read/write in a real APIC. + */ + reg0 = apic_read(APIC_ID); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); + apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); + reg1 = apic_read(APIC_ID); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); + apic_write(APIC_ID, reg0); + if (reg1 != (reg0 ^ APIC_ID_MASK)) + return 0; + + /* * The next two are just to see if we have sane values. * They're only really relevant if we're in Virtual Wire * compatibility mode, but most boxes are anymore. */ reg0 = apic_read(APIC_LVT0); - apic_printk(APIC_DEBUG,"Getting LVT0: %x\n", reg0); + apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); reg1 = apic_read(APIC_LVT1); apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); return 1; } +/** + * sync_Arb_IDs - synchronize APIC bus arbitration IDs + */ void __init sync_Arb_IDs(void) { /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ - unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - if (ver >= 0x14) /* P4 or higher */ + if (modern_apic()) return; /* @@ -418,9 +672,12 @@ void __init init_bsp_APIC(void) apic_write(APIC_LVT1, value); } -void __cpuinit setup_local_APIC (void) +/** + * setup_local_APIC - setup the local APIC + */ +void __cpuinit setup_local_APIC(void) { - unsigned int value, maxlvt; + unsigned int value; int i, j; value = apic_read(APIC_LVR); @@ -516,183 +773,27 @@ void __cpuinit setup_local_APIC (void) else value = APIC_DM_NMI | APIC_LVT_MASKED; apic_write(APIC_LVT1, value); - - { - unsigned oldvalue; - maxlvt = get_maxlvt(); - oldvalue = apic_read(APIC_ESR); - value = ERROR_APIC_VECTOR; // enables sending errors - apic_write(APIC_LVTERR, value); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, - "ESR value after enabling vector: %08x, after %08x\n", - oldvalue, value); - } - - nmi_watchdog_default(); - setup_apic_nmi_watchdog(NULL); - apic_pm_activate(); } -#ifdef CONFIG_PM - -static struct { - /* 'active' is true if the local APIC was enabled by us and - not the BIOS; this signifies that we are also responsible - for disabling it before entering apm/acpi suspend */ - int active; - /* r/w apic fields */ - unsigned int apic_id; - unsigned int apic_taskpri; - unsigned int apic_ldr; - unsigned int apic_dfr; - unsigned int apic_spiv; - unsigned int apic_lvtt; - unsigned int apic_lvtpc; - unsigned int apic_lvt0; - unsigned int apic_lvt1; - unsigned int apic_lvterr; - unsigned int apic_tmict; - unsigned int apic_tdcr; - unsigned int apic_thmr; -} apic_pm_state; - -static int lapic_suspend(struct sys_device *dev, pm_message_t state) +void __cpuinit lapic_setup_esr(void) { - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; + unsigned maxlvt = lapic_get_maxlvt(); - maxlvt = get_maxlvt(); - - apic_pm_state.apic_id = apic_read(APIC_ID); - apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); - apic_pm_state.apic_ldr = apic_read(APIC_LDR); - apic_pm_state.apic_dfr = apic_read(APIC_DFR); - apic_pm_state.apic_spiv = apic_read(APIC_SPIV); - apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - if (maxlvt >= 4) - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); - apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); - apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); - apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); - apic_pm_state.apic_tmict = apic_read(APIC_TMICT); - apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_MCE_INTEL - if (maxlvt >= 5) - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); -#endif - local_irq_save(flags); - disable_local_APIC(); - local_irq_restore(flags); - return 0; -} - -static int lapic_resume(struct sys_device *dev) -{ - unsigned int l, h; - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = get_maxlvt(); - - local_irq_save(flags); - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); - apic_write(APIC_ID, apic_pm_state.apic_id); - apic_write(APIC_DFR, apic_pm_state.apic_dfr); - apic_write(APIC_LDR, apic_pm_state.apic_ldr); - apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); - apic_write(APIC_SPIV, apic_pm_state.apic_spiv); - apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); - apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#ifdef CONFIG_X86_MCE_INTEL - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); -#endif - if (maxlvt >= 4) - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); - apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); - apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); - apic_write(APIC_TMICT, apic_pm_state.apic_tmict); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - local_irq_restore(flags); - return 0; -} - -static struct sysdev_class lapic_sysclass = { - set_kset_name("lapic"), - .resume = lapic_resume, - .suspend = lapic_suspend, -}; - -static struct sys_device device_lapic = { - .id = 0, - .cls = &lapic_sysclass, -}; - -static void __cpuinit apic_pm_activate(void) -{ - apic_pm_state.active = 1; -} - -static int __init init_lapic_sysfs(void) -{ - int error; - if (!cpu_has_apic) - return 0; - /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ - error = sysdev_class_register(&lapic_sysclass); - if (!error) - error = sysdev_register(&device_lapic); - return error; + apic_write(APIC_LVTERR, ERROR_APIC_VECTOR); + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); } -device_initcall(init_lapic_sysfs); - -#else /* CONFIG_PM */ -static void apic_pm_activate(void) { } - -#endif /* CONFIG_PM */ - -static int __init apic_set_verbosity(char *str) +void __cpuinit end_local_APIC_setup(void) { - if (str == NULL) { - skip_ioapic_setup = 0; - ioapic_force = 1; - return 0; - } - if (strcmp("debug", str) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) - apic_verbosity = APIC_VERBOSE; - else { - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", str); - return -EINVAL; - } - - return 0; + lapic_setup_esr(); + nmi_watchdog_default(); + setup_apic_nmi_watchdog(NULL); + apic_pm_activate(); } -early_param("apic", apic_set_verbosity); /* * Detect and enable local APICs on non-SMP boards. @@ -700,77 +801,21 @@ early_param("apic", apic_set_verbosity); * On AMD64 we trust the BIOS - if it says no APIC it is likely * not correctly set up (usually the APIC timer won't work etc.) */ - -static int __init detect_init_APIC (void) +static int __init detect_init_APIC(void) { if (!cpu_has_apic) { printk(KERN_INFO "No local APIC present\n"); - return -1; - } - - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - boot_cpu_id = 0; - return 0; -} - -#ifdef CONFIG_X86_IO_APIC -static struct resource * __init ioapic_setup_resources(void) -{ -#define IOAPIC_RESOURCE_NAME_SIZE 11 - unsigned long n; - struct resource *res; - char *mem; - int i; - - if (nr_ioapics <= 0) - return NULL; - - n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); - n *= nr_ioapics; - - mem = alloc_bootmem(n); - res = (void *)mem; - - if (mem != NULL) { - memset(mem, 0, n); - mem += sizeof(struct resource) * nr_ioapics; - - for (i = 0; i < nr_ioapics; i++) { - res[i].name = mem; - res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); - mem += IOAPIC_RESOURCE_NAME_SIZE; - } - } - - ioapic_resources = res; - - return res; -} - -static int __init ioapic_insert_resources(void) -{ - int i; - struct resource *r = ioapic_resources; - - if (!r) { - printk("IO APIC resources could be not be allocated.\n"); - return -1; - } - - for (i = 0; i < nr_ioapics; i++) { - insert_resource(&iomem_resource, r); - r++; + return -1; } + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + boot_cpu_id = 0; return 0; } -/* Insert the IO APIC resources after PCI initialization has occured to handle - * IO APICS that are mapped in on a BAR in PCI space. */ -late_initcall(ioapic_insert_resources); -#endif - +/** + * init_apic_mappings - initialize APIC mappings + */ void __init init_apic_mappings(void) { unsigned long apic_phys; @@ -800,295 +845,279 @@ void __init init_apic_mappings(void) * default configuration (or the MP table is broken). */ boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); - - { - unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - int i; - struct resource *ioapic_res; - - ioapic_res = ioapic_setup_resources(); - for (i = 0; i < nr_ioapics; i++) { - if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mpc_apicaddr; - } else { - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); - ioapic_phys = __pa(ioapic_phys); - } - set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %016lx (%016lx)\n", - __fix_to_virt(idx), ioapic_phys); - idx++; - - if (ioapic_res != NULL) { - ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; - ioapic_res++; - } - } - } } /* - * This function sets up the local APIC timer, with a timeout of - * 'clocks' APIC bus clock. During calibration we actually call - * this function twice on the boot CPU, once with a bogus timeout - * value, second time for real. The other (noncalibrating) CPUs - * call this function only once, with the real, calibrated value. - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. + * This initializes the IO-APIC and APIC hardware if this is + * a UP kernel. */ - -static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) +int __init APIC_init_uniprocessor(void) { - unsigned int lvtt_value, tmp_value; + if (disable_apic) { + printk(KERN_INFO "Apic disabled\n"); + return -1; + } + if (!cpu_has_apic) { + disable_apic = 1; + printk(KERN_INFO "Apic disabled by BIOS\n"); + return -1; + } - lvtt_value = LOCAL_TIMER_VECTOR; - if (!oneshot) - lvtt_value |= APIC_LVT_TIMER_PERIODIC; - if (!irqen) - lvtt_value |= APIC_LVT_MASKED; + verify_local_APIC(); - apic_write(APIC_LVTT, lvtt_value); + phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); + apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id)); + + setup_local_APIC(); /* - * Divide PICLK by 16 + * Now enable IO-APICs, actually call clear_IO_APIC + * We need clear_IO_APIC before enabling vector on BP */ - tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, (tmp_value - & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) - | APIC_TDR_DIV_16); + if (!skip_ioapic_setup && nr_ioapics) + enable_IO_APIC(); - if (!oneshot) - apic_write(APIC_TMICT, clocks); -} - -static void setup_APIC_timer(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - - memcpy(levt, &lapic_clockevent, sizeof(*levt)); - levt->cpumask = cpumask_of_cpu(smp_processor_id()); + end_local_APIC_setup(); - clockevents_register_device(levt); + if (smp_found_config && !skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); + else + nr_ioapics = 0; + setup_boot_APIC_clock(); + check_nmi_watchdog(); + return 0; } /* - * In this function we calibrate APIC bus clocks to the external - * timer. Unfortunately we cannot use jiffies and the timer irq - * to calibrate, since some later bootup code depends on getting - * the first irq? Ugh. - * - * We want to do the calibration only once since we - * want to have local timer irqs syncron. CPUs connected - * by the same APIC bus have the very same bus frequency. - * And we want to have irqs off anyways, no accidental - * APIC irq that way. + * Local APIC interrupts */ -#define TICK_COUNT 100000000 - -static void __init calibrate_APIC_clock(void) +/* + * This interrupt should _never_ happen with our APIC/SMP architecture + */ +asmlinkage void smp_spurious_interrupt(void) { - unsigned apic, apic_start; - unsigned long tsc, tsc_start; - int result; - - local_irq_disable(); - + unsigned int v; + exit_idle(); + irq_enter(); /* - * Put whatever arbitrary (but long enough) timeout - * value into the APIC clock, we just want to get the - * counter running for calibration. - * - * No interrupt enable ! + * Check if this really is a spurious interrupt and ACK it + * if it is a vectored one. Just in case... + * Spurious interrupts should not be ACKed. */ - __setup_APIC_LVTT(250000000, 0, 0); - - apic_start = apic_read(APIC_TMCCT); -#ifdef CONFIG_X86_PM_TIMER - if (apic_calibrate_pmtmr && pmtmr_ioport) { - pmtimer_wait(5000); /* 5ms wait */ - apic = apic_read(APIC_TMCCT); - result = (apic_start - apic) * 1000L / 5; - } else -#endif - { - rdtscll(tsc_start); - - do { - apic = apic_read(APIC_TMCCT); - rdtscll(tsc); - } while ((tsc - tsc_start) < TICK_COUNT && - (apic_start - apic) < TICK_COUNT); - - result = (apic_start - apic) * 1000L * tsc_khz / - (tsc - tsc_start); - } - - local_irq_enable(); + v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); + if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) + ack_APIC_irq(); - printk(KERN_DEBUG "APIC timer calibration result %d\n", result); + add_pda(irq_spurious_count, 1); + irq_exit(); +} - printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", - result / 1000 / 1000, result / 1000 % 1000); +/* + * This interrupt should never happen with our APIC/SMP architecture + */ +asmlinkage void smp_error_interrupt(void) +{ + unsigned int v, v1; - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); + exit_idle(); + irq_enter(); + /* First tickle the hardware, only then report what went on. -- REW */ + v = apic_read(APIC_ESR); + apic_write(APIC_ESR, 0); + v1 = apic_read(APIC_ESR); + ack_APIC_irq(); + atomic_inc(&irq_err_count); - calibration_result = result / HZ; + /* Here is what the APIC error bits mean: + 0: Send CS error + 1: Receive CS error + 2: Send accept error + 3: Receive accept error + 4: Reserved + 5: Send illegal vector + 6: Received illegal vector + 7: Illegal register address + */ + printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", + smp_processor_id(), v , v1); + irq_exit(); } -void __init setup_boot_APIC_clock (void) +void disconnect_bsp_APIC(int virt_wire_setup) { - /* - * The local apic timer can be disabled via the kernel commandline. - * Register the lapic timer as a dummy clock event source on SMP - * systems, so the broadcast mechanism is used. On UP systems simply - * ignore it. - */ - if (disable_apic_timer) { - printk(KERN_INFO "Disabling APIC timer\n"); - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) - setup_APIC_timer(); - return; - } + /* Go back to Virtual Wire compatibility mode */ + unsigned long value; - printk(KERN_INFO "Using local APIC timer interrupts.\n"); - calibrate_APIC_clock(); + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write(APIC_SPIV, value); - /* - * If nmi_watchdog is set to IO_APIC, we need the - * PIT/HPET going. Otherwise register lapic as a dummy - * device. - */ - if (nmi_watchdog != NMI_IO_APIC) - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - else - printk(KERN_WARNING "APIC timer registered as dummy," - " due to nmi_watchdog=1!\n"); + if (!virt_wire_setup) { + /* + * For LVT0 make it edge triggered, active high, + * external and enabled + */ + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write(APIC_LVT0, value); + } else { + /* Disable LVT0 */ + apic_write(APIC_LVT0, APIC_LVT_MASKED); + } - setup_APIC_timer(); + /* For LVT1 make it edge triggered, active high, nmi and enabled */ + value = apic_read(APIC_LVT1); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write(APIC_LVT1, value); } /* - * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the - * C1E flag only in the secondary CPU, so when we detect the wreckage - * we already have enabled the boot CPU local apic timer. Check, if - * disable_apic_timer is set and the DUMMY flag is cleared. If yes, - * set the DUMMY flag again and force the broadcast mode in the - * clockevents layer. + * Power management */ -void __cpuinit check_boot_apic_timer_broadcast(void) +#ifdef CONFIG_PM + +static struct { + /* 'active' is true if the local APIC was enabled by us and + not the BIOS; this signifies that we are also responsible + for disabling it before entering apm/acpi suspend */ + int active; + /* r/w apic fields */ + unsigned int apic_id; + unsigned int apic_taskpri; + unsigned int apic_ldr; + unsigned int apic_dfr; + unsigned int apic_spiv; + unsigned int apic_lvtt; + unsigned int apic_lvtpc; + unsigned int apic_lvt0; + unsigned int apic_lvt1; + unsigned int apic_lvterr; + unsigned int apic_tmict; + unsigned int apic_tdcr; + unsigned int apic_thmr; +} apic_pm_state; + +static int lapic_suspend(struct sys_device *dev, pm_message_t state) { - if (!disable_apic_timer || - (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) - return; + unsigned long flags; + int maxlvt; - printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n"); - lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY; + if (!apic_pm_state.active) + return 0; - local_irq_enable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id); - local_irq_disable(); -} + maxlvt = lapic_get_maxlvt(); -void __cpuinit setup_secondary_APIC_clock(void) -{ - check_boot_apic_timer_broadcast(); - setup_APIC_timer(); + apic_pm_state.apic_id = apic_read(APIC_ID); + apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); + apic_pm_state.apic_ldr = apic_read(APIC_LDR); + apic_pm_state.apic_dfr = apic_read(APIC_DFR); + apic_pm_state.apic_spiv = apic_read(APIC_SPIV); + apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); + if (maxlvt >= 4) + apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); + apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); + apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); + apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); + apic_pm_state.apic_tmict = apic_read(APIC_TMICT); + apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 5) + apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#endif + local_irq_save(flags); + disable_local_APIC(); + local_irq_restore(flags); + return 0; } -int setup_profiling_timer(unsigned int multiplier) +static int lapic_resume(struct sys_device *dev) { - return -EINVAL; -} + unsigned int l, h; + unsigned long flags; + int maxlvt; -void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector, - unsigned char msg_type, unsigned char mask) -{ - unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE; - unsigned int v = (mask << 16) | (msg_type << 8) | vector; - apic_write(reg, v); -} + if (!apic_pm_state.active) + return 0; -/* - * Local timer interrupt handler. It does both profiling and - * process statistics/rescheduling. - * - * We do profiling in every local tick, statistics/rescheduling - * happen only every 'profiling multiplier' ticks. The default - * multiplier is 1 and it can be changed by writing the new multiplier - * value into /proc/profile. - */ + maxlvt = lapic_get_maxlvt(); -void smp_local_timer_interrupt(void) -{ - int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu); + local_irq_save(flags); + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); + apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); + apic_write(APIC_ID, apic_pm_state.apic_id); + apic_write(APIC_DFR, apic_pm_state.apic_dfr); + apic_write(APIC_LDR, apic_pm_state.apic_ldr); + apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); + apic_write(APIC_SPIV, apic_pm_state.apic_spiv); + apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); + apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 5) + apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); +#endif + if (maxlvt >= 4) + apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); + apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); + apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); + apic_write(APIC_TMICT, apic_pm_state.apic_tmict); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + local_irq_restore(flags); + return 0; +} - /* - * Normally we should not be here till LAPIC has been initialized but - * in some cases like kdump, its possible that there is a pending LAPIC - * timer interrupt from previous kernel's context and is delivered in - * new kernel the moment interrupts are enabled. - * - * Interrupts are enabled early and LAPIC is setup much later, hence - * its possible that when we get here evt->event_handler is NULL. - * Check for event_handler being NULL and discard the interrupt as - * spurious. - */ - if (!evt->event_handler) { - printk(KERN_WARNING - "Spurious LAPIC timer interrupt on cpu %d\n", cpu); - /* Switch it off */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); - return; - } +static struct sysdev_class lapic_sysclass = { + set_kset_name("lapic"), + .resume = lapic_resume, + .suspend = lapic_suspend, +}; - /* - * the NMI deadlock-detector uses this. - */ - add_pda(apic_timer_irqs, 1); +static struct sys_device device_lapic = { + .id = 0, + .cls = &lapic_sysclass, +}; - evt->event_handler(evt); +static void __cpuinit apic_pm_activate(void) +{ + apic_pm_state.active = 1; } -/* - * Local APIC timer interrupt. This is the most natural way for doing - * local interrupts, but local timer interrupts can be emulated by - * broadcast interrupts too. [in case the hw doesn't support APIC timers] - * - * [ if a single-CPU system runs an SMP kernel then we call the local - * interrupt as well. Thus we cannot inline the local irq ... ] - */ -void smp_apic_timer_interrupt(struct pt_regs *regs) +static int __init init_lapic_sysfs(void) { - struct pt_regs *old_regs = set_irq_regs(regs); - - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow. - */ - ack_APIC_irq(); - /* - * update_process_times() expects us to have done irq_enter(). - * Besides, if we don't timer interrupts ignore the global - * interrupt lock, which is the WrongThing (tm) to do. - */ - exit_idle(); - irq_enter(); - smp_local_timer_interrupt(); - irq_exit(); - set_irq_regs(old_regs); + int error; + if (!cpu_has_apic) + return 0; + /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ + error = sysdev_class_register(&lapic_sysclass); + if (!error) + error = sysdev_register(&device_lapic); + return error; } +device_initcall(init_lapic_sysfs); + +#else /* CONFIG_PM */ + +static void apic_pm_activate(void) { } + +#endif /* CONFIG_PM */ /* * apic_is_clustered_box() -- Check if we can expect good TSC @@ -1138,91 +1167,28 @@ __cpuinit int apic_is_clustered_box(void } /* - * This interrupt should _never_ happen with our APIC/SMP architecture - */ -asmlinkage void smp_spurious_interrupt(void) -{ - unsigned int v; - exit_idle(); - irq_enter(); - /* - * Check if this really is a spurious interrupt and ACK it - * if it is a vectored one. Just in case... - * Spurious interrupts should not be ACKed. - */ - v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); - if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) - ack_APIC_irq(); - - add_pda(irq_spurious_count, 1); - irq_exit(); -} - -/* - * This interrupt should never happen with our APIC/SMP architecture - */ - -asmlinkage void smp_error_interrupt(void) -{ - unsigned int v, v1; - - exit_idle(); - irq_enter(); - /* First tickle the hardware, only then report what went on. -- REW */ - v = apic_read(APIC_ESR); - apic_write(APIC_ESR, 0); - v1 = apic_read(APIC_ESR); - ack_APIC_irq(); - atomic_inc(&irq_err_count); - - /* Here is what the APIC error bits mean: - 0: Send CS error - 1: Receive CS error - 2: Send accept error - 3: Receive accept error - 4: Reserved - 5: Send illegal vector - 6: Received illegal vector - 7: Illegal register address - */ - printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", - smp_processor_id(), v , v1); - irq_exit(); -} - -int disable_apic; - -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. + * APIC command line parameters */ -int __init APIC_init_uniprocessor (void) +static int __init apic_set_verbosity(char *str) { - if (disable_apic) { - printk(KERN_INFO "Apic disabled\n"); - return -1; + if (str == NULL) { + skip_ioapic_setup = 0; + ioapic_force = 1; + return 0; } - if (!cpu_has_apic) { - disable_apic = 1; - printk(KERN_INFO "Apic disabled by BIOS\n"); - return -1; + if (strcmp("debug", str) == 0) + apic_verbosity = APIC_DEBUG; + else if (strcmp("verbose", str) == 0) + apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", str); + return -EINVAL; } - verify_local_APIC(); - - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); - apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id)); - - setup_local_APIC(); - - if (smp_found_config && !skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); - else - nr_ioapics = 0; - setup_boot_APIC_clock(); - check_nmi_watchdog(); return 0; } +early_param("apic", apic_set_verbosity); static __init int setup_disableapic(char *str) { diff -puN arch/x86/kernel/apm_32.c~git-x86 arch/x86/kernel/apm_32.c --- a/arch/x86/kernel/apm_32.c~git-x86 +++ a/arch/x86/kernel/apm_32.c @@ -235,8 +235,6 @@ #include #include -#include "io_ports.h" - #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); #endif diff -puN arch/x86/kernel/asm-offsets_32.c~git-x86 arch/x86/kernel/asm-offsets_32.c --- a/arch/x86/kernel/asm-offsets_32.c~git-x86 +++ a/arch/x86/kernel/asm-offsets_32.c @@ -123,7 +123,7 @@ void foo(void) OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); - OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); + OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); #endif diff -puN arch/x86/kernel/cpu/mcheck/mce_64.c~git-x86 arch/x86/kernel/cpu/mcheck/mce_64.c --- a/arch/x86/kernel/cpu/mcheck/mce_64.c~git-x86 +++ a/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -63,7 +63,7 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait) * separate MCEs from kernel messages to avoid bogus bug reports. */ -struct mce_log mcelog = { +static struct mce_log mcelog = { MCE_LOG_SIGNATURE, MCE_LOG_LEN, }; diff -puN arch/x86/kernel/cpu/mtrr/amd.c~git-x86 arch/x86/kernel/cpu/mtrr/amd.c --- a/arch/x86/kernel/cpu/mtrr/amd.c~git-x86 +++ a/arch/x86/kernel/cpu/mtrr/amd.c @@ -53,8 +53,6 @@ static void amd_set_mtrr(unsigned int re The base address of the region. The size of the region. If this is 0 the region is disabled. The type of the region. - If TRUE, do the change safely. If FALSE, safety measures should - be done externally. [RETURNS] Nothing. */ { diff -puN arch/x86/kernel/cpu/mtrr/generic.c~git-x86 arch/x86/kernel/cpu/mtrr/generic.c --- a/arch/x86/kernel/cpu/mtrr/generic.c~git-x86 +++ a/arch/x86/kernel/cpu/mtrr/generic.c @@ -188,7 +188,7 @@ static inline void k8_enable_fixed_iorrs * \param changed pointer which indicates whether the MTRR needed to be changed * \param msrwords pointer to the MSR values which the MSR should have */ -static void set_fixed_range(int msr, int * changed, unsigned int * msrwords) +static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) { unsigned lo, hi; @@ -200,7 +200,7 @@ static void set_fixed_range(int msr, int ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) k8_enable_fixed_iorrs(); mtrr_wrmsr(msr, msrwords[0], msrwords[1]); - *changed = TRUE; + *changed = true; } } @@ -260,7 +260,7 @@ static void generic_get_mtrr(unsigned in static int set_fixed_ranges(mtrr_type * frs) { unsigned long long *saved = (unsigned long long *) frs; - int changed = FALSE; + bool changed = false; int block=-1, range; while (fixed_range_blocks[++block].ranges) @@ -273,17 +273,17 @@ static int set_fixed_ranges(mtrr_type * /* Set the MSR pair relating to a var range. Returns TRUE if changes are made */ -static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) +static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) { unsigned int lo, hi; - int changed = FALSE; + bool changed = false; rdmsr(MTRRphysBase_MSR(index), lo, hi); if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); - changed = TRUE; + changed = true; } rdmsr(MTRRphysMask_MSR(index), lo, hi); @@ -292,7 +292,7 @@ static int set_mtrr_var_ranges(unsigned || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); - changed = TRUE; + changed = true; } return changed; } @@ -417,8 +417,6 @@ static void generic_set_mtrr(unsigned in The base address of the region. The size of the region. If this is 0 the region is disabled. The type of the region. - If TRUE, do the change safely. If FALSE, safety measures should - be done externally. [RETURNS] Nothing. */ { diff -puN arch/x86/kernel/cpu/mtrr/if.c~git-x86 arch/x86/kernel/cpu/mtrr/if.c --- a/arch/x86/kernel/cpu/mtrr/if.c~git-x86 +++ a/arch/x86/kernel/cpu/mtrr/if.c @@ -37,7 +37,7 @@ const char *mtrr_attrib_to_str(int x) static int mtrr_file_add(unsigned long base, unsigned long size, - unsigned int type, char increment, struct file *file, int page) + unsigned int type, bool increment, struct file *file, int page) { int reg, max; unsigned int *fcount = FILE_FCOUNT(file); @@ -55,7 +55,7 @@ mtrr_file_add(unsigned long base, unsign base >>= PAGE_SHIFT; size >>= PAGE_SHIFT; } - reg = mtrr_add_page(base, size, type, 1); + reg = mtrr_add_page(base, size, type, true); if (reg >= 0) ++fcount[reg]; return reg; @@ -141,7 +141,7 @@ mtrr_write(struct file *file, const char size >>= PAGE_SHIFT; err = mtrr_add_page((unsigned long) base, (unsigned long) size, i, - 1); + true); if (err < 0) return err; return len; @@ -217,7 +217,7 @@ mtrr_ioctl(struct file *file, unsigned i if (!capable(CAP_SYS_ADMIN)) return -EPERM; err = - mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, + mtrr_file_add(sentry.base, sentry.size, sentry.type, true, file, 0); break; case MTRRIOC_SET_ENTRY: @@ -226,7 +226,7 @@ mtrr_ioctl(struct file *file, unsigned i #endif if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); + err = mtrr_add(sentry.base, sentry.size, sentry.type, false); break; case MTRRIOC_DEL_ENTRY: #ifdef CONFIG_COMPAT @@ -270,7 +270,7 @@ mtrr_ioctl(struct file *file, unsigned i if (!capable(CAP_SYS_ADMIN)) return -EPERM; err = - mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, + mtrr_file_add(sentry.base, sentry.size, sentry.type, true, file, 1); break; case MTRRIOC_SET_PAGE_ENTRY: @@ -279,7 +279,8 @@ mtrr_ioctl(struct file *file, unsigned i #endif if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); + err = + mtrr_add_page(sentry.base, sentry.size, sentry.type, false); break; case MTRRIOC_DEL_PAGE_ENTRY: #ifdef CONFIG_COMPAT diff -puN arch/x86/kernel/cpu/mtrr/main.c~git-x86 arch/x86/kernel/cpu/mtrr/main.c --- a/arch/x86/kernel/cpu/mtrr/main.c~git-x86 +++ a/arch/x86/kernel/cpu/mtrr/main.c @@ -311,7 +311,7 @@ static void set_mtrr(unsigned int reg, u */ int mtrr_add_page(unsigned long base, unsigned long size, - unsigned int type, char increment) + unsigned int type, bool increment) { int i, replace, error; mtrr_type ltype; @@ -394,7 +394,9 @@ int mtrr_add_page(unsigned long base, un if (likely(replace < 0)) usage_table[i] = 1; else { - usage_table[i] = usage_table[replace] + !!increment; + usage_table[i] = usage_table[replace]; + if (increment) + usage_table[i]++; if (unlikely(replace != i)) { set_mtrr(replace, 0, 0, 0); usage_table[replace] = 0; @@ -460,7 +462,7 @@ static int mtrr_check(unsigned long base int mtrr_add(unsigned long base, unsigned long size, unsigned int type, - char increment) + bool increment) { if (mtrr_check(base, size)) return -EINVAL; diff -puN arch/x86/kernel/cpu/mtrr/mtrr.h~git-x86 arch/x86/kernel/cpu/mtrr/mtrr.h --- a/arch/x86/kernel/cpu/mtrr/mtrr.h~git-x86 +++ a/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -2,10 +2,8 @@ * local mtrr defines. */ -#ifndef TRUE -#define TRUE 1 -#define FALSE 0 -#endif +#include +#include #define MTRRcap_MSR 0x0fe #define MTRRdefType_MSR 0x2ff diff -puN arch/x86/kernel/cpu/perfctr-watchdog.c~git-x86 arch/x86/kernel/cpu/perfctr-watchdog.c --- a/arch/x86/kernel/cpu/perfctr-watchdog.c~git-x86 +++ a/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -167,7 +167,6 @@ void release_evntsel_nmi(unsigned int ms clear_bit(counter, evntsel_nmi_owner); } -EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); EXPORT_SYMBOL(reserve_perfctr_nmi); EXPORT_SYMBOL(release_perfctr_nmi); diff -puN arch/x86/kernel/e820_32.c~git-x86 arch/x86/kernel/e820_32.c --- a/arch/x86/kernel/e820_32.c~git-x86 +++ a/arch/x86/kernel/e820_32.c @@ -37,26 +37,6 @@ unsigned long pci_mem_start = 0x10000000 EXPORT_SYMBOL(pci_mem_start); #endif extern int user_defined_memmap; -struct resource data_resource = { - .name = "Kernel data", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -struct resource code_resource = { - .name = "Kernel code", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -struct resource bss_resource = { - .name = "Kernel bss", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; static struct resource system_rom_resource = { .name = "System ROM", @@ -111,60 +91,6 @@ static struct resource video_rom_resourc .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM }; -static struct resource video_ram_resource = { - .name = "Video RAM area", - .start = 0xa0000, - .end = 0xbffff, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource standard_io_resources[] = { { - .name = "dma1", - .start = 0x0000, - .end = 0x001f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic1", - .start = 0x0020, - .end = 0x0021, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer0", - .start = 0x0040, - .end = 0x0043, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer1", - .start = 0x0050, - .end = 0x0053, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "keyboard", - .start = 0x0060, - .end = 0x006f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma page reg", - .start = 0x0080, - .end = 0x008f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic2", - .start = 0x00a0, - .end = 0x00a1, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma2", - .start = 0x00c0, - .end = 0x00df, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "fpu", - .start = 0x00f0, - .end = 0x00ff, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -} }; - #define ROMSIGNATURE 0xaa55 static int __init romsignature(const unsigned char *rom) @@ -260,10 +186,9 @@ static void __init probe_roms(void) * Request address space for all standard RAM and ROM resources * and also for regions reported as reserved by the e820. */ -static void __init -legacy_init_iomem_resources(struct resource *code_resource, - struct resource *data_resource, - struct resource *bss_resource) +void __init legacy_init_iomem_resources(struct resource *code_resource, + struct resource *data_resource, + struct resource *bss_resource) { int i; @@ -305,35 +230,6 @@ legacy_init_iomem_resources(struct resou } } -/* - * Request address space for all standard resources - * - * This is called just before pcibios_init(), which is also a - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). - */ -static int __init request_standard_resources(void) -{ - int i; - - printk("Setting up standard PCI resources\n"); - if (efi_enabled) - efi_initialize_iomem_resources(&code_resource, - &data_resource, &bss_resource); - else - legacy_init_iomem_resources(&code_resource, - &data_resource, &bss_resource); - - /* EFI systems may still have VGA */ - request_resource(&iomem_resource, &video_ram_resource); - - /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, &standard_io_resources[i]); - return 0; -} - -subsys_initcall(request_standard_resources); - #if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) /** * e820_mark_nosave_regions - Find the ranges of physical addresses that do not diff -puN arch/x86/kernel/e820_64.c~git-x86 arch/x86/kernel/e820_64.c --- a/arch/x86/kernel/e820_64.c~git-x86 +++ a/arch/x86/kernel/e820_64.c @@ -1,4 +1,4 @@ -/* +/* * Handle the memory map. * The functions here do the job until bootmem takes over. * @@ -26,47 +26,45 @@ #include #include #include +#include struct e820map e820; -/* +/* * PFN of last memory page. */ -unsigned long end_pfn; -EXPORT_SYMBOL(end_pfn); +unsigned long end_pfn; -/* +/* * end_pfn only includes RAM, while end_pfn_map includes all e820 entries. * The direct mapping extends to end_pfn_map, so that we can directly access * apertures, ACPI and other tables without having to play with fixmaps. - */ -unsigned long end_pfn_map; + */ +unsigned long end_pfn_map; -/* +/* * Last pfn which the user wants to use. */ static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; -extern struct resource code_resource, data_resource, bss_resource; - -/* Check for some hardcoded bad areas that early boot is not allowed to touch */ +/* Check for some hardcoded bad areas that early boot is not allowed to touch */ static inline int bad_addr(unsigned long *addrp, unsigned long size) -{ - unsigned long addr = *addrp, last = addr + size; +{ + unsigned long addr = *addrp, last = addr + size; /* various gunk below that needed for SMP startup */ - if (addr < 0x8000) { + if (addr < 0x8000) { *addrp = PAGE_ALIGN(0x8000); - return 1; + return 1; } /* direct mapping tables of the kernel */ - if (last >= table_start<= table_start<= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) { @@ -97,9 +95,9 @@ static inline int bad_addr(unsigned long return 1; } #endif - /* XXX ramdisk image here? */ + /* XXX ramdisk image here? */ return 0; -} +} /* * This function checks if any part of the range is mapped @@ -107,16 +105,18 @@ static inline int bad_addr(unsigned long */ int e820_any_mapped(unsigned long start, unsigned long end, unsigned type) -{ +{ int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (type && ei->type != type) continue; if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } + continue; + return 1; + } return 0; } EXPORT_SYMBOL_GPL(e820_any_mapped); @@ -127,11 +127,14 @@ EXPORT_SYMBOL_GPL(e820_any_mapped); * Note: this function only works correct if the e820 table is sorted and * not-overlapping, which is the case */ -int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type) +int __init e820_all_mapped(unsigned long start, unsigned long end, + unsigned type) { int i; + for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; + if (type && ei->type != type) continue; /* is the region (part) in overlap with the current region ?*/ @@ -143,65 +146,73 @@ int __init e820_all_mapped(unsigned long */ if (ei->addr <= start) start = ei->addr + ei->size; - /* if start is now at or beyond end, we're done, full coverage */ + /* + * if start is now at or beyond end, we're done, full + * coverage + */ if (start >= end) - return 1; /* we're done */ + return 1; } return 0; } -/* - * Find a free area in a specific range. - */ -unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsigned size) -{ - int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long addr = ei->addr, last; - if (ei->type != E820_RAM) - continue; - if (addr < start) +/* + * Find a free area in a specific range. + */ +unsigned long __init find_e820_area(unsigned long start, unsigned long end, + unsigned size) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + unsigned long addr = ei->addr, last; + + if (ei->type != E820_RAM) + continue; + if (addr < start) addr = start; - if (addr > ei->addr + ei->size) - continue; + if (addr > ei->addr + ei->size) + continue; while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size) ; last = PAGE_ALIGN(addr) + size; if (last > ei->addr + ei->size) continue; - if (last > end) + if (last > end) continue; - return addr; - } - return -1UL; -} + return addr; + } + return -1UL; +} /* * Find the highest page frame number we have available */ unsigned long __init e820_end_of_ram(void) { - unsigned long end_pfn = 0; + unsigned long end_pfn; + end_pfn = find_max_pfn_with_active_regions(); - - if (end_pfn > end_pfn_map) + + if (end_pfn > end_pfn_map) end_pfn_map = end_pfn; if (end_pfn_map > MAXMEM>>PAGE_SHIFT) end_pfn_map = MAXMEM>>PAGE_SHIFT; if (end_pfn > end_user_pfn) end_pfn = end_user_pfn; - if (end_pfn > end_pfn_map) - end_pfn = end_pfn_map; + if (end_pfn > end_pfn_map) + end_pfn = end_pfn_map; - printk("end_pfn_map = %lu\n", end_pfn_map); - return end_pfn; + printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map); + return end_pfn; } /* * Mark e820 reserved areas as busy for the resource manager. */ -void __init e820_reserve_resources(void) +void __init e820_reserve_resources(struct resource *code_resource, + struct resource *data_resource, struct resource *bss_resource) { int i; for (i = 0; i < e820.nr_map; i++) { @@ -219,13 +230,13 @@ void __init e820_reserve_resources(void) request_resource(&iomem_resource, res); if (e820.map[i].type == E820_RAM) { /* - * We don't know which RAM region contains kernel data, - * so we try it repeatedly and let the resource manager - * test it. + * We don't know which RAM region contains kernel data, + * so we try it repeatedly and let the resource manager + * test it. */ - request_resource(res, &code_resource); - request_resource(res, &data_resource); - request_resource(res, &bss_resource); + request_resource(res, code_resource); + request_resource(res, data_resource); + request_resource(res, bss_resource); #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) request_resource(res, &crashk_res); @@ -322,9 +333,9 @@ e820_register_active_regions(int nid, un add_active_range(nid, ei_startpfn, ei_endpfn); } -/* +/* * Add a memory region to the kernel e820 map. - */ + */ void __init add_memory_region(unsigned long start, unsigned long size, int type) { int x = e820.nr_map; @@ -349,9 +360,7 @@ unsigned long __init e820_hole_size(unsi { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long end_pfn = end >> PAGE_SHIFT; - unsigned long ei_startpfn; - unsigned long ei_endpfn; - unsigned long ram = 0; + unsigned long ei_startpfn, ei_endpfn, ram = 0; int i; for (i = 0; i < e820.nr_map; i++) { @@ -363,28 +372,31 @@ unsigned long __init e820_hole_size(unsi return end - start - (ram << PAGE_SHIFT); } -void __init e820_print_map(char *who) +static void __init e820_print_map(char *who) { int i; for (i = 0; i < e820.nr_map; i++) { printk(KERN_INFO " %s: %016Lx - %016Lx ", who, - (unsigned long long) e820.map[i].addr, - (unsigned long long) (e820.map[i].addr + e820.map[i].size)); + (unsigned long long) e820.map[i].addr, + (unsigned long long) + (e820.map[i].addr + e820.map[i].size)); switch (e820.map[i].type) { - case E820_RAM: printk("(usable)\n"); - break; + case E820_RAM: + printk(KERN_CONT "(usable)\n"); + break; case E820_RESERVED: - printk("(reserved)\n"); - break; + printk(KERN_CONT "(reserved)\n"); + break; case E820_ACPI: - printk("(ACPI data)\n"); - break; + printk(KERN_CONT "(ACPI data)\n"); + break; case E820_NVS: - printk("(ACPI NVS)\n"); - break; - default: printk("type %u\n", e820.map[i].type); - break; + printk(KERN_CONT "(ACPI NVS)\n"); + break; + default: + printk(KERN_CONT "type %u\n", e820.map[i].type); + break; } } } @@ -392,11 +404,11 @@ void __init e820_print_map(char *who) /* * Sanitize the BIOS e820 map. * - * Some e820 responses include overlapping entries. The following + * Some e820 responses include overlapping entries. The following * replaces the original e820 map with a new one, removing overlaps. * */ -static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) +static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) { struct change_member { struct e820entry *pbios; /* pointer to original bios entry */ @@ -416,7 +428,8 @@ static int __init sanitize_e820_map(stru int i; /* - Visually we're performing the following (1,2,3,4 = memory types)... + Visually we're performing the following + (1,2,3,4 = memory types)... Sample memory map (w/overlaps): ____22__________________ @@ -458,22 +471,23 @@ static int __init sanitize_e820_map(stru old_nr = *pnr_map; /* bail out if we find any unreasonable addresses in bios map */ - for (i=0; iaddr = biosmap[i].addr; change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; + change_point[chgidx]->addr = biosmap[i].addr + + biosmap[i].size; change_point[chgidx++]->pbios = &biosmap[i]; } } @@ -483,75 +497,106 @@ static int __init sanitize_e820_map(stru still_changing = 1; while (still_changing) { still_changing = 0; - for (i=1; i < chg_nr; i++) { - /* if > , swap */ - /* or, if current= & last=, swap */ - if ((change_point[i]->addr < change_point[i-1]->addr) || - ((change_point[i]->addr == change_point[i-1]->addr) && - (change_point[i]->addr == change_point[i]->pbios->addr) && - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) - ) - { + for (i = 1; i < chg_nr; i++) { + unsigned long long curaddr, lastaddr; + unsigned long long curpbaddr, lastpbaddr; + + curaddr = change_point[i]->addr; + lastaddr = change_point[i - 1]->addr; + curpbaddr = change_point[i]->pbios->addr; + lastpbaddr = change_point[i - 1]->pbios->addr; + + /* + * swap entries, when: + * + * curaddr > lastaddr or + * curaddr == lastaddr and curaddr == curpbaddr and + * lastaddr != lastpbaddr + */ + if (curaddr < lastaddr || + (curaddr == lastaddr && curaddr == curpbaddr && + lastaddr != lastpbaddr)) { change_tmp = change_point[i]; change_point[i] = change_point[i-1]; change_point[i-1] = change_tmp; - still_changing=1; + still_changing = 1; } } } /* create a new bios memory map, removing overlaps */ - overlap_entries=0; /* number of entries in the overlap table */ - new_bios_entry=0; /* index for creating new bios map entries */ + overlap_entries = 0; /* number of entries in the overlap table */ + new_bios_entry = 0; /* index for creating new bios map entries */ last_type = 0; /* start with undefined memory type */ last_addr = 0; /* start with 0 as last starting address */ + /* loop through change-points, determining affect on the new bios map */ - for (chgidx=0; chgidx < chg_nr; chgidx++) - { + for (chgidx = 0; chgidx < chg_nr; chgidx++) { /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) - { - /* add map entry to overlap list (> 1 entry implies an overlap) */ - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; - } - else - { - /* remove entry from list (order independent, so swap with last) */ - for (i=0; ipbios) - overlap_list[i] = overlap_list[overlap_entries-1]; + if (change_point[chgidx]->addr == + change_point[chgidx]->pbios->addr) { + /* + * add map entry to overlap list (> 1 entry + * implies an overlap) + */ + overlap_list[overlap_entries++] = + change_point[chgidx]->pbios; + } else { + /* + * remove entry from list (order independent, + * so swap with last) + */ + for (i = 0; i < overlap_entries; i++) { + if (overlap_list[i] == + change_point[chgidx]->pbios) + overlap_list[i] = + overlap_list[overlap_entries-1]; } overlap_entries--; } - /* if there are overlapping entries, decide which "type" to use */ - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ + /* + * if there are overlapping entries, decide which + * "type" to use (larger value takes precedence -- + * 1=usable, 2,3,4,4+=unusable) + */ current_type = 0; - for (i=0; itype > current_type) current_type = overlap_list[i]->type; - /* continue building up new bios map based on this information */ + /* + * continue building up new bios map based on this + * information + */ if (current_type != last_type) { if (last_type != 0) { new_bios[new_bios_entry].size = change_point[chgidx]->addr - last_addr; - /* move forward only if the new size was non-zero */ + /* + * move forward only if the new size + * was non-zero + */ if (new_bios[new_bios_entry].size != 0) + /* + * no more space left for new + * bios entries ? + */ if (++new_bios_entry >= E820MAX) - break; /* no more space left for new bios entries */ + break; } if (current_type != 0) { - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; + new_bios[new_bios_entry].addr = + change_point[chgidx]->addr; new_bios[new_bios_entry].type = current_type; - last_addr=change_point[chgidx]->addr; + last_addr = change_point[chgidx]->addr; } last_type = current_type; } } - new_nr = new_bios_entry; /* retain count for new bios entries */ + /* retain count for new bios entries */ + new_nr = new_bios_entry; /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); + memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); *pnr_map = new_nr; return 0; @@ -566,7 +611,7 @@ static int __init sanitize_e820_map(stru * will have given us a memory map that we can use to properly * set up memory. If we aren't, we'll fake a memory map. */ -static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) +static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) { /* Only one memory region (or negative)? Ignore it */ if (nr_map < 2) @@ -583,11 +628,11 @@ static int __init copy_e820_map(struct e return -1; add_memory_region(start, size, type); - } while (biosmap++,--nr_map); + } while (biosmap++, --nr_map); return 0; } -void early_panic(char *msg) +static void early_panic(char *msg) { early_printk(msg); panic(msg); @@ -613,9 +658,9 @@ static int __init parse_memopt(char *p) if (!p) return -EINVAL; end_user_pfn = memparse(p, &p); - end_user_pfn >>= PAGE_SHIFT; + end_user_pfn >>= PAGE_SHIFT; return 0; -} +} early_param("mem", parse_memopt); static int userdef __initdata; @@ -627,9 +672,9 @@ static int __init parse_memmap_opt(char if (!strcmp(p, "exactmap")) { #ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we - * still need to know the real mem - * size before original memory map is + /* + * If we are doing a crash dump, we still need to know + * the real mem size before original memory map is * reset. */ e820_register_active_regions(0, 0, -1UL); @@ -646,6 +691,8 @@ static int __init parse_memmap_opt(char mem_size = memparse(p, &p); if (p == oldp) return -EINVAL; + + userdef = 1; if (*p == '@') { start_at = memparse(p+1, &p); add_memory_region(start_at, mem_size, E820_RAM); @@ -665,6 +712,12 @@ early_param("memmap", parse_memmap_opt); void __init finish_e820_parsing(void) { if (userdef) { + char nr = e820.nr_map; + + if (sanitize_e820_map(e820.map, &nr) < 0) + early_panic("Invalid user supplied memory map"); + e820.nr_map = nr; + printk(KERN_INFO "user-defined physical RAM map:\n"); e820_print_map("user"); } @@ -713,8 +766,10 @@ __init void e820_setup_gap(void) if (!found) { gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; - printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n" - KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n"); + printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " + "address range\n" + KERN_ERR "PCI: Unassigned devices with 32bit resource " + "registers may break!\n"); } /* @@ -727,8 +782,9 @@ __init void e820_setup_gap(void) /* Fun with two's complement */ pci_mem_start = (gapstart + round) & -round; - printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", - pci_mem_start, gapstart, gapsize); + printk(KERN_INFO + "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", + pci_mem_start, gapstart, gapsize); } int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) diff -puN arch/x86/kernel/early-quirks.c~git-x86 arch/x86/kernel/early-quirks.c --- a/arch/x86/kernel/early-quirks.c~git-x86 +++ a/arch/x86/kernel/early-quirks.c @@ -34,38 +34,20 @@ static void __init via_bugs(void) #endif } -#ifdef CONFIG_ACPI -#ifdef CONFIG_X86_IO_APIC - -static int __init nvidia_hpet_check(struct acpi_table_header *header) -{ - return 0; -} -#endif /* CONFIG_X86_IO_APIC */ -#endif /* CONFIG_ACPI */ - static void __init nvidia_bugs(void) { #ifdef CONFIG_ACPI #ifdef CONFIG_X86_IO_APIC /* - * All timer overrides on Nvidia are - * wrong unless HPET is enabled. - * Unfortunately that's not true on many Asus boards. - * We don't know yet how to detect this automatically, but - * at least allow a command line override. + * All timer overrides on Nvidia NF3/NF4 are + * wrong. */ if (acpi_use_timer_override) return; - if (acpi_table_parse(ACPI_SIG_HPET, nvidia_hpet_check)) { - acpi_skip_timer_override = 1; - printk(KERN_INFO "Nvidia board " - "detected. Ignoring ACPI " - "timer override.\n"); - printk(KERN_INFO "If you got timer trouble " - "try acpi_use_timer_override\n"); - } + acpi_skip_timer_override = 1; + printk(KERN_INFO "Nvidia board detected. Ignoring ACPI timer override.\n"); + printk(KERN_INFO "If you got timer trouble try acpi_use_timer_override\n"); #endif #endif /* RED-PEN skip them on mptables too? */ @@ -86,10 +68,19 @@ static void __init ati_bugs(void) struct chipset { u16 vendor; void (*f)(void); + int id; }; static struct chipset early_qrk[] __initdata = { - { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, + /* This list should cover at least one PCI ID from each NF3 or NF4 + mainboard to handle a bug in their reference BIOS. May be incomplete. */ + { PCI_VENDOR_ID_NVIDIA, nvidia_bugs, 0x00dd }, /* nforce 3 */ + { PCI_VENDOR_ID_NVIDIA, nvidia_bugs, 0x00e1 }, /* nforce 3 */ + { PCI_VENDOR_ID_NVIDIA, nvidia_bugs, 0x00ed }, /* nforce 3 */ + { PCI_VENDOR_ID_NVIDIA, nvidia_bugs, 0x003d }, /* mcp 04 ?? */ + { PCI_VENDOR_ID_NVIDIA, nvidia_bugs, 0x005c }, /* ck 804 */ + { PCI_VENDOR_ID_NVIDIA, nvidia_bugs, 0x026f }, /* mcp 51 / nf4 ? */ + { PCI_VENDOR_ID_NVIDIA, nvidia_bugs, 0x02f0 }, /* mcp 51 / nf4 ? */ { PCI_VENDOR_ID_VIA, via_bugs }, { PCI_VENDOR_ID_ATI, ati_bugs }, {} @@ -102,12 +93,13 @@ void __init early_quirks(void) if (!early_pci_allowed()) return; - /* Poor man's PCI discovery */ + /* Poor man's PCI discovery. + We just look for a chipset unique PCI bridge; not scan all devices */ for (num = 0; num < 32; num++) { for (slot = 0; slot < 32; slot++) { for (func = 0; func < 8; func++) { u32 class; - u32 vendor; + u32 vendor, device; u8 type; int i; class = read_pci_config(num,slot,func, @@ -120,13 +112,19 @@ void __init early_quirks(void) vendor = read_pci_config(num, slot, func, PCI_VENDOR_ID); + device = vendor >> 16; + vendor &= 0xffff; - for (i = 0; early_qrk[i].f; i++) - if (early_qrk[i].vendor == vendor) { + for (i = 0; early_qrk[i].f; i++) { + struct chipset *c = &early_qrk[i]; + if (c->vendor == vendor && + (!c->id || + (c->id && c->id == device))) { early_qrk[i].f(); return; } + } type = read_pci_config_byte(num, slot, func, PCI_HEADER_TYPE); diff -puN arch/x86/kernel/entry_32.S~git-x86 arch/x86/kernel/entry_32.S --- a/arch/x86/kernel/entry_32.S~git-x86 +++ a/arch/x86/kernel/entry_32.S @@ -58,7 +58,7 @@ * for paravirtualization. The following will never clobber any registers: * INTERRUPT_RETURN (aka. "iret") * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") - * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). + * ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit"). * * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). @@ -351,7 +351,7 @@ sysenter_past_esp: xorl %ebp,%ebp TRACE_IRQS_ON 1: mov PT_FS(%esp), %fs - ENABLE_INTERRUPTS_SYSEXIT + ENABLE_INTERRUPTS_SYSCALL_RET CFI_ENDPROC .pushsection .fixup,"ax" 2: movl $0,PT_FS(%esp) @@ -882,10 +882,10 @@ ENTRY(native_iret) .previous END(native_iret) -ENTRY(native_irq_enable_sysexit) +ENTRY(native_irq_enable_syscall_ret) sti sysexit -END(native_irq_enable_sysexit) +END(native_irq_enable_syscall_ret) #endif KPROBE_ENTRY(int3) diff -puN arch/x86/kernel/geode_32.c~git-x86 arch/x86/kernel/geode_32.c --- a/arch/x86/kernel/geode_32.c~git-x86 +++ a/arch/x86/kernel/geode_32.c @@ -1,6 +1,7 @@ /* * AMD Geode southbridge support code * Copyright (C) 2006, Advanced Micro Devices, Inc. + * Copyright (C) 2007, Andres Salomon * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public License @@ -51,45 +52,62 @@ EXPORT_SYMBOL_GPL(geode_get_dev_base); /* === GPIO API === */ -void geode_gpio_set(unsigned int gpio, unsigned int reg) +void geode_gpio_set(u32 gpio, unsigned int reg) { u32 base = geode_get_dev_base(GEODE_DEV_GPIO); if (!base) return; - if (gpio < 16) - outl(1 << gpio, base + reg); - else - outl(1 << (gpio - 16), base + 0x80 + reg); + /* low bank register */ + if (gpio & 0xFFFF) + outl(gpio & 0xFFFF, base + reg); + /* high bank register */ + gpio >>= 16; + if (gpio) + outl(gpio, base + 0x80 + reg); } EXPORT_SYMBOL_GPL(geode_gpio_set); -void geode_gpio_clear(unsigned int gpio, unsigned int reg) +void geode_gpio_clear(u32 gpio, unsigned int reg) { u32 base = geode_get_dev_base(GEODE_DEV_GPIO); if (!base) return; - if (gpio < 16) - outl(1 << (gpio + 16), base + reg); - else - outl(1 << gpio, base + 0x80 + reg); + /* low bank register */ + if (gpio & 0xFFFF) + outl((gpio & 0xFFFF) << 16, base + reg); + /* high bank register */ + gpio &= (0xFFFF << 16); + if (gpio) + outl(gpio, base + 0x80 + reg); } EXPORT_SYMBOL_GPL(geode_gpio_clear); -int geode_gpio_isset(unsigned int gpio, unsigned int reg) +int geode_gpio_isset(u32 gpio, unsigned int reg) { u32 base = geode_get_dev_base(GEODE_DEV_GPIO); + u32 val; if (!base) return 0; - if (gpio < 16) - return (inl(base + reg) & (1 << gpio)) ? 1 : 0; - else - return (inl(base + 0x80 + reg) & (1 << (gpio - 16))) ? 1 : 0; + /* low bank register */ + if (gpio & 0xFFFF) { + val = inl(base + reg) & (gpio & 0xFFFF); + if ((gpio & 0xFFFF) == val) + return 1; + } + /* high bank register */ + gpio >>= 16; + if (gpio) { + val = inl(base + 0x80 + reg) & gpio; + if (gpio == val) + return 1; + } + return 0; } EXPORT_SYMBOL_GPL(geode_gpio_isset); diff -puN arch/x86/kernel/head64.c~git-x86 arch/x86/kernel/head64.c --- a/arch/x86/kernel/head64.c~git-x86 +++ a/arch/x86/kernel/head64.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -19,6 +20,7 @@ #include #include #include +#include static void __init zap_identity_mappings(void) { diff -puN arch/x86/kernel/hpet.c~git-x86 arch/x86/kernel/hpet.c --- a/arch/x86/kernel/hpet.c~git-x86 +++ a/arch/x86/kernel/hpet.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include diff -puN arch/x86/kernel/i386_ksyms_32.c~git-x86 arch/x86/kernel/i386_ksyms_32.c --- a/arch/x86/kernel/i386_ksyms_32.c~git-x86 +++ a/arch/x86/kernel/i386_ksyms_32.c @@ -21,11 +21,4 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(strstr); -#ifdef CONFIG_SMP -extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); -extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); -EXPORT_SYMBOL(__write_lock_failed); -EXPORT_SYMBOL(__read_lock_failed); -#endif - EXPORT_SYMBOL(csum_partial); diff -puN arch/x86/kernel/i8259_32.c~git-x86 arch/x86/kernel/i8259_32.c --- a/arch/x86/kernel/i8259_32.c~git-x86 +++ a/arch/x86/kernel/i8259_32.c @@ -21,8 +21,6 @@ #include #include -#include - /* * This is the 'legacy' 8259A Programmable Interrupt Controller, * present in the majority of PC/AT boxes. diff -puN arch/x86/kernel/i8259_64.c~git-x86 arch/x86/kernel/i8259_64.c --- a/arch/x86/kernel/i8259_64.c~git-x86 +++ a/arch/x86/kernel/i8259_64.c @@ -21,6 +21,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -48,7 +49,7 @@ */ /* - * The IO-APIC gives us many more interrupt sources. Most of these + * The IO-APIC gives us many more interrupt sources. Most of these * are unused but an SMP system is supposed to have enough memory ... * sometimes (mostly wrt. hw bugs) we get corrupted vectors all * across the spectrum, so we really want to be prepared to get all @@ -114,11 +115,7 @@ static struct irq_chip i8259A_chip = { /* * This contains the irq mask for both 8259A irq controllers, */ -static unsigned int cached_irq_mask = 0xffff; - -#define __byte(x,y) (((unsigned char *)&(y))[x]) -#define cached_21 (__byte(0,cached_irq_mask)) -#define cached_A1 (__byte(1,cached_irq_mask)) +unsigned int cached_irq_mask = 0xffff; /* * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) @@ -139,9 +136,9 @@ void disable_8259A_irq(unsigned int irq) spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask |= mask; if (irq & 8) - outb(cached_A1,0xA1); + outb(cached_slave_mask, PIC_SLAVE_IMR); else - outb(cached_21,0x21); + outb(cached_master_mask, PIC_MASTER_IMR); spin_unlock_irqrestore(&i8259A_lock, flags); } @@ -153,9 +150,9 @@ void enable_8259A_irq(unsigned int irq) spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask &= mask; if (irq & 8) - outb(cached_A1,0xA1); + outb(cached_slave_mask, PIC_SLAVE_IMR); else - outb(cached_21,0x21); + outb(cached_master_mask, PIC_MASTER_IMR); spin_unlock_irqrestore(&i8259A_lock, flags); } @@ -167,9 +164,9 @@ int i8259A_irq_pending(unsigned int irq) spin_lock_irqsave(&i8259A_lock, flags); if (irq < 8) - ret = inb(0x20) & mask; + ret = inb(PIC_MASTER_CMD) & mask; else - ret = inb(0xA0) & (mask >> 8); + ret = inb(PIC_SLAVE_CMD) & (mask >> 8); spin_unlock_irqrestore(&i8259A_lock, flags); return ret; @@ -196,14 +193,14 @@ static inline int i8259A_irq_real(unsign int irqmask = 1<> 8); - outb(0x0A,0xA0); /* back to the IRR register */ + outb(0x0B,PIC_SLAVE_CMD); /* ISR register */ + value = inb(PIC_SLAVE_CMD) & (irqmask >> 8); + outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ return value; } @@ -240,14 +237,17 @@ static void mask_and_ack_8259A(unsigned handle_real_irq: if (irq & 8) { - inb(0xA1); /* DUMMY - (do we need this?) */ - outb(cached_A1,0xA1); - outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */ - outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */ + inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ + outb(cached_slave_mask, PIC_SLAVE_IMR); + /* 'Specific EOI' to slave */ + outb(0x60+(irq&7),PIC_SLAVE_CMD); + /* 'Specific EOI' to master-IRQ2 */ + outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); } else { - inb(0x21); /* DUMMY - (do we need this?) */ - outb(cached_21,0x21); - outb(0x60+irq,0x20); /* 'Specific EOI' to master */ + inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ + outb(cached_master_mask, PIC_MASTER_IMR); + /* 'Specific EOI' to master */ + outb(0x60+irq,PIC_MASTER_CMD); } spin_unlock_irqrestore(&i8259A_lock, flags); return; @@ -270,7 +270,8 @@ spurious_8259A_irq: * lets ACK and report it. [once per IRQ] */ if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); + printk(KERN_DEBUG + "spurious 8259A interrupt: IRQ%d.\n", irq); spurious_irq_mask |= irqmask; } atomic_inc(&irq_err_count); @@ -283,51 +284,6 @@ spurious_8259A_irq: } } -void init_8259A(int auto_eoi) -{ - unsigned long flags; - - i8259A_auto_eoi = auto_eoi; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, 0x21); /* mask all of 8259A-1 */ - outb(0xff, 0xA1); /* mask all of 8259A-2 */ - - /* - * outb_p - this has to work on a wide range of PC hardware. - */ - outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */ - outb_p(IRQ0_VECTOR, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ - outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */ - if (auto_eoi) - outb_p(0x03, 0x21); /* master does Auto EOI */ - else - outb_p(0x01, 0x21); /* master expects normal EOI */ - - outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */ - outb_p(IRQ8_VECTOR, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ - outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */ - outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode - is to be investigated) */ - - if (auto_eoi) - /* - * in AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_chip.mask_ack = disable_8259A_irq; - else - i8259A_chip.mask_ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_21, 0x21); /* restore master IRQ mask */ - outb(cached_A1, 0xA1); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} - static char irq_trigger[2]; /** * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ @@ -364,8 +320,8 @@ static int i8259A_shutdown(struct sys_de * the kernel initialization code can get it * out of. */ - outb(0xff, 0x21); /* mask all of 8259A-1 */ - outb(0xff, 0xA1); /* mask all of 8259A-1 */ + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ return 0; } @@ -391,6 +347,58 @@ static int __init i8259A_init_sysfs(void device_initcall(i8259A_init_sysfs); +void init_8259A(int auto_eoi) +{ + unsigned long flags; + + i8259A_auto_eoi = auto_eoi; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ + + /* + * outb_p - this has to work on a wide range of PC hardware. + */ + outb_p(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ + /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ + outb_p(IRQ0_VECTOR, PIC_MASTER_IMR); + /* 8259A-1 (the master) has a slave on IR2 */ + outb_p(0x04, PIC_MASTER_IMR); + if (auto_eoi) /* master does Auto EOI */ + outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); + else /* master expects normal EOI */ + outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); + + outb_p(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ + /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ + outb_p(IRQ8_VECTOR, PIC_SLAVE_IMR); + /* 8259A-2 is a slave on master's IR2 */ + outb_p(PIC_CASCADE_IR, PIC_SLAVE_IMR); + /* (slave's support for AEOI in flat mode is to be investigated) */ + outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); + + if (auto_eoi) + /* + * In AEOI mode we just have to mask the interrupt + * when acking. + */ + i8259A_chip.mask_ack = disable_8259A_irq; + else + i8259A_chip.mask_ack = mask_and_ack_8259A; + + udelay(100); /* wait for 8259A to initialize */ + + outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ + outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} + + + + /* * IRQ2 is cascade interrupt to second interrupt controller */ diff -puN arch/x86/kernel/init_task.c~git-x86 arch/x86/kernel/init_task.c --- a/arch/x86/kernel/init_task.c~git-x86 +++ a/arch/x86/kernel/init_task.c @@ -15,7 +15,6 @@ static struct files_struct init_files = static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); -EXPORT_SYMBOL(init_mm); /* * Initial thread structure. diff -puN arch/x86/kernel/io_apic_32.c~git-x86 arch/x86/kernel/io_apic_32.c --- a/arch/x86/kernel/io_apic_32.c~git-x86 +++ a/arch/x86/kernel/io_apic_32.c @@ -48,8 +48,6 @@ #include #include -#include "io_ports.h" - int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; @@ -2166,6 +2164,10 @@ static inline void __init check_timer(vo { int apic1, pin1, apic2, pin2; int vector; + unsigned int ver; + + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); /* * get/set the timer IRQ vector: @@ -2179,11 +2181,15 @@ static inline void __init check_timer(vo * mode for the 8259A whenever interrupts are routed * through I/O APICs. Also IRQ0 has to be enabled in * the 8259A which implies the virtual wire has to be - * disabled in the local APIC. + * disabled in the local APIC. Finally timer interrupts + * need to be acknowledged manually in the 8259A for + * timer_interrupt() and for the i82489DX when using + * the NMI watchdog. */ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - timer_ack = 1; + timer_ack = !cpu_has_tsc; + timer_ack |= (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); if (timer_over_8254 > 0) enable_8259A_irq(0); diff -puN arch/x86/kernel/io_apic_64.c~git-x86 arch/x86/kernel/io_apic_64.c --- a/arch/x86/kernel/io_apic_64.c~git-x86 +++ a/arch/x86/kernel/io_apic_64.c @@ -35,6 +35,7 @@ #ifdef CONFIG_ACPI #include #endif +#include #include #include @@ -1069,7 +1070,7 @@ void __apicdebuginit print_local_APIC(vo v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); - maxlvt = get_maxlvt(); + maxlvt = lapic_get_maxlvt(); v = apic_read(APIC_TASKPRI); printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); @@ -1171,7 +1172,7 @@ void __apicdebuginit print_PIC(void) #endif /* 0 */ -static void __init enable_IO_APIC(void) +void __init enable_IO_APIC(void) { union IO_APIC_reg_01 reg_01; int i8259_apic, i8259_pin; @@ -1435,7 +1436,7 @@ static void ack_apic_level(unsigned int int do_unmask_irq = 0; irq_complete_move(irq); -#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) +#ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { do_unmask_irq = 1; @@ -1780,7 +1781,10 @@ __setup("no_timer_check", notimercheck); void __init setup_IO_APIC(void) { - enable_IO_APIC(); + + /* + * calling enable_IO_APIC() is moved to setup_local_APIC for BP + */ if (acpi_ioapic) io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ diff -puN arch/x86/kernel/ioport_32.c~git-x86 arch/x86/kernel/ioport_32.c --- a/arch/x86/kernel/ioport_32.c~git-x86 +++ a/arch/x86/kernel/ioport_32.c @@ -16,49 +16,27 @@ #include /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ -static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) +static void set_bitmap(unsigned long *bitmap, unsigned int base, + unsigned int extent, int new_value) { - unsigned long mask; - unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG); - unsigned int low_index = base & (BITS_PER_LONG-1); - int length = low_index + extent; - - if (low_index != 0) { - mask = (~0UL << low_index); - if (length < BITS_PER_LONG) - mask &= ~(~0UL << length); - if (new_value) - *bitmap_base++ |= mask; - else - *bitmap_base++ &= ~mask; - length -= BITS_PER_LONG; - } - - mask = (new_value ? ~0UL : 0UL); - while (length >= BITS_PER_LONG) { - *bitmap_base++ = mask; - length -= BITS_PER_LONG; - } + unsigned int i; - if (length > 0) { - mask = ~(~0UL << length); + for (i = base; i < base + extent; i++) { if (new_value) - *bitmap_base++ |= mask; + __set_bit(i, bitmap); else - *bitmap_base++ &= ~mask; + __clear_bit(i, bitmap); } } - /* * this changes the io permissions bitmap in the current task. */ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) { - unsigned long i, max_long, bytes, bytes_updated; struct thread_struct * t = ¤t->thread; struct tss_struct * tss; - unsigned long *bitmap; + unsigned long i, max_long; if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; @@ -71,7 +49,8 @@ asmlinkage long sys_ioperm(unsigned long * this is why we delay this operation until now: */ if (!t->io_bitmap_ptr) { - bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); + unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); + if (!bitmap) return -ENOMEM; @@ -100,10 +79,7 @@ asmlinkage long sys_ioperm(unsigned long if (t->io_bitmap_ptr[i] != ~0UL) max_long = i; - bytes = (max_long + 1) * sizeof(long); - bytes_updated = max(bytes, t->io_bitmap_max); - - t->io_bitmap_max = bytes; + t->io_bitmap_max = (max_long + 1) * sizeof(unsigned long); /* * Sets the lazy trigger so that the next I/O operation will @@ -130,9 +106,9 @@ asmlinkage long sys_ioperm(unsigned long * code. */ -asmlinkage long sys_iopl(unsigned long unused) +asmlinkage long sys_iopl(unsigned long regsp) { - volatile struct pt_regs * regs = (struct pt_regs *) &unused; + volatile struct pt_regs *regs = (struct pt_regs *)®sp; unsigned int level = regs->ebx; unsigned int old = (regs->eflags >> 12) & 3; struct thread_struct *t = ¤t->thread; @@ -144,8 +120,10 @@ asmlinkage long sys_iopl(unsigned long u if (!capable(CAP_SYS_RAWIO)) return -EPERM; } + t->iopl = level << 12; regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl; set_iopl_mask(t->iopl); + return 0; } diff -puN arch/x86/kernel/irq_64.c~git-x86 arch/x86/kernel/irq_64.c --- a/arch/x86/kernel/irq_64.c~git-x86 +++ a/arch/x86/kernel/irq_64.c @@ -20,6 +20,26 @@ atomic_t irq_err_count; +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq); + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + * But don't ack when the APIC is disabled. -AK + */ + if (!disable_apic) + ack_APIC_irq(); +} + #ifdef CONFIG_DEBUG_STACKOVERFLOW /* * Probabilistic stack overflow check: diff -puN /dev/null arch/x86/kernel/ldt.c --- /dev/null +++ a/arch/x86/kernel/ldt.c @@ -0,0 +1,264 @@ +/* + * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds + * Copyright (C) 1999 Ingo Molnar + * Copyright (C) 2002 Andi Kleen + * + * This handles calls from both 32bit and 64bit mode. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef CONFIG_SMP +static void flush_ldt(void *null) +{ + if (current->active_mm) + load_LDT(¤t->active_mm->context); +} +#endif + +static int alloc_ldt(mm_context_t *pc, int mincount, int reload) +{ + void *oldldt, *newldt; + int oldsize; + + if (mincount <= pc->size) + return 0; + oldsize = pc->size; + mincount = (mincount + 511) & (~511); + if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) + newldt = vmalloc(mincount * LDT_ENTRY_SIZE); + else + newldt = kmalloc(mincount * LDT_ENTRY_SIZE, GFP_KERNEL); + + if (!newldt) + return -ENOMEM; + + if (oldsize) + memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); + oldldt = pc->ldt; + memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, + (mincount - oldsize) * LDT_ENTRY_SIZE); + +#ifdef CONFIG_X86_64 + /* CHECKME: Do we really need this ? */ + wmb(); +#endif + pc->ldt = newldt; + wmb(); + pc->size = mincount; + wmb(); + + if (reload) { +#ifdef CONFIG_SMP + cpumask_t mask; + + preempt_disable(); + load_LDT(pc); + mask = cpumask_of_cpu(smp_processor_id()); + if (!cpus_equal(current->mm->cpu_vm_mask, mask)) + smp_call_function(flush_ldt, NULL, 1, 1); + preempt_enable(); +#else + load_LDT(pc); +#endif + } + if (oldsize) { + if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(oldldt); + else + kfree(oldldt); + } + return 0; +} + +static inline int copy_ldt(mm_context_t *new, mm_context_t *old) +{ + int err = alloc_ldt(new, old->size, 0); + + if (err < 0) + return err; + memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); + return 0; +} + +/* + * we do not have to muck with descriptors here, that is + * done in switch_mm() as needed. + */ +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + struct mm_struct *old_mm; + int retval = 0; + + mutex_init(&mm->context.lock); + mm->context.size = 0; + old_mm = current->mm; + if (old_mm && old_mm->context.size > 0) { + mutex_lock(&old_mm->context.lock); + retval = copy_ldt(&mm->context, &old_mm->context); + mutex_unlock(&old_mm->context.lock); + } + return retval; +} + +/* + * No need to lock the MM as we are the last user + * + * 64bit: Don't touch the LDT register - we're already in the next thread. + */ +void destroy_context(struct mm_struct *mm) +{ + if (mm->context.size) { +#ifdef CONFIG_X86_32 + /* CHECKME: Can this ever happen ? */ + if (mm == current->active_mm) + clear_LDT(); +#endif + if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(mm->context.ldt); + else + kfree(mm->context.ldt); + mm->context.size = 0; + } +} + +static int read_ldt(void __user *ptr, unsigned long bytecount) +{ + int err; + unsigned long size; + struct mm_struct *mm = current->mm; + + if (!mm->context.size) + return 0; + if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) + bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; + + mutex_lock(&mm->context.lock); + size = mm->context.size * LDT_ENTRY_SIZE; + if (size > bytecount) + size = bytecount; + + err = 0; + if (copy_to_user(ptr, mm->context.ldt, size)) + err = -EFAULT; + mutex_unlock(&mm->context.lock); + if (err < 0) + goto error_return; + if (size != bytecount) { + /* zero-fill the rest */ + if (clear_user(ptr + size, bytecount - size) != 0) { + err = -EFAULT; + goto error_return; + } + } + return bytecount; +error_return: + return err; +} + +static int read_default_ldt(void __user *ptr, unsigned long bytecount) +{ + /* CHECKME: Can we use _one_ random number ? */ +#ifdef CONFIG_X86_32 + unsigned long size = 5 * sizeof(struct desc_struct); +#else + unsigned long size = 128; +#endif + if (bytecount > size) + bytecount = size; + if (clear_user(ptr, bytecount)) + return -EFAULT; + return bytecount; +} + +static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) +{ + struct mm_struct *mm = current->mm; + __u32 entry_1, entry_2; + int error; + struct user_desc ldt_info; + + error = -EINVAL; + if (bytecount != sizeof(ldt_info)) + goto out; + error = -EFAULT; + if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) + goto out; + + error = -EINVAL; + if (ldt_info.entry_number >= LDT_ENTRIES) + goto out; + if (ldt_info.contents == 3) { + if (oldmode) + goto out; + if (ldt_info.seg_not_present == 0) + goto out; + } + + mutex_lock(&mm->context.lock); + if (ldt_info.entry_number >= mm->context.size) { + error = alloc_ldt(¤t->mm->context, + ldt_info.entry_number + 1, 1); + if (error < 0) + goto out_unlock; + } + + /* Allow LDTs to be cleared by the user. */ + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { + if (oldmode || LDT_empty(&ldt_info)) { + entry_1 = 0; + entry_2 = 0; + goto install; + } + } + + entry_1 = LDT_entry_a(&ldt_info); + entry_2 = LDT_entry_b(&ldt_info); + if (oldmode) + entry_2 &= ~(1 << 20); + + /* Install the new entry ... */ +install: + write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, + entry_2); + error = 0; + +out_unlock: + mutex_unlock(&mm->context.lock); +out: + return error; +} + +asmlinkage int sys_modify_ldt(int func, void __user *ptr, + unsigned long bytecount) +{ + int ret = -ENOSYS; + + switch (func) { + case 0: + ret = read_ldt(ptr, bytecount); + break; + case 1: + ret = write_ldt(ptr, bytecount, 1); + break; + case 2: + ret = read_default_ldt(ptr, bytecount); + break; + case 0x11: + ret = write_ldt(ptr, bytecount, 0); + break; + } + return ret; +} diff -puN arch/x86/kernel/ldt_32.c~git-x86 /dev/null --- a/arch/x86/kernel/ldt_32.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ -static void flush_ldt(void *null) -{ - if (current->active_mm) - load_LDT(¤t->active_mm->context); -} -#endif - -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) -{ - void *oldldt; - void *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - - if (reload) { -#ifdef CONFIG_SMP - cpumask_t mask; - preempt_disable(); - load_LDT(pc); - mask = cpumask_of_cpu(smp_processor_id()); - if (!cpus_equal(current->mm->cpu_vm_mask, mask)) - smp_call_function(flush_ldt, NULL, 1, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } - return 0; -} - -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) -{ - int err = alloc_ldt(new, old->size, 0); - if (err < 0) - return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); - return 0; -} - -/* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - struct mm_struct * old_mm; - int retval = 0; - - mutex_init(&mm->context.lock); - mm->context.size = 0; - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); - retval = copy_ldt(&mm->context, &old_mm->context); - mutex_unlock(&old_mm->context.lock); - } - return retval; -} - -/* - * No need to lock the MM as we are the last user - */ -void destroy_context(struct mm_struct *mm) -{ - if (mm->context.size) { - if (mm == current->active_mm) - clear_LDT(); - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } -} - -static int read_ldt(void __user * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - struct mm_struct * mm = current->mm; - - if (!mm->context.size) - return 0; - if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - - mutex_lock(&mm->context.lock); - size = mm->context.size*LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; - - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - mutex_unlock(&mm->context.lock); - if (err < 0) - goto error_return; - if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr+size, bytecount-size) != 0) { - err = -EFAULT; - goto error_return; - } - } - return bytecount; -error_return: - return err; -} - -static int read_default_ldt(void __user * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - - err = 0; - size = 5*sizeof(struct desc_struct); - if (size > bytecount) - size = bytecount; - - err = size; - if (clear_user(ptr, size)) - err = -EFAULT; - - return err; -} - -static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) -{ - struct mm_struct * mm = current->mm; - __u32 entry_1, entry_2; - int error; - struct user_desc ldt_info; - - error = -EINVAL; - if (bytecount != sizeof(ldt_info)) - goto out; - error = -EFAULT; - if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) - goto out; - - error = -EINVAL; - if (ldt_info.entry_number >= LDT_ENTRIES) - goto out; - if (ldt_info.contents == 3) { - if (oldmode) - goto out; - if (ldt_info.seg_not_present == 0) - goto out; - } - - mutex_lock(&mm->context.lock); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); - if (error < 0) - goto out_unlock; - } - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - entry_1 = 0; - entry_2 = 0; - goto install; - } - } - - entry_1 = LDT_entry_a(&ldt_info); - entry_2 = LDT_entry_b(&ldt_info); - if (oldmode) - entry_2 &= ~(1 << 20); - - /* Install the new entry ... */ -install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2); - error = 0; - -out_unlock: - mutex_unlock(&mm->context.lock); -out: - return error; -} - -asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) -{ - int ret = -ENOSYS; - - switch (func) { - case 0: - ret = read_ldt(ptr, bytecount); - break; - case 1: - ret = write_ldt(ptr, bytecount, 1); - break; - case 2: - ret = read_default_ldt(ptr, bytecount); - break; - case 0x11: - ret = write_ldt(ptr, bytecount, 0); - break; - } - return ret; -} diff -puN arch/x86/kernel/ldt_64.c~git-x86 /dev/null --- a/arch/x86/kernel/ldt_64.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar - * Copyright (C) 2002 Andi Kleen - * - * This handles calls from both 32bit and 64bit mode. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ -static void flush_ldt(void *null) -{ - if (current->active_mm) - load_LDT(¤t->active_mm->context); -} -#endif - -static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload) -{ - void *oldldt; - void *newldt; - unsigned oldsize; - - if (mincount <= (unsigned)pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); - wmb(); - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - if (reload) { -#ifdef CONFIG_SMP - cpumask_t mask; - - preempt_disable(); - mask = cpumask_of_cpu(smp_processor_id()); - load_LDT(pc); - if (!cpus_equal(current->mm->cpu_vm_mask, mask)) - smp_call_function(flush_ldt, NULL, 1, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } - return 0; -} - -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) -{ - int err = alloc_ldt(new, old->size, 0); - if (err < 0) - return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); - return 0; -} - -/* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - struct mm_struct * old_mm; - int retval = 0; - - mutex_init(&mm->context.lock); - mm->context.size = 0; - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); - retval = copy_ldt(&mm->context, &old_mm->context); - mutex_unlock(&old_mm->context.lock); - } - return retval; -} - -/* - * - * Don't touch the LDT register - we're already in the next thread. - */ -void destroy_context(struct mm_struct *mm) -{ - if (mm->context.size) { - if ((unsigned)mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } -} - -static int read_ldt(void __user * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - struct mm_struct * mm = current->mm; - - if (!mm->context.size) - return 0; - if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - - mutex_lock(&mm->context.lock); - size = mm->context.size*LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; - - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - mutex_unlock(&mm->context.lock); - if (err < 0) - goto error_return; - if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr+size, bytecount-size) != 0) { - err = -EFAULT; - goto error_return; - } - } - return bytecount; -error_return: - return err; -} - -static int read_default_ldt(void __user * ptr, unsigned long bytecount) -{ - /* Arbitrary number */ - /* x86-64 default LDT is all zeros */ - if (bytecount > 128) - bytecount = 128; - if (clear_user(ptr, bytecount)) - return -EFAULT; - return bytecount; -} - -static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) -{ - struct task_struct *me = current; - struct mm_struct * mm = me->mm; - __u32 entry_1, entry_2, *lp; - int error; - struct user_desc ldt_info; - - error = -EINVAL; - - if (bytecount != sizeof(ldt_info)) - goto out; - error = -EFAULT; - if (copy_from_user(&ldt_info, ptr, bytecount)) - goto out; - - error = -EINVAL; - if (ldt_info.entry_number >= LDT_ENTRIES) - goto out; - if (ldt_info.contents == 3) { - if (oldmode) - goto out; - if (ldt_info.seg_not_present == 0) - goto out; - } - - mutex_lock(&mm->context.lock); - if (ldt_info.entry_number >= (unsigned)mm->context.size) { - error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); - if (error < 0) - goto out_unlock; - } - - lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - entry_1 = 0; - entry_2 = 0; - goto install; - } - } - - entry_1 = LDT_entry_a(&ldt_info); - entry_2 = LDT_entry_b(&ldt_info); - if (oldmode) - entry_2 &= ~(1 << 20); - - /* Install the new entry ... */ -install: - *lp = entry_1; - *(lp+1) = entry_2; - error = 0; - -out_unlock: - mutex_unlock(&mm->context.lock); -out: - return error; -} - -asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) -{ - int ret = -ENOSYS; - - switch (func) { - case 0: - ret = read_ldt(ptr, bytecount); - break; - case 1: - ret = write_ldt(ptr, bytecount, 1); - break; - case 2: - ret = read_default_ldt(ptr, bytecount); - break; - case 0x11: - ret = write_ldt(ptr, bytecount, 0); - break; - } - return ret; -} diff -puN arch/x86/kernel/mpparse_32.c~git-x86 arch/x86/kernel/mpparse_32.c --- a/arch/x86/kernel/mpparse_32.c~git-x86 +++ a/arch/x86/kernel/mpparse_32.c @@ -258,7 +258,7 @@ static void __init MP_ioapic_info (struc if (!(m->mpc_flags & MPC_APIC_USABLE)) return; - printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n", + printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); if (nr_ioapics >= MAX_IO_APICS) { printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n", @@ -405,9 +405,9 @@ static int __init smp_read_mpc(struct mp mps_oem_check(mpc, oem, str); - printk("APIC at: 0x%lX\n",mpc->mpc_lapic); + printk("APIC at: 0x%X\n", mpc->mpc_lapic); - /* + /* * Save the local APIC address (it might be non-default) -- but only * if we're not using ACPI. */ @@ -918,14 +918,14 @@ void __init mp_register_ioapic(u8 id, u3 */ mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; mp_ioapic_routing[idx].gsi_base = gsi_base; - mp_ioapic_routing[idx].gsi_end = gsi_base + + mp_ioapic_routing[idx].gsi_end = gsi_base + io_apic_get_redir_entries(idx); - printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, - mp_ioapic_routing[idx].gsi_base, - mp_ioapic_routing[idx].gsi_end); + printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " + "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, + mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, + mp_ioapic_routing[idx].gsi_base, + mp_ioapic_routing[idx].gsi_end); } void __init diff -puN arch/x86/kernel/nmi_32.c~git-x86 arch/x86/kernel/nmi_32.c --- a/arch/x86/kernel/nmi_32.c~git-x86 +++ a/arch/x86/kernel/nmi_32.c @@ -25,6 +25,7 @@ #include #include +#include #include "mach_traps.h" @@ -51,13 +52,13 @@ static int unknown_nmi_panic_callback(st static int endflag __initdata = 0; +#ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all * CPUs during the test make them busy. */ static __init void nmi_cpu_busy(void *data) { -#ifdef CONFIG_SMP local_irq_enable_in_hardirq(); /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, @@ -67,8 +68,8 @@ static __init void nmi_cpu_busy(void *da care if they get somewhat less cycles. */ while (endflag == 0) mb(); -#endif } +#endif static int __init check_nmi_watchdog(void) { @@ -83,15 +84,17 @@ static int __init check_nmi_watchdog(voi prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); if (!prev_nmi_count) - return -1; + goto error; printk(KERN_INFO "Testing NMI watchdog ... "); +#ifdef CONFIG_SMP if (nmi_watchdog == NMI_LOCAL_APIC) smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); +#endif for_each_possible_cpu(cpu) - prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; + prev_nmi_count[cpu] = nmi_count(cpu); local_irq_enable(); mdelay((20*1000)/nmi_hz); // wait 20 ticks @@ -117,7 +120,7 @@ static int __init check_nmi_watchdog(voi if (!atomic_read(&nmi_active)) { kfree(prev_nmi_count); atomic_set(&nmi_active, -1); - return -1; + goto error; } printk("OK.\n"); @@ -128,6 +131,10 @@ static int __init check_nmi_watchdog(voi kfree(prev_nmi_count); return 0; +error: + timer_ack = !cpu_has_tsc; + + return -1; } /* This needs to happen later in boot so counters are working */ late_initcall(check_nmi_watchdog); @@ -173,7 +180,6 @@ static int lapic_nmi_resume(struct sys_d return 0; } - static struct sysdev_class nmi_sysclass = { set_kset_name("lapic_nmi"), .resume = lapic_nmi_resume, @@ -236,10 +242,10 @@ void acpi_nmi_disable(void) on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); } -void setup_apic_nmi_watchdog (void *unused) +void setup_apic_nmi_watchdog(void *unused) { if (__get_cpu_var(wd_enabled)) - return; + return; /* cheap hack to support suspend/resume */ /* if cpu0 is not active neither should the other cpus */ @@ -328,7 +334,7 @@ __kprobes int nmi_watchdog_tick(struct p unsigned int sum; int touched = 0; int cpu = smp_processor_id(); - int rc=0; + int rc = 0; /* check for other users first */ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) diff -puN arch/x86/kernel/nmi_64.c~git-x86 arch/x86/kernel/nmi_64.c --- a/arch/x86/kernel/nmi_64.c~git-x86 +++ a/arch/x86/kernel/nmi_64.c @@ -39,7 +39,7 @@ static cpumask_t backtrace_mask = CPU_MA * 0: the lapic NMI watchdog is disabled, but can be enabled */ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -int panic_on_timeout; +static int panic_on_timeout; unsigned int nmi_watchdog = NMI_DEFAULT; static unsigned int nmi_hz = HZ; @@ -78,22 +78,22 @@ static __init void nmi_cpu_busy(void *da } #endif -int __init check_nmi_watchdog (void) +int __init check_nmi_watchdog(void) { - int *counts; + int *prev_nmi_count; int cpu; - if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) + if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) return 0; if (!atomic_read(&nmi_active)) return 0; - counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); - if (!counts) + prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); + if (!prev_nmi_count) return -1; - printk(KERN_INFO "testing NMI watchdog ... "); + printk(KERN_INFO "Testing NMI watchdog ... "); #ifdef CONFIG_SMP if (nmi_watchdog == NMI_LOCAL_APIC) @@ -101,29 +101,28 @@ int __init check_nmi_watchdog (void) #endif for (cpu = 0; cpu < NR_CPUS; cpu++) - counts[cpu] = cpu_pda(cpu)->__nmi_count; + prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count; local_irq_enable(); mdelay((20*1000)/nmi_hz); // wait 20 ticks for_each_online_cpu(cpu) { if (!per_cpu(wd_enabled, cpu)) continue; - if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { + if (cpu_pda(cpu)->__nmi_count - prev_nmi_count[cpu] <= 5) { printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", - cpu, - counts[cpu], - cpu_pda(cpu)->__nmi_count); + cpu, + prev_nmi_count[cpu], + cpu_pda(cpu)->__nmi_count); per_cpu(wd_enabled, cpu) = 0; atomic_dec(&nmi_active); } } + endflag = 1; if (!atomic_read(&nmi_active)) { - kfree(counts); + kfree(prev_nmi_count); atomic_set(&nmi_active, -1); - endflag = 1; return -1; } - endflag = 1; printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to @@ -131,11 +130,11 @@ int __init check_nmi_watchdog (void) if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = lapic_adjust_nmi_hz(1); - kfree(counts); + kfree(prev_nmi_count); return 0; } -int __init setup_nmi_watchdog(char *str) +static int __init setup_nmi_watchdog(char *str) { int nmi; @@ -158,34 +157,6 @@ int __init setup_nmi_watchdog(char *str) __setup("nmi_watchdog=", setup_nmi_watchdog); - -static void __acpi_nmi_disable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); -} - -/* - * Disable timer based NMIs on all CPUs: - */ -void acpi_nmi_disable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); -} - -static void __acpi_nmi_enable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI); -} - -/* - * Enable timer based NMIs on all CPUs: - */ -void acpi_nmi_enable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); -} #ifdef CONFIG_PM static int nmi_pm_active; /* nmi_active before suspend */ @@ -216,7 +187,7 @@ static struct sysdev_class nmi_sysclass }; static struct sys_device device_lapic_nmi = { - .id = 0, + .id = 0, .cls = &nmi_sysclass, }; @@ -230,7 +201,7 @@ static int __init init_lapic_nmi_sysfs(v if (nmi_watchdog != NMI_LOCAL_APIC) return 0; - if ( atomic_read(&nmi_active) < 0 ) + if (atomic_read(&nmi_active) < 0) return 0; error = sysdev_class_register(&nmi_sysclass); @@ -243,9 +214,37 @@ late_initcall(init_lapic_nmi_sysfs); #endif /* CONFIG_PM */ +static void __acpi_nmi_enable(void *__unused) +{ + apic_write(APIC_LVT0, APIC_DM_NMI); +} + +/* + * Enable timer based NMIs on all CPUs: + */ +void acpi_nmi_enable(void) +{ + if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) + on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); +} + +static void __acpi_nmi_disable(void *__unused) +{ + apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); +} + +/* + * Disable timer based NMIs on all CPUs: + */ +void acpi_nmi_disable(void) +{ + if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) + on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); +} + void setup_apic_nmi_watchdog(void *unused) { - if (__get_cpu_var(wd_enabled) == 1) + if (__get_cpu_var(wd_enabled)) return; /* cheap hack to support suspend/resume */ @@ -310,8 +309,9 @@ void touch_nmi_watchdog(void) } } - touch_softlockup_watchdog(); + touch_softlockup_watchdog(); } +EXPORT_SYMBOL(touch_nmi_watchdog); int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) { @@ -478,4 +478,3 @@ void __trigger_all_cpu_backtrace(void) EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); -EXPORT_SYMBOL(touch_nmi_watchdog); diff -puN arch/x86/kernel/paravirt_32.c~git-x86 arch/x86/kernel/paravirt_32.c --- a/arch/x86/kernel/paravirt_32.c~git-x86 +++ a/arch/x86/kernel/paravirt_32.c @@ -60,7 +60,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti" DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); DEF_NATIVE(pv_cpu_ops, iret, "iret"); -DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); +DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit"); DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); @@ -88,7 +88,7 @@ static unsigned native_patch(u8 type, u1 SITE(pv_irq_ops, restore_fl); SITE(pv_irq_ops, save_fl); SITE(pv_cpu_ops, iret); - SITE(pv_cpu_ops, irq_enable_sysexit); + SITE(pv_cpu_ops, irq_enable_syscall_ret); SITE(pv_mmu_ops, read_cr2); SITE(pv_mmu_ops, read_cr3); SITE(pv_mmu_ops, write_cr3); @@ -186,7 +186,7 @@ unsigned paravirt_patch_default(u8 type, /* If the operation is a nop, then nop the callsite */ ret = paravirt_patch_nop(); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || - type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit)) + type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret)) /* If operation requires a jmp, then jmp */ ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); else @@ -237,7 +237,7 @@ static void native_flush_tlb_single(unsi /* These are in entry.S */ extern void native_iret(void); -extern void native_irq_enable_sysexit(void); +extern void native_irq_enable_syscall_ret(void); static int __init print_banner(void) { @@ -384,7 +384,7 @@ struct pv_cpu_ops pv_cpu_ops = { .write_idt_entry = write_dt_entry, .load_esp0 = native_load_esp0, - .irq_enable_sysexit = native_irq_enable_sysexit, + .irq_enable_syscall_ret = native_irq_enable_syscall_ret, .iret = native_iret, .set_iopl_mask = native_set_iopl_mask, diff -puN arch/x86/kernel/pci-calgary_64.c~git-x86 arch/x86/kernel/pci-calgary_64.c --- a/arch/x86/kernel/pci-calgary_64.c~git-x86 +++ a/arch/x86/kernel/pci-calgary_64.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -183,7 +182,7 @@ static struct calgary_bus_info bus_info[ /* enable this to stress test the chip's TCE cache */ #ifdef CONFIG_IOMMU_DEBUG -int debugging __read_mostly = 1; +static int debugging = 1; static inline unsigned long verify_bit_range(unsigned long* bitmap, int expected, unsigned long start, unsigned long end) @@ -202,7 +201,7 @@ static inline unsigned long verify_bit_r return ~0UL; } #else /* debugging is disabled */ -int debugging __read_mostly = 0; +static int debugging = 0; static inline unsigned long verify_bit_range(unsigned long* bitmap, int expected, unsigned long start, unsigned long end) diff -puN arch/x86/kernel/pci-dma_64.c~git-x86 arch/x86/kernel/pci-dma_64.c --- a/arch/x86/kernel/pci-dma_64.c~git-x86 +++ a/arch/x86/kernel/pci-dma_64.c @@ -13,7 +13,6 @@ #include int iommu_merge __read_mostly = 1; -EXPORT_SYMBOL(iommu_merge); dma_addr_t bad_dma_address __read_mostly; EXPORT_SYMBOL(bad_dma_address); @@ -230,7 +229,7 @@ EXPORT_SYMBOL(dma_set_mask); * See for the iommu kernel parameter * documentation. */ -__init int iommu_setup(char *p) +static __init int iommu_setup(char *p) { iommu_merge = 1; diff -puN arch/x86/kernel/pci-gart_64.c~git-x86 arch/x86/kernel/pci-gart_64.c --- a/arch/x86/kernel/pci-gart_64.c~git-x86 +++ a/arch/x86/kernel/pci-gart_64.c @@ -1,12 +1,12 @@ /* * Dynamic DMA mapping support for AMD Hammer. - * + * * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI. * This allows to use PCI devices that only support 32bit addresses on systems - * with more than 4GB. + * with more than 4GB. * * See Documentation/DMA-mapping.txt for the interface specification. - * + * * Copyright 2002 Andi Kleen, SuSE Labs. * Subject to the GNU General Public License v2 only. */ @@ -37,23 +37,26 @@ #include static unsigned long iommu_bus_base; /* GART remapping area (physical) */ -static unsigned long iommu_size; /* size of remapping area bytes */ +static unsigned long iommu_size; /* size of remapping area bytes */ static unsigned long iommu_pages; /* .. and in pages */ -static u32 *iommu_gatt_base; /* Remapping table */ +static u32 *iommu_gatt_base; /* Remapping table */ -/* If this is disabled the IOMMU will use an optimized flushing strategy - of only flushing when an mapping is reused. With it true the GART is flushed - for every mapping. Problem is that doing the lazy flush seems to trigger - bugs with some popular PCI cards, in particular 3ware (but has been also - also seen with Qlogic at least). */ +/* + * If this is disabled the IOMMU will use an optimized flushing strategy + * of only flushing when an mapping is reused. With it true the GART is + * flushed for every mapping. Problem is that doing the lazy flush seems + * to trigger bugs with some popular PCI cards, in particular 3ware (but + * has been also also seen with Qlogic at least). + */ int iommu_fullflush = 1; -/* Allocation bitmap for the remapping area */ +/* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); -static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ +/* Guarded by iommu_bitmap_lock: */ +static unsigned long *iommu_gart_bitmap; -static u32 gart_unmapped_entry; +static u32 gart_unmapped_entry; #define GPTE_VALID 1 #define GPTE_COHERENT 2 @@ -61,10 +64,10 @@ static u32 gart_unmapped_entry; (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) -#define to_pages(addr,size) \ +#define to_pages(addr, size) \ (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) -#define EMERGENCY_PAGES 32 /* = 128KB */ +#define EMERGENCY_PAGES 32 /* = 128KB */ #ifdef CONFIG_AGP #define AGPEXTERN extern @@ -77,130 +80,152 @@ AGPEXTERN int agp_memory_reserved; AGPEXTERN __u32 *agp_gatt_table; static unsigned long next_bit; /* protected by iommu_bitmap_lock */ -static int need_flush; /* global flush state. set for each gart wrap */ +static int need_flush; /* global flush state. set for each gart wrap */ -static unsigned long alloc_iommu(int size) -{ +static unsigned long alloc_iommu(int size) +{ unsigned long offset, flags; - spin_lock_irqsave(&iommu_bitmap_lock, flags); - offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); + spin_lock_irqsave(&iommu_bitmap_lock, flags); + offset = find_next_zero_string(iommu_gart_bitmap, next_bit, + iommu_pages, size); if (offset == -1) { need_flush = 1; - offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size); + offset = find_next_zero_string(iommu_gart_bitmap, 0, + iommu_pages, size); } - if (offset != -1) { - set_bit_string(iommu_gart_bitmap, offset, size); - next_bit = offset+size; - if (next_bit >= iommu_pages) { + if (offset != -1) { + set_bit_string(iommu_gart_bitmap, offset, size); + next_bit = offset+size; + if (next_bit >= iommu_pages) { next_bit = 0; need_flush = 1; - } - } + } + } if (iommu_fullflush) need_flush = 1; - spin_unlock_irqrestore(&iommu_bitmap_lock, flags); + spin_unlock_irqrestore(&iommu_bitmap_lock, flags); + return offset; -} +} static void free_iommu(unsigned long offset, int size) -{ +{ unsigned long flags; + spin_lock_irqsave(&iommu_bitmap_lock, flags); __clear_bit_string(iommu_gart_bitmap, offset, size); spin_unlock_irqrestore(&iommu_bitmap_lock, flags); -} +} -/* +/* * Use global flush state to avoid races with multiple flushers. */ static void flush_gart(void) -{ +{ unsigned long flags; + spin_lock_irqsave(&iommu_bitmap_lock, flags); if (need_flush) { k8_flush_garts(); need_flush = 0; - } + } spin_unlock_irqrestore(&iommu_bitmap_lock, flags); -} +} #ifdef CONFIG_IOMMU_LEAK -#define SET_LEAK(x) if (iommu_leak_tab) \ - iommu_leak_tab[x] = __builtin_return_address(0); -#define CLEAR_LEAK(x) if (iommu_leak_tab) \ - iommu_leak_tab[x] = NULL; +#define SET_LEAK(x) \ + do { \ + if (iommu_leak_tab) \ + iommu_leak_tab[x] = __builtin_return_address(0);\ + } while (0) + +#define CLEAR_LEAK(x) \ + do { \ + if (iommu_leak_tab) \ + iommu_leak_tab[x] = NULL; \ + } while (0) /* Debugging aid for drivers that don't free their IOMMU tables */ -static void **iommu_leak_tab; +static void **iommu_leak_tab; static int leak_trace; static int iommu_leak_pages = 20; + static void dump_leak(void) { int i; - static int dump; - if (dump || !iommu_leak_tab) return; + static int dump; + + if (dump || !iommu_leak_tab) + return; dump = 1; - show_stack(NULL,NULL); - /* Very crude. dump some from the end of the table too */ - printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages); - for (i = 0; i < iommu_leak_pages; i+=2) { - printk("%lu: ", iommu_pages-i); + show_stack(NULL, NULL); + + /* Very crude. dump some from the end of the table too */ + printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n", + iommu_leak_pages); + for (i = 0; i < iommu_leak_pages; i += 2) { + printk(KERN_DEBUG "%lu: ", iommu_pages-i); printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]); - printk("%c", (i+1)%2 == 0 ? '\n' : ' '); - } - printk("\n"); + printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); + } + printk(KERN_DEBUG "\n"); } #else -#define SET_LEAK(x) -#define CLEAR_LEAK(x) +# define SET_LEAK(x) +# define CLEAR_LEAK(x) #endif static void iommu_full(struct device *dev, size_t size, int dir) { - /* + /* * Ran out of IOMMU space for this operation. This is very bad. * Unfortunately the drivers cannot handle this operation properly. - * Return some non mapped prereserved space in the aperture and + * Return some non mapped prereserved space in the aperture and * let the Northbridge deal with it. This will result in garbage * in the IO operation. When the size exceeds the prereserved space - * memory corruption will occur or random memory will be DMAed + * memory corruption will occur or random memory will be DMAed * out. Hopefully no network devices use single mappings that big. - */ - - printk(KERN_ERR - "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", - size, dev->bus_id); + */ + + printk(KERN_ERR + "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", + size, dev->bus_id); if (size > PAGE_SIZE*EMERGENCY_PAGES) { if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) panic("PCI-DMA: Memory would be corrupted\n"); - if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) - panic(KERN_ERR "PCI-DMA: Random memory would be DMAed\n"); - } - + if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) + panic(KERN_ERR + "PCI-DMA: Random memory would be DMAed\n"); + } #ifdef CONFIG_IOMMU_LEAK - dump_leak(); + dump_leak(); #endif -} +} -static inline int need_iommu(struct device *dev, unsigned long addr, size_t size) -{ +static inline int +need_iommu(struct device *dev, unsigned long addr, size_t size) +{ u64 mask = *dev->dma_mask; int high = addr + size > mask; int mmu = high; - if (force_iommu) - mmu = 1; - return mmu; + + if (force_iommu) + mmu = 1; + + return mmu; } -static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size) -{ +static inline int +nonforced_iommu(struct device *dev, unsigned long addr, size_t size) +{ u64 mask = *dev->dma_mask; int high = addr + size > mask; int mmu = high; - return mmu; + + return mmu; } /* Map a single continuous physical area into the IOMMU. @@ -208,13 +233,14 @@ static inline int nonforced_iommu(struct */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, size_t size, int dir) -{ +{ unsigned long npages = to_pages(phys_mem, size); unsigned long iommu_page = alloc_iommu(npages); int i; + if (iommu_page == -1) { if (!nonforced_iommu(dev, phys_mem, size)) - return phys_mem; + return phys_mem; if (panic_on_overflow) panic("dma_map_area overflow %lu bytes\n", size); iommu_full(dev, size, dir); @@ -229,35 +255,39 @@ static dma_addr_t dma_map_area(struct de return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); } -static dma_addr_t gart_map_simple(struct device *dev, char *buf, - size_t size, int dir) +static dma_addr_t +gart_map_simple(struct device *dev, char *buf, size_t size, int dir) { dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); + flush_gart(); + return map; } /* Map a single area into the IOMMU */ -static dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir) +static dma_addr_t +gart_map_single(struct device *dev, void *addr, size_t size, int dir) { unsigned long phys_mem, bus; if (!dev) dev = &fallback_dev; - phys_mem = virt_to_phys(addr); + phys_mem = virt_to_phys(addr); if (!need_iommu(dev, phys_mem, size)) - return phys_mem; + return phys_mem; bus = gart_map_simple(dev, addr, size, dir); - return bus; + + return bus; } /* * Free a DMA mapping. */ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, - size_t size, int direction) + size_t size, int direction) { unsigned long iommu_page; int npages; @@ -266,6 +296,7 @@ static void gart_unmap_single(struct dev if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || dma_addr >= iommu_bus_base + iommu_size) return; + iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; npages = to_pages(dma_addr, size); for (i = 0; i < npages; i++) { @@ -278,7 +309,8 @@ static void gart_unmap_single(struct dev /* * Wrapper for pci_unmap_single working with scatterlists. */ -static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) +static void +gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) { struct scatterlist *s; int i; @@ -303,12 +335,13 @@ static int dma_map_sg_nonforce(struct de for_each_sg(sg, s, nents, i) { unsigned long addr = sg_phys(s); - if (nonforced_iommu(dev, addr, s->length)) { + + if (nonforced_iommu(dev, addr, s->length)) { addr = dma_map_area(dev, addr, s->length, dir); - if (addr == bad_dma_address) { - if (i > 0) + if (addr == bad_dma_address) { + if (i > 0) gart_unmap_sg(dev, sg, i, dir); - nents = 0; + nents = 0; sg[0].dma_length = 0; break; } @@ -317,15 +350,16 @@ static int dma_map_sg_nonforce(struct de s->dma_length = s->length; } flush_gart(); + return nents; } /* Map multiple scatterlist entries continuous into the first. */ static int __dma_map_cont(struct scatterlist *start, int nelems, - struct scatterlist *sout, unsigned long pages) + struct scatterlist *sout, unsigned long pages) { unsigned long iommu_start = alloc_iommu(pages); - unsigned long iommu_page = iommu_start; + unsigned long iommu_page = iommu_start; struct scatterlist *s; int i; @@ -335,32 +369,33 @@ static int __dma_map_cont(struct scatter for_each_sg(start, s, nelems, i) { unsigned long pages, addr; unsigned long phys_addr = s->dma_address; - + BUG_ON(s != start && s->offset); if (s == start) { sout->dma_address = iommu_bus_base; sout->dma_address += iommu_page*PAGE_SIZE + s->offset; sout->dma_length = s->length; - } else { - sout->dma_length += s->length; + } else { + sout->dma_length += s->length; } addr = phys_addr; - pages = to_pages(s->offset, s->length); - while (pages--) { - iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); + pages = to_pages(s->offset, s->length); + while (pages--) { + iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); SET_LEAK(iommu_page); addr += PAGE_SIZE; iommu_page++; } - } - BUG_ON(iommu_page - iommu_start != pages); + } + BUG_ON(iommu_page - iommu_start != pages); + return 0; } -static inline int dma_map_cont(struct scatterlist *start, int nelems, - struct scatterlist *sout, - unsigned long pages, int need) +static inline int +dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout, + unsigned long pages, int need) { if (!need) { BUG_ON(nelems != 1); @@ -401,15 +436,19 @@ static int gart_map_sg(struct device *de ps = NULL; /* shut up gcc */ for_each_sg(sg, s, nents, i) { dma_addr_t addr = sg_phys(s); + s->dma_address = addr; - BUG_ON(s->length == 0); + BUG_ON(s->length == 0); - nextneed = need_iommu(dev, addr, s->length); + nextneed = need_iommu(dev, addr, s->length); /* Handle the previous not yet processed entries */ if (i > start) { - /* Can only merge when the last chunk ends on a page - boundary and the new one doesn't have an offset. */ + /* + * Can only merge when the last chunk ends on a + * page boundary and the new one doesn't have an + * offset. + */ if (!iommu_merge || !nextneed || !need || s->offset || (s->length + seg_size > max_seg_size) || (ps->offset + ps->length) % PAGE_SIZE) { @@ -443,6 +482,7 @@ static int gart_map_sg(struct device *de error: flush_gart(); gart_unmap_sg(dev, sg, out, dir); + /* When it was forced or merged try again in a dumb way */ if (force_iommu || iommu_merge) { out = dma_map_sg_nonforce(dev, sg, nents, dir); @@ -451,64 +491,68 @@ error: } if (panic_on_overflow) panic("dma_map_sg: overflow on %lu pages\n", pages); + iommu_full(dev, pages << PAGE_SHIFT, dir); for_each_sg(sg, s, nents, i) s->dma_address = bad_dma_address; return 0; -} +} static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) -{ - unsigned long a; - if (!iommu_size) { - iommu_size = aper_size; - if (!no_agp) - iommu_size /= 2; - } +{ + unsigned long a; + + if (!iommu_size) { + iommu_size = aper_size; + if (!no_agp) + iommu_size /= 2; + } - a = aper + iommu_size; + a = aper + iommu_size; iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a; - if (iommu_size < 64*1024*1024) + if (iommu_size < 64*1024*1024) { printk(KERN_WARNING - "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20); - + "PCI-DMA: Warning: Small IOMMU %luMB." + " Consider increasing the AGP aperture in BIOS\n", + iommu_size >> 20); + } + return iommu_size; -} +} -static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) -{ - unsigned aper_size = 0, aper_base_32; +static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) +{ + unsigned aper_size = 0, aper_base_32, aper_order; u64 aper_base; - unsigned aper_order; - pci_read_config_dword(dev, 0x94, &aper_base_32); + pci_read_config_dword(dev, 0x94, &aper_base_32); pci_read_config_dword(dev, 0x90, &aper_order); - aper_order = (aper_order >> 1) & 7; + aper_order = (aper_order >> 1) & 7; - aper_base = aper_base_32 & 0x7fff; + aper_base = aper_base_32 & 0x7fff; aper_base <<= 25; - aper_size = (32 * 1024 * 1024) << aper_order; - if (aper_base + aper_size > 0x100000000UL || !aper_size) + aper_size = (32 * 1024 * 1024) << aper_order; + if (aper_base + aper_size > 0x100000000UL || !aper_size) aper_base = 0; *size = aper_size; return aper_base; -} +} -/* +/* * Private Northbridge GATT initialization in case we cannot use the - * AGP driver for some reason. + * AGP driver for some reason. */ static __init int init_k8_gatt(struct agp_kern_info *info) -{ +{ + unsigned aper_size, gatt_size, new_aper_size; + unsigned aper_base, new_aper_base; struct pci_dev *dev; void *gatt; - unsigned aper_base, new_aper_base; - unsigned aper_size, gatt_size, new_aper_size; int i; printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); @@ -516,75 +560,77 @@ static __init int init_k8_gatt(struct ag dev = NULL; for (i = 0; i < num_k8_northbridges; i++) { dev = k8_northbridges[i]; - new_aper_base = read_aperture(dev, &new_aper_size); - if (!new_aper_base) - goto nommu; - - if (!aper_base) { + new_aper_base = read_aperture(dev, &new_aper_size); + if (!new_aper_base) + goto nommu; + + if (!aper_base) { aper_size = new_aper_size; aper_base = new_aper_base; - } - if (aper_size != new_aper_size || aper_base != new_aper_base) + } + if (aper_size != new_aper_size || aper_base != new_aper_base) goto nommu; } if (!aper_base) - goto nommu; + goto nommu; info->aper_base = aper_base; - info->aper_size = aper_size>>20; + info->aper_size = aper_size >> 20; - gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); - gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); - if (!gatt) + gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); + gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); + if (!gatt) panic("Cannot allocate GATT table"); - if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT, PAGE_KERNEL_NOCACHE)) + if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT, + PAGE_KERNEL_NOCACHE)) panic("Could not set GART PTEs to uncacheable pages"); global_flush_tlb(); - memset(gatt, 0, gatt_size); + memset(gatt, 0, gatt_size); agp_gatt_table = gatt; for (i = 0; i < num_k8_northbridges; i++) { - u32 ctl; - u32 gatt_reg; + u32 gatt_reg; + u32 ctl; dev = k8_northbridges[i]; - gatt_reg = __pa(gatt) >> 12; - gatt_reg <<= 4; + gatt_reg = __pa(gatt) >> 12; + gatt_reg <<= 4; pci_write_config_dword(dev, 0x98, gatt_reg); - pci_read_config_dword(dev, 0x90, &ctl); + pci_read_config_dword(dev, 0x90, &ctl); ctl |= 1; ctl &= ~((1<<4) | (1<<5)); - pci_write_config_dword(dev, 0x90, ctl); + pci_write_config_dword(dev, 0x90, ctl); } flush_gart(); - - printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); + + printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", + aper_base, aper_size>>10); return 0; nommu: - /* Should not happen anymore */ + /* Should not happen anymore */ printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n" KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n"); - return -1; -} + return -1; +} extern int agp_amd64_init(void); static const struct dma_mapping_ops gart_dma_ops = { - .mapping_error = NULL, - .map_single = gart_map_single, - .map_simple = gart_map_simple, - .unmap_single = gart_unmap_single, - .sync_single_for_cpu = NULL, - .sync_single_for_device = NULL, - .sync_single_range_for_cpu = NULL, - .sync_single_range_for_device = NULL, - .sync_sg_for_cpu = NULL, - .sync_sg_for_device = NULL, - .map_sg = gart_map_sg, - .unmap_sg = gart_unmap_sg, + .mapping_error = NULL, + .map_single = gart_map_single, + .map_simple = gart_map_simple, + .unmap_single = gart_unmap_single, + .sync_single_for_cpu = NULL, + .sync_single_for_device = NULL, + .sync_single_range_for_cpu = NULL, + .sync_single_range_for_device = NULL, + .sync_sg_for_cpu = NULL, + .sync_sg_for_device = NULL, + .map_sg = gart_map_sg, + .unmap_sg = gart_unmap_sg, }; void gart_iommu_shutdown(void) @@ -595,23 +641,23 @@ void gart_iommu_shutdown(void) if (no_agp && (dma_ops != &gart_dma_ops)) return; - for (i = 0; i < num_k8_northbridges; i++) { - u32 ctl; + for (i = 0; i < num_k8_northbridges; i++) { + u32 ctl; - dev = k8_northbridges[i]; - pci_read_config_dword(dev, 0x90, &ctl); + dev = k8_northbridges[i]; + pci_read_config_dword(dev, 0x90, &ctl); - ctl &= ~1; + ctl &= ~1; - pci_write_config_dword(dev, 0x90, ctl); - } + pci_write_config_dword(dev, 0x90, ctl); + } } void __init gart_iommu_init(void) -{ +{ struct agp_kern_info info; - unsigned long aper_size; unsigned long iommu_start; + unsigned long aper_size; unsigned long scratch; long i; @@ -621,14 +667,14 @@ void __init gart_iommu_init(void) } #ifndef CONFIG_AGP_AMD64 - no_agp = 1; + no_agp = 1; #else /* Makefile puts PCI initialization via subsys_initcall first. */ /* Add other K8 AGP bridge drivers here */ - no_agp = no_agp || - (agp_amd64_init() < 0) || + no_agp = no_agp || + (agp_amd64_init() < 0) || (agp_copy_info(agp_bridge, &info) < 0); -#endif +#endif if (swiotlb) return; @@ -650,77 +696,78 @@ void __init gart_iommu_init(void) } printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); - aper_size = info.aper_size * 1024 * 1024; - iommu_size = check_iommu_size(info.aper_base, aper_size); - iommu_pages = iommu_size >> PAGE_SHIFT; - - iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL, - get_order(iommu_pages/8)); - if (!iommu_gart_bitmap) - panic("Cannot allocate iommu bitmap\n"); + aper_size = info.aper_size * 1024 * 1024; + iommu_size = check_iommu_size(info.aper_base, aper_size); + iommu_pages = iommu_size >> PAGE_SHIFT; + + iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL, + get_order(iommu_pages/8)); + if (!iommu_gart_bitmap) + panic("Cannot allocate iommu bitmap\n"); memset(iommu_gart_bitmap, 0, iommu_pages/8); #ifdef CONFIG_IOMMU_LEAK - if (leak_trace) { - iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, + if (leak_trace) { + iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, get_order(iommu_pages*sizeof(void *))); - if (iommu_leak_tab) - memset(iommu_leak_tab, 0, iommu_pages * 8); + if (iommu_leak_tab) + memset(iommu_leak_tab, 0, iommu_pages * 8); else - printk("PCI-DMA: Cannot allocate leak trace area\n"); - } + printk(KERN_DEBUG + "PCI-DMA: Cannot allocate leak trace area\n"); + } #endif - /* + /* * Out of IOMMU space handling. - * Reserve some invalid pages at the beginning of the GART. - */ - set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); + * Reserve some invalid pages at the beginning of the GART. + */ + set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); - agp_memory_reserved = iommu_size; + agp_memory_reserved = iommu_size; printk(KERN_INFO "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", - iommu_size>>20); + iommu_size >> 20); - iommu_start = aper_size - iommu_size; - iommu_bus_base = info.aper_base + iommu_start; + iommu_start = aper_size - iommu_size; + iommu_bus_base = info.aper_base + iommu_start; bad_dma_address = iommu_bus_base; iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); - /* + /* * Unmap the IOMMU part of the GART. The alias of the page is * always mapped with cache enabled and there is no full cache * coherency across the GART remapping. The unmapping avoids * automatic prefetches from the CPU allocating cache lines in * there. All CPU accesses are done via the direct mapping to * the backing memory. The GART address is only used by PCI - * devices. + * devices. */ clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size); - /* - * Try to workaround a bug (thanks to BenH) - * Set unmapped entries to a scratch page instead of 0. + /* + * Try to workaround a bug (thanks to BenH) + * Set unmapped entries to a scratch page instead of 0. * Any prefetches that hit unmapped entries won't get an bus abort * then. */ - scratch = get_zeroed_page(GFP_KERNEL); - if (!scratch) + scratch = get_zeroed_page(GFP_KERNEL); + if (!scratch) panic("Cannot allocate iommu scratch page"); gart_unmapped_entry = GPTE_ENCODE(__pa(scratch)); - for (i = EMERGENCY_PAGES; i < iommu_pages; i++) + for (i = EMERGENCY_PAGES; i < iommu_pages; i++) iommu_gatt_base[i] = gart_unmapped_entry; flush_gart(); dma_ops = &gart_dma_ops; -} +} void __init gart_parse_options(char *p) { int arg; #ifdef CONFIG_IOMMU_LEAK - if (!strncmp(p,"leak",4)) { + if (!strncmp(p, "leak", 4)) { leak_trace = 1; p += 4; if (*p == '=') ++p; @@ -730,18 +777,18 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; - if (!strncmp(p, "fullflush",8)) + if (!strncmp(p, "fullflush", 8)) iommu_fullflush = 1; - if (!strncmp(p, "nofullflush",11)) + if (!strncmp(p, "nofullflush", 11)) iommu_fullflush = 0; - if (!strncmp(p,"noagp",5)) + if (!strncmp(p, "noagp", 5)) no_agp = 1; - if (!strncmp(p, "noaperture",10)) + if (!strncmp(p, "noaperture", 10)) fix_aperture = 0; /* duplicated from pci-dma.c */ - if (!strncmp(p,"force",5)) + if (!strncmp(p, "force", 5)) gart_iommu_aperture_allowed = 1; - if (!strncmp(p,"allowed",7)) + if (!strncmp(p, "allowed", 7)) gart_iommu_aperture_allowed = 1; if (!strncmp(p, "memaper", 7)) { fallback_aper_force = 1; diff -puN arch/x86/kernel/pci-swiotlb_64.c~git-x86 arch/x86/kernel/pci-swiotlb_64.c --- a/arch/x86/kernel/pci-swiotlb_64.c~git-x86 +++ a/arch/x86/kernel/pci-swiotlb_64.c @@ -10,7 +10,6 @@ #include int swiotlb __read_mostly; -EXPORT_SYMBOL(swiotlb); const struct dma_mapping_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, diff -puN arch/x86/kernel/pmtimer_64.c~git-x86 arch/x86/kernel/pmtimer_64.c --- a/arch/x86/kernel/pmtimer_64.c~git-x86 +++ a/arch/x86/kernel/pmtimer_64.c @@ -19,13 +19,13 @@ #include #include #include +#include + #include #include #include #include -#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ - static inline u32 cyc2us(u32 cycles) { /* The Power Management Timer ticks at 3.579545 ticks per microsecond. diff -puN arch/x86/kernel/process_32.c~git-x86 arch/x86/kernel/process_32.c --- a/arch/x86/kernel/process_32.c~git-x86 +++ a/arch/x86/kernel/process_32.c @@ -55,6 +55,7 @@ #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -971,3 +972,10 @@ unsigned long arch_align_stack(unsigned sp -= get_random_int() % 8192; return sp & ~0xf; } + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long range_end = mm->brk + 0x02000000; + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; +} + diff -puN arch/x86/kernel/process_64.c~git-x86 arch/x86/kernel/process_64.c --- a/arch/x86/kernel/process_64.c~git-x86 +++ a/arch/x86/kernel/process_64.c @@ -72,13 +72,6 @@ void idle_notifier_register(struct notif { atomic_notifier_chain_register(&idle_notifier, n); } -EXPORT_SYMBOL_GPL(idle_notifier_register); - -void idle_notifier_unregister(struct notifier_block *n) -{ - atomic_notifier_chain_unregister(&idle_notifier, n); -} -EXPORT_SYMBOL(idle_notifier_unregister); void enter_idle(void) { @@ -903,3 +896,10 @@ unsigned long arch_align_stack(unsigned sp -= get_random_int() % 8192; return sp & ~0xf; } + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long range_end = mm->brk + 0x02000000; + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; +} + diff -puN arch/x86/kernel/reboot_64.c~git-x86 arch/x86/kernel/reboot_64.c --- a/arch/x86/kernel/reboot_64.c~git-x86 +++ a/arch/x86/kernel/reboot_64.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -28,7 +29,8 @@ EXPORT_SYMBOL(pm_power_off); static long no_idt[3]; static enum { BOOT_TRIPLE = 't', - BOOT_KBD = 'k' + BOOT_KBD = 'k', + BOOT_ACPI = 'a' } reboot_type = BOOT_KBD; static int reboot_mode = 0; int reboot_force; @@ -38,6 +40,7 @@ int reboot_force; cold Set the cold reboot flag triple Force a triple fault (init) kbd Use the keyboard controller. cold reset (default) + acpi Use the RESET_REG in the FADT force Avoid anything that could hang. */ static int __init reboot_setup(char *str) @@ -53,6 +56,7 @@ static int __init reboot_setup(char *str break; case 't': + case 'a': case 'b': case 'k': reboot_type = *str; @@ -142,6 +146,11 @@ void machine_emergency_restart(void) reboot_type = BOOT_KBD; break; + + case BOOT_ACPI: + acpi_reboot(); + reboot_type = BOOT_KBD; + break; } } } diff -puN arch/x86/kernel/reboot_fixups_32.c~git-x86 arch/x86/kernel/reboot_fixups_32.c --- a/arch/x86/kernel/reboot_fixups_32.c~git-x86 +++ a/arch/x86/kernel/reboot_fixups_32.c @@ -30,6 +30,19 @@ static void cs5536_warm_reset(struct pci udelay(50); /* shouldn't get here but be safe and spin a while */ } +static void rdc321x_reset(struct pci_dev *dev) +{ + unsigned i; + /* Voluntary reset the watchdog timer */ + outl(0x80003840, 0xCF8); + /* Generate a CPU reset on next tick */ + i = inl(0xCFC); + /* Use the minimum timer resolution */ + i |= 0x1600; + outl(i, 0xCFC); + outb(1, 0x92); +} + struct device_fixup { unsigned int vendor; unsigned int device; @@ -40,6 +53,7 @@ static struct device_fixup fixups_table[ { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, +{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset }, }; /* diff -puN /dev/null arch/x86/kernel/rtc.c --- /dev/null +++ a/arch/x86/kernel/rtc.c @@ -0,0 +1,196 @@ +/* + * RTC related functions + */ +#include +#include +#include + +#include + +#ifdef CONFIG_X86_32 +# define CMOS_YEARS_OFFS 1900 +/* + * This is a special lock that is owned by the CPU and holds the index + * register we are working with. It is required for NMI access to the + * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details. + */ +volatile unsigned long cmos_lock = 0; +EXPORT_SYMBOL(cmos_lock); +#else +/* + * x86-64 systems only exists since 2002. + * This will work up to Dec 31, 2100 + */ +# define CMOS_YEARS_OFFS 2000 +#endif + +DEFINE_SPINLOCK(rtc_lock); +EXPORT_SYMBOL(rtc_lock); + +/* + * In order to set the CMOS clock precisely, set_rtc_mmss has to be + * called 500 ms after the second nowtime has started, because when + * nowtime is written into the registers of the CMOS clock, it will + * jump to the next second precisely 500 ms later. Check the Motorola + * MC146818A or Dallas DS12887 data sheet for details. + * + * BUG: This routine does not handle hour overflow properly; it just + * sets the minutes. Usually you'll only notice that after reboot! + */ +int mach_set_rtc_mmss(unsigned long nowtime) +{ + int retval = 0; + int real_seconds, real_minutes, cmos_minutes; + unsigned char save_control, save_freq_select; + + /* tell the clock it's being set */ + save_control = CMOS_READ(RTC_CONTROL); + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); + + /* stop and reset prescaler */ + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + + cmos_minutes = CMOS_READ(RTC_MINUTES); + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + BCD_TO_BIN(cmos_minutes); + + /* + * since we're only adjusting minutes and seconds, + * don't interfere with hour overflow. This avoids + * messing with unknown time zones but requires your + * RTC not to be off by more than 15 minutes + */ + real_seconds = nowtime % 60; + real_minutes = nowtime / 60; + /* correct for half hour time zone */ + if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) + real_minutes += 30; + real_minutes %= 60; + + if (abs(real_minutes - cmos_minutes) < 30) { + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + BIN_TO_BCD(real_seconds); + BIN_TO_BCD(real_minutes); + } + CMOS_WRITE(real_seconds,RTC_SECONDS); + CMOS_WRITE(real_minutes,RTC_MINUTES); + } else { + printk(KERN_WARNING + "set_rtc_mmss: can't update from %d to %d\n", + cmos_minutes, real_minutes); + retval = -1; + } + + /* The following flags have to be released exactly in this order, + * otherwise the DS12887 (popular MC146818A clone with integrated + * battery and quartz) will not reset the oscillator and will not + * update precisely 500 ms later. You won't find this mentioned in + * the Dallas Semiconductor data sheets, but who believes data + * sheets anyway ... -- Markus Kuhn + */ + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + + return retval; +} + +unsigned long mach_get_cmos_time(void) +{ + unsigned int year, mon, day, hour, min, sec, century = 0; + + /* + * If UIP is clear, then we have >= 244 microseconds before + * RTC registers will be updated. Spec sheet says that this + * is the reliable way to read RTC - registers. If UIP is set + * then the register access might be invalid. + */ + while ((CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)) + cpu_relax(); + + sec = CMOS_READ(RTC_SECONDS); + min = CMOS_READ(RTC_MINUTES); + hour = CMOS_READ(RTC_HOURS); + day = CMOS_READ(RTC_DAY_OF_MONTH); + mon = CMOS_READ(RTC_MONTH); + year = CMOS_READ(RTC_YEAR); + +#if defined(CONFIG_ACPI) && defined(CONFIG_X86_64) + /* CHECKME: Is this really 64bit only ??? */ + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && + acpi_gbl_FADT.century) + century = CMOS_READ(acpi_gbl_FADT.century); +#endif + + if (RTC_ALWAYS_BCD || !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)) { + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year); + } + + if (century) { + BCD_TO_BIN(century); + year += century * 100; + printk(KERN_INFO "Extended CMOS year: %d\n", century * 100); + } else { + year += CMOS_YEARS_OFFS; + if (year < 1970) + year += 100; + } + + return mktime(year, mon, day, hour, min, sec); +} + +/* Routines for accessing the CMOS RAM/RTC. */ +unsigned char rtc_cmos_read(unsigned char addr) +{ + unsigned char val; + + lock_cmos_prefix(addr); + outb_p(addr, RTC_PORT(0)); + val = inb_p(RTC_PORT(1)); + lock_cmos_suffix(addr); + return val; +} +EXPORT_SYMBOL(rtc_cmos_read); + +void rtc_cmos_write(unsigned char val, unsigned char addr) +{ + lock_cmos_prefix(addr); + outb_p(addr, RTC_PORT(0)); + outb_p(val, RTC_PORT(1)); + lock_cmos_suffix(addr); +} +EXPORT_SYMBOL(rtc_cmos_write); + +static int set_rtc_mmss(unsigned long nowtime) +{ + int retval; + unsigned long flags; + + spin_lock_irqsave(&rtc_lock, flags); + retval = set_wallclock(nowtime); + spin_unlock_irqrestore(&rtc_lock, flags); + + return retval; +} + +/* not static: needed by APM */ +unsigned long read_persistent_clock(void) +{ + unsigned long retval, flags; + + spin_lock_irqsave(&rtc_lock, flags); + retval = get_wallclock(); + spin_unlock_irqrestore(&rtc_lock, flags); + + return retval; +} + +int update_persistent_clock(struct timespec now) +{ + return set_rtc_mmss(now.tv_sec); +} diff -puN arch/x86/kernel/setup_32.c~git-x86 arch/x86/kernel/setup_32.c --- a/arch/x86/kernel/setup_32.c~git-x86 +++ a/arch/x86/kernel/setup_32.c @@ -44,6 +44,7 @@ #include #include #include +#include #include