GIT 91002b6698367299b0cd0958a1c9ceadf36f0e16 git+ssh://master.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6.git#test

commit 7c6c66362941df847957766ad133ff5fde67579c
Author: Robin Holt <holt@sgi.com>
Date:   Thu Feb 2 12:30:21 2006 -0600

    [IA64-SGI] Fix XPC code which sleeps with spin_lock_irqsave().
    
    During some testing, we got a warning about trying to allocate
    memory while holding a lock.  This fixes that problem.
    
    Signed-off-by: Robin Holt <holt@sgi.com>
    Acked-by: Dean Nelson <dcn@sgi.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit 9a52bbed905fa41ca10f4db2e845b588f0fdfbef
Author: Jes Sorensen <jes@sgi.com>
Date:   Thu Feb 2 05:50:05 2006 -0500

    [IA64-SGI] include/asm-ia64/sn/intr.h more sn2 housekeeping
    
    House keeping - eliminate unneeded parenthesis in macro defines.
    
    Signed-off-by: Jes Sorensen <jes@sgi.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit 2fcc3db0ccee9b47df7a4f732e6624f4f643c035
Author: Jes Sorensen <jes@sgi.com>
Date:   Thu Feb 2 05:15:51 2006 -0500

    [IA64-SGI] sn2 housekeeping
    
    Maintenance patch:
     - Add missing __init calls
     - Do not zero initialize global variables
     - No need to typecast function call returns to void
     - Some formatting
    
    Signed-off-by: Jes Sorensen <jes@sgi.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit a58786917ce23c2a26c3e099c3cdba32a35eeceb
Author: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date:   Mon Jan 30 16:32:31 2006 -0700

    [IA64] avoid broken SAL_CACHE_FLUSH implementations
    
    If SAL_CACHE_FLUSH drops interrupts, complain about it and fall back to
    using PAL_CACHE_FLUSH instead.
    
    This is to work around a defect in HP rx5670 firmware: when an interrupt
    occurs during SAL_CACHE_FLUSH, SAL drops the interrupt but leaves it marked
    "in-service", which leaves the interrupt (and others of equal or lower
    priority) masked.
    
    Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit af14aca90e0cdfccd71f9947c45b6ea2cf321dcb
Author: Kyle McMartin <kyle@mcmartin.ca>
Date:   Sat Jan 28 00:02:52 2006 -0500

    [IA64] Remove stale comment from ia64/Kconfig
    
    Somehow I doubt this comment is meant to be here anymore... It's
    been floating after the L1_CACHE_SHIFT entry since before Linux
    moved to bitkeeper.
    
    Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit a80dcc0b9660b01a2cc1c0faa8f6095970f38730
Author: Mark Maule <maule@sgi.com>
Date:   Fri Jan 27 13:59:07 2006 -0600

    [IA64-SGI] disable msi for all altix pci devices
    
    Temporary patch to make pci_enable_msi() fail gracefully on altix.  Will be
    removed after 2.6.16 releases and the msi abstraction patches start flowing.
    
    Signed-off-by: Mark Maule <maule@sgi.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit 689388bbf8c5c1966b6a67fa427299f90cf83b99
Author: Mark Maule <maule@sgi.com>
Date:   Fri Jan 27 11:55:34 2006 -0600

    [IA64-SGI] fix smp_affinity redirection when using CONFIG_PCI_MSI
    
    Redirecting interrupts using smp_affinity on altix does not work on kernels
    built with CONFIG_PCI_MSI.  The problem is that move_irq() turns into a noop
    if MSI is built in.  This patch calls move_native_irq() instead of move_irq()
    to get around that.
    
    Signed-off-by: Mark Maule <maule@sgi.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit f8efa27662532ad5adb2790bfc3f4c78e019cfad
Author: Chen, Kenneth W <kenneth.w.chen@intel.com>
Date:   Thu Jan 26 18:24:59 2006 -0800

    [IA64] remove staled comments in asm/system.h
    
    With the recent optimization made to wrap_mmu_context function,
    we don't hold tasklist_lock anymore when wrapping context id.
    The comments in asm/system.h must fall through the crack earlier.
    Remove staled comments.
    
    I believe it is still beneficial to unlock the runqueue lock
    across context switch. So leave __ARCH_WANT_UNLOCKED_CTXSW on.
    
    Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit e08e6c521355cd33e647b2f739885bc3050eead6
Author: Brent Casavant <bcasavan@sgi.com>
Date:   Thu Jan 26 15:55:52 2006 -0800

    [IA64] hooks to wait for mmio writes to drain when migrating processes
    
    On SN2, MMIO writes which are issued from separate processors are not
    guaranteed to arrive in any particular order at the IO hardware.  When
    performing such writes from the kernel this is not a problem, as a
    kernel thread will not migrate to another CPU during execution, and
    mmiowb() calls can guarantee write ordering when control of the IO
    resource is allowed to move between threads.
    
    However, when MMIO writes can be performed from user space (e.g. DRM)
    there are no such guarantees and mechanisms, as the process may
    context-switch at any time, and may migrate to a different CPU as part
    of the switch.  For such programs/hardware to operate correctly, it is
    required that the MMIO writes from the old CPU be accepted by the IO
    hardware before subsequent writes from the new CPU can be issued.
    
    The following patch implements this behavior on SN2 by waiting for a
    Shub register to indicate that these writes have been accepted.  This
    is placed in the context switch-in path, and only performs the wait
    when the newly scheduled task changes CPUs.
    
    Signed-off-by: Prarit Bhargava <prarit@sgi.com>
    Signed-off-by: Brent Casavant <bcasavan@sgi.com>

commit 61a34a024fcd61ef7207405b2e4cef2c073b220c
Author: Jack Steiner <steiner@sgi.com>
Date:   Thu Jan 26 15:03:41 2006 -0800

    [IA64-SGI] Update TLB flushing code for SN platform
    
    This patch finishes support for SHUB2 (the new chipset). Most of the
    changes are performance related. A few changes are workarounds for
    "interesting" chipset features.
    
    Some temporary debugging code has also been deleted.
    
    Signed-off-by: Jack Steiner <steiner@sgi.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit 13938ca7a1ad9a4788cf73309f187d99c97ddfde
Author: Mark Maule <maule@sgi.com>
Date:   Thu Jan 26 14:46:39 2006 -0600

    [IA64-SGI] driver bugfixes and hardware workarounds for CE1.0 asic
    
    Various bugfixes and hardware bug workarounds necessary for the rev 1.0 version
    of the altix TIO CE asic.
    
    Signed-off-by: Mark Maule <maule@sgi.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit 28ff6b9b2fc01d2c2746c72ce8af1729344fae27
Author: Aaron Young <ayoung@google.engr.sgi.com>
Date:   Mon Jan 23 09:00:51 2006 -0800

    [IA64-SGI] Handle SC env. powerdown events
    
    Handle system controller power down pending events
    on SN systems. This allows the system to gracefully shutdown
    before the system controller removes power due to
    an adverse environmental condition.
    
    Signed-off-by: Aaron Young <ayoung@sgi.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit b0a06623dc4caf6dfb6a84419507643471676d20
Author: Keith Owens <kaos@sgi.com>
Date:   Sun Jan 22 10:55:25 2006 +1100

    [IA64] Delete MCA/INIT sigdelayed code
    
    The only user of the MCA/INIT sigdelayed code (SGI's I/O probing) has
    moved from the kernel into SAL.  Delete the MCA/INIT sigdelayed code.
    
    Signed-off-by: Keith Owens <kaos@sgi.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit 466575f4e975db1207c5e1a7be34aeaec6ddba1e
Author: Jes Sorensen <jes@sgi.com>
Date:   Thu Jan 19 04:54:00 2006 -0500

    [PATCH] drivers/sn/ must be entered for CONFIG_SGI_IOC3
    
    Actually I think this is more appropriate so we don't end up with 17
    cases that add drivers/sn to the build lib.
    Include drivers/sn when CONFIG_IA64_SGI_SN2 or CONFIG_IA64_GENERIC
    is enabled.
    
    Acked-by: Dave Jones <davej@redhat.com>
    Signed-off-by: Jes Sorensen <jes@sgi.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit 103ec0910d6b9401b7f72ba3ac71fed88306d2d0
Author: Keith Owens <kaos@sgi.com>
Date:   Wed Jan 18 15:38:14 2006 +1100

    [IA64-SGI] Recursive flags do not work for selective builds
    
    arch/ia64/sn/Makefile sets CPPFLAGS, expecting that setting to
    propogate to all the subdirectories.  For a normal build with its
    recursive descent it does work, but doing a selective build like
    'make arch/ia64/sn/kernel/io_init.i' does not do a recursive descent,
    it goes directly to arch/ia64/sn/kernel/Makefile so the flags do not
    get set.
    
    To support selective builds, set the flags in all the subordinate Makefiles.
    
    Signed-off-by: Keith Owens <kaos@sgi.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit 6e586f32931d6c98431d54cd0430d4366195b0ba
Author: Jes Sorensen <jes@sgi.com>
Date:   Tue Jan 17 12:24:39 2006 -0500

    [IA64-SGI] sem2mutex ioc4.c
    
    Convert to use a single mutex instead of two rwsems as this isn't
    performance critical.
    
    Signed-off-by: Jes Sorensen <jes@sgi.com>
    Signed-off-by: Brent Casavant <bcasavan@sgi.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit a454c2f3d1fd1cab7073b53c6c14d6d4b61f4e09
Author: Chen, Kenneth W <kenneth.w.chen@intel.com>
Date:   Wed Jan 11 17:11:09 2006 -0800

    [IA64] implement ia64 specific mutex primitives
    
    Implement ia64 optimized mutex primitives.  It properly uses
    acquire/release memory ordering semantics in lock/unlock path.
    2nd version making them all static inline functions.
    
    Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit b88e926584bf100bc23f5e76b7b674d4257edcb0
Author: Ashok Raj <ashok.raj@intel.com>
Date:   Thu Jan 19 16:18:47 2006 -0800

    [IA64] Fix UP build with BSP removal support.
    
    Causes undefined force_cpei_retarget defined in arch/ia64/kernel/smpboot.c
    Push the unneeded code inside #ifdef CONFIG_HOTPLUG_CPU.
    
    Signed-off-by: Ashok Raj <ashok.raj@intel.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>

commit ff741906ad3cf4b8ca1a958acb013a97a6381ca2
Author: Ashok Raj <ashok.raj@intel.com>
Date:   Fri Nov 11 14:32:40 2005 -0800

    [IA64] support for cpu0 removal
    
    here is the BSP removal support for IA64. Its pretty much the same thing that
    was released a while back, but has your feedback incorporated.
    
    - Removed CONFIG_BSP_REMOVE_WORKAROUND and associated cmdline param
    - Fixed compile issue with sn2/zx1 due to a undefined fix_b0_for_bsp
    - some formatting nits (whitespace etc)
    
    This has been tested on tiger and long back by alex on hp systems as well.
    
    Signed-off-by: Ashok Raj <ashok.raj@intel.com>
    Signed-off-by: Tony Luck <tony.luck@intel.com>


---
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 199eeaf..df2ffd2 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -194,7 +194,6 @@ config IA64_L1_CACHE_SHIFT
 	default "7" if MCKINLEY
 	default "6" if ITANIUM
 
-# align cache-sensitive data to 64 bytes
 config IA64_CYCLONE
 	bool "Cyclone (EXA) Time Source support"
 	help
@@ -272,6 +271,25 @@ config SCHED_SMT
 	  Intel IA64 chips with MultiThreading at a cost of slightly increased
 	  overhead in some places. If unsure say N here.
 
+config PERMIT_BSP_REMOVE
+	bool "Support removal of Bootstrap Processor"
+	depends on HOTPLUG_CPU
+	default n
+	---help---
+	Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU
+	support. 
+
+config FORCE_CPEI_RETARGET
+	bool "Force assumption that CPEI can be re-targetted"
+	depends on PERMIT_BSP_REMOVE
+	default n
+	---help---
+	Say Y if you need to force the assumption that CPEI can be re-targetted to
+	any cpu in the system. This hint is available via ACPI 3.0 specifications.
+	Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP.
+	This option it useful to enable this feature on older BIOS's as well.
+	You can also enable this by using boot command line option force_cpei=1.
+
 config PREEMPT
 	bool "Preemptible Kernel"
         help
@@ -374,6 +392,9 @@ config IA64_PALINFO
 	  To use this option, you have to ensure that the "/proc file system
 	  support" (CONFIG_PROC_FS) is enabled, too.
 
+config SGI_SN
+	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
+
 source "drivers/firmware/Kconfig"
 
 source "fs/Kconfig.binfmt"
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
index 6859119..65d6b2e 100644
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
@@ -114,6 +114,8 @@ CONFIG_FORCE_MAX_ZONEORDER=17
 CONFIG_SMP=y
 CONFIG_NR_CPUS=4
 CONFIG_HOTPLUG_CPU=y
+CONFIG_PERMIT_BSP_REMOVE=y
+CONFIG_FORCE_CPEI_RETARGET=y
 # CONFIG_SCHED_SMT is not set
 # CONFIG_PREEMPT is not set
 CONFIG_SELECT_MEMORY_MODEL=y
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index d2702c4..339fbce 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -284,19 +284,24 @@ acpi_parse_plat_int_src(acpi_table_entry
 	return 0;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 unsigned int can_cpei_retarget(void)
 {
 	extern int cpe_vector;
+	extern unsigned int force_cpei_retarget;
 
 	/*
 	 * Only if CPEI is supported and the override flag
 	 * is present, otherwise return that its re-targettable
 	 * if we are in polling mode.
 	 */
-	if (cpe_vector > 0 && !acpi_cpei_override)
-		return 0;
-	else
-		return 1;
+	if (cpe_vector > 0) {
+		if (acpi_cpei_override || force_cpei_retarget)
+			return 1;
+		else
+			return 0;
+	}
+	return 1;
 }
 
 unsigned int is_cpu_cpei_target(unsigned int cpu)
@@ -315,6 +320,7 @@ void set_cpei_target_cpu(unsigned int cp
 {
 	acpi_cpei_phys_cpuid = cpu_physical_id(cpu);
 }
+#endif
 
 unsigned int get_cpei_target_cpu(void)
 {
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 7a6ffd6..9dda7a3 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1100,9 +1100,6 @@ skip_rbs_switch:
 	st8 [r2]=r8
 	st8 [r3]=r10
 .work_pending:
-	tbit.nz p6,p0=r31,TIF_SIGDELAYED		// signal delayed from  MCA/INIT/NMI/PMI context?
-(p6)	br.cond.sptk.few .sigdelayed
-	;;
 	tbit.z p6,p0=r31,TIF_NEED_RESCHED		// current_thread_info()->need_resched==0?
 (p6)	br.cond.sptk.few .notify
 #ifdef CONFIG_PREEMPT
@@ -1129,17 +1126,6 @@ skip_rbs_switch:
 (pLvSys)br.cond.sptk.few  .work_pending_syscall_end
 	br.cond.sptk.many .work_processed_kernel	// don't re-check
 
-// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
-// it could not be delivered.  Deliver it now.  The signal might be for us and
-// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
-// signal.
-
-.sigdelayed:
-	br.call.sptk.many rp=do_sigdelayed
-	cmp.eq p6,p0=r0,r0				// p6 <- 1, always re-check
-(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
-	br.cond.sptk.many .work_processed_kernel	// re-check
-
 .work_pending_syscall_end:
 	adds r2=PT(R8)+16,r12
 	adds r3=PT(R10)+16,r12
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 574084f..8832c55 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -631,6 +631,7 @@ get_target_cpu (unsigned int gsi, int ve
 {
 #ifdef CONFIG_SMP
 	static int cpu = -1;
+	extern int cpe_vector;
 
 	/*
 	 * In case of vector shared by multiple RTEs, all RTEs that
@@ -653,6 +654,11 @@ get_target_cpu (unsigned int gsi, int ve
 	if (!cpu_online(smp_processor_id()))
 		return cpu_physical_id(smp_processor_id());
 
+#ifdef CONFIG_ACPI
+	if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
+		return get_cpei_target_cpu();
+#endif
+
 #ifdef CONFIG_NUMA
 	{
 		int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index d33244c..5ce908e 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -163,8 +163,19 @@ void fixup_irqs(void)
 {
 	unsigned int irq;
 	extern void ia64_process_pending_intr(void);
+	extern void ia64_disable_timer(void);
+	extern volatile int time_keeper_id;
+
+	ia64_disable_timer();
+
+	/*
+	 * Find a new timesync master
+	 */
+	if (smp_processor_id() == time_keeper_id) {
+		time_keeper_id = first_cpu(cpu_online_map);
+		printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id);
+	}
 
-	ia64_set_itv(1<<16);
 	/*
 	 * Phase 1: Locate irq's bound to this cpu and
 	 * relocate them for cpu removal.
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index ee7eec9..87fb7ce 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -289,6 +289,7 @@ ia64_mca_log_sal_error_record(int sal_in
 #ifdef CONFIG_ACPI
 
 int cpe_vector = -1;
+int ia64_cpe_irq = -1;
 
 static irqreturn_t
 ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
@@ -1444,11 +1445,13 @@ void __devinit
 ia64_mca_cpu_init(void *cpu_data)
 {
 	void *pal_vaddr;
+	static int first_time = 1;
 
-	if (smp_processor_id() == 0) {
+	if (first_time) {
 		void *mca_data;
 		int cpu;
 
+		first_time = 0;
 		mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu)
 					 * NR_CPUS + KERNEL_STACK_SIZE);
 		mca_data = (void *)(((unsigned long)mca_data +
@@ -1704,6 +1707,7 @@ ia64_mca_late_init(void)
 					desc = irq_descp(irq);
 					desc->status |= IRQ_PER_CPU;
 					setup_irq(irq, &mca_cpe_irqaction);
+					ia64_cpe_irq = irq;
 				}
 			ia64_mca_register_cpev(cpe_vector);
 			IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__);
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 9c5194b..077f212 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -6722,6 +6722,7 @@ __initcall(pfm_init);
 void
 pfm_init_percpu (void)
 {
+	static int first_time=1;
 	/*
 	 * make sure no measurement is active
 	 * (may inherit programmed PMCs from EFI).
@@ -6734,8 +6735,10 @@ pfm_init_percpu (void)
 	 */
 	pfm_unfreeze_pmu();
 
-	if (smp_processor_id() == 0)
+	if (first_time) {
 		register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
+		first_time=0;
+	}
 
 	ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
 	ia64_srlz_d();
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
index acc0f13..056f7a6 100644
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@@ -14,6 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 
+#include <asm/delay.h>
 #include <asm/page.h>
 #include <asm/sal.h>
 #include <asm/pal.h>
@@ -214,6 +215,78 @@ chk_nointroute_opt(void)
 static void __init sal_desc_ap_wakeup(void *p) { }
 #endif
 
+/*
+ * HP rx5670 firmware polls for interrupts during SAL_CACHE_FLUSH by reading
+ * cr.ivr, but it never writes cr.eoi.  This leaves any interrupt marked as
+ * "in-service" and masks other interrupts of equal or lower priority.
+ *
+ * HP internal defect reports: F1859, F2775, F3031.
+ */
+static int sal_cache_flush_drops_interrupts;
+
+static void __init
+check_sal_cache_flush (void)
+{
+	unsigned long flags, itv;
+	int cpu;
+	u64 vector;
+
+	cpu = get_cpu();
+	local_irq_save(flags);
+
+	/*
+	 * Schedule a timer interrupt, wait until it's reported, and see if
+	 * SAL_CACHE_FLUSH drops it.
+	 */
+	itv = ia64_get_itv();
+	BUG_ON((itv & (1 << 16)) == 0);
+
+	ia64_set_itv(IA64_TIMER_VECTOR);
+	ia64_set_itm(ia64_get_itc() + 1000);
+
+	while (!ia64_get_irr(IA64_TIMER_VECTOR))
+		cpu_relax();
+
+	ia64_sal_cache_flush(3);
+
+	if (ia64_get_irr(IA64_TIMER_VECTOR)) {
+		vector = ia64_get_ivr();
+		ia64_eoi();
+		WARN_ON(vector != IA64_TIMER_VECTOR);
+	} else {
+		sal_cache_flush_drops_interrupts = 1;
+		printk(KERN_ERR "SAL: SAL_CACHE_FLUSH drops interrupts; "
+			"PAL_CACHE_FLUSH will be used instead\n");
+		ia64_eoi();
+	}
+
+	ia64_set_itv(itv);
+	local_irq_restore(flags);
+	put_cpu();
+}
+
+s64
+ia64_sal_cache_flush (u64 cache_type)
+{
+	struct ia64_sal_retval isrv;
+
+	if (sal_cache_flush_drops_interrupts) {
+		unsigned long flags;
+		u64 progress;
+		s64 rc;
+
+		progress = 0;
+		local_irq_save(flags);
+		rc = ia64_pal_cache_flush(cache_type,
+			PAL_CACHE_FLUSH_INVALIDATE, &progress, NULL);
+		local_irq_restore(flags);
+		return rc;
+	}
+
+	SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0);
+	return isrv.status;
+}
+
 void __init
 ia64_sal_init (struct ia64_sal_systab *systab)
 {
@@ -262,6 +335,8 @@ ia64_sal_init (struct ia64_sal_systab *s
 		}
 		p += SAL_DESC_SIZE(*p);
 	}
+
+	check_sal_cache_flush();
 }
 
 int
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 463f6bb..1d7903e 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -588,104 +588,3 @@ ia64_do_signal (sigset_t *oldset, struct
 	}
 	return 0;
 }
-
-/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it
- * could not be delivered.  It is important that the target process is not
- * allowed to do any more work in user space.  Possible cases for the target
- * process:
- *
- * - It is sleeping and will wake up soon.  Store the data in the current task,
- *   the signal will be sent when the current task returns from the next
- *   interrupt.
- *
- * - It is running in user context.  Store the data in the current task, the
- *   signal will be sent when the current task returns from the next interrupt.
- *
- * - It is running in kernel context on this or another cpu and will return to
- *   user context.  Store the data in the target task, the signal will be sent
- *   to itself when the target task returns to user space.
- *
- * - It is running in kernel context on this cpu and will sleep before
- *   returning to user context.  Because this is also the current task, the
- *   signal will not get delivered and the task could sleep indefinitely.
- *   Store the data in the idle task for this cpu, the signal will be sent
- *   after the idle task processes its next interrupt.
- *
- * To cover all cases, store the data in the target task, the current task and
- * the idle task on this cpu.  Whatever happens, the signal will be delivered
- * to the target task before it can do any useful user space work.  Multiple
- * deliveries have no unwanted side effects.
- *
- * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts
- * disabled.  It must not take any locks nor use kernel structures or services
- * that require locks.
- */
-
-/* To ensure that we get the right pid, check its start time.  To avoid extra
- * include files in thread_info.h, convert the task start_time to unsigned long,
- * giving us a cycle time of > 580 years.
- */
-static inline unsigned long
-start_time_ul(const struct task_struct *t)
-{
-	return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec;
-}
-
-void
-set_sigdelayed(pid_t pid, int signo, int code, void __user *addr)
-{
-	struct task_struct *t;
-	unsigned long start_time =  0;
-	int i;
-
-	for (i = 1; i <= 3; ++i) {
-		switch (i) {
-		case 1:
-			t = find_task_by_pid(pid);
-			if (t)
-				start_time = start_time_ul(t);
-			break;
-		case 2:
-			t = current;
-			break;
-		default:
-			t = idle_task(smp_processor_id());
-			break;
-		}
-
-		if (!t)
-			return;
-		task_thread_info(t)->sigdelayed.signo = signo;
-		task_thread_info(t)->sigdelayed.code = code;
-		task_thread_info(t)->sigdelayed.addr = addr;
-		task_thread_info(t)->sigdelayed.start_time = start_time;
-		task_thread_info(t)->sigdelayed.pid = pid;
-		wmb();
-		set_tsk_thread_flag(t, TIF_SIGDELAYED);
-	}
-}
-
-/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that
- * was detected in MCA/INIT/NMI/PMI context where it could not be delivered.
- */
-
-void
-do_sigdelayed(void)
-{
-	struct siginfo siginfo;
-	pid_t pid;
-	struct task_struct *t;
-
-	clear_thread_flag(TIF_SIGDELAYED);
-	memset(&siginfo, 0, sizeof(siginfo));
-	siginfo.si_signo = current_thread_info()->sigdelayed.signo;
-	siginfo.si_code = current_thread_info()->sigdelayed.code;
-	siginfo.si_addr = current_thread_info()->sigdelayed.addr;
-	pid = current_thread_info()->sigdelayed.pid;
-	t = find_task_by_pid(pid);
-	if (!t)
-		return;
-	if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
-		return;
-	force_sig_info(siginfo.si_signo, &siginfo, t);
-}
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 8f44e7d..e9d37bf 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -70,6 +70,12 @@
 #endif
 
 #ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_PERMIT_BSP_REMOVE
+#define bsp_remove_ok	1
+#else
+#define bsp_remove_ok	0
+#endif
+
 /*
  * Store all idle threads, this can be reused instead of creating
  * a new thread. Also avoids complicated thread destroy functionality
@@ -104,7 +110,7 @@ struct sal_to_os_boot *sal_state_for_boo
 /*
  * ITC synchronization related stuff:
  */
-#define MASTER	0
+#define MASTER	(0)
 #define SLAVE	(SMP_CACHE_BYTES/8)
 
 #define NUM_ROUNDS	64	/* magic value */
@@ -151,6 +157,27 @@ char __initdata no_int_routing;
 
 unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
 
+#ifdef CONFIG_FORCE_CPEI_RETARGET
+#define CPEI_OVERRIDE_DEFAULT	(1)
+#else
+#define CPEI_OVERRIDE_DEFAULT	(0)
+#endif
+
+unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT;
+
+static int __init
+cmdl_force_cpei(char *str)
+{
+	int value=0;
+
+	get_option (&str, &value);
+	force_cpei_retarget = value;
+
+	return 1;
+}
+
+__setup("force_cpei=", cmdl_force_cpei);
+
 static int __init
 nointroute (char *str)
 {
@@ -161,6 +188,27 @@ nointroute (char *str)
 
 __setup("nointroute", nointroute);
 
+static void fix_b0_for_bsp(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	int cpuid;
+	static int fix_bsp_b0 = 1;
+
+	cpuid = smp_processor_id();
+
+	/*
+	 * Cache the b0 value on the first AP that comes up
+	 */
+	if (!(fix_bsp_b0 && cpuid))
+		return;
+
+	sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0];
+	printk ("Fixed BSP b0 value from CPU %d\n", cpuid);
+
+	fix_bsp_b0 = 0;
+#endif
+}
+
 void
 sync_master (void *arg)
 {
@@ -327,8 +375,9 @@ smp_setup_percpu_timer (void)
 static void __devinit
 smp_callin (void)
 {
-	int cpuid, phys_id;
+	int cpuid, phys_id, itc_master;
 	extern void ia64_init_itm(void);
+	extern volatile int time_keeper_id;
 
 #ifdef CONFIG_PERFMON
 	extern void pfm_init_percpu(void);
@@ -336,6 +385,7 @@ smp_callin (void)
 
 	cpuid = smp_processor_id();
 	phys_id = hard_smp_processor_id();
+	itc_master = time_keeper_id;
 
 	if (cpu_online(cpuid)) {
 		printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
@@ -343,6 +393,8 @@ smp_callin (void)
 		BUG();
 	}
 
+	fix_b0_for_bsp();
+
 	lock_ipi_calllock();
 	cpu_set(cpuid, cpu_online_map);
 	unlock_ipi_calllock();
@@ -365,8 +417,8 @@ smp_callin (void)
 		 * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
 		 * local_bh_enable(), which bugs out if irqs are not enabled...
 		 */
-		Dprintk("Going to syncup ITC with BP.\n");
-		ia64_sync_itc(0);
+		Dprintk("Going to syncup ITC with ITC Master.\n");
+		ia64_sync_itc(itc_master);
 	}
 
 	/*
@@ -638,6 +690,47 @@ remove_siblinginfo(int cpu)
 }
 
 extern void fixup_irqs(void);
+
+int migrate_platform_irqs(unsigned int cpu)
+{
+	int new_cpei_cpu;
+	irq_desc_t *desc = NULL;
+	cpumask_t 	mask;
+	int 		retval = 0;
+
+	/*
+	 * dont permit CPEI target to removed.
+	 */
+	if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) {
+		printk ("CPU (%d) is CPEI Target\n", cpu);
+		if (can_cpei_retarget()) {
+			/*
+			 * Now re-target the CPEI to a different processor
+			 */
+			new_cpei_cpu = any_online_cpu(cpu_online_map);
+			mask = cpumask_of_cpu(new_cpei_cpu);
+			set_cpei_target_cpu(new_cpei_cpu);
+			desc = irq_descp(ia64_cpe_irq);
+			/*
+			 * Switch for now, immediatly, we need to do fake intr
+			 * as other interrupts, but need to study CPEI behaviour with
+			 * polling before making changes.
+			 */
+			if (desc) {
+				desc->handler->disable(ia64_cpe_irq);
+				desc->handler->set_affinity(ia64_cpe_irq, mask);
+				desc->handler->enable(ia64_cpe_irq);
+				printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu);
+			}
+		}
+		if (!desc) {
+			printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu);
+			retval = -EBUSY;
+		}
+	}
+	return retval;
+}
+
 /* must be called with cpucontrol mutex held */
 int __cpu_disable(void)
 {
@@ -646,8 +739,17 @@ int __cpu_disable(void)
 	/*
 	 * dont permit boot processor for now
 	 */
-	if (cpu == 0)
-		return -EBUSY;
+	if (cpu == 0 && !bsp_remove_ok) {
+		printk ("Your platform does not support removal of BSP\n");
+		return (-EBUSY);
+	}
+
+	cpu_clear(cpu, cpu_online_map);
+
+	if (migrate_platform_irqs(cpu)) {
+		cpu_set(cpu, cpu_online_map);
+		return (-EBUSY);
+	}
 
 	remove_siblinginfo(cpu);
 	cpu_clear(cpu, cpu_online_map);
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 028a2b9..1ca130a 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -32,7 +32,7 @@
 
 extern unsigned long wall_jiffies;
 
-#define TIME_KEEPER_ID	0	/* smp_processor_id() of time-keeper */
+volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
 
 #ifdef CONFIG_IA64_DEBUG_IRQ
 
@@ -71,7 +71,7 @@ timer_interrupt (int irq, void *dev_id, 
 
 		new_itm += local_cpu_data->itm_delta;
 
-		if (smp_processor_id() == TIME_KEEPER_ID) {
+		if (smp_processor_id() == time_keeper_id) {
 			/*
 			 * Here we are in the timer irq handler. We have irqs locally
 			 * disabled, but we don't know if the timer_bh is running on
@@ -236,6 +236,11 @@ static struct irqaction timer_irqaction 
 	.name =		"timer"
 };
 
+void __devinit ia64_disable_timer(void)
+{
+	ia64_set_itv(1 << 16);
+}
+
 void __init
 time_init (void)
 {
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 706b773..c9562d9 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -36,7 +36,7 @@ int arch_register_cpu(int num)
 	parent = &sysfs_nodes[cpu_to_node(num)];
 #endif /* CONFIG_NUMA */
 
-#ifdef CONFIG_ACPI
+#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU)
 	/*
 	 * If CPEI cannot be re-targetted, and this is
 	 * CPEI target, then dont create the control file
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index acaaec4..9855ba3 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -181,13 +181,15 @@ per_cpu_init (void)
 {
 	void *cpu_data;
 	int cpu;
+	static int first_time=1;
 
 	/*
 	 * get_free_pages() cannot be used before cpu_init() done.  BSP
 	 * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
 	 * get_zeroed_page().
 	 */
-	if (smp_processor_id() == 0) {
+	if (first_time) {
+		first_time=0;
 		cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
 					   PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
 		for (cpu = 0; cpu < NR_CPUS; cpu++) {
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index c87d6d1..573d5cc 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -528,12 +528,17 @@ void __init find_memory(void)
 void *per_cpu_init(void)
 {
 	int cpu;
+	static int first_time = 1;
+
 
 	if (smp_processor_id() != 0)
 		return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
-		per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+	if (first_time) {
+		first_time = 0;
+		for (cpu = 0; cpu < NR_CPUS; cpu++)
+			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+	}
 
 	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
 }
diff --git a/arch/ia64/sn/Makefile b/arch/ia64/sn/Makefile
index a269f6d..79a7df0 100644
--- a/arch/ia64/sn/Makefile
+++ b/arch/ia64/sn/Makefile
@@ -9,6 +9,4 @@
 # Makefile for the sn ia64 subplatform
 #
 
-CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
-
 obj-y += kernel/ pci/
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
index 4351c4f..3e9b4ee 100644
--- a/arch/ia64/sn/kernel/Makefile
+++ b/arch/ia64/sn/kernel/Makefile
@@ -7,6 +7,8 @@
 # Copyright (C) 1999,2001-2005 Silicon Graphics, Inc.  All Rights Reserved.
 #
 
+CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
+
 obj-y				+= setup.o bte.o bte_error.o irq.o mca.o idle.o \
 				   huberror.o io_init.o iomv.o klconflib.o sn2/
 obj-$(CONFIG_IA64_GENERIC)      += machvec.o
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index a4c7815..d7e4d79 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -208,7 +208,7 @@ static s64 sn_device_fixup_war(u64 nasid
  * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for
  *	each node in the system.
  */
-static void sn_fixup_ionodes(void)
+static void __init sn_fixup_ionodes(void)
 {
 	struct sn_flush_device_kernel *sn_flush_device_kernel;
 	struct sn_flush_device_kernel *dev_entry;
@@ -467,6 +467,13 @@ void sn_pci_fixup_slot(struct pci_dev *d
 		pcidev_info->pdi_sn_irq_info = NULL;
 		kfree(sn_irq_info);
 	}
+
+	/*
+	 * MSI currently not supported on altix.  Remove this when
+	 * the MSI abstraction patches are integrated into the kernel
+	 * (sometime after 2.6.16 releases)
+	 */
+	dev->no_msi = 1;
 }
 
 /*
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index ec37084..74d87d9 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -5,11 +5,12 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 #include <linux/irq.h>
 #include <linux/spinlock.h>
+#include <linux/init.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/arch.h>
 #include <asm/sn/intr.h>
@@ -76,17 +77,15 @@ static void sn_enable_irq(unsigned int i
 
 static void sn_ack_irq(unsigned int irq)
 {
-	u64 event_occurred, mask = 0;
+	u64 event_occurred, mask;
 
 	irq = irq & 0xff;
-	event_occurred =
-	    HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED));
+	event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED));
 	mask = event_occurred & SH_ALL_INT_MASK;
-	HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS),
-	      mask);
+	HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), mask);
 	__set_bit(irq, (volatile void *)pda->sn_in_service_ivecs);
 
-	move_irq(irq);
+	move_native_irq(irq);
 }
 
 static void sn_end_irq(unsigned int irq)
@@ -219,9 +218,8 @@ static void register_intr_pda(struct sn_
 		pdacpu(cpu)->sn_last_irq = irq;
 	}
 
-	if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq) {
+	if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq)
 		pdacpu(cpu)->sn_first_irq = irq;
-	}
 }
 
 static void unregister_intr_pda(struct sn_irq_info *sn_irq_info)
@@ -289,7 +287,7 @@ void sn_irq_fixup(struct pci_dev *pci_de
 	list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]);
 	spin_unlock(&sn_irq_info_lock);
 
-	(void)register_intr_pda(sn_irq_info);
+	register_intr_pda(sn_irq_info);
 }
 
 void sn_irq_unfixup(struct pci_dev *pci_dev)
@@ -419,7 +417,7 @@ void sn_lb_int_war_check(void)
 	rcu_read_unlock();
 }
 
-void sn_irq_lh_init(void)
+void __init sn_irq_lh_init(void)
 {
 	int i;
 
@@ -434,5 +432,4 @@ void sn_irq_lh_init(void)
 
 		INIT_LIST_HEAD(sn_irq_lh[i]);
 	}
-
 }
diff --git a/arch/ia64/sn/kernel/klconflib.c b/arch/ia64/sn/kernel/klconflib.c
index 0f11a32..87682b4 100644
--- a/arch/ia64/sn/kernel/klconflib.c
+++ b/arch/ia64/sn/kernel/klconflib.c
@@ -78,31 +78,30 @@ format_module_id(char *buffer, moduleid_
 	position = MODULE_GET_BPOS(m);
 
 	if ((fmt == MODULE_FORMAT_BRIEF) || (fmt == MODULE_FORMAT_LCD)) {
-	    /* Brief module number format, eg. 002c15 */
+		/* Brief module number format, eg. 002c15 */
 
-	    /* Decompress the rack number */
-	    *buffer++ = '0' + RACK_GET_CLASS(rack);
-	    *buffer++ = '0' + RACK_GET_GROUP(rack);
-	    *buffer++ = '0' + RACK_GET_NUM(rack);
+		/* Decompress the rack number */
+		*buffer++ = '0' + RACK_GET_CLASS(rack);
+		*buffer++ = '0' + RACK_GET_GROUP(rack);
+		*buffer++ = '0' + RACK_GET_NUM(rack);
 
-	    /* Add the brick type */
-	    *buffer++ = brickchar;
+		/* Add the brick type */
+		*buffer++ = brickchar;
 	}
 	else if (fmt == MODULE_FORMAT_LONG) {
-	    /* Fuller hwgraph format, eg. rack/002/bay/15 */
+		/* Fuller hwgraph format, eg. rack/002/bay/15 */
 
-	    strcpy(buffer, "rack" "/");  buffer += strlen(buffer);
+		strcpy(buffer, "rack" "/");  buffer += strlen(buffer);
 
-	    *buffer++ = '0' + RACK_GET_CLASS(rack);
-	    *buffer++ = '0' + RACK_GET_GROUP(rack);
-	    *buffer++ = '0' + RACK_GET_NUM(rack);
+		*buffer++ = '0' + RACK_GET_CLASS(rack);
+		*buffer++ = '0' + RACK_GET_GROUP(rack);
+		*buffer++ = '0' + RACK_GET_NUM(rack);
 
-	    strcpy(buffer, "/" "bay" "/");  buffer += strlen(buffer);
+		strcpy(buffer, "/" "bay" "/");  buffer += strlen(buffer);
 	}
 
 	/* Add the bay position, using at least two digits */
 	if (position < 10)
-	    *buffer++ = '0';
+		*buffer++ = '0';
 	sprintf(buffer, "%d", position);
-
 }
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index e510dce..46d2904 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/config.h>
@@ -209,7 +209,7 @@ void __init early_sn_setup(void)
 }
 
 extern int platform_intr_list[];
-static int __initdata shub_1_1_found = 0;
+static int __initdata shub_1_1_found;
 
 /*
  * sn_check_for_wars
@@ -496,6 +496,7 @@ void __init sn_setup(char **cmdline_p)
 	 * for sn.
 	 */
 	pm_power_off = ia64_sn_power_down;
+	current->thread.flags |= IA64_THREAD_MIGRATION;
 }
 
 /**
@@ -578,13 +579,17 @@ void __init sn_cpu_init(void)
 			sn_prom_type = 2;
 		else
 			sn_prom_type = 1;
-		printk("Running on medusa with %s PROM\n", (sn_prom_type == 1) ? "real" : "fake");
+		printk(KERN_INFO "Running on medusa with %s PROM\n",
+		       (sn_prom_type == 1) ? "real" : "fake");
 	}
 
 	memset(pda, 0, sizeof(pda));
-	if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2, &sn_hub_info->nasid_bitmask, &sn_hub_info->nasid_shift,
-				&sn_system_size, &sn_sharing_domain_size, &sn_partition_id,
-				&sn_coherency_id, &sn_region_size))
+	if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2,
+				&sn_hub_info->nasid_bitmask,
+				&sn_hub_info->nasid_shift,
+				&sn_system_size, &sn_sharing_domain_size,
+				&sn_partition_id, &sn_coherency_id,
+				&sn_region_size))
 		BUG();
 	sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2;
 
@@ -654,7 +659,8 @@ void __init sn_cpu_init(void)
 			SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3};
 		u64 *pio;
 		pio = is_shub1() ? pio1 : pio2;
-		pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]);
+		pda->pio_write_status_addr =
+		   (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]);
 		pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0;
 	}
 
@@ -716,7 +722,8 @@ void __init build_cnode_tables(void)
 	for_each_online_node(node) {
 		kl_config_hdr_t *klgraph_header;
 		nasid = cnodeid_to_nasid(node);
-		if ((klgraph_header = ia64_sn_get_klconfig_addr(nasid)) == NULL)
+		klgraph_header = ia64_sn_get_klconfig_addr(nasid);
+		if (klgraph_header == NULL)
 			BUG();
 		brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info);
 		while (brd) {
@@ -734,7 +741,7 @@ nasid_slice_to_cpuid(int nasid, int slic
 {
 	long cpu;
 
-	for (cpu=0; cpu < NR_CPUS; cpu++)
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
 		if (cpuid_to_nasid(cpu) == nasid &&
 					cpuid_to_slice(cpu) == slice)
 			return cpu;
diff --git a/arch/ia64/sn/kernel/sn2/Makefile b/arch/ia64/sn/kernel/sn2/Makefile
index 170bde4..99e1776 100644
--- a/arch/ia64/sn/kernel/sn2/Makefile
+++ b/arch/ia64/sn/kernel/sn2/Makefile
@@ -9,5 +9,7 @@
 # sn2 specific kernel files
 #
 
+CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
+
 obj-y += cache.o io.o ptc_deadlock.o sn2_smp.o sn_proc_fs.o \
 	 prominfo_proc.o timer.o timer_interrupt.o sn_hwperf.o
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index 471bbaa..77e8425 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -5,7 +5,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/init.h>
@@ -46,104 +46,28 @@ DECLARE_PER_CPU(struct ptc_stats, ptcsta
 
 static  __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
 
-void sn2_ptc_deadlock_recovery(short *, short, int, volatile unsigned long *, unsigned long data0,
-	volatile unsigned long *, unsigned long data1);
+void sn2_ptc_deadlock_recovery(short *, short, short, int, volatile unsigned long *, unsigned long,
+	volatile unsigned long *, unsigned long);
 
-#ifdef DEBUG_PTC
 /*
- * ptctest:
- *
- * 	xyz - 3 digit hex number:
- * 		x - Force PTC purges to use shub:
- * 			0 - no force
- * 			1 - force
- * 		y - interupt enable
- * 			0 - disable interrupts
- * 			1 - leave interuupts enabled
- * 		z - type of lock:
- * 			0 - global lock
- * 			1 - node local lock
- * 			2 - no lock
- *
- *   	Note: on shub1, only ptctest == 0 is supported. Don't try other values!
+ * Note: some is the following is captured here to make degugging easier
+ * (the macros make more sense if you see the debug patch - not posted)
  */
-
-static unsigned int sn2_ptctest = 0;
-
-static int __init ptc_test(char *str)
-{
-	get_option(&str, &sn2_ptctest);
-	return 1;
-}
-__setup("ptctest=", ptc_test);
-
-static inline int ptc_lock(unsigned long *flagp)
-{
-	unsigned long opt = sn2_ptctest & 255;
-
-	switch (opt) {
-	case 0x00:
-		spin_lock_irqsave(&sn2_global_ptc_lock, *flagp);
-		break;
-	case 0x01:
-		spin_lock_irqsave(&sn_nodepda->ptc_lock, *flagp);
-		break;
-	case 0x02:
-		local_irq_save(*flagp);
-		break;
-	case 0x10:
-		spin_lock(&sn2_global_ptc_lock);
-		break;
-	case 0x11:
-		spin_lock(&sn_nodepda->ptc_lock);
-		break;
-	case 0x12:
-		break;
-	default:
-		BUG();
-	}
-	return opt;
-}
-
-static inline void ptc_unlock(unsigned long flags, int opt)
-{
-	switch (opt) {
-	case 0x00:
-		spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
-		break;
-	case 0x01:
-		spin_unlock_irqrestore(&sn_nodepda->ptc_lock, flags);
-		break;
-	case 0x02:
-		local_irq_restore(flags);
-		break;
-	case 0x10:
-		spin_unlock(&sn2_global_ptc_lock);
-		break;
-	case 0x11:
-		spin_unlock(&sn_nodepda->ptc_lock);
-		break;
-	case 0x12:
-		break;
-	default:
-		BUG();
-	}
-}
-#else
-
 #define sn2_ptctest	0
+#define local_node_uses_ptc_ga(sh1)	((sh1) ? 1 : 0)
+#define max_active_pio(sh1)		((sh1) ? 32 : 7)
+#define reset_max_active_on_deadlock()	1
+#define PTC_LOCK(sh1)			((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock)
 
-static inline int ptc_lock(unsigned long *flagp)
+static inline void ptc_lock(int sh1, unsigned long *flagp)
 {
-	spin_lock_irqsave(&sn2_global_ptc_lock, *flagp);
-	return 0;
+	spin_lock_irqsave(PTC_LOCK(sh1), *flagp);
 }
 
-static inline void ptc_unlock(unsigned long flags, int opt)
+static inline void ptc_unlock(int sh1, unsigned long flags)
 {
-	spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
+	spin_unlock_irqrestore(PTC_LOCK(sh1), flags);
 }
-#endif
 
 struct ptc_stats {
 	unsigned long ptc_l;
@@ -151,27 +75,51 @@ struct ptc_stats {
 	unsigned long shub_ptc_flushes;
 	unsigned long nodes_flushed;
 	unsigned long deadlocks;
+	unsigned long deadlocks2;
 	unsigned long lock_itc_clocks;
 	unsigned long shub_itc_clocks;
 	unsigned long shub_itc_clocks_max;
+	unsigned long shub_ptc_flushes_not_my_mm;
 };
 
 static inline unsigned long wait_piowc(void)
 {
-	volatile unsigned long *piows, zeroval;
-	unsigned long ws;
+	volatile unsigned long *piows;
+	unsigned long zeroval, ws;
 
 	piows = pda->pio_write_status_addr;
 	zeroval = pda->pio_write_status_val;
 	do {
 		cpu_relax();
 	} while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval);
-	return ws;
+	return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
+}
+
+/**
+ * sn_migrate - SN-specific task migration actions
+ * @task: Task being migrated to new CPU
+ *
+ * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order.
+ * Context switching user threads which have memory-mapped MMIO may cause
+ * PIOs to issue from seperate CPUs, thus the PIO writes must be drained
+ * from the previous CPU's Shub before execution resumes on the new CPU.
+ */
+void sn_migrate(struct task_struct *task)
+{
+	pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu);
+	volatile unsigned long *adr = last_pda->pio_write_status_addr;
+	unsigned long val = last_pda->pio_write_status_val;
+
+	/* Drain PIO writes from old CPU's Shub */
+	while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK)
+			!= val))
+		cpu_relax();
 }
 
 void sn_tlb_migrate_finish(struct mm_struct *mm)
 {
-	if (mm == current->mm)
+	/* flush_tlb_mm is inefficient if more than 1 users of mm */
+	if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1)
 		flush_tlb_mm(mm);
 }
 
@@ -201,12 +149,14 @@ void
 sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
 		     unsigned long end, unsigned long nbits)
 {
-	int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0;
-	int mymm = (mm == current->active_mm && current->mm);
+	int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid;
+	int mymm = (mm == current->active_mm && mm == current->mm);
+	int use_cpu_ptcga;
 	volatile unsigned long *ptc0, *ptc1;
-	unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value;
+	unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0;
 	short nasids[MAX_NUMNODES], nix;
 	nodemask_t nodes_flushed;
+	int active, max_active, deadlock;
 
 	nodes_clear(nodes_flushed);
 	i = 0;
@@ -267,41 +217,56 @@ sn2_global_tlb_purge(struct mm_struct *m
 	
 
 	mynasid = get_nasid();
+	use_cpu_ptcga = local_node_uses_ptc_ga(shub1);
+	max_active = max_active_pio(shub1);
 
 	itc = ia64_get_itc();
-	opt = ptc_lock(&flags);
+	ptc_lock(shub1, &flags);
 	itc2 = ia64_get_itc();
+
 	__get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc;
 	__get_cpu_var(ptcstats).shub_ptc_flushes++;
 	__get_cpu_var(ptcstats).nodes_flushed += nix;
+	if (!mymm)
+		 __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++;
+
+	if (use_cpu_ptcga && !mymm) {
+		old_rr = ia64_get_rr(start);
+		ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8));
+		ia64_srlz_d();
+	}
 
+	wait_piowc();
 	do {
 		if (shub1)
 			data1 = start | (1UL << SH1_PTC_1_START_SHFT);
 		else
 			data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
-		for (i = 0; i < nix; i++) {
+		deadlock = 0;
+		active = 0;
+		for (ibegin = 0, i = 0; i < nix; i++) {
 			nasid = nasids[i];
-			if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid && mymm)) {
+			if (use_cpu_ptcga && unlikely(nasid == mynasid)) {
 				ia64_ptcga(start, nbits << 2);
 				ia64_srlz_i();
 			} else {
 				ptc0 = CHANGE_NASID(nasid, ptc0);
 				if (ptc1)
 					ptc1 = CHANGE_NASID(nasid, ptc1);
-				pio_atomic_phys_write_mmrs(ptc0, data0, ptc1,
-							   data1);
-				flushed = 1;
+				pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1);
+				active++;
+			}
+			if (active >= max_active || i == (nix - 1)) {
+				if ((deadlock = wait_piowc())) {
+					sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1);
+					if (reset_max_active_on_deadlock())
+						max_active = 1;
+				}
+				active = 0;
+				ibegin = i + 1;
 			}
 		}
-		if (flushed
-		    && (wait_piowc() &
-				(SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK))) {
-			sn2_ptc_deadlock_recovery(nasids, nix, mynasid, ptc0, data0, ptc1, data1);
-		}
-
 		start += (1UL << nbits);
-
 	} while (start < end);
 
 	itc2 = ia64_get_itc() - itc2;
@@ -309,7 +274,12 @@ sn2_global_tlb_purge(struct mm_struct *m
 	if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max)
 		__get_cpu_var(ptcstats).shub_itc_clocks_max = itc2;
 
-	ptc_unlock(flags, opt);
+	if (old_rr) {
+		ia64_set_rr(start, old_rr);
+		ia64_srlz_d();
+	}
+
+	ptc_unlock(shub1, flags);
 
 	preempt_enable();
 }
@@ -321,27 +291,30 @@ sn2_global_tlb_purge(struct mm_struct *m
  * TLB flush transaction.  The recovery sequence is somewhat tricky & is
  * coded in assembly language.
  */
-void sn2_ptc_deadlock_recovery(short *nasids, short nix, int mynasid, volatile unsigned long *ptc0, unsigned long data0,
+void sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, volatile unsigned long *ptc0, unsigned long data0,
 	volatile unsigned long *ptc1, unsigned long data1)
 {
-	extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
+	extern unsigned long sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
 	        volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long);
 	short nasid, i;
-	unsigned long *piows, zeroval;
+	unsigned long *piows, zeroval, n;
 
 	__get_cpu_var(ptcstats).deadlocks++;
 
 	piows = (unsigned long *) pda->pio_write_status_addr;
 	zeroval = pda->pio_write_status_val;
 
-	for (i=0; i < nix; i++) {
+
+	for (i=ib; i <= ie; i++) {
 		nasid = nasids[i];
-		if (!(sn2_ptctest & 3) && nasid == mynasid)
+		if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid)
 			continue;
 		ptc0 = CHANGE_NASID(nasid, ptc0);
 		if (ptc1)
 			ptc1 = CHANGE_NASID(nasid, ptc1);
-		sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
+
+		n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
+		__get_cpu_var(ptcstats).deadlocks2 += n;
 	}
 
 }
@@ -452,20 +425,22 @@ static int sn2_ptc_seq_show(struct seq_f
 	cpu = *(loff_t *) data;
 
 	if (!cpu) {
-		seq_printf(file, "# ptc_l change_rid shub_ptc_flushes shub_nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max\n");
+		seq_printf(file,
+			   "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n");
 		seq_printf(file, "# ptctest %d\n", sn2_ptctest);
 	}
 
 	if (cpu < NR_CPUS && cpu_online(cpu)) {
 		stat = &per_cpu(ptcstats, cpu);
-		seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
+		seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
 				stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed,
 				stat->deadlocks,
 				1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
 				1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
-				1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec);
+				1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec,
+				stat->shub_ptc_flushes_not_my_mm,
+				stat->deadlocks2);
 	}
-
 	return 0;
 }
 
@@ -476,7 +451,7 @@ static struct seq_operations sn2_ptc_seq
 	.show = sn2_ptc_seq_show
 };
 
-int sn2_ptc_proc_open(struct inode *inode, struct file *file)
+static int sn2_ptc_proc_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &sn2_ptc_seq_ops);
 }
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index c75f8ae..9cd460d 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -575,18 +575,21 @@ xpc_activate_partition(struct xpc_partit
 
 	spin_lock_irqsave(&part->act_lock, irq_flags);
 
-	pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0);
-
 	DBUG_ON(part->act_state != XPC_P_INACTIVE);
 
-	if (pid > 0) {
-		part->act_state = XPC_P_ACTIVATION_REQ;
-		XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
-	} else {
-		XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
-	}
+	part->act_state = XPC_P_ACTIVATION_REQ;
+	XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
 
 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+	pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0);
+
+	if (unlikely(pid <= 0)) {
+		spin_lock_irqsave(&part->act_lock, irq_flags);
+		part->act_state = XPC_P_INACTIVE;
+		XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
+		spin_unlock_irqrestore(&part->act_lock, irq_flags);
+	}
 }
 
 
diff --git a/arch/ia64/sn/pci/Makefile b/arch/ia64/sn/pci/Makefile
index 321576b..c694678 100644
--- a/arch/ia64/sn/pci/Makefile
+++ b/arch/ia64/sn/pci/Makefile
@@ -7,4 +7,6 @@
 #
 # Makefile for the sn pci general routines.
 
+CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
+
 obj-y := pci_dma.o tioca_provider.o tioce_provider.o pcibr/
diff --git a/arch/ia64/sn/pci/pcibr/Makefile b/arch/ia64/sn/pci/pcibr/Makefile
index 1850c4a..3b403ea 100644
--- a/arch/ia64/sn/pci/pcibr/Makefile
+++ b/arch/ia64/sn/pci/pcibr/Makefile
@@ -7,5 +7,7 @@
 #
 # Makefile for the sn2 io routines.
 
+CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
+
 obj-y				+=  pcibr_dma.o pcibr_reg.o \
 				    pcibr_ate.o pcibr_provider.o
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index e52831e..fa073cc 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -15,6 +15,124 @@
 #include <asm/sn/pcidev.h>
 #include <asm/sn/pcibus_provider_defs.h>
 #include <asm/sn/tioce_provider.h>
+#include <asm/sn/sn2/sn_hwperf.h>
+
+/*
+ * 1/26/2006
+ *
+ * WAR for SGI PV 944642.  For revA TIOCE, need to use the following recipe
+ * (taken from the above PV) before and after accessing tioce internal MMR's
+ * to avoid tioce lockups.
+ *
+ * The recipe as taken from the PV:
+ *
+ *	if(mmr address < 0x45000) {
+ *		if(mmr address == 0 or 0x80)
+ *			mmr wrt or read address 0xc0
+ *		else if(mmr address == 0x148 or 0x200)
+ *			mmr wrt or read address 0x28
+ *		else
+ *			mmr wrt or read address 0x158
+ *
+ *		do desired mmr access (rd or wrt)
+ *
+ *		if(mmr address == 0x100)
+ *			mmr wrt or read address 0x38
+ *		mmr wrt or read address 0xb050
+ *	} else
+ *		do desired mmr access
+ *
+ * According to hw, we can use reads instead of writes to the above addres
+ *
+ * Note this WAR can only to be used for accessing internal MMR's in the
+ * TIOCE Coretalk Address Range 0x0 - 0x07ff_ffff.  This includes the
+ * "Local CE Registers and Memories" and "PCI Compatible Config Space" address
+ * spaces from table 2-1 of the "CE Programmer's Reference Overview" document.
+ *
+ * All registers defined in struct tioce will meet that criteria.
+ */
+
+static void inline
+tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr)
+{
+	u64 mmr_base;
+	u64 mmr_offset;
+
+	if (kern->ce_common->ce_rev != TIOCE_REV_A)
+		return;
+
+	mmr_base = kern->ce_common->ce_pcibus.bs_base;
+	mmr_offset = (u64)mmr_addr - mmr_base;
+
+	if (mmr_offset < 0x45000) {
+		u64 mmr_war_offset;
+
+		if (mmr_offset == 0 || mmr_offset == 0x80)
+			mmr_war_offset = 0xc0;
+		else if (mmr_offset == 0x148 || mmr_offset == 0x200)
+			mmr_war_offset = 0x28;
+		else
+			mmr_war_offset = 0x158;
+
+		readq_relaxed((void *)(mmr_base + mmr_war_offset));
+	}
+}
+
+static void inline
+tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr)
+{
+	u64 mmr_base;
+	u64 mmr_offset;
+
+	if (kern->ce_common->ce_rev != TIOCE_REV_A)
+		return;
+
+	mmr_base = kern->ce_common->ce_pcibus.bs_base;
+	mmr_offset = (u64)mmr_addr - mmr_base;
+
+	if (mmr_offset < 0x45000) {
+		if (mmr_offset == 0x100)
+			readq_relaxed((void *)(mmr_base + 0x38));
+		readq_relaxed((void *)(mmr_base + 0xb050));
+	}
+}
+
+/* load mmr contents into a variable */
+#define tioce_mmr_load(kern, mmrp, varp) do {\
+	tioce_mmr_war_pre(kern, mmrp); \
+	*(varp) = readq_relaxed(mmrp); \
+	tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* store variable contents into mmr */
+#define tioce_mmr_store(kern, mmrp, varp) do {\
+	tioce_mmr_war_pre(kern, mmrp); \
+	writeq(*varp, mmrp); \
+	tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* store immediate value into mmr */
+#define tioce_mmr_storei(kern, mmrp, val) do {\
+	tioce_mmr_war_pre(kern, mmrp); \
+	writeq(val, mmrp); \
+	tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* set bits (immediate value) into mmr */
+#define tioce_mmr_seti(kern, mmrp, bits) do {\
+	u64 tmp; \
+	tioce_mmr_load(kern, mmrp, &tmp); \
+	tmp |= (bits); \
+	tioce_mmr_store(kern, mmrp, &tmp); \
+} while (0)
+
+/* clear bits (immediate value) into mmr */
+#define tioce_mmr_clri(kern, mmrp, bits) do { \
+	u64 tmp; \
+	tioce_mmr_load(kern, mmrp, &tmp); \
+	tmp &= ~(bits); \
+	tioce_mmr_store(kern, mmrp, &tmp); \
+} while (0)
 
 /**
  * Bus address ranges for the 5 flavors of TIOCE DMA
@@ -62,9 +180,9 @@
 #define TIOCE_ATE_M40	2
 #define TIOCE_ATE_M40S	3
 
-#define KB(x)	((x) << 10)
-#define MB(x)	((x) << 20)
-#define GB(x)	((x) << 30)
+#define KB(x)	((u64)(x) << 10)
+#define MB(x)	((u64)(x) << 20)
+#define GB(x)	((u64)(x) << 30)
 
 /**
  * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode
@@ -151,7 +269,7 @@ tioce_alloc_map(struct tioce_kernel *ce_
 	int last;
 	int entries;
 	int nates;
-	int pagesize;
+	u64 pagesize;
 	u64 *ate_shadow;
 	u64 *ate_reg;
 	u64 addr;
@@ -228,7 +346,7 @@ tioce_alloc_map(struct tioce_kernel *ce_
 
 		ate = ATE_MAKE(addr, pagesize);
 		ate_shadow[i + j] = ate;
-		writeq(ate, &ate_reg[i + j]);
+		tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate);
 		addr += pagesize;
 	}
 
@@ -272,7 +390,8 @@ tioce_dma_d32(struct pci_dev *pdev, u64 
 		u64 tmp;
 
 		ce_kern->ce_port[port].dirmap_shadow = ct_upper;
-		writeq(ct_upper, &ce_mmr->ce_ure_dir_map[port]);
+		tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port],
+				 ct_upper);
 		tmp = ce_mmr->ce_ure_dir_map[port];
 		dma_ok = 1;
 	} else
@@ -344,7 +463,8 @@ tioce_dma_unmap(struct pci_dev *pdev, dm
 	if (TIOCE_D32_ADDR(bus_addr)) {
 		if (--ce_kern->ce_port[port].dirmap_refcnt == 0) {
 			ce_kern->ce_port[port].dirmap_shadow = 0;
-			writeq(0, &ce_mmr->ce_ure_dir_map[port]);
+			tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port],
+					 0);
 		}
 	} else {
 		struct tioce_dmamap *map;
@@ -365,7 +485,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dm
 		} else if (--map->refcnt == 0) {
 			for (i = 0; i < map->ate_count; i++) {
 				map->ate_shadow[i] = 0;
-				map->ate_hw[i] = 0;
+				tioce_mmr_storei(ce_kern, &map->ate_hw[i], 0);
 			}
 
 			list_del(&map->ce_dmamap_list);
@@ -486,7 +606,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u
 	spin_unlock_irqrestore(&ce_kern->ce_lock, flags);
 
 dma_map_done:
-	if (mapaddr & barrier)
+	if (mapaddr && barrier)
 		mapaddr = tioce_dma_barrier(mapaddr, 1);
 
 	return mapaddr;
@@ -541,17 +661,61 @@ tioce_error_intr_handler(int irq, void *
 			soft->ce_pcibus.bs_persist_segment,
 			soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0);
 
+	if (ret_stuff.v0)
+		panic("tioce_error_intr_handler:  Fatal TIOCE error");
+
 	return IRQ_HANDLED;
 }
 
 /**
+ * tioce_reserve_m32 - reserve M32 ate's for the indicated address range
+ * @tioce_kernel: TIOCE context to reserve ate's for
+ * @base: starting bus address to reserve
+ * @limit: last bus address to reserve
+ *
+ * If base/limit falls within the range of bus space mapped through the
+ * M32 space, reserve the resources corresponding to the range.
+ */
+static void
+tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit)
+{
+	int ate_index, last_ate, ps;
+	struct tioce *ce_mmr;
+
+	if (!TIOCE_M32_ADDR(base))
+		return;
+
+	ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base;
+	ps = ce_kern->ce_ate3240_pagesize;
+	ate_index = ATE_PAGE(base, ps);
+	last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1;
+
+	if (ate_index < 64)
+		ate_index = 64;
+
+	while (ate_index <= last_ate) {
+		u64 ate;
+
+		ate = ATE_MAKE(0xdeadbeef, ps);
+		ce_kern->ce_ate3240_shadow[ate_index] = ate;
+		tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index],
+				 ate);
+		ate_index++;
+	}
+}
+
+/**
  * tioce_kern_init - init kernel structures related to a given TIOCE
  * @tioce_common: ptr to a cached tioce_common struct that originated in prom
- */ static struct tioce_kernel *
+ */
+static struct tioce_kernel *
 tioce_kern_init(struct tioce_common *tioce_common)
 {
 	int i;
+	int ps;
+	int dev;
 	u32 tmp;
+	unsigned int seg, bus;
 	struct tioce *tioce_mmr;
 	struct tioce_kernel *tioce_kern;
 
@@ -572,9 +736,10 @@ tioce_kern_init(struct tioce_common *tio
 	 * here to use pci_read_config_xxx() so use the raw_pci_ops vector.
 	 */
 
-	raw_pci_ops->read(tioce_common->ce_pcibus.bs_persist_segment,
-			  tioce_common->ce_pcibus.bs_persist_busnum,
-			  PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1, &tmp);
+	seg = tioce_common->ce_pcibus.bs_persist_segment;
+	bus = tioce_common->ce_pcibus.bs_persist_busnum;
+
+	raw_pci_ops->read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp);
 	tioce_kern->ce_port1_secondary = (u8) tmp;
 
 	/*
@@ -583,18 +748,76 @@ tioce_kern_init(struct tioce_common *tio
 	 */
 
 	tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
-	__sn_clrq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_PAGESIZE_MASK);
-	__sn_setq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_256K_PAGESIZE);
-	tioce_kern->ce_ate3240_pagesize = KB(256);
+	tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map,
+		       CE_URE_PAGESIZE_MASK);
+	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map,
+		       CE_URE_256K_PAGESIZE);
+	ps = tioce_kern->ce_ate3240_pagesize = KB(256);
 
 	for (i = 0; i < TIOCE_NUM_M40_ATES; i++) {
 		tioce_kern->ce_ate40_shadow[i] = 0;
-		writeq(0, &tioce_mmr->ce_ure_ate40[i]);
+		tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate40[i], 0);
 	}
 
 	for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) {
 		tioce_kern->ce_ate3240_shadow[i] = 0;
-		writeq(0, &tioce_mmr->ce_ure_ate3240[i]);
+		tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate3240[i], 0);
+	}
+
+	/*
+	 * Reserve ATE's corresponding to reserved address ranges.  These
+	 * include:
+	 *
+	 *	Memory space covered by each PPB mem base/limit register
+	 * 	Memory space covered by each PPB prefetch base/limit register
+	 *
+	 * These bus ranges are for pio (downstream) traffic only, and so
+	 * cannot be used for DMA.
+	 */
+
+	for (dev = 1; dev <= 2; dev++) {
+		u64 base, limit;
+
+		/* mem base/limit */
+
+		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_MEMORY_BASE, 2, &tmp);
+		base = (u64)tmp << 16;
+
+		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_MEMORY_LIMIT, 2, &tmp);
+		limit = (u64)tmp << 16;
+		limit |= 0xfffffUL;
+
+		if (base < limit)
+			tioce_reserve_m32(tioce_kern, base, limit);
+
+		/*
+		 * prefetch mem base/limit.  The tioce ppb's have 64-bit
+		 * decoders, so read the upper portions w/o checking the
+		 * attributes.
+		 */
+
+		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_PREF_MEMORY_BASE, 2, &tmp);
+		base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
+
+		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_PREF_BASE_UPPER32, 4, &tmp);
+		base |= (u64)tmp << 32;
+
+		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_PREF_MEMORY_LIMIT, 2, &tmp);
+
+		limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
+		limit |= 0xfffffUL;
+
+		raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+				  PCI_PREF_LIMIT_UPPER32, 4, &tmp);
+		limit |= (u64)tmp << 32;
+
+		if ((base < limit) && TIOCE_M32_ADDR(base))
+			tioce_reserve_m32(tioce_kern, base, limit);
 	}
 
 	return tioce_kern;
@@ -614,6 +837,7 @@ tioce_force_interrupt(struct sn_irq_info
 {
 	struct pcidev_info *pcidev_info;
 	struct tioce_common *ce_common;
+	struct tioce_kernel *ce_kern;
 	struct tioce *ce_mmr;
 	u64 force_int_val;
 
@@ -629,6 +853,29 @@ tioce_force_interrupt(struct sn_irq_info
 
 	ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
 	ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base;
+	ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
+
+	/*
+	 * TIOCE Rev A workaround (PV 945826), force an interrupt by writing
+	 * the TIO_INTx register directly (1/26/2006)
+	 */
+	if (ce_common->ce_rev == TIOCE_REV_A) {
+		u64 int_bit_mask = (1ULL << sn_irq_info->irq_int_bit);
+		u64 status;
+
+		tioce_mmr_load(ce_kern, &ce_mmr->ce_adm_int_status, &status);
+		if (status & int_bit_mask) {
+			u64 force_irq = (1 << 8) | sn_irq_info->irq_irq;
+			u64 ctalk = sn_irq_info->irq_xtalkaddr;
+			u64 nasid, offset;
+
+			nasid = (ctalk & CTALK_NASID_MASK) >> CTALK_NASID_SHFT;
+			offset = (ctalk & CTALK_NODE_OFFSET);
+			HUB_S(TIO_IOSPACE_ADDR(nasid, offset), force_irq);
+		}
+
+		return;
+	}
 
 	/*
 	 * irq_int_bit is originally set up by prom, and holds the interrupt
@@ -666,7 +913,7 @@ tioce_force_interrupt(struct sn_irq_info
 	default:
 		return;
 	}
-	writeq(force_int_val, &ce_mmr->ce_adm_force_int);
+	tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_force_int, force_int_val);
 }
 
 /**
@@ -685,6 +932,7 @@ tioce_target_interrupt(struct sn_irq_inf
 {
 	struct pcidev_info *pcidev_info;
 	struct tioce_common *ce_common;
+	struct tioce_kernel *ce_kern;
 	struct tioce *ce_mmr;
 	int bit;
 	u64 vector;
@@ -695,14 +943,15 @@ tioce_target_interrupt(struct sn_irq_inf
 
 	ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
 	ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base;
+	ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
 
 	bit = sn_irq_info->irq_int_bit;
 
-	__sn_setq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit));
+	tioce_mmr_seti(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit));
 	vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT;
 	vector |= sn_irq_info->irq_xtalkaddr;
-	writeq(vector, &ce_mmr->ce_adm_int_dest[bit]);
-	__sn_clrq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit));
+	tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_int_dest[bit], vector);
+	tioce_mmr_clri(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit));
 
 	tioce_force_interrupt(sn_irq_info);
 }
@@ -721,7 +970,11 @@ tioce_target_interrupt(struct sn_irq_inf
 static void *
 tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
 {
+	int my_nasid;
+	cnodeid_t my_cnode, mem_cnode;
 	struct tioce_common *tioce_common;
+	struct tioce_kernel *tioce_kern;
+	struct tioce *tioce_mmr;
 
 	/*
 	 * Allocate kernel bus soft and copy from prom.
@@ -734,11 +987,23 @@ tioce_bus_fixup(struct pcibus_bussoft *p
 	memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common));
 	tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET;
 
-	if (tioce_kern_init(tioce_common) == NULL) {
+	tioce_kern = tioce_kern_init(tioce_common);
+	if (tioce_kern == NULL) {
 		kfree(tioce_common);
 		return NULL;
 	}
 
+	/*
+	 * Clear out any transient errors before registering the error
+	 * interrupt handler.
+	 */
+
+	tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
+	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL);
+	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias,
+		       ~0ULL);
+	tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, ~0ULL);
+
 	if (request_irq(SGI_PCIASIC_ERROR,
 			tioce_error_intr_handler,
 			SA_SHIRQ, "TIOCE error", (void *)tioce_common))
@@ -750,6 +1015,21 @@ tioce_bus_fixup(struct pcibus_bussoft *p
 		       tioce_common->ce_pcibus.bs_persist_segment,
 		       tioce_common->ce_pcibus.bs_persist_busnum);
 
+	/*
+	 * identify closest nasid for memory allocations
+	 */
+
+	my_nasid = NASID_GET(tioce_common->ce_pcibus.bs_base);
+	my_cnode = nasid_to_cnodeid(my_nasid);
+
+	if (sn_hwperf_get_nearest_node(my_cnode, &mem_cnode, NULL) < 0) {
+		printk(KERN_WARNING "tioce_bus_fixup: failed to find "
+		       "closest node with MEM to TIO node %d\n", my_cnode);
+		mem_cnode = (cnodeid_t)-1; /* use any node */
+	}
+
+	controller->node = mem_cnode;
+
 	return tioce_common;
 }
 
diff --git a/drivers/Makefile b/drivers/Makefile
index 619dd96..5c69b86 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -69,7 +69,7 @@ obj-$(CONFIG_EISA)		+= eisa/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_MMC)		+= mmc/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
-obj-$(CONFIG_SGI_IOC4)		+= sn/
+obj-$(CONFIG_SGI_SN)		+= sn/
 obj-y				+= firmware/
 obj-$(CONFIG_CRYPTO)		+= crypto/
 obj-$(CONFIG_SUPERH)		+= sh/
diff --git a/drivers/char/snsc.h b/drivers/char/snsc.h
index a9efc13..8a98169 100644
--- a/drivers/char/snsc.h
+++ b/drivers/char/snsc.h
@@ -5,7 +5,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved.
  */
 
 /*
@@ -70,6 +70,9 @@ struct sysctl_data_s {
 #define EV_CLASS_TEST_WARNING	0x6000ul
 #define EV_CLASS_PWRD_NOTIFY	0x8000ul
 
+/* ENV class codes */
+#define ENV_PWRDN_PEND		0x4101ul
+
 #define EV_SEVERITY_POWER_STABLE	0x0000ul
 #define EV_SEVERITY_POWER_LOW_WARNING	0x0100ul
 #define EV_SEVERITY_POWER_HIGH_WARNING	0x0200ul
diff --git a/drivers/char/snsc_event.c b/drivers/char/snsc_event.c
index baaa365..a4fa507 100644
--- a/drivers/char/snsc_event.c
+++ b/drivers/char/snsc_event.c
@@ -5,7 +5,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved.
  */
 
 /*
@@ -187,7 +187,8 @@ scdrv_event_severity(int code)
 static void
 scdrv_dispatch_event(char *event, int len)
 {
-	int code, esp_code, src;
+	static int snsc_shutting_down = 0;
+	int code, esp_code, src, class;
 	char desc[CHUNKSIZE];
 	char *severity;
 
@@ -199,9 +200,25 @@ scdrv_dispatch_event(char *event, int le
 	/* how urgent is the message? */
 	severity = scdrv_event_severity(code);
 
-	if ((code & EV_CLASS_MASK) == EV_CLASS_PWRD_NOTIFY) {
+	class = (code & EV_CLASS_MASK);
+
+	if (class == EV_CLASS_PWRD_NOTIFY || code == ENV_PWRDN_PEND) {
 		struct task_struct *p;
 
+		if (snsc_shutting_down)
+			return;
+
+		snsc_shutting_down = 1;
+
+		/* give a message for each type of event */
+		if (class == EV_CLASS_PWRD_NOTIFY)
+			printk(KERN_NOTICE "Power off indication received."
+			       " Sending SIGPWR to init...\n");
+		else if (code == ENV_PWRDN_PEND)
+			printk(KERN_CRIT "WARNING: Shutting down the system"
+			       " due to a critical environmental condition."
+			       " Sending SIGPWR to init...\n");
+
 		/* give a SIGPWR signal to init proc */
 
 		/* first find init's task */
@@ -210,12 +227,11 @@ scdrv_dispatch_event(char *event, int le
 			if (p->pid == 1)
 				break;
 		}
-		if (p) { /* we found init's task */
-			printk(KERN_EMERG "Power off indication received. Initiating power fail sequence...\n");
+		if (p) {
 			force_sig(SIGPWR, p);
-		} else { /* failed to find init's task - just give message(s) */
-			printk(KERN_WARNING "Failed to find init proc to handle power off!\n");
-			printk("%s|$(0x%x)%s\n", severity, esp_code, desc);
+		} else {
+			printk(KERN_ERR "Failed to signal init!\n");
+			snsc_shutting_down = 0; /* so can try again (?) */
 		}
 		read_unlock(&tasklist_lock);
 	} else {
diff --git a/drivers/sn/ioc4.c b/drivers/sn/ioc4.c
index ea75b3d..67140a5 100644
--- a/drivers/sn/ioc4.c
+++ b/drivers/sn/ioc4.c
@@ -31,7 +31,7 @@
 #include <linux/ioc4.h>
 #include <linux/mmtimer.h>
 #include <linux/rtc.h>
-#include <linux/rwsem.h>
+#include <linux/mutex.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/clksupport.h>
 #include <asm/sn/shub_mmr.h>
@@ -54,11 +54,10 @@
  * Submodule management *
  ************************/
 
-static LIST_HEAD(ioc4_devices);
-static DECLARE_RWSEM(ioc4_devices_rwsem);
+static DEFINE_MUTEX(ioc4_mutex);
 
+static LIST_HEAD(ioc4_devices);
 static LIST_HEAD(ioc4_submodules);
-static DECLARE_RWSEM(ioc4_submodules_rwsem);
 
 /* Register an IOC4 submodule */
 int
@@ -66,15 +65,13 @@ ioc4_register_submodule(struct ioc4_subm
 {
 	struct ioc4_driver_data *idd;
 
-	down_write(&ioc4_submodules_rwsem);
+	mutex_lock(&ioc4_mutex);
 	list_add(&is->is_list, &ioc4_submodules);
-	up_write(&ioc4_submodules_rwsem);
 
 	/* Initialize submodule for each IOC4 */
 	if (!is->is_probe)
-		return 0;
+		goto out;
 
-	down_read(&ioc4_devices_rwsem);
 	list_for_each_entry(idd, &ioc4_devices, idd_list) {
 		if (is->is_probe(idd)) {
 			printk(KERN_WARNING
@@ -84,8 +81,8 @@ ioc4_register_submodule(struct ioc4_subm
 			       pci_name(idd->idd_pdev));
 		}
 	}
-	up_read(&ioc4_devices_rwsem);
-
+ out:
+	mutex_unlock(&ioc4_mutex);
 	return 0;
 }
 
@@ -95,15 +92,13 @@ ioc4_unregister_submodule(struct ioc4_su
 {
 	struct ioc4_driver_data *idd;
 
-	down_write(&ioc4_submodules_rwsem);
+	mutex_lock(&ioc4_mutex);
 	list_del(&is->is_list);
-	up_write(&ioc4_submodules_rwsem);
 
 	/* Remove submodule for each IOC4 */
 	if (!is->is_remove)
-		return;
+		goto out;
 
-	down_read(&ioc4_devices_rwsem);
 	list_for_each_entry(idd, &ioc4_devices, idd_list) {
 		if (is->is_remove(idd)) {
 			printk(KERN_WARNING
@@ -113,7 +108,8 @@ ioc4_unregister_submodule(struct ioc4_su
 			       pci_name(idd->idd_pdev));
 		}
 	}
-	up_read(&ioc4_devices_rwsem);
+ out:
+	mutex_unlock(&ioc4_mutex);
 }
 
 /*********************
@@ -312,12 +308,11 @@ ioc4_probe(struct pci_dev *pdev, const s
 	/* Track PCI-device specific data */
 	idd->idd_serial_data = NULL;
 	pci_set_drvdata(idd->idd_pdev, idd);
-	down_write(&ioc4_devices_rwsem);
+
+	mutex_lock(&ioc4_mutex);
 	list_add(&idd->idd_list, &ioc4_devices);
-	up_write(&ioc4_devices_rwsem);
 
 	/* Add this IOC4 to all submodules */
-	down_read(&ioc4_submodules_rwsem);
 	list_for_each_entry(is, &ioc4_submodules, is_list) {
 		if (is->is_probe && is->is_probe(idd)) {
 			printk(KERN_WARNING
@@ -327,7 +322,7 @@ ioc4_probe(struct pci_dev *pdev, const s
 			       pci_name(idd->idd_pdev));
 		}
 	}
-	up_read(&ioc4_submodules_rwsem);
+	mutex_unlock(&ioc4_mutex);
 
 	return 0;
 
@@ -351,7 +346,7 @@ ioc4_remove(struct pci_dev *pdev)
 	idd = pci_get_drvdata(pdev);
 
 	/* Remove this IOC4 from all submodules */
-	down_read(&ioc4_submodules_rwsem);
+	mutex_lock(&ioc4_mutex);
 	list_for_each_entry(is, &ioc4_submodules, is_list) {
 		if (is->is_remove && is->is_remove(idd)) {
 			printk(KERN_WARNING
@@ -361,7 +356,7 @@ ioc4_remove(struct pci_dev *pdev)
 			       pci_name(idd->idd_pdev));
 		}
 	}
-	up_read(&ioc4_submodules_rwsem);
+	mutex_unlock(&ioc4_mutex);
 
 	/* Release resources */
 	iounmap(idd->idd_misc_regs);
@@ -377,9 +372,9 @@ ioc4_remove(struct pci_dev *pdev)
 	pci_disable_device(pdev);
 
 	/* Remove and free driver data */
-	down_write(&ioc4_devices_rwsem);
+	mutex_lock(&ioc4_mutex);
 	list_del(&idd->idd_list);
-	up_write(&ioc4_devices_rwsem);
+	mutex_unlock(&ioc4_mutex);
 	kfree(idd);
 }
 
diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h
index ca5ea99..c3e4ed8 100644
--- a/include/asm-ia64/machvec.h
+++ b/include/asm-ia64/machvec.h
@@ -20,6 +20,7 @@ struct scatterlist;
 struct page;
 struct mm_struct;
 struct pci_bus;
+struct task_struct;
 
 typedef void ia64_mv_setup_t (char **);
 typedef void ia64_mv_cpu_init_t (void);
@@ -34,6 +35,7 @@ typedef int ia64_mv_pci_legacy_read_t (s
 				       u8 size);
 typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val,
 					u8 size);
+typedef void ia64_mv_migrate_t(struct task_struct * task);
 
 /* DMA-mapping interface: */
 typedef void ia64_mv_dma_init (void);
@@ -85,6 +87,11 @@ machvec_noop_mm (struct mm_struct *mm)
 {
 }
 
+static inline void
+machvec_noop_task (struct task_struct *task)
+{
+}
+
 extern void machvec_setup (char **);
 extern void machvec_timer_interrupt (int, void *, struct pt_regs *);
 extern void machvec_dma_sync_single (struct device *, dma_addr_t, size_t, int);
@@ -146,6 +153,7 @@ extern void machvec_tlb_migrate_finish (
 #  define platform_readw_relaxed        ia64_mv.readw_relaxed
 #  define platform_readl_relaxed        ia64_mv.readl_relaxed
 #  define platform_readq_relaxed        ia64_mv.readq_relaxed
+#  define platform_migrate		ia64_mv.migrate
 # endif
 
 /* __attribute__((__aligned__(16))) is required to make size of the
@@ -194,6 +202,7 @@ struct ia64_machine_vector {
 	ia64_mv_readw_relaxed_t *readw_relaxed;
 	ia64_mv_readl_relaxed_t *readl_relaxed;
 	ia64_mv_readq_relaxed_t *readq_relaxed;
+	ia64_mv_migrate_t *migrate;
 } __attribute__((__aligned__(16))); /* align attrib? see above comment */
 
 #define MACHVEC_INIT(name)			\
@@ -238,6 +247,7 @@ struct ia64_machine_vector {
 	platform_readw_relaxed,			\
 	platform_readl_relaxed,			\
 	platform_readq_relaxed,			\
+	platform_migrate,			\
 }
 
 extern struct ia64_machine_vector ia64_mv;
@@ -386,5 +396,8 @@ extern ia64_mv_dma_supported		swiotlb_dm
 #ifndef platform_readq_relaxed
 # define platform_readq_relaxed	__ia64_readq_relaxed
 #endif
+#ifndef platform_migrate
+# define platform_migrate machvec_noop_task
+#endif
 
 #endif /* _ASM_IA64_MACHVEC_H */
diff --git a/include/asm-ia64/machvec_sn2.h b/include/asm-ia64/machvec_sn2.h
index e1b6cd6..6f0021b 100644
--- a/include/asm-ia64/machvec_sn2.h
+++ b/include/asm-ia64/machvec_sn2.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2002-2003,2006 Silicon Graphics, Inc.  All Rights Reserved.
  * 
  * This program is free software; you can redistribute it and/or modify it 
  * under the terms of version 2 of the GNU General Public License 
@@ -71,6 +71,7 @@ extern ia64_mv_dma_sync_single_for_devic
 extern ia64_mv_dma_sync_sg_for_device	sn_dma_sync_sg_for_device;
 extern ia64_mv_dma_mapping_error	sn_dma_mapping_error;
 extern ia64_mv_dma_supported		sn_dma_supported;
+extern ia64_mv_migrate_t		sn_migrate;
 
 /*
  * This stuff has dual use!
@@ -120,6 +121,7 @@ extern ia64_mv_dma_supported		sn_dma_sup
 #define platform_dma_sync_sg_for_device	sn_dma_sync_sg_for_device
 #define platform_dma_mapping_error		sn_dma_mapping_error
 #define platform_dma_supported		sn_dma_supported
+#define platform_migrate		sn_migrate
 
 #include <asm/sn/io.h>
 
diff --git a/include/asm-ia64/mca.h b/include/asm-ia64/mca.h
index c7d9c9e..bfbbb8d 100644
--- a/include/asm-ia64/mca.h
+++ b/include/asm-ia64/mca.h
@@ -131,6 +131,8 @@ struct ia64_mca_cpu {
 /* Array of physical addresses of each CPU's MCA area.  */
 extern unsigned long __per_cpu_mca[NR_CPUS];
 
+extern int cpe_vector;
+extern int ia64_cpe_irq;
 extern void ia64_mca_init(void);
 extern void ia64_mca_cpu_init(void *);
 extern void ia64_os_mca_dispatch(void);
diff --git a/include/asm-ia64/mutex.h b/include/asm-ia64/mutex.h
index 458c1f7..5a3224f 100644
--- a/include/asm-ia64/mutex.h
+++ b/include/asm-ia64/mutex.h
@@ -1,9 +1,92 @@
 /*
- * Pull in the generic implementation for the mutex fastpath.
+ * ia64 implementation of the mutex fastpath.
  *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
+ * Copyright (C) 2006 Ken Chen <kenneth.w.chen@intel.com>
+ *
+ */
+
+#ifndef _ASM_MUTEX_H
+#define _ASM_MUTEX_H
+
+/**
+ *  __mutex_fastpath_lock - try to take the lock by moving the count
+ *                          from 1 to a 0 value
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if
+ * it wasn't 1 originally. This function MUST leave the value lower than
+ * 1 even when the "1" assertion wasn't true.
+ */
+static inline void
+__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
+{
+	if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
+		fail_fn(count);
+}
+
+/**
+ *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
+ *                                 from 1 to a 0 value
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if
+ * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
+ * or anything the slow path function returns.
+ */
+static inline int
+__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+	if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
+		return fail_fn(count);
+	return 0;
+}
+
+/**
+ *  __mutex_fastpath_unlock - try to promote the count from 0 to 1
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 0
+ *
+ * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
+ * In the failure case, this function is allowed to either set the value to
+ * 1, or to set it to a value lower than 1.
+ *
+ * If the implementation sets it to a value of lower than 1, then the
+ * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
+ * to return 0 otherwise.
+ */
+static inline void
+__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
+{
+	int ret = ia64_fetchadd4_rel(count, 1);
+	if (unlikely(ret < 0))
+		fail_fn(count);
+}
+
+#define __mutex_slowpath_needs_to_unlock()		1
+
+/**
+ * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
+ *
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: fallback function
+ *
+ * Change the count from 1 to a value lower than 1, and return 0 (failure)
+ * if it wasn't 1 originally, or return 1 (success) otherwise. This function
+ * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
+ * Additionally, if the value was < 0 originally, this function must not leave
+ * it to 0 on failure.
+ *
+ * If the architecture has no effective trylock variant, it should call the
+ * <fail_fn> spinlock-based trylock variant unconditionally.
  */
+static inline int
+__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+	if (likely(cmpxchg_acq(count, 1, 0)) == 1)
+		return 1;
+	return 0;
+}
 
-#include <asm-generic/mutex-dec.h>
+#endif
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index 09b9902..128fefd 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -50,7 +50,8 @@
 #define IA64_THREAD_PM_VALID	(__IA64_UL(1) << 2)	/* performance registers valid? */
 #define IA64_THREAD_UAC_NOPRINT	(__IA64_UL(1) << 3)	/* don't log unaligned accesses */
 #define IA64_THREAD_UAC_SIGBUS	(__IA64_UL(1) << 4)	/* generate SIGBUS on unaligned acc. */
-							/* bit 5 is currently unused */
+#define IA64_THREAD_MIGRATION	(__IA64_UL(1) << 5)	/* require migration
+							   sync at ctx sw */
 #define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6)	/* don't log any fpswa faults */
 #define IA64_THREAD_FPEMU_SIGFPE  (__IA64_UL(1) << 7)	/* send a SIGFPE for fpswa faults */
 
@@ -559,6 +560,23 @@ ia64_eoi (void)
 
 #define cpu_relax()	ia64_hint(ia64_hint_pause)
 
+static inline int
+ia64_get_irr(unsigned int vector)
+{
+	unsigned int reg = vector / 64;
+	unsigned int bit = vector % 64;
+	u64 irr;
+
+	switch (reg) {
+	case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break;
+	case 1: irr = ia64_getreg(_IA64_REG_CR_IRR1); break;
+	case 2: irr = ia64_getreg(_IA64_REG_CR_IRR2); break;
+	case 3: irr = ia64_getreg(_IA64_REG_CR_IRR3); break;
+	}
+
+	return test_bit(bit, &irr);
+}
+
 static inline void
 ia64_set_lrr0 (unsigned long val)
 {
diff --git a/include/asm-ia64/sal.h b/include/asm-ia64/sal.h
index 313cad0..0b210ab 100644
--- a/include/asm-ia64/sal.h
+++ b/include/asm-ia64/sal.h
@@ -658,15 +658,7 @@ ia64_sal_freq_base (unsigned long which,
 	return isrv.status;
 }
 
-/* Flush all the processor and platform level instruction and/or data caches */
-static inline s64
-ia64_sal_cache_flush (u64 cache_type)
-{
-	struct ia64_sal_retval isrv;
-	SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0);
-	return isrv.status;
-}
-
+extern s64 ia64_sal_cache_flush (u64 cache_type);
 
 /* Initialize all the processor and platform level instruction and data caches */
 static inline s64
diff --git a/include/asm-ia64/signal.h b/include/asm-ia64/signal.h
index 608168d..5e328ed 100644
--- a/include/asm-ia64/signal.h
+++ b/include/asm-ia64/signal.h
@@ -158,8 +158,6 @@ struct k_sigaction {
 
 #define ptrace_signal_deliver(regs, cookie) do { } while (0)
 
-void set_sigdelayed(pid_t pid, int signo, int code, void __user *addr);
-
 #endif /* __KERNEL__ */
 
 # endif /* !__ASSEMBLY__ */
diff --git a/include/asm-ia64/sn/addrs.h b/include/asm-ia64/sn/addrs.h
index 2c32e4b..1d9efe5 100644
--- a/include/asm-ia64/sn/addrs.h
+++ b/include/asm-ia64/sn/addrs.h
@@ -283,5 +283,13 @@
 #define REMOTE_HUB_L(n, a)		HUB_L(REMOTE_HUB_ADDR((n), (a)))
 #define REMOTE_HUB_S(n, a, d)		HUB_S(REMOTE_HUB_ADDR((n), (a)), (d))
 
+/*
+ * Coretalk address breakdown
+ */
+#define CTALK_NASID_SHFT		40
+#define CTALK_NASID_MASK		(0x3FFFULL << CTALK_NASID_SHFT)
+#define CTALK_CID_SHFT			38
+#define CTALK_CID_MASK			(0x3ULL << CTALK_CID_SHFT)
+#define CTALK_NODE_OFFSET		0x3FFFFFFFFF
 
 #endif /* _ASM_IA64_SN_ADDRS_H */
diff --git a/include/asm-ia64/sn/intr.h b/include/asm-ia64/sn/intr.h
index a343137..60a51a4 100644
--- a/include/asm-ia64/sn/intr.h
+++ b/include/asm-ia64/sn/intr.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved.
  */
 
 #ifndef _ASM_IA64_SN_INTR_H
@@ -11,26 +11,26 @@
 
 #include <linux/rcupdate.h>
 
-#define SGI_UART_VECTOR		(0xe9)
+#define SGI_UART_VECTOR		0xe9
 
 /* Reserved IRQs : Note, not to exceed IA64_SN2_FIRST_DEVICE_VECTOR */
-#define SGI_XPC_ACTIVATE                (0x30)
-#define SGI_II_ERROR                    (0x31)
-#define SGI_XBOW_ERROR                  (0x32)
-#define SGI_PCIASIC_ERROR               (0x33)
-#define SGI_ACPI_SCI_INT                (0x34)
-#define SGI_TIOCA_ERROR                 (0x35)
-#define SGI_TIO_ERROR                   (0x36)
-#define SGI_TIOCX_ERROR                 (0x37)
-#define SGI_MMTIMER_VECTOR              (0x38)
-#define SGI_XPC_NOTIFY                  (0xe7)
-
-#define IA64_SN2_FIRST_DEVICE_VECTOR    (0x3c)
-#define IA64_SN2_LAST_DEVICE_VECTOR     (0xe6)
-
-#define SN2_IRQ_RESERVED        (0x1)
-#define SN2_IRQ_CONNECTED       (0x2)
-#define SN2_IRQ_SHARED          (0x4)
+#define SGI_XPC_ACTIVATE	0x30
+#define SGI_II_ERROR		0x31
+#define SGI_XBOW_ERROR		0x32
+#define SGI_PCIASIC_ERROR	0x33
+#define SGI_ACPI_SCI_INT	0x34
+#define SGI_TIOCA_ERROR		0x35
+#define SGI_TIO_ERROR		0x36
+#define SGI_TIOCX_ERROR		0x37
+#define SGI_MMTIMER_VECTOR	0x38
+#define SGI_XPC_NOTIFY		0xe7
+
+#define IA64_SN2_FIRST_DEVICE_VECTOR	0x3c
+#define IA64_SN2_LAST_DEVICE_VECTOR	0xe6
+
+#define SN2_IRQ_RESERVED	0x1
+#define SN2_IRQ_CONNECTED	0x2
+#define SN2_IRQ_SHARED		0x4
 
 // The SN PROM irq struct
 struct sn_irq_info {
diff --git a/include/asm-ia64/sn/tioce.h b/include/asm-ia64/sn/tioce.h
index d4c9907..893468e 100644
--- a/include/asm-ia64/sn/tioce.h
+++ b/include/asm-ia64/sn/tioce.h
@@ -11,7 +11,7 @@
 
 /* CE ASIC part & mfgr information  */
 #define TIOCE_PART_NUM			0xCE00
-#define TIOCE_MFGR_NUM			0x36
+#define TIOCE_SRC_ID			0x01
 #define TIOCE_REV_A			0x1
 
 /* CE Virtual PPB Vendor/Device IDs */
@@ -20,7 +20,7 @@
 
 /* CE Host Bridge Vendor/Device IDs */
 #define CE_HOST_BRIDGE_VENDOR_ID	0x10a9
-#define CE_HOST_BRIDGE_DEVICE_ID	0x4003
+#define CE_HOST_BRIDGE_DEVICE_ID	0x4001
 
 
 #define TIOCE_NUM_M40_ATES		4096
@@ -463,6 +463,25 @@ typedef volatile struct tioce {
 	u64	ce_end_of_struct;			/* 0x044400 */
 } tioce_t;
 
+/* ce_lsiX_gb_cfg1 register bit masks & shifts */
+#define CE_LSI_GB_CFG1_RXL0S_THS_SHFT	0
+#define CE_LSI_GB_CFG1_RXL0S_THS_MASK	(0xffULL << 0)
+#define CE_LSI_GB_CFG1_RXL0S_SMP_SHFT	8
+#define CE_LSI_GB_CFG1_RXL0S_SMP_MASK	(0xfULL << 8);
+#define CE_LSI_GB_CFG1_RXL0S_ADJ_SHFT	12
+#define CE_LSI_GB_CFG1_RXL0S_ADJ_MASK	(0x7ULL << 12)
+#define CE_LSI_GB_CFG1_RXL0S_FLT_SHFT	15
+#define CE_LSI_GB_CFG1_RXL0S_FLT_MASK	(0x1ULL << 15)
+#define CE_LSI_GB_CFG1_LPBK_SEL_SHFT	16
+#define CE_LSI_GB_CFG1_LPBK_SEL_MASK	(0x3ULL << 16)
+#define CE_LSI_GB_CFG1_LPBK_EN_SHFT	18
+#define CE_LSI_GB_CFG1_LPBK_EN_MASK	(0x1ULL << 18)
+#define CE_LSI_GB_CFG1_RVRS_LB_SHFT	19
+#define CE_LSI_GB_CFG1_RVRS_LB_MASK	(0x1ULL << 19)
+#define CE_LSI_GB_CFG1_RVRS_CLK_SHFT	20
+#define CE_LSI_GB_CFG1_RVRS_CLK_MASK	(0x3ULL << 20)
+#define CE_LSI_GB_CFG1_SLF_TS_SHFT	24
+#define CE_LSI_GB_CFG1_SLF_TS_MASK	(0xfULL << 24)
 
 /* ce_adm_int_mask/ce_adm_int_status register bit defines */
 #define CE_ADM_INT_CE_ERROR_SHFT		0
@@ -592,6 +611,11 @@ typedef volatile struct tioce {
 #define CE_URE_RD_MRG_ENABLE		(0x1ULL << 0)
 #define CE_URE_WRT_MRG_ENABLE1		(0x1ULL << 4)
 #define CE_URE_WRT_MRG_ENABLE2		(0x1ULL << 5)
+#define CE_URE_WRT_MRG_TIMER_SHFT	12
+#define CE_URE_WRT_MRG_TIMER_MASK	(0x7FFULL << CE_URE_WRT_MRG_TIMER_SHFT)
+#define CE_URE_WRT_MRG_TIMER(x)		(((u64)(x) << \
+					  CE_URE_WRT_MRG_TIMER_SHFT) & \
+					 CE_URE_WRT_MRG_TIMER_MASK)
 #define CE_URE_RSPQ_BYPASS_DISABLE	(0x1ULL << 24)
 #define CE_URE_UPS_DAT1_PAR_DISABLE	(0x1ULL << 32)
 #define CE_URE_UPS_HDR1_PAR_DISABLE	(0x1ULL << 33)
@@ -653,8 +677,12 @@ typedef volatile struct tioce {
 #define CE_URE_SI			(0x1ULL << 0)
 #define CE_URE_ELAL_SHFT		4
 #define CE_URE_ELAL_MASK		(0x7ULL << CE_URE_ELAL_SHFT)
+#define CE_URE_ELAL_SET(n)		(((u64)(n) << CE_URE_ELAL_SHFT) & \
+					 CE_URE_ELAL_MASK)
 #define CE_URE_ELAL1_SHFT		8
 #define CE_URE_ELAL1_MASK		(0x7ULL << CE_URE_ELAL1_SHFT)
+#define CE_URE_ELAL1_SET(n)		(((u64)(n) << CE_URE_ELAL1_SHFT) & \
+					 CE_URE_ELAL1_MASK)
 #define CE_URE_SCC			(0x1ULL << 12)
 #define CE_URE_PN1_SHFT			16
 #define CE_URE_PN1_MASK			(0xFFULL << CE_URE_PN1_SHFT)
@@ -675,8 +703,12 @@ typedef volatile struct tioce {
 #define CE_URE_HPC			(0x1ULL << 6)
 #define CE_URE_SPLV_SHFT		7
 #define CE_URE_SPLV_MASK		(0xFFULL << CE_URE_SPLV_SHFT)
+#define CE_URE_SPLV_SET(n)		(((u64)(n) << CE_URE_SPLV_SHFT) & \
+					 CE_URE_SPLV_MASK)
 #define CE_URE_SPLS_SHFT		15
 #define CE_URE_SPLS_MASK		(0x3ULL << CE_URE_SPLS_SHFT)
+#define CE_URE_SPLS_SET(n)		(((u64)(n) << CE_URE_SPLS_SHFT) & \
+					 CE_URE_SPLS_MASK)
 #define CE_URE_PSN1_SHFT		19
 #define CE_URE_PSN1_MASK		(0x1FFFULL << CE_URE_PSN1_SHFT)
 #define CE_URE_PSN2_SHFT		32
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
index 80c5a23..cd4233d 100644
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -244,37 +244,19 @@ extern void ia64_load_extra (struct task
 		__ia64_save_fpu((prev)->thread.fph);				\
 	}									\
 	__switch_to(prev, next, last);						\
+	/* "next" in old context is "current" in new context */			\
+	if (unlikely((current->thread.flags & IA64_THREAD_MIGRATION) &&	       \
+		     (task_cpu(current) !=				       \
+		      		      task_thread_info(current)->last_cpu))) { \
+		platform_migrate(current);				       \
+		task_thread_info(current)->last_cpu = task_cpu(current);       \
+	}								       \
 } while (0)
 #else
 # define switch_to(prev,next,last)	__switch_to(prev, next, last)
 #endif
 
-/*
- * On IA-64, we don't want to hold the runqueue's lock during the low-level context-switch,
- * because that could cause a deadlock.  Here is an example by Erich Focht:
- *
- * Example:
- * CPU#0:
- * schedule()
- *    -> spin_lock_irq(&rq->lock)
- *    -> context_switch()
- *       -> wrap_mmu_context()
- *          -> read_lock(&tasklist_lock)
- *
- * CPU#1:
- * sys_wait4() or release_task() or forget_original_parent()
- *    -> write_lock(&tasklist_lock)
- *    -> do_notify_parent()
- *       -> wake_up_parent()
- *          -> try_to_wake_up()
- *             -> spin_lock_irq(&parent_rq->lock)
- *
- * If the parent's rq happens to be on CPU#0, we'll wait for the rq->lock
- * of that CPU which will not be released, because there we wait for the
- * tasklist_lock to become available.
- */
 #define __ARCH_WANT_UNLOCKED_CTXSW
-
 #define ARCH_HAS_PREFETCH_SWITCH_STACK
 #define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
 
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h
index 1d6518f..56394a2 100644
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -26,16 +26,10 @@ struct thread_info {
 	struct exec_domain *exec_domain;/* execution domain */
 	__u32 flags;			/* thread_info flags (see TIF_*) */
 	__u32 cpu;			/* current CPU */
+	__u32 last_cpu;			/* Last CPU thread ran on */
 	mm_segment_t addr_limit;	/* user-level address space limit */
 	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
 	struct restart_block restart_block;
-	struct {
-		int signo;
-		int code;
-		void __user *addr;
-		unsigned long start_time;
-		pid_t pid;
-	} sigdelayed;			/* Saved information for TIF_SIGDELAYED */
 };
 
 #define THREAD_SIZE			KERNEL_STACK_SIZE
@@ -89,7 +83,6 @@ struct thread_info {
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
 #define TIF_SYSCALL_TRACE	3	/* syscall trace active */
 #define TIF_SYSCALL_AUDIT	4	/* syscall auditing active */
-#define TIF_SIGDELAYED		5	/* signal delayed from MCA/INIT/NMI/PMI context */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		17
 #define TIF_MCA_INIT		18	/* this task is processing MCA or INIT */
@@ -101,13 +94,12 @@ struct thread_info {
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_SIGDELAYED		(1 << TIF_SIGDELAYED)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_MCA_INIT		(1 << TIF_MCA_INIT)
 #define _TIF_DB_DISABLED	(1 << TIF_DB_DISABLED)
 
 /* "work to do on user-return" bits */
-#define TIF_ALLWORK_MASK	(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SIGDELAYED)
+#define TIF_ALLWORK_MASK	(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
 /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
 #define TIF_WORK_MASK		(TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))