--- linux.base/include/asm-ia64/mmu_context.h Fri Jan 9 00:59:09 2004 +++ linux/include/asm-ia64/mmu_context.h Tue Jan 27 12:56:13 2004 @@ -21,6 +21,7 @@ # ifndef __ASSEMBLY__ +#include #include #include #include @@ -106,6 +107,9 @@ /* re-check, now that we've got the lock: */ context = mm->context; if (context == 0) { +#ifdef CONFIG_NUMA + cpus_clear(mm->cpu_vm_mask); +#endif if (ia64_ctx.next >= ia64_ctx.limit) wrap_mmu_context(mm); mm->context = context = ia64_ctx.next++; @@ -170,6 +174,10 @@ do { context = get_mmu_context(mm); MMU_TRACE('A', smp_processor_id(), mm, context); +#ifdef CONFIG_NUMA + if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) + cpu_set(smp_processor_id(), mm->cpu_vm_mask); +#endif reload_context(context); MMU_TRACE('a', smp_processor_id(), mm, context); /* in the unlikely event of a TLB-flush by another thread, redo the load: */ --- linux.base/arch/ia64/sn/kernel/sn2/sn2_smp.c Mon Jan 26 17:06:03 2004 +++ linux/arch/ia64/sn/kernel/sn2/sn2_smp.c Tue Jan 27 10:28:30 2004 @@ -4,7 +4,7 @@ * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. - * + * * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved. */ @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include #include @@ -34,6 +36,13 @@ #include #include +/* When nodemask_t is available, delete the following definitions */ +#define NODEMASK_WORDCOUNT ((NR_NODES+(BITS_PER_LONG-1))/BITS_PER_LONG) +#define NODE_MASK_ALL { [0 ... ((NR_NODES+BITS_PER_LONG-1)/BITS_PER_LONG)-1] = ~0UL } +#define NODE_MASK_NONE { [0 ... ((NR_NODES+BITS_PER_LONG-1)/BITS_PER_LONG)-1] = 0 } +typedef unsigned long nodemask_t[NODEMASK_WORDCOUNT]; + + void sn2_ptc_deadlock_recovery(unsigned long data0, unsigned long data1); @@ -66,14 +75,52 @@ * * Purges the translation caches of all processors of the given virtual address * range. + * + * Note: + * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. + * - cpu_vm_mask is converted into a nodemask of the nodes containing the + * cpus in cpu_vm_mask. + * - if only one bit is set in cpu_vm_mask & it is the current cpu, + * then only the local TLB needs to be flushed. This flushing can be done + * using ptc.l. This is the common case & avoids the global spinlock. + * - if multiple cpus have loaded the context, then flushing has to be + * done with ptc.g/MMRs under protection of the global ptc_lock. */ void sn2_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits) { - int cnode, mycnode, nasid, flushed=0; + int i, cnode, mynasid, cpu, lcpu=0, nasid, flushed=0; volatile unsigned long *ptc0, *ptc1; unsigned long flags=0, data0, data1; + struct mm_struct *mm=current->active_mm; + nodemask_t nodes_flushed=NODE_MASK_NONE; + short nasids[NR_NODES], nix; + + for (i=0, cpu=find_first_bit(&mm->cpu_vm_mask, NR_CPUS); cpu < NR_CPUS; + i++, cpu=find_next_bit(&mm->cpu_vm_mask, NR_CPUS, ++cpu)) { + cnode = cpu_to_node(cpu); + __set_bit(cnode, nodes_flushed); + lcpu = cpu; + } + + preempt_disable(); + + if (likely(i == 1 && lcpu == smp_processor_id())) { + do { + asm volatile ("ptc.l %0,%1" :: "r"(start), "r"(nbits<<2) : "memory"); + start += (1UL << nbits); + } while (start < end); + ia64_srlz_i(); + preempt_enable(); + return; + } + + nix = 0; + for (cnode=find_first_bit(&nodes_flushed, NR_NODES); cnode < NR_NODES; + cnode=find_next_bit(&nodes_flushed, NR_NODES, ++cnode)) + nasids[nix++] = cnodeid_to_nasid(cnode); + data0 = (1UL<