This patch increases the maximum SSI size to 1024 nodes. Long term, we expect to use ACPI3.0 to support >256 nodes. This patch is an interim solution that enables >256 node support with ACPI2.0. Support for ACPI3.0 is at least a couple of years away. In the interim, this patch can be used for testing large SSI systems in the lab. The changes are not particularily invasive, and, if necessary, could be pushed to the community. However, this is not planned at this point. The PROM has also been modified to support >256 nodes with ACPI2.0. By early spring, the default PROM should support up to 1024 nodes. arch/ia64/Kconfig | 9 +++++++++ arch/ia64/kernel/acpi.c | 32 ++++++++++++++++++++++++++++---- arch/ia64/kernel/numa.c | 2 +- include/asm-ia64/acpi.h | 4 ++++ include/asm-ia64/numa.h | 2 +- include/asm-ia64/numnodes.h | 13 +++++++++---- include/asm-ia64/sn/arch.h | 3 ++- include/linux/kmalloc_sizes.h | 4 +++- mm/slab.c | 12 +++++++----- Index: linux-2.6.16-rc6-mm1/arch/ia64/Kconfig =================================================================== --- linux-2.6.16-rc6-mm1.orig/arch/ia64/Kconfig 2006-03-13 09:46:11.000000000 -0800 +++ linux-2.6.16-rc6-mm1/arch/ia64/Kconfig 2006-03-14 16:37:54.000000000 -0800 @@ -261,6 +261,15 @@ config NR_CPUS than 64 will cause the use of a CPU mask array, causing a small performance hit. +config IA64_NR_NODES + int "Maximum number of NODEs (256-1024)" if (IA64_SGI_SN2 || IA64_GENERIC) + range 256 1024 + depends on IA64_SGI_SN2 || IA64_GENERIC + default "256" + help + This option specifies the maximum number of nodes in your SSI system. + If in doubt, use the default. + config HOTPLUG_CPU bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" depends on SMP && EXPERIMENTAL Index: linux-2.6.16-rc6-mm1/arch/ia64/kernel/acpi.c =================================================================== --- linux-2.6.16-rc6-mm1.orig/arch/ia64/kernel/acpi.c 2006-03-13 09:45:42.000000000 -0800 +++ linux-2.6.16-rc6-mm1/arch/ia64/kernel/acpi.c 2006-03-14 16:37:54.000000000 -0800 @@ -417,6 +417,26 @@ static u32 __devinitdata pxm_flag[PXM_FL #define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) static struct acpi_table_slit __initdata *slit_table; +static int get_processor_proximity_domain(struct acpi_table_processor_affinity *pa) +{ + int pxm; + + pxm = pa->proximity_domain; + if (ia64_platform_is("sn2")) + pxm += pa->reserved[0] << 8; + return pxm; +} + +static int get_memory_proximity_domain(struct acpi_table_memory_affinity *ma) +{ + int pxm; + + pxm = ma->proximity_domain; + if (ia64_platform_is("sn2")) + pxm += ma->reserved1[0] << 8; + return pxm; +} + /* * ACPI 2.0 SLIT (System Locality Information Table) * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf @@ -440,13 +460,17 @@ void __init acpi_numa_slit_init(struct a void __init acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) { + int pxm; + + pxm = get_processor_proximity_domain(pa); + /* record this node in proximity bitmap */ - pxm_bit_set(pa->proximity_domain); + pxm_bit_set(pxm); node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | (pa->lsapic_eid); /* nid should be overridden as logical node id later */ - node_cpuid[srat_num_cpus].nid = pa->proximity_domain; + node_cpuid[srat_num_cpus].nid = pxm; srat_num_cpus++; } @@ -454,10 +478,10 @@ void __init acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) { unsigned long paddr, size; - u8 pxm; + int pxm; struct node_memblk_s *p, *q, *pend; - pxm = ma->proximity_domain; + pxm = get_memory_proximity_domain(ma); /* fill node memory chunk structure */ paddr = ma->base_addr_hi; Index: linux-2.6.16-rc6-mm1/include/asm-ia64/numnodes.h =================================================================== --- linux-2.6.16-rc6-mm1.orig/include/asm-ia64/numnodes.h 2006-03-11 14:12:55.000000000 -0800 +++ linux-2.6.16-rc6-mm1/include/asm-ia64/numnodes.h 2006-03-14 16:37:54.000000000 -0800 @@ -3,13 +3,18 @@ #ifdef CONFIG_IA64_DIG /* Max 8 Nodes */ -#define NODES_SHIFT 3 +# define NODES_SHIFT 3 #elif defined(CONFIG_IA64_HP_ZX1) || defined(CONFIG_IA64_HP_ZX1_SWIOTLB) /* Max 32 Nodes */ -#define NODES_SHIFT 5 +# define NODES_SHIFT 5 #elif defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) -/* Max 256 Nodes */ -#define NODES_SHIFT 8 +# if CONFIG_IA64_NR_NODES == 256 +# define NODES_SHIFT 8 +# elif CONFIG_IA64_NR_NODES <= 512 +# define NODES_SHIFT 9 +# elif CONFIG_IA64_NR_NODES <= 1024 +# define NODES_SHIFT 10 +# endif #endif #endif /* _ASM_MAX_NUMNODES_H */ Index: linux-2.6.16-rc6-mm1/mm/slab.c =================================================================== --- linux-2.6.16-rc6-mm1.orig/mm/slab.c 2006-03-13 09:46:28.000000000 -0800 +++ linux-2.6.16-rc6-mm1/mm/slab.c 2006-03-14 16:37:54.000000000 -0800 @@ -1424,6 +1424,7 @@ static void *kmem_getpages(struct kmem_c struct page *page; void *addr; int i; + int order; flags |= cachep->gfpflags; page = alloc_pages_node(nodeid, flags, cachep->gfporder); Index: linux-2.6.16-rc6-mm1/arch/ia64/kernel/numa.c =================================================================== --- linux-2.6.16-rc6-mm1.orig/arch/ia64/kernel/numa.c 2006-03-11 14:12:55.000000000 -0800 +++ linux-2.6.16-rc6-mm1/arch/ia64/kernel/numa.c 2006-03-14 16:37:54.000000000 -0800 @@ -25,7 +25,7 @@ #include #include -u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned; +u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_to_node_map); cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; Index: linux-2.6.16-rc6-mm1/include/asm-ia64/numa.h =================================================================== --- linux-2.6.16-rc6-mm1.orig/include/asm-ia64/numa.h 2006-03-11 14:12:55.000000000 -0800 +++ linux-2.6.16-rc6-mm1/include/asm-ia64/numa.h 2006-03-14 16:37:54.000000000 -0800 @@ -23,7 +23,7 @@ #include -extern u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned; +extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; /* Stuff below this line could be architecture independent */