--- include/linux/percpu.h | 10 +--- mm/allocpercpu.c | 116 ++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 96 insertions(+), 30 deletions(-) Index: linux-2.6/mm/allocpercpu.c =================================================================== --- linux-2.6.orig/mm/allocpercpu.c 2007-10-30 16:35:41.000000000 -0700 +++ linux-2.6/mm/allocpercpu.c 2007-10-30 16:38:15.000000000 -0700 @@ -2,10 +2,82 @@ * linux/mm/allocpercpu.c * * Separated from slab.c August 11, 2006 Christoph Lameter + * + * (C) 2007 SGI, Christoph Lameter + * Basic implementation with allocation and free from a dedicated per cpu area. */ #include #include +#define MAXIMUM_UNITS_PER_CPU 16384 + +#define FREE 0 +#define USED 255 + +static DEFINE_SPINLOCK(cpu_area_lock); +static u8 cpu_alloc_map[MAXIMUM_UNITS_PER_CPU]; +DEFINE_PER_CPU(per_cpu_unit, cpu_area)[MAXIMUM_UNITS_PER_CPU]; + +static inline int size_to_units(unsigned long size) +{ + return (size + sizeof(per_cpu_unit) - 1) / sizeof(per_cpu_unit); +} + +static inline void set_map(int start, int length) +{ + cpu_alloc_map[start] = length; + if (length > 1) + memset(cpu_alloc_map + start + 1, USED, length - 1); +} + +static inline void clear_map(int start, int length) +{ + memset(cpu_alloc_map + start, FREE, length); +} + +static inline void *cpu_alloc(unsigned long size) +{ + unsigned long start = 0; + int units = size_to_units(size); + unsigned end; + + BUG_ON(size < 254 * sizeof(per_cpu_unit)); + spin_lock(&cpu_area_lock); + do { + while (start < MAXIMUM_UNITS_PER_CPU && + cpu_alloc_map[start] != FREE) + start++; + if (start == MAXIMUM_UNITS_PER_CPU) + return NULL; + + end = start + 1; + while (end < MAXIMUM_UNITS_PER_CPU && end - start < units && + cpu_alloc_map[end] == FREE) + end++; + if (end - start == units) + break; + start = end; + } while (1); + + set_map(start, units); + spin_unlock(&cpu_area_lock); + return (void *)start; +} + +static inline void cpu_free(void *pcpu) +{ + unsigned long start = (unsigned long)pcpu; + int units; + + units = cpu_alloc_map[start]; + BUG_ON(units == FREE || units == USED || + start >= MAXIMUM_UNITS_PER_CPU); + + spin_lock(&cpu_area_lock); + clear_map(start, units); + spin_unlock(&cpu_area_lock); +} + /** * percpu_depopulate - depopulate per-cpu data for given cpu * @__pdata: per-cpu data to depopulate @@ -16,10 +88,10 @@ */ void percpu_depopulate(void *__pdata, int cpu) { - struct percpu_data *pdata = __percpu_disguise(__pdata); - - kfree(pdata->ptrs[cpu]); - pdata->ptrs[cpu] = NULL; + /* + * Nothing to do here. Removal can only be effected for all + * per cpu areas of a cpu at once. + */ } EXPORT_SYMBOL_GPL(percpu_depopulate); @@ -47,17 +119,16 @@ EXPORT_SYMBOL_GPL(__percpu_depopulate_ma * use case. You need to register a cpu hotplug handler for that purpose. * Per-cpu object is populated with zeroed buffer. */ -void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) +void *percpu_populate(void *pdata, size_t size, gfp_t gfp, int cpu) { - struct percpu_data *pdata = __percpu_disguise(__pdata); - int node = cpu_to_node(cpu); - - BUG_ON(pdata->ptrs[cpu]); - if (node_online(node)) - pdata->ptrs[cpu] = kmalloc_node(size, gfp|__GFP_ZERO, node); - else - pdata->ptrs[cpu] = kzalloc(size, gfp); - return pdata->ptrs[cpu]; + /* + * Nothing to do here. We can only populate the complete + * per cpu area at once. + * + * But we can return the address of the object in order to be + * backward compatible. + */ + return percpu_ptr(pdata, cpu); } EXPORT_SYMBOL_GPL(percpu_populate); @@ -73,6 +144,7 @@ EXPORT_SYMBOL_GPL(percpu_populate); int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, cpumask_t *mask) { + cpumask_t populated = CPU_MASK_NONE; int cpu; @@ -98,15 +170,12 @@ EXPORT_SYMBOL_GPL(__percpu_populate_mask */ void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) { - void *pdata = kzalloc(sizeof(struct percpu_data), gfp); - void *__pdata = __percpu_disguise(pdata); + void *pdata = cpu_alloc(size); if (unlikely(!pdata)) return NULL; - if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask))) - return __pdata; - kfree(pdata); - return NULL; + BUG_ON(!cpus_subset(*mask, cpu_online_map)); + return pdata; } EXPORT_SYMBOL_GPL(__percpu_alloc_mask); @@ -117,11 +186,10 @@ EXPORT_SYMBOL_GPL(__percpu_alloc_mask); * We simply clean up any per-cpu object left. No need for the client to * track and specify through a bis mask which per-cpu objects are to free. */ -void percpu_free(void *__pdata) +void percpu_free(void *pdata) { - if (unlikely(!__pdata)) + if (unlikely(!pdata)) return; - __percpu_depopulate_mask(__pdata, &cpu_possible_map); - kfree(__percpu_disguise(__pdata)); + cpu_free(pdata); } EXPORT_SYMBOL_GPL(percpu_free); Index: linux-2.6/include/linux/percpu.h =================================================================== --- linux-2.6.orig/include/linux/percpu.h 2007-10-30 16:35:41.000000000 -0700 +++ linux-2.6/include/linux/percpu.h 2007-10-30 16:38:38.000000000 -0700 @@ -33,11 +33,10 @@ #ifdef CONFIG_SMP -struct percpu_data { - void *ptrs[NR_CPUS]; -}; +typedef unsigned long long per_cpu_unit; + +DECLARE_PER_CPU(per_cpu_unit, cpu_area)[]; -#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) /* * Use this to get to a cpu's version of the per-cpu object dynamically * allocated. Non-atomic access to the current CPU's version should @@ -45,8 +44,7 @@ struct percpu_data { */ #define percpu_ptr(ptr, cpu) \ ({ \ - struct percpu_data *__p = __percpu_disguise(ptr); \ - (__typeof__(ptr))__p->ptrs[(cpu)]; \ + (__typeof__(ptr))((per_cpu(cpu_area, cpu) + (unsigned long)ptr)); \ }) extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu);