alloc_pages_range: Allocate memory from a specified range of addresses The current ZONE_DMA scheme is limited to only a single boundary. I.e. one can only allocate memory under 16M or above. alloc_pages_range allows one to specify what the allowed memory range is. Allocate_pages_range() will check the system for available zones and then perform the fastest allocation possible. If there is no suitable zone then it will perform a search through the possible zones for pages that fit the allocation criteria. This search is not fast but it is likely sufficient for supporting legacy devices and devices with issues. It is interesting to do this since the DMA subsystem has the ability to communicate which addresses allowable. Only the page allocator cannot satisfy request for memory for a specific memory range. With this patch the arch specific dma_alloc_coherent() function can be modified to call alloc_pages_range() and then the DMA subsystem will be able to exploit all available memory in that range. Once this mechanism is in place and if one has dealt with all relevant GFP_DMA references (all current uses must be changed to call alloc_pages_ranage()!) for an arch then one can disable ZONE_DMA and enjoy the benefits of a single zone while still being able to use the old floppy driver should the need arise. - Only i386 supported. - Reclaim when not falling back to regular allocs may not be that efficient. - It boots on my system. Signed-off-by: Christoph Lameter Index: linux-2.6.20/include/linux/gfp.h =================================================================== --- linux-2.6.20.orig/include/linux/gfp.h 2007-02-04 10:44:54.000000000 -0800 +++ linux-2.6.20/include/linux/gfp.h 2007-02-12 21:45:42.000000000 -0800 @@ -123,6 +123,9 @@ extern struct page * FASTCALL(__alloc_pages(gfp_t, unsigned int, struct zonelist *)); +extern struct page *__alloc_pages_range(unsigned long low, unsigned long high, + gfp_t gfp_mask, unsigned int order, struct zonelist *zl); + static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { @@ -137,8 +140,25 @@ NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask)); } +static inline struct page *alloc_pages_range_node(int nid, + unsigned long low, unsigned long high, + gfp_t gfp_mask, unsigned int order) +{ + if (unlikely(order >= MAX_ORDER)) + return NULL; + + /* Unknown node is current node */ + if (nid < 0) + nid = numa_node_id(); + + return __alloc_pages_range(low, high, gfp_mask, order, + NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask)); +} + #ifdef CONFIG_NUMA extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order); +extern struct page *alloc_pages_range_current(unsigned long low, unsigned long high, + gfp_t gfp_mask, unsigned order); static inline struct page * alloc_pages(gfp_t gfp_mask, unsigned int order) @@ -148,12 +168,25 @@ return alloc_pages_current(gfp_mask, order); } + +static inline struct page * +alloc_pages_range(unsigned long low, unsigned long high, gfp_t gfp_mask, + unsigned int order) +{ + if (unlikely(order >= MAX_ORDER)) + return NULL; + + return alloc_pages_range_current(low, high, gfp_mask, order); +} + extern struct page *alloc_page_vma(gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr); #else #define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order) #define alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0) +#define alloc_pages_range(low, high, gfp_mask, order) \ + alloc_pages_range_node(numa_node_id(), low, high, gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) Index: linux-2.6.20/mm/page_alloc.c =================================================================== --- linux-2.6.20.orig/mm/page_alloc.c 2007-02-04 10:44:54.000000000 -0800 +++ linux-2.6.20/mm/page_alloc.c 2007-02-12 22:07:30.000000000 -0800 @@ -1374,10 +1374,47 @@ got_pg: return page; } - EXPORT_SYMBOL(__alloc_pages); /* + * Special allocation functions to get memory in a specified area of memory + * like necessary for allocations for DMA devices that are unable to address + * all of memory. + */ +struct page *__alloc_pages_range(unsigned long low, unsigned long high, + gfp_t gfp_flags, unsigned int order, + struct zonelist *zl) +{ + BUG_ON(gfp_flags & (__GFP_HIGHMEM|__GFP_DMA32|__GFP_DMA)); + +/* + * If the address range specified includes all of available RAM + * then we can fall back to __alloc_pages without any issues. This may occur + * f.e. if we only have 2 GB RAM on x86_64. Then 32 bit DMA + * will work on all of memory. Even controllers that only support + * 31 bits will be fine. + */ + if ((void *)high >= pfn_to_kaddr(max_low_pfn) && + (void *)low <= pfn_to_kaddr(NODE_DATA(0)->node_start_pfn)) + return alloc_pages(gfp_flags, order); + +/* + * If we have any DMA zones then use them to optimize allocations + */ +#ifdef CONFIG_ZONE_DMA + if (high <= MAX_DMA_ADDRESS && !low) + return __alloc_pages(gfp_flags | GFP_DMA, order, zl); +#endif + +#ifdef CONFIG_ZONE_DMA32 + if (high <= (unsigned long)__va(MAX_DMA32_PFN << PAGE_SHIFT) && !low) + return __alloc_pages(gfp_flags | GFP_DMA32, order, zl); +#endif + /* We have no means of satisfying the allocation constraints */ + return NULL; +} + +/* * Common helper functions. */ fastcall unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) Index: linux-2.6.20/mm/mempolicy.c =================================================================== --- linux-2.6.20.orig/mm/mempolicy.c 2007-02-12 21:19:55.000000000 -0800 +++ linux-2.6.20/mm/mempolicy.c 2007-02-12 22:09:28.000000000 -0800 @@ -1306,6 +1306,35 @@ } EXPORT_SYMBOL(alloc_pages_current); +/** + * alloc_pages_range_current - Allocate pages. + */ +struct page *alloc_pages_range_current(unsigned long low, unsigned long high, + gfp_t gfp, unsigned order) +{ + struct mempolicy *pol = current->mempolicy; + + if ((gfp & __GFP_WAIT) && !in_interrupt()) + cpuset_update_task_memory_state(); + if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) + pol = &default_policy; + if (pol->policy == MPOL_INTERLEAVE) { + int nid = interleave_nodes(pol); + struct zonelist *zl; + struct page *page; + + zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp); + page = __alloc_pages(gfp, order, zl); + if (page && page_zone(page) == zl->zones[0]) + inc_zone_page_state(page, NUMA_INTERLEAVE_HIT); + return page; + } + else + return __alloc_pages_range(low, high, gfp, order, + zonelist_policy(gfp, pol)); +} +EXPORT_SYMBOL(alloc_pages_range_current); + /* * If mpol_copy() sees current->cpuset == cpuset_being_rebound, then it * rebinds the mempolicy its copying by calling mpol_rebind_policy()