Define functions for page cache handling We use the macros PAGE_CACHE_SIZE PAGE_CACHE_SHIFT PAGE_CACHE_MASK and PAGE_CACHE_ALIGN in various places in the kernel. These are useful if one only want to support one page size in the page cache. This patch provides a set of functions in order to provide the ability to define new page size in the future. All functions take an address_space pointer. Add a set of extended functions that will be used to consolidate the hand crafted shifts and adds in use right now. New function Related base page constant --------------------------------------------------- page_cache_shift(a) PAGE_CACHE_SHIFT page_cache_size(a) PAGE_CACHE_SIZE page_cache_mask(a) PAGE_CACHE_MASK page_cache_index(a, pos) Calculate page number from position page_cache_next(addr, pos) Page number of next page page_cache_offset(a, pos) Calculate offset into a page page_cache_pos(a, index, offset) Form position based on page number and an offset. The workings of these functions depend on CONFIG_LARGE_BLOCKSIZE. If set then these sizes are dynamically calculated. Otherwise the functions will provide constant results. Signed-off-by: Christoph Lameter --- block/Kconfig | 17 +++++ include/linux/fs.h | 5 + include/linux/pagemap.h | 142 ++++++++++++++++++++++++++++++++++++++++++++++-- mm/filemap.c | 12 ++-- 4 files changed, 166 insertions(+), 10 deletions(-) Index: linux-2.6.21-rc7-mm1/include/linux/pagemap.h =================================================================== --- linux-2.6.21-rc7-mm1.orig/include/linux/pagemap.h 2007-04-25 08:45:25.000000000 -0700 +++ linux-2.6.21-rc7-mm1/include/linux/pagemap.h 2007-04-25 08:53:48.000000000 -0700 @@ -43,6 +43,10 @@ static inline void mapping_set_gfp_mask( { m->flags = (m->flags & ~(__force unsigned long)__GFP_BITS_MASK) | (__force unsigned long)mask; +#ifdef CONFIG_LARGE_BLOCKSIZE + if (m->order) + m->flags |= __GFP_COMP; +#endif } /* @@ -52,33 +56,161 @@ static inline void mapping_set_gfp_mask( * space in smaller chunks for same flexibility). * * Or rather, it _will_ be done in larger chunks. + * + * The following constants can be used if a filesystem only supports a single + * page size. */ #define PAGE_CACHE_SHIFT PAGE_SHIFT #define PAGE_CACHE_SIZE PAGE_SIZE #define PAGE_CACHE_MASK PAGE_MASK #define PAGE_CACHE_ALIGN(addr) (((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK) +/* + * The next set of functions allow to write code that is capable of dealing + * with multiple page sizes. + */ +#ifdef CONFIG_LARGE_BLOCKSIZE +/* + * We support compound pages in the page cache. This means that a page + * poiner points to the head page of the compound page. Tail pages are + * following the head page. Thus page++ will *not* necessarily give you + * the next page. + */ +static inline void set_mapping_order(struct address_space *a, int order) +{ + a->order = order; + a->shift = order + PAGE_SHIFT; + a->offset_mask = (1UL << a->shift) - 1; + if (order) + a->flags |= __GFP_COMP; +} + +/* + * Determine the order of a mapping. A higher order mapping contains + * one head page and (1 << mapping_order) -1 tail pages. + */ +static inline int mapping_order(struct address_space *a) +{ + return a->order; +} + +static inline int page_cache_shift(struct address_space *a) +{ + return a->shift; +} + +/* + * Size of a page cache page. In case of the use of compound pages + * we can end up with an arbirarily large page size. + */ +static inline unsigned int page_cache_size(struct address_space *a) +{ + return a->offset_mask + 1; +} + +static inline loff_t page_cache_mask(struct address_space *a) +{ + return ~a->offset_mask; +} + +/* + * Offset into the (potentially higher order)page. The offset may + * be > PAGE_SIZE if we use compound pages. It will be less than + * page_cache_size(mapping) thouigh. + */ +static inline unsigned int page_cache_offset(struct address_space *a, + loff_t pos) +{ + return pos & a->offset_mask; +} +#else +/* + * Kernel configured for a fixed PAGE_SIZEd page cache + */ +static inline void set_mapping_order(struct address_space *a, int order) +{ + BUG_ON(order); +} + +static inline int mapping_order(struct address_space *a) +{ + return 0; +} + +static inline int page_cache_shift(struct address_space *a) +{ + return PAGE_SHIFT; +} + +static inline unsigned int page_cache_size(struct address_space *a) +{ + return PAGE_SIZE; +} + +static inline loff_t page_cache_mask(struct address_space *a) +{ + return (loff_t)PAGE_MASK; +} + +static inline unsigned int page_cache_offset(struct address_space *a, + loff_t pos) +{ + return pos & ~PAGE_MASK; +} +#endif + +/* + * The index in the mapping. In case of a higher order mapping the index + * is the number of the higher order page not the number of the PAGE_SIZEd + * base pages. + */ +static inline pgoff_t page_cache_index(struct address_space *a, + loff_t pos) +{ + return pos >> page_cache_shift(a); +} + +/* + * Index of the page starting on or after the given position. + * + * In the case of a higher order mapping: The index is the + * number of the compound page not an index to a PAGE_SIZEd page. + */ +static inline pgoff_t page_cache_next(struct address_space *a, + loff_t pos) +{ + return page_cache_index(a, pos + page_cache_size(a) - 1); +} + +static inline loff_t page_cache_pos(struct address_space *a, + pgoff_t index, unsigned long offset) +{ + return ((loff_t)index << page_cache_shift(a)) + offset; +} + #define page_cache_get(page) get_page(page) #define page_cache_release(page) put_page(page) void release_pages(struct page **pages, int nr, int cold); #ifdef CONFIG_NUMA -extern struct page *__page_cache_alloc(gfp_t gfp); +extern struct page *__page_cache_alloc(gfp_t gfp, int order); #else -static inline struct page *__page_cache_alloc(gfp_t gfp) +static inline struct page *__page_cache_alloc(gfp_t gfp, int order) { - return alloc_pages(gfp, 0); + return alloc_pages(gfp, order); } #endif static inline struct page *page_cache_alloc(struct address_space *x) { - return __page_cache_alloc(mapping_gfp_mask(x)); + return __page_cache_alloc(mapping_gfp_mask(x), + mapping_order(x)); } static inline struct page *page_cache_alloc_cold(struct address_space *x) { - return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD); + return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD, + mapping_order(x)); } typedef int filler_t(void *, struct page *); Index: linux-2.6.21-rc7-mm1/include/linux/fs.h =================================================================== --- linux-2.6.21-rc7-mm1.orig/include/linux/fs.h 2007-04-25 08:45:25.000000000 -0700 +++ linux-2.6.21-rc7-mm1/include/linux/fs.h 2007-04-25 08:45:43.000000000 -0700 @@ -442,6 +442,11 @@ struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ rwlock_t tree_lock; /* and rwlock protecting it */ +#ifdef CONFIG_LARGE_BLOCKSIZE + unsigned int order; /* Page order of the pages in here */ + unsigned int shift; /* Shift of index */ + loff_t offset_mask; /* Mask to get to offset bits */ +#endif unsigned int i_mmap_writable;/* count VM_SHARED mappings */ struct prio_tree_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ Index: linux-2.6.21-rc7-mm1/mm/filemap.c =================================================================== --- linux-2.6.21-rc7-mm1.orig/mm/filemap.c 2007-04-25 08:45:25.000000000 -0700 +++ linux-2.6.21-rc7-mm1/mm/filemap.c 2007-04-25 08:45:43.000000000 -0700 @@ -469,13 +469,13 @@ int add_to_page_cache_lru(struct page *p } #ifdef CONFIG_NUMA -struct page *__page_cache_alloc(gfp_t gfp) +struct page *__page_cache_alloc(gfp_t gfp, int order) { if (cpuset_do_page_mem_spread()) { int n = cpuset_mem_spread_node(); - return alloc_pages_node(n, gfp, 0); + return alloc_pages_node(n, gfp, order); } - return alloc_pages(gfp, 0); + return alloc_pages(gfp, order); } EXPORT_SYMBOL(__page_cache_alloc); #endif @@ -697,7 +697,8 @@ repeat: if (!page) { if (!cached_page) { cached_page = - __page_cache_alloc(gfp_mask); + __page_cache_alloc(gfp_mask, + mapping_order(mapping)); if (!cached_page) return NULL; } @@ -832,7 +833,8 @@ grab_cache_page_nowait(struct address_sp page_cache_release(page); return NULL; } - page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS); + page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS, + mapping_order(mapping)); if (page && add_to_page_cache_lru(page, mapping, index, GFP_KERNEL)) { page_cache_release(page); page = NULL; Index: linux-2.6.21-rc7-mm1/block/Kconfig =================================================================== --- linux-2.6.21-rc7-mm1.orig/block/Kconfig 2007-04-25 08:45:25.000000000 -0700 +++ linux-2.6.21-rc7-mm1/block/Kconfig 2007-04-25 08:45:43.000000000 -0700 @@ -49,6 +49,23 @@ config LSF If unsure, say Y. +# +# We do not support HIGHMEM because kmap does not support higher order pages +# We do not support 32 bit because smaller machines are limited in memory +# and fragmentation could easily occur. Also 32 bit machines typically +# have restricted DMA areas which requires page bouncing. +# +config LARGE_BLOCKSIZE + bool "Support blocksizes larger than page size" + default n + depends on EXPERIMENTAL && !HIGHMEM && 64BIT + help + Allows the page cache to support higher orders of pages. Higher + order page cache pages may be useful to support special devices + like CD or DVDs and Flash. Also to increase I/O performance. + However, be aware that higher order pages may cause fragmentation + which in turn may lead to OOM conditions. + endif # BLOCK source block/Kconfig.iosched