[PATCH] Free up 6 page flags and reserve them These are available for any kernel feature that requires that sparsemem is not configured (sparsemem_vmemmap is okay) So do a DEPENDS SPARSEMEM_VMEMMAP || !SPARSEMEM I would like to reserve these page flags A. PageTail Tail page (currently compound pages overload with PG_xx) B. PageVComp Virtualized compound page (currently overloaded with PG_dirty) C. PageMlocked An mlocked page D. PagePinned A pinned page E. PageExported Page is externally referenced F. PageFileBacked Page is backed by a real file --- include/linux/mm.h | 10 - include/linux/mmzone.h | 19 -- include/linux/page-flags.h | 334 +++++++++++++++++++++++---------------------- mm/Kconfig | 7 mm/page_alloc.c | 3 mm/sparse.c | 3 6 files changed, 194 insertions(+), 182 deletions(-) Index: linux-2.6/mm/sparse.c =================================================================== --- linux-2.6.orig/mm/sparse.c 2008-02-17 17:06:58.000000000 -0800 +++ linux-2.6/mm/sparse.c 2008-02-17 17:07:58.000000000 -0800 @@ -27,6 +27,9 @@ struct mem_section mem_section[NR_SECTIO EXPORT_SYMBOL(mem_section); #ifdef NODE_NOT_IN_PAGE_FLAGS +#ifdef CONFIG_SPARSEMEM_VMEMMAP +#error SPARSEMEM_VMEMMAP must have the node in page flags +#endif /* * If we did not store the node number in the page then we have to * do a lookup in the section_to_node_table in order to find which Index: linux-2.6/include/linux/mm.h =================================================================== --- linux-2.6.orig/include/linux/mm.h 2008-02-17 16:59:24.000000000 -0800 +++ linux-2.6/include/linux/mm.h 2008-02-17 19:00:40.000000000 -0800 @@ -388,11 +388,11 @@ static inline void set_compound_order(st * we have run out of space and have to fall back to an * alternate (slower) way of determining the node. * - * No sparsemem: | NODE | ZONE | ... | FLAGS | + * No sparsemem or vmemmap: | NODE | ZONE | ... | FLAGS | * with space for node: | SECTION | NODE | ZONE | ... | FLAGS | * no space for node: | SECTION | ZONE | ... | FLAGS | */ -#ifdef CONFIG_SPARSEMEM +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTIONS_WIDTH SECTIONS_SHIFT #else #define SECTIONS_WIDTH 0 @@ -400,7 +400,7 @@ static inline void set_compound_order(st #define ZONES_WIDTH ZONES_SHIFT -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= FLAGS_RESERVED +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS #define NODES_WIDTH NODES_SHIFT #else #define NODES_WIDTH 0 @@ -445,8 +445,8 @@ static inline void set_compound_order(st #define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) -#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED -#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED +#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS +#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS #endif #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) Index: linux-2.6/include/linux/page-flags.h =================================================================== --- linux-2.6.orig/include/linux/page-flags.h 2008-02-17 16:59:24.000000000 -0800 +++ linux-2.6/include/linux/page-flags.h 2008-02-17 21:00:39.000000000 -0800 @@ -67,69 +67,148 @@ * FLAGS_RESERVED which defines the width of the fields section * (see linux/mmzone.h). New flags must _not_ overlap with this area. */ -#define PG_locked 0 /* Page is locked. Don't touch. */ -#define PG_error 1 -#define PG_referenced 2 -#define PG_uptodate 3 - -#define PG_dirty 4 -#define PG_lru 5 -#define PG_active 6 -#define PG_slab 7 /* slab debug (Suparna wants this) */ - -#define PG_owner_priv_1 8 /* Owner use. If pagecache, fs may use*/ -#define PG_arch_1 9 -#define PG_reserved 10 -#define PG_private 11 /* If pagecache, has fs-private data */ - -#define PG_writeback 12 /* Page is under writeback */ -#define PG_compound 14 /* Part of a compound page */ -#define PG_swapcache 15 /* Swap page: swp_entry_t in private */ - -#define PG_mappedtodisk 16 /* Has blocks allocated on-disk */ -#define PG_reclaim 17 /* To be reclaimed asap */ -#define PG_buddy 19 /* Page is free, on buddy lists */ +enum pageflags { + PG_locked, /* Page is locked. Don't touch. */ + PG_error, + PG_referenced, + PG_uptodate, + PG_dirty, + PG_lru, + PG_active, + PG_slab, + PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ + PG_arch_1, + PG_reserved, + PG_private, /* If pagecache, has fs-private data */ + PG_writeback, /* Page is under writeback */ + PG_head, /* Head of a compound page */ + PG_swapcache, /* Swap page: swp_entry_t in private */ + PG_mappedtodisk, /* Has blocks allocated on-disk */ + PG_reclaim, /* To be reclaimed asap */ + PG_buddy, /* Page is free, on buddy lists */ -/* PG_readahead is only used for file reads; PG_reclaim is only for writes */ -#define PG_readahead PG_reclaim /* Reminder to do async read-ahead */ - -/* PG_owner_priv_1 users should have descriptive aliases */ -#define PG_checked PG_owner_priv_1 /* Used by some filesystems */ -#define PG_pinned PG_owner_priv_1 /* Xen pinned pagetable */ - -#if (BITS_PER_LONG > 32) +#ifdef CONFIG_EXTENDED_PAGE_FLAGS /* - * 64-bit-only flags build down from bit 31 - * - * 32 bit -------------------------------| FIELDS | FLAGS | - * 64 bit | FIELDS | ?????? FLAGS | - * 63 32 0 + * Page flags that are only available without sparsemem + * (sparsemem vmemmap is ok) */ -#define PG_uncached 31 /* Page has been mapped as uncached */ + + PG_mlock, /* Page cannot be swapped out */ + PG_pin, /* Page cannot be moved in memory */ + PG_tail, /* Tail of a compound page */ + PG_export, /* Page is referenced directly from a driver */ + PG_vcompound, /* Compound page is virtually mapped */ + PG_filebacked, /* Page is backed by an actual disk (not RAM) */ +#endif + +#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR + PG_uncached, /* Page has been mapped as uncached */ +#endif + __NR_PAGEFLAGS /* For verification purposes */ +}; + +#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR +#define FLAGS_IA64 1 +#else +#define FLAGS_IA64 0 #endif +#ifdef CONFIG_EXTENDED_BASE_FLAGS +#define FLAGS_EXTENDED 6 +#else +#define FLAGS_EXTENDED 0 +#endif + +#define FLAGS_NORMAL 18 +#define NR_PAGEFLAGS (FLAGS_NORMAL + FLAGS_EXTENDED + FLAGS_IA64) + +struct page; /* forward declaration */ + /* * Manipulation of page state flags */ -#define PageLocked(page) \ - test_bit(PG_locked, &(page)->flags) -#define SetPageLocked(page) \ - set_bit(PG_locked, &(page)->flags) -#define TestSetPageLocked(page) \ - test_and_set_bit(PG_locked, &(page)->flags) -#define ClearPageLocked(page) \ - clear_bit(PG_locked, &(page)->flags) -#define TestClearPageLocked(page) \ - test_and_clear_bit(PG_locked, &(page)->flags) - -#define PageError(page) test_bit(PG_error, &(page)->flags) -#define SetPageError(page) set_bit(PG_error, &(page)->flags) -#define ClearPageError(page) clear_bit(PG_error, &(page)->flags) - -#define PageReferenced(page) test_bit(PG_referenced, &(page)->flags) -#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags) -#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags) -#define TestClearPageReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags) + +#define TESTPAGEFLAG(uname, lname) \ +static inline int Page##uname(struct page *page) \ + { return test_bit(PG_##lname, page); } + +#define SETPAGEFLAG(uname, lname) \ +static inline void SetPage##uname(struct page *page) \ + { set_bit(PG_##lname, page); } + +#define CLEARPAGEFLAG(uname, lname) \ +static inline void ClearPage##uname(struct page *page) \ + { clear_bit(PG_##lname, page); } + +#define __SETPAGEFLAG(uname, lname) \ +static inline void __SetPage##uname(struct page *page) \ + { __set_bit(PG_##lname, page); } + +#define __CLEARPAGEFLAG(uname, lname) \ +static inline void __ClearPage##uname(struct page *page) \ + { __clear_bit(PG_##lname, page); } + +#define TESTSETFLAG(uname, lname) \ +static inline int TestSetPage##uname(struct page *page) \ + { return test_and_set_bit(PG_##lname, &page->flags); } + +#define TESTCLEARFLAG(uname, lname) \ +static inline int TestClearPage##uname(struct page *page) \ + { return test_and_clear_bit(PG_##lname, &page->flags); } + + +#define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ + SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname) + +#define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ + __SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname) + +#define TESTSCFLAG(uname, lname) \ + TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname) + +PAGEFLAG(Locked, locked) TESTSCFLAG(Locked, locked) +PAGEFLAG(Error, error) +PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) +PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) +PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru) +PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) +__PAGEFLAG(Slab, slab) +PAGEFLAG(Checked, owner_priv_1) /* Used by some filesystems */ +PAGEFLAG(Pinned, owner_priv_1) /* Xen pinned pagetable */ +PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) +PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) + __SETPAGEFLAG(Private, private) + +/* + * Only test-and-set exist for PG_writeback. The unconditional operators are + * risky: they bypass page accounting. + */ +TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) +__PAGEFLAG(Buddy, buddy) +PAGEFLAG(MappedToDisk, mappedtodisk) + +/* PG_readahead is only used for file reads; PG_reclaim is only for writes */ +PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) +PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */ + +#ifdef CONFIG_HIGHMEM +#define PageHighMem(page) is_highmem(page_zone(page)) +#else +#define PageHighMem(page) 0 /* needed to optimize away at compile time */ +#endif + +#ifdef CONFIG_SWAP +PAGEFLAG(SwapCache, swapcache) +#else +static inline int PageSwapCache(struct page *page) +{ + return 0; +} +#endif + +#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR +PAGEFLAG(Uncached, uncached) +#endif static inline int PageUptodate(struct page *page) { @@ -177,97 +256,58 @@ static inline void SetPageUptodate(struc #endif } -#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags) +CLEARPAGEFLAG(Uptodate, uptodate) -#define PageDirty(page) test_bit(PG_dirty, &(page)->flags) -#define SetPageDirty(page) set_bit(PG_dirty, &(page)->flags) -#define TestSetPageDirty(page) test_and_set_bit(PG_dirty, &(page)->flags) -#define ClearPageDirty(page) clear_bit(PG_dirty, &(page)->flags) -#define __ClearPageDirty(page) __clear_bit(PG_dirty, &(page)->flags) -#define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags) - -#define PageLRU(page) test_bit(PG_lru, &(page)->flags) -#define SetPageLRU(page) set_bit(PG_lru, &(page)->flags) -#define ClearPageLRU(page) clear_bit(PG_lru, &(page)->flags) -#define __ClearPageLRU(page) __clear_bit(PG_lru, &(page)->flags) - -#define PageActive(page) test_bit(PG_active, &(page)->flags) -#define SetPageActive(page) set_bit(PG_active, &(page)->flags) -#define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) -#define __ClearPageActive(page) __clear_bit(PG_active, &(page)->flags) - -#define PageSlab(page) test_bit(PG_slab, &(page)->flags) -#define __SetPageSlab(page) __set_bit(PG_slab, &(page)->flags) -#define __ClearPageSlab(page) __clear_bit(PG_slab, &(page)->flags) +extern void cancel_dirty_page(struct page *page, unsigned int account_size); -#ifdef CONFIG_HIGHMEM -#define PageHighMem(page) is_highmem(page_zone(page)) -#else -#define PageHighMem(page) 0 /* needed to optimize away at compile time */ -#endif +int test_clear_page_writeback(struct page *page); +int test_set_page_writeback(struct page *page); + +static inline void set_page_writeback(struct page *page) +{ + test_set_page_writeback(page); +} + +#ifdef CONFIG_EXTENDED_PAGE_FLAGS + +__PAGEFLAG(Head, head) +__PAGEFLAG(Tail, tail) +__PAGEFLAG(Vcompound, vcompound) +__PAGEFLAG(Mlock, mlock) +__PAGEFLAG(Pin, pin) +__PAGEFLAG(FileBacked, filebacked) +__PAGEFLAG(Export, export) -#define PageChecked(page) test_bit(PG_checked, &(page)->flags) -#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) -#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags) - -#define PagePinned(page) test_bit(PG_pinned, &(page)->flags) -#define SetPagePinned(page) set_bit(PG_pinned, &(page)->flags) -#define ClearPagePinned(page) clear_bit(PG_pinned, &(page)->flags) - -#define PageReserved(page) test_bit(PG_reserved, &(page)->flags) -#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) -#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) -#define __ClearPageReserved(page) __clear_bit(PG_reserved, &(page)->flags) - -#define SetPagePrivate(page) set_bit(PG_private, &(page)->flags) -#define ClearPagePrivate(page) clear_bit(PG_private, &(page)->flags) -#define PagePrivate(page) test_bit(PG_private, &(page)->flags) -#define __SetPagePrivate(page) __set_bit(PG_private, &(page)->flags) -#define __ClearPagePrivate(page) __clear_bit(PG_private, &(page)->flags) +static inline int PageCompound(struct page *page) +{ + return (page->flags & ((1 << PG_tail) | (1 << PG_head))) != 0; +} +#else /* - * Only test-and-set exist for PG_writeback. The unconditional operators are - * risky: they bypass page accounting. + * Fallback for sparsemem (not vmemmap) configurations that require + * the use of a lot of page flags. Could be removed if those + * sparsemem configuration are no longer used. */ -#define PageWriteback(page) test_bit(PG_writeback, &(page)->flags) -#define TestSetPageWriteback(page) test_and_set_bit(PG_writeback, \ - &(page)->flags) -#define TestClearPageWriteback(page) test_and_clear_bit(PG_writeback, \ - &(page)->flags) - -#define PageBuddy(page) test_bit(PG_buddy, &(page)->flags) -#define __SetPageBuddy(page) __set_bit(PG_buddy, &(page)->flags) -#define __ClearPageBuddy(page) __clear_bit(PG_buddy, &(page)->flags) - -#define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags) -#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags) -#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags) - -#define PageReadahead(page) test_bit(PG_readahead, &(page)->flags) -#define SetPageReadahead(page) set_bit(PG_readahead, &(page)->flags) -#define ClearPageReadahead(page) clear_bit(PG_readahead, &(page)->flags) - -#define PageReclaim(page) test_bit(PG_reclaim, &(page)->flags) -#define SetPageReclaim(page) set_bit(PG_reclaim, &(page)->flags) -#define ClearPageReclaim(page) clear_bit(PG_reclaim, &(page)->flags) -#define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags) - -#define PageCompound(page) test_bit(PG_compound, &(page)->flags) -#define __SetPageCompound(page) __set_bit(PG_compound, &(page)->flags) -#define __ClearPageCompound(page) __clear_bit(PG_compound, &(page)->flags) +#define PG_compound PG_head + +__PAGEFLAG(Compound, head) +__PAGEFLAG(Head, head) /* * PG_reclaim is used in combination with PG_compound to mark the - * head and tail of a compound page + * head and tail of a compound page. This saved one page flag + * but makes it impossible to use compound pages for the page cache. * * PG_compound & PG_reclaim => Tail page * PG_compound & ~PG_reclaim => Head page */ - #define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim)) -#define PageTail(page) ((page->flags & PG_head_tail_mask) \ - == PG_head_tail_mask) +static inline int PageTail(struct page *page) +{ + return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask); +} static inline void __SetPageTail(struct page *page) { @@ -279,33 +319,13 @@ static inline void __ClearPageTail(struc page->flags &= ~PG_head_tail_mask; } -#define PageHead(page) ((page->flags & PG_head_tail_mask) \ - == (1L << PG_compound)) -#define __SetPageHead(page) __SetPageCompound(page) -#define __ClearPageHead(page) __ClearPageCompound(page) +/* + * Compound pages cannot be used for the page cache if we do not + * have enough flags. So we can use the dirty flag for virtually + * mapped compound pages. + */ +__PAGEFLAG(Vcompound, dirty) -#ifdef CONFIG_SWAP -#define PageSwapCache(page) test_bit(PG_swapcache, &(page)->flags) -#define SetPageSwapCache(page) set_bit(PG_swapcache, &(page)->flags) -#define ClearPageSwapCache(page) clear_bit(PG_swapcache, &(page)->flags) -#else -#define PageSwapCache(page) 0 #endif -#define PageUncached(page) test_bit(PG_uncached, &(page)->flags) -#define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags) -#define ClearPageUncached(page) clear_bit(PG_uncached, &(page)->flags) - -struct page; /* forward declaration */ - -extern void cancel_dirty_page(struct page *page, unsigned int account_size); - -int test_clear_page_writeback(struct page *page); -int test_set_page_writeback(struct page *page); - -static inline void set_page_writeback(struct page *page) -{ - test_set_page_writeback(page); -} - #endif /* PAGE_FLAGS_H */ Index: linux-2.6/mm/Kconfig =================================================================== --- linux-2.6.orig/mm/Kconfig 2008-02-17 16:59:24.000000000 -0800 +++ linux-2.6/mm/Kconfig 2008-02-17 17:06:14.000000000 -0800 @@ -190,6 +190,13 @@ config NR_QUICK default "2" if SUPERH default "1" +# +# Regular sparsemem consumes a lot of page flags +# +config EXTENDED_PAGE_FLAGS + bool + depends on SPARSEMEM_VMEMMAP || !SPARSEMEM + config VIRT_TO_BUS def_bool y depends on !ARCH_NO_VIRT_TO_BUS Index: linux-2.6/include/linux/mmzone.h =================================================================== --- linux-2.6.orig/include/linux/mmzone.h 2008-02-17 17:12:44.000000000 -0800 +++ linux-2.6/include/linux/mmzone.h 2008-02-17 19:00:47.000000000 -0800 @@ -735,25 +735,6 @@ extern struct zone *next_zone(struct zon #include #endif -#if BITS_PER_LONG == 32 -/* - * with 32 bit page->flags field, we reserve 9 bits for node/zone info. - * there are 4 zones (3 bits) and this leaves 9-3=6 bits for nodes. - */ -#define FLAGS_RESERVED 9 - -#elif BITS_PER_LONG == 64 -/* - * with 64 bit flags field, there's plenty of room. - */ -#define FLAGS_RESERVED 32 - -#else - -#error BITS_PER_LONG not defined - -#endif - #if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ !defined(CONFIG_ARCH_POPULATES_NODE_MAP) #define early_pfn_to_nid(nid) (0UL) Index: linux-2.6/mm/page_alloc.c =================================================================== --- linux-2.6.orig/mm/page_alloc.c 2008-02-17 17:27:13.000000000 -0800 +++ linux-2.6/mm/page_alloc.c 2008-02-17 20:59:38.000000000 -0800 @@ -623,7 +623,7 @@ static int prep_new_page(struct page *pa if (PageReserved(page)) return 1; - page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_readahead | + page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim | 1 << PG_referenced | 1 << PG_arch_1 | 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk); set_page_private(page, 0); @@ -3904,6 +3904,7 @@ static int __init cmdline_parse_core(cha if (!p) return -EINVAL; + BUILD_BUG_ON(NR_PAGEFLAGS != __NR_PAGEFLAGS); coremem = memparse(p, &p); *core = coremem >> PAGE_SHIFT;