Add a dirty map to the address space structure In a NUMA system it is helpful to know where the dirty pages of a mapping are located. That way we will be able to implement writeout for applications that are constrained to a portion of the memory of the system as required by cpusets. Two functions are introduce to manage the dirty node map: cpuset_clear_dirty_nodes() and cpuset_update_nodes(). Both are defined using macros since the definition of an address space may not be available in cpuset.h. The synchronization of the dirty_map updates is a bit problematic since the clearing of the inode does not involve taking the tree_lock. The only potential harm that could occur is if dirty bits are lost. That is rare and will be impossibly rare if multiple pages are involved. There is therefore a slight chance that we have missed a dirty node if it just contains a single page. That is likely tolerable. Signed-off-by; Christoph Lameter Index: linux-2.6.20-rc4/fs/fs-writeback.c =================================================================== --- linux-2.6.20-rc4.orig/fs/fs-writeback.c 2007-01-06 23:45:51.000000000 -0600 +++ linux-2.6.20-rc4/fs/fs-writeback.c 2007-01-08 12:40:21.822743331 -0600 @@ -22,6 +22,7 @@ #include #include #include +#include #include "internal.h" /** @@ -223,11 +224,13 @@ __sync_single_inode(struct inode *inode, /* * The inode is clean, inuse */ + cpuset_clear_dirty_nodes(inode->i_mapping); list_move(&inode->i_list, &inode_in_use); } else { /* * The inode is clean, unused */ + cpuset_clear_dirty_nodes(inode->i_mapping); list_move(&inode->i_list, &inode_unused); } } Index: linux-2.6.20-rc4/fs/inode.c =================================================================== --- linux-2.6.20-rc4.orig/fs/inode.c 2007-01-06 23:45:51.000000000 -0600 +++ linux-2.6.20-rc4/fs/inode.c 2007-01-08 12:40:21.841299647 -0600 @@ -22,6 +22,7 @@ #include #include #include +#include /* * This is needed for the following functions: @@ -147,6 +148,7 @@ static struct inode *alloc_inode(struct mapping->flags = 0; mapping_set_gfp_mask(mapping, GFP_HIGHUSER); mapping->assoc_mapping = NULL; + cpuset_clear_dirty_nodes(mapping); mapping->backing_dev_info = &default_backing_dev_info; /* Index: linux-2.6.20-rc4/include/linux/fs.h =================================================================== --- linux-2.6.20-rc4.orig/include/linux/fs.h 2007-01-06 23:45:51.000000000 -0600 +++ linux-2.6.20-rc4/include/linux/fs.h 2007-01-08 12:40:21.874505687 -0600 @@ -446,6 +446,9 @@ struct address_space { spinlock_t private_lock; /* for use by the address_space */ struct list_head private_list; /* ditto */ struct address_space *assoc_mapping; /* ditto */ +#ifdef CONFIG_CPUSETS + nodemask_t dirty_nodes; /* Map of nodes with dirty pages */ +#endif } __attribute__((aligned(sizeof(long)))); /* * On most architectures that alignment is already the case; but Index: linux-2.6.20-rc4/mm/page-writeback.c =================================================================== --- linux-2.6.20-rc4.orig/mm/page-writeback.c 2007-01-06 23:45:51.000000000 -0600 +++ linux-2.6.20-rc4/mm/page-writeback.c 2007-01-08 12:43:06.118415712 -0600 @@ -33,6 +33,7 @@ #include #include #include +#include /* * The maximum number of pages to writeout in a single bdflush/kupdate @@ -776,6 +777,7 @@ int __set_page_dirty_nobuffers(struct pa radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } + cpuset_update_dirty_nodes(mapping, page); write_unlock_irq(&mapping->tree_lock); if (mapping->host) { /* !PageAnon && !swapper_space */ Index: linux-2.6.20-rc4/fs/buffer.c =================================================================== --- linux-2.6.20-rc4.orig/fs/buffer.c 2007-01-06 23:45:51.000000000 -0600 +++ linux-2.6.20-rc4/fs/buffer.c 2007-01-08 12:40:21.958497436 -0600 @@ -42,6 +42,7 @@ #include #include #include +#include static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static void invalidate_bh_lrus(void); @@ -736,6 +737,7 @@ int __set_page_dirty_buffers(struct page } radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); + cpuset_update_dirty_nodes(mapping, page); } write_unlock_irq(&mapping->tree_lock); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); Index: linux-2.6.20-rc4/include/linux/cpuset.h =================================================================== --- linux-2.6.20-rc4.orig/include/linux/cpuset.h 2007-01-06 23:45:51.000000000 -0600 +++ linux-2.6.20-rc4/include/linux/cpuset.h 2007-01-08 12:40:21.971193863 -0600 @@ -75,6 +75,15 @@ static inline int cpuset_do_slab_mem_spr extern void cpuset_track_online_nodes(void); +/* + * We need macros since struct address_space is not defined yet + */ +#define cpuset_update_dirty_nodes(__mapping, __page) \ + node_set(page_to_nid(__page), (__mapping)->dirty_nodes) + +#define cpuset_clear_dirty_nodes(__mapping) \ + (__mapping)->dirty_nodes = NODE_MASK_NONE + #else /* !CONFIG_CPUSETS */ static inline int cpuset_init_early(void) { return 0; } @@ -146,6 +155,11 @@ static inline int cpuset_do_slab_mem_spr static inline void cpuset_track_online_nodes(void) {} +static inline void cpuset_update_dirty_nodes(struct address_space *mapping, + struct page *page) {} + +static inline void cpuset_clear_dirty_nodes(struct address_space *mapping) {} + #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */