From b7ca15f9d0d0508df05630b215d5f9e47ed3b8a7 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 25 Jul 2007 20:35:28 -0700 Subject: [PATCH] Readahead changes to support large blocksize. Fix up readhead for large I/O operations. Only calculate the readahead until the 2M boundary then fall back to one page. Signed-off-by: Fengguang Wu Signed-off-by: Christoph Lameter =================================================================== --- include/linux/mm.h | 2 +- mm/fadvise.c | 4 ++-- mm/filemap.c | 5 ++--- mm/madvise.c | 2 +- mm/readahead.c | 22 ++++++++++++++-------- 5 files changed, 20 insertions(+), 15 deletions(-) Index: linux-2.6/include/linux/mm.h =================================================================== --- linux-2.6.orig/include/linux/mm.h 2008-02-16 20:35:11.000000000 -0800 +++ linux-2.6/include/linux/mm.h 2008-02-16 21:01:44.000000000 -0800 @@ -1130,7 +1130,7 @@ void page_cache_async_readahead(struct a pgoff_t offset, unsigned long size); -unsigned long max_sane_readahead(unsigned long nr); +unsigned long max_sane_readahead(unsigned long nr, int order); /* Do stack extension */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); Index: linux-2.6/mm/fadvise.c =================================================================== --- linux-2.6.orig/mm/fadvise.c 2008-02-16 20:28:44.000000000 -0800 +++ linux-2.6/mm/fadvise.c 2008-02-16 21:01:44.000000000 -0800 @@ -98,10 +98,10 @@ asmlinkage long sys_fadvise64_64(int fd, nrpages = end_index - start_index + 1; if (!nrpages) nrpages = ~0UL; - + ret = force_page_cache_readahead(mapping, file, start_index, - max_sane_readahead(nrpages)); + nrpages); if (ret > 0) ret = 0; break; Index: linux-2.6/mm/filemap.c =================================================================== --- linux-2.6.orig/mm/filemap.c 2008-02-16 20:55:14.000000000 -0800 +++ linux-2.6/mm/filemap.c 2008-02-16 21:01:44.000000000 -0800 @@ -1253,8 +1253,7 @@ do_readahead(struct address_space *mappi if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage) return -EINVAL; - force_page_cache_readahead(mapping, filp, index, - max_sane_readahead(nr)); + force_page_cache_readahead(mapping, filp, index, nr); return 0; } @@ -1388,7 +1387,7 @@ retry_find: count_vm_event(PGMAJFAULT); } did_readaround = 1; - ra_pages = max_sane_readahead(file->f_ra.ra_pages); + ra_pages = file->f_ra.ra_pages; if (ra_pages) { pgoff_t start = 0; Index: linux-2.6/mm/madvise.c =================================================================== --- linux-2.6.orig/mm/madvise.c 2008-02-16 20:28:45.000000000 -0800 +++ linux-2.6/mm/madvise.c 2008-02-16 21:01:44.000000000 -0800 @@ -124,7 +124,7 @@ static long madvise_willneed(struct vm_a end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; force_page_cache_readahead(file->f_mapping, - file, start, max_sane_readahead(end - start)); + file, start, end - start); return 0; } Index: linux-2.6/mm/readahead.c =================================================================== --- linux-2.6.orig/mm/readahead.c 2008-02-16 20:28:45.000000000 -0800 +++ linux-2.6/mm/readahead.c 2008-02-16 21:03:04.000000000 -0800 @@ -37,7 +37,8 @@ EXPORT_SYMBOL_GPL(default_backing_dev_in void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping) { - ra->ra_pages = mapping->backing_dev_info->ra_pages; + ra->ra_pages = DIV_ROUND_UP(mapping->backing_dev_info->ra_pages, + page_cache_size(mapping)); ra->prev_pos = -1; } EXPORT_SYMBOL_GPL(file_ra_state_init); @@ -75,7 +76,7 @@ int read_cache_pages(struct address_spac put_pages_list(pages); break; } - task_io_account_read(PAGE_CACHE_SIZE); + task_io_account_read(page_cache_size(mapping)); } return ret; } @@ -136,7 +137,7 @@ __do_page_cache_readahead(struct address if (isize == 0) goto out; - end_index = ((isize - 1) >> PAGE_CACHE_SHIFT); + end_index = page_cache_index(mapping, isize - 1); /* * Preallocate as many pages as we will need. @@ -187,10 +188,12 @@ int force_page_cache_readahead(struct ad if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages)) return -EINVAL; + nr_to_read = max_sane_readahead(nr_to_read, mapping_order(mapping)); while (nr_to_read) { int err; - unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE; + unsigned long this_chunk = DIV_ROUND_UP(2 * 1024 * 1024, + page_cache_size(mapping)); if (this_chunk > nr_to_read) this_chunk = nr_to_read; @@ -220,17 +223,20 @@ int do_page_cache_readahead(struct addre if (bdi_read_congested(mapping->backing_dev_info)) return -1; + nr_to_read = max_sane_readahead(nr_to_read, mapping_order(mapping)); return __do_page_cache_readahead(mapping, filp, offset, nr_to_read, 0); } /* - * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a + * Given a desired number of page order readahead pages, return a * sensible upper limit. */ -unsigned long max_sane_readahead(unsigned long nr) +unsigned long max_sane_readahead(unsigned long nr, int order) { - return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE) - + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2); + unsigned long base_pages = node_page_state(numa_node_id(), NR_INACTIVE) + + node_page_state(numa_node_id(), NR_FREE_PAGES); + + return min(nr, (base_pages / 2) >> order); } static int __init readahead_init(void)