--- include/linux/mm.h | 1 mm/filemap.c | 23 ++++++++++++---------- mm/memory.c | 35 ++++++++++++++++++++++++---------- mm/rmap.c | 54 +++++++++++++++++++++++++++++++++++++++++++++-------- 4 files changed, 85 insertions(+), 28 deletions(-) Index: linux-2.6/mm/filemap.c =================================================================== --- linux-2.6.orig/mm/filemap.c 2007-09-07 17:37:55.000000000 -0700 +++ linux-2.6/mm/filemap.c 2007-09-07 17:37:55.000000000 -0700 @@ -1320,9 +1320,12 @@ unsigned long size; int did_readaround = 0; int ret = 0; + pgoff_t pgoff = vmf->pgoff >> mapping_order(mapping); + vmf->base_page_index = + vmf->pgoff & ((1 << mapping_order(mapping)) -1); size = page_cache_next(mapping, i_size_read(inode)); - if (vmf->pgoff >= size) + if (pgoff >= size) goto outside_data_content; /* If we don't want any read-ahead, don't bother */ @@ -1333,21 +1336,21 @@ * Do we have something in the page cache already? */ retry_find: - page = find_lock_page(mapping, vmf->pgoff); + page = find_lock_page(mapping, pgoff); /* * For sequential accesses, we use the generic readahead logic. */ if (VM_SequentialReadHint(vma)) { if (!page) { page_cache_sync_readahead(mapping, ra, file, - vmf->pgoff, 1); - page = find_lock_page(mapping, vmf->pgoff); + pgoff, 1); + page = find_lock_page(mapping, pgoff); if (!page) goto no_cached_page; } if (PageReadahead(page)) { page_cache_async_readahead(mapping, ra, file, page, - vmf->pgoff, 1); + pgoff, 1); } } @@ -1377,10 +1380,10 @@ pgoff_t start = 0; if (vmf->pgoff > ra_pages / 2) - start = vmf->pgoff - ra_pages / 2; + start = pgoff - ra_pages / 2; do_page_cache_readahead(mapping, file, start, ra_pages); } - page = find_lock_page(mapping, vmf->pgoff); + page = find_lock_page(mapping, pgoff); if (!page) goto no_cached_page; } @@ -1397,7 +1400,7 @@ /* Must recheck i_size under page lock */ size = page_cache_next(mapping, i_size_read(inode)); - if (unlikely(vmf->pgoff >= size)) { + if (unlikely(pgoff >= size)) { unlock_page(page); goto outside_data_content; } @@ -1424,7 +1427,7 @@ * We're only likely to ever get here if MADV_RANDOM is in * effect. */ - error = page_cache_read(file, vmf->pgoff); + error = page_cache_read(file, pgoff); /* * The page we want has now been added to the page cache. @@ -1482,7 +1485,7 @@ /* * Forbid mmap access to higher order mappings. */ - if (mapping_order(mapping)) + if (mapping_order(mapping) && mapping_writably_mapped(mapping)) return -ENOSYS; if (!mapping->a_ops->readpage) Index: linux-2.6/mm/memory.c =================================================================== --- linux-2.6.orig/mm/memory.c 2007-09-07 17:37:24.000000000 -0700 +++ linux-2.6/mm/memory.c 2007-09-07 17:49:52.000000000 -0700 @@ -382,6 +382,11 @@ * and if that isn't true, the page has been COW'ed (in which case it * _does_ have a "struct page" associated with it even if it is in a * VM_PFNMAP range). + * + * vm_normal_page may return a tail page of a compound page. The tail + * page pointer allows the determination of the PAGE_SIZE slice + * intended to be operated upon on. The page head can be determined + * from the tail page. */ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { @@ -478,9 +483,11 @@ page = vm_normal_page(vma, addr, pte); if (page) { - get_page(page); - page_dup_rmap(page, vma, addr); - rss[!!PageAnon(page)]++; + struct page *head = page_cache_head(page); + + get_page(head); + page_dup_rmap(head, vma, addr); + rss[!!PageAnon(head)]++; } out_set_pte: @@ -639,9 +646,15 @@ (*zap_work) -= PAGE_SIZE; if (pte_present(ptent)) { - struct page *page; - - page = vm_normal_page(vma, addr, ptent); + struct page *spage; + struct page *page = NULL; + int page_index = 0; + + spage = vm_normal_page(vma, addr, ptent); + if (spage) { + page = page_cache_head(spage); + page_index = spage - page; + } if (unlikely(details) && page) { /* * unmap_shared_mapping_pages() wants to @@ -669,7 +682,7 @@ && linear_page_index(details->nonlinear_vma, addr) != page->index) set_pte_at(mm, addr, pte, - pgoff_to_pte(page->index)); + pgoff_to_pte(page->index + page_index)); if (PageAnon(page)) anon_rss--; else { @@ -680,7 +693,7 @@ file_rss--; } page_remove_rmap(page, vma); - tlb_remove_page(tlb, page); + tlb_remove_page(tlb, spage); continue; } /* @@ -897,6 +910,8 @@ /* * Do a quick page-table lookup for a single page. + * + * This may return a tail page. */ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, unsigned int flags) @@ -906,7 +921,7 @@ pmd_t *pmd; pte_t *ptep, pte; spinlock_t *ptl; - struct page *page; + struct page *page, *head; struct mm_struct *mm = vma->vm_mm; page = follow_huge_addr(mm, address, flags & FOLL_WRITE); @@ -947,13 +962,14 @@ if (unlikely(!page)) goto unlock; + head = page_cache_head(page); if (flags & FOLL_GET) - get_page(page); + get_page(head); if (flags & FOLL_TOUCH) { if ((flags & FOLL_WRITE) && - !pte_dirty(pte) && !PageDirty(page)) - set_page_dirty(page); - mark_page_accessed(page); + !pte_dirty(pte) && !PageDirty(head)) + set_page_dirty(head); + mark_page_accessed(head); } unlock: pte_unmap_unlock(ptep, ptl); @@ -1022,7 +1038,7 @@ struct page *page = vm_normal_page(gate_vma, start, *pte); pages[i] = page; if (page) - get_page(page); + get_page(page_cache_head(page)); } pte_unmap(pte); if (vmas) @@ -1638,13 +1654,20 @@ { struct page *old_page, *new_page; pte_t entry; - int reuse = 0, ret = 0; + int reuse = 0, ret = 0, subpage_index = 0; struct page *dirty_page = NULL; old_page = vm_normal_page(vma, address, orig_pte); if (!old_page) goto gotten; + if (PageTail(old_page)) { + struct page *head = page_cache_head(old_page); + + subpage_index = old_page - head; + old_page = head; + } + /* * Take out anonymous pages first, anonymous shared vmas are * not dirty accountable. @@ -1722,7 +1745,8 @@ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); if (!new_page) goto oom; - cow_user_page(new_page, old_page, address, vma); + cow_user_page(new_page, old_page + subpage_index, + address, vma); } /* @@ -2326,6 +2350,7 @@ vmf.pgoff = pgoff; vmf.flags = flags; vmf.page = NULL; + vmf.base_page_index = 0; pte_unmap(page_table); BUG_ON(vma->vm_flags & VM_PFNMAP); @@ -2371,7 +2396,8 @@ ret = VM_FAULT_OOM; goto out; } - copy_user_highpage(page, vmf.page, address, vma); + copy_user_highpage(page, + vmf.page + vmf.base_page_index, address, vma); } else { /* * If the page will be shareable, see if the backing @@ -2417,8 +2443,8 @@ */ /* Only go through if we didn't race with anybody else... */ if (likely(pte_same(*page_table, orig_pte))) { - flush_icache_page(vma, page); - entry = mk_pte(page, vma->vm_page_prot); + flush_icache_page(vma, page + vmf.base_page_index); + entry = mk_pte(page + vmf.base_page_index, vma->vm_page_prot); if (flags & FAULT_FLAG_WRITE) entry = maybe_mkwrite(pte_mkdirty(entry), vma); set_pte_at(mm, address, page_table, entry); Index: linux-2.6/include/linux/mm.h =================================================================== --- linux-2.6.orig/include/linux/mm.h 2007-09-07 17:37:55.000000000 -0700 +++ linux-2.6/include/linux/mm.h 2007-09-07 17:37:55.000000000 -0700 @@ -216,6 +216,7 @@ * is set (which is also implied by * VM_FAULT_ERROR). */ + int base_page_index; }; /* Index: linux-2.6/mm/rmap.c =================================================================== --- linux-2.6.orig/mm/rmap.c 2007-09-07 17:37:55.000000000 -0700 +++ linux-2.6/mm/rmap.c 2007-09-07 17:37:55.000000000 -0700 @@ -271,7 +271,7 @@ * Subfunctions of page_referenced: page_referenced_one called * repeatedly from either page_referenced_anon or page_referenced_file. */ -static int page_referenced_one(struct page *page, +static int __page_referenced_one(struct page *page, struct vm_area_struct *vma, unsigned int *mapcount) { struct mm_struct *mm = vma->vm_mm; @@ -303,6 +303,18 @@ return referenced; } +static int page_referenced_one(struct page *page, + struct vm_area_struct *vma, unsigned int *mapcount) +{ + int i; + int referenced = 0; + + for (i = 0; i < page_cache_pages(page); i++) + referenced += __page_referenced_one(page++, vma, mapcount); + + return referenced; +} + static int page_referenced_anon(struct page *page) { unsigned int mapcount; @@ -417,7 +429,7 @@ return referenced; } -static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) +static int __page_mkclean_one(struct page *page, struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; unsigned long address; @@ -450,6 +462,17 @@ return ret; } +static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) +{ + int i; + int ret = 0; + + for (i = 0; i < page_cache_pages(page); i++) + ret += __page_mkclean_one(page + i, vma); + + return ret; +} + static int page_mkclean_file(struct address_space *mapping, struct page *page) { pgoff_t pgoff = page->index << (page_cache_shift(mapping) - PAGE_SHIFT); @@ -657,8 +680,8 @@ * Subfunctions of try_to_unmap: try_to_unmap_one called * repeatedly from either try_to_unmap_anon or try_to_unmap_file. */ -static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, - int migration) +static int __try_to_unmap_one(struct page *page, int base_page_index, + struct vm_area_struct *vma, int migration) { struct mm_struct *mm = vma->vm_mm; unsigned long address; @@ -667,11 +690,11 @@ spinlock_t *ptl; int ret = SWAP_AGAIN; - address = vma_address(page, vma); + address = vma_address(page + base_page_index, vma); if (address == -EFAULT) goto out; - pte = page_check_address(page, mm, address, &ptl); + pte = page_check_address(page + base_page_index, mm, address, &ptl); if (!pte) goto out; @@ -687,7 +710,7 @@ } /* Nuke the page table entry. */ - flush_cache_page(vma, address, page_to_pfn(page)); + flush_cache_page(vma, address, page_to_pfn(page + base_page_index)); pteval = ptep_clear_flush(vma, address, pte); /* Move the dirty bit to the physical page now the pte is gone. */ @@ -731,7 +754,8 @@ if (migration) { /* Establish migration entry for a file page */ swp_entry_t entry; - entry = make_migration_entry(page, pte_write(pteval)); + entry = make_migration_entry(page + base_page_index, + pte_write(pteval)); set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); } else #endif @@ -747,6 +771,20 @@ return ret; } +static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, + int migration) +{ + int ret = SWAP_AGAIN; + int i; + + for (i = 0; i < page_cache_pages(page); i++) { + ret = __try_to_unmap_one(page, i, vma, migration); + if (ret == SWAP_FAIL || !page_mapped(page)) + return ret; + } + return ret; +} + /* * objrmap doesn't work for nonlinear VMAs because the assumption that * offset-into-file correlates with offset-into-virtual-addresses does not hold. @@ -779,7 +817,7 @@ pte_t *pte; pte_t pteval; spinlock_t *ptl; - struct page *page; + struct page *page, *head; unsigned long address; unsigned long end; @@ -816,6 +854,7 @@ if (ptep_clear_flush_young(vma, address, pte)) continue; + head = page_cache_head(page); /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); pteval = ptep_clear_flush(vma, address, pte); @@ -826,10 +865,10 @@ /* Move the dirty bit to the physical page now the pte is gone. */ if (pte_dirty(pteval)) - set_page_dirty(page); + set_page_dirty(head); - page_remove_rmap(page, vma); - page_cache_release(page); + page_remove_rmap(head, vma); + page_cache_release(head); dec_mm_counter(mm, file_rss); (*mapcount)--; } Index: linux-2.6/mm/fremap.c =================================================================== --- linux-2.6.orig/mm/fremap.c 2007-09-07 17:37:24.000000000 -0700 +++ linux-2.6/mm/fremap.c 2007-09-07 18:02:37.000000000 -0700 @@ -32,10 +32,12 @@ pte = ptep_clear_flush(vma, addr, ptep); page = vm_normal_page(vma, addr, pte); if (page) { + struct page *head = page_cache_head(page); + if (pte_dirty(pte)) - set_page_dirty(page); - page_remove_rmap(page, vma); - page_cache_release(page); + set_page_dirty(head); + page_remove_rmap(head, vma); + page_cache_release(head); update_hiwater_rss(mm); dec_mm_counter(mm, file_rss); } Index: linux-2.6/mm/mempolicy.c =================================================================== --- linux-2.6.orig/mm/mempolicy.c 2007-09-07 17:38:06.000000000 -0700 +++ linux-2.6/mm/mempolicy.c 2007-09-07 17:38:08.000000000 -0700 @@ -228,7 +228,7 @@ struct page *page; int nid; - if (!pte_present(*pte)) + if (!pte_present(*pte) || PageTail(page)) continue; page = vm_normal_page(vma, addr, *pte); if (!page) @@ -256,7 +256,9 @@ migrate_page_add(page, private, flags); else break; - } while (pte++, addr += PAGE_SIZE, addr != end); + } while (pte+= page_cache_pages(page), + addr += page_cache_size(page), + addr != end); pte_unmap_unlock(orig_pte, ptl); return addr != end; }