Improve /proc//numa_maps This changes the format of numa_maps to be more compact and contain additional information that is useful for managing and troubleshooting memory on a NUMA system. Fixes: 1. More compact format. Only display fields if they contain additional information. 2. Always display information for all vmas. The old numa_maps did not display vma with no mapped entries. This was a bit confusing because page migration removes ptes for file backed vmas. After page migration a part of the vmas vanished. 3. Rename maxref to maxshare. This is maximu mapcount of all the pages in a vma and may be used as an indicator as to how many processes are using a certain page. New items shown: dirty Number of pages in a vma that have either the dirty bit set in the page_struct or in the pte. pages_dirty Number of page_structs that have the dirty bit set. This is only displayed if it differs from the number of dirty pages. If displayed it means that there are pages with ptes that marked dirty but the corresponding ptes are not. ptes_dirty Number of ptes that have the dirty bit set. This is only displayed if it differs from the number of dirty pages. If displayed it means that pages have ptes without dirty bit set. An fault may be needed to sync the pte with the pagestruct. file= The file backing the memory area stack Stack area heap Heap area swapcache Number of pages with swap references. Must be >0 in order to be shown. active Number of active pages. Only displayed if different from the number of pages mapped. locked Number of pages locked. Only displayed if >0. Signed-off-by: Christoph Lameter Index: linux-2.6.16-rc5/mm/mempolicy.c =================================================================== --- linux-2.6.16-rc5.orig/mm/mempolicy.c 2006-02-26 21:09:35.000000000 -0800 +++ linux-2.6.16-rc5/mm/mempolicy.c 2006-03-02 23:26:27.000000000 -0800 @@ -197,7 +197,7 @@ static struct mempolicy *mpol_new(int mo return policy; } -static void gather_stats(struct page *, void *); +static void gather_stats(struct page *, void *, int pte_dirty); static void migrate_page_add(struct page *page, struct list_head *pagelist, unsigned long flags); @@ -239,7 +239,7 @@ static int check_pte_range(struct vm_are continue; if (flags & MPOL_MF_STATS) - gather_stats(page, private); + gather_stats(page, private, pte_dirty(*pte)); else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) migrate_page_add(page, private, flags); else @@ -1752,66 +1752,148 @@ static inline int mpol_to_str(char *buff struct numa_maps { unsigned long pages; unsigned long anon; - unsigned long mapped; + unsigned long active; + unsigned long locked; unsigned long mapcount_max; + unsigned long dirty; + unsigned long pages_dirty; + unsigned long ptes_dirty; + unsigned long swapcache; unsigned long node[MAX_NUMNODES]; }; -static void gather_stats(struct page *page, void *private) +static void gather_stats(struct page *page, void *private, int pte_dirty) { struct numa_maps *md = private; int count = page_mapcount(page); - if (count) - md->mapped++; + md->pages++; + if (pte_dirty || PageDirty(page)) + md->dirty++; - if (count > md->mapcount_max) - md->mapcount_max = count; + if (PageSwapCache(page)) + md->swapcache++; - md->pages++; + if (PageActive(page)) + md->active++; + + if (PageLocked(page)) + md->locked++; if (PageAnon(page)) md->anon++; + if (count > md->mapcount_max) + md->mapcount_max = count; + md->node[page_to_nid(page)]++; cond_resched(); } +void check_huge_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct numa_maps *md) +{ + unsigned long addr; + struct page *page; + + for(addr = start; addr < end; addr += HPAGE_SIZE) { + pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & HPAGE_MASK); + pte_t pte; + + if (!ptep) + continue; + + pte = *ptep; + + if (pte_none(pte)) + continue; + + page = pte_page(*ptep); + + if (!page) + continue; + + gather_stats(page, md, pte_dirty(*ptep)); + } +} + int show_numa_map(struct seq_file *m, void *v) { struct task_struct *task = m->private; struct vm_area_struct *vma = v; struct numa_maps *md; + struct file *file = vma->vm_file; + struct mm_struct *mm = vma->vm_mm; int n; char buffer[50]; - if (!vma->vm_mm) + if (!mm) return 0; md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL); if (!md) return 0; - check_pgd_range(vma, vma->vm_start, vma->vm_end, - &node_online_map, MPOL_MF_STATS, md); + seq_printf(m, "%08lx %s", vma->vm_start, buffer); + + mpol_to_str(buffer, sizeof(buffer), + get_vma_policy(task, vma, vma->vm_start)); + + if (file) { + + seq_printf(m, " file="); + seq_path(m, file->f_vfsmnt, file->f_dentry, "\n\t"); + + } else if (vma->vm_start <= mm->brk && + vma->vm_end >= mm->start_brk) + + seq_printf(m, " heap"); + + else if (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack) + + seq_printf(m, " stack"); + + if (is_vm_hugetlb_page(vma)) { + + check_huge_range(vma, vma->vm_start, vma->vm_end, md); + seq_printf(m, " huge"); + + } else { - if (md->pages) { - mpol_to_str(buffer, sizeof(buffer), - get_vma_policy(task, vma, vma->vm_start)); - - seq_printf(m, "%08lx %s pages=%lu mapped=%lu maxref=%lu", - vma->vm_start, buffer, md->pages, - md->mapped, md->mapcount_max); - - if (md->anon) - seq_printf(m," anon=%lu",md->anon); - - for_each_online_node(n) - if (md->node[n]) - seq_printf(m, " N%d=%lu", n, md->node[n]); + check_pgd_range(vma, vma->vm_start, vma->vm_end, + &node_online_map, MPOL_MF_STATS, md); - seq_putc(m, '\n'); } + + if (!md->pages) + goto out; + + if (md->anon) + seq_printf(m," anon=%lu",md->anon); + + if (md->dirty) + seq_printf(m," dirty=%lu",md->dirty); + + if (md->pages != md->anon && md->pages != md->dirty) + seq_printf(m, " mapped=%lu", md->pages); + + if (md->mapcount_max > 1) + seq_printf(m, " maxshare=%lu", md->mapcount_max); + + if (md->swapcache) + seq_printf(m," swapcache=%lu", md->swapcache); + + if (md->active < md->pages) + seq_printf(m," active=%lu", md->active); + + if (md->locked) + seq_printf(m," locked=%lu", md->locked); + + for_each_online_node(n) + if (md->node[n]) + seq_printf(m, " N%d=%lu", n, md->node[n]); +out: + seq_putc(m, '\n'); kfree(md); if (m->count < m->size)