Improve /proc/<pid>/numa_maps

This changes the format of numa_maps to be more compact and contain additional
information that is useful for managing and troubleshooting memory on a NUMA
system.

Fixes:

1. More compact format. Only display fields if they contain additional
	information.

2. Always display information for all vmas. The old numa_maps did not display
	vma with no mapped entries. This was a bit confusing because page
	migration removes ptes for file backed vmas. After page migration
	a part of the vmas vanished.

3. Rename maxref to maxshare. This is maximu mapcount of all the pages
	in a vma and may be used as an indicator as to how many processes
	are using a certain page.

New items shown:

dirty
	Number of pages in a vma that have either the dirty bit set in the
	page_struct or in the pte.

pages_dirty
	Number of page_structs that have the dirty bit set. This is only
	displayed if it differs from the number of dirty pages. If displayed
	it means that there are pages with ptes that marked dirty but
	the corresponding ptes are not.

ptes_dirty
	Number of ptes that have the dirty bit set. This is only
	displayed if it differs from the number of dirty pages. If displayed
	it means that pages have ptes without dirty bit set. An fault
	may be needed to sync the pte with the pagestruct.

file=<filename>
	The file backing the memory area

stack
	Stack area

heap
	Heap area

swapcache
	Number of pages with swap references. Must be >0 in order to
	be shown.

active
	Number of active pages. Only displayed if different from the number
	of pages mapped.

locked
	Number of pages locked. Only displayed if >0.


Signed-off-by: Christoph Lameter <clameter@sgi.com>

Index: linux-2.6.16-rc5/mm/mempolicy.c
===================================================================
--- linux-2.6.16-rc5.orig/mm/mempolicy.c	2006-02-26 21:09:35.000000000 -0800
+++ linux-2.6.16-rc5/mm/mempolicy.c	2006-03-02 23:26:27.000000000 -0800
@@ -197,7 +197,7 @@ static struct mempolicy *mpol_new(int mo
 	return policy;
 }
 
-static void gather_stats(struct page *, void *);
+static void gather_stats(struct page *, void *, int pte_dirty);
 static void migrate_page_add(struct page *page, struct list_head *pagelist,
 				unsigned long flags);
 
@@ -239,7 +239,7 @@ static int check_pte_range(struct vm_are
 			continue;
 
 		if (flags & MPOL_MF_STATS)
-			gather_stats(page, private);
+			gather_stats(page, private, pte_dirty(*pte));
 		else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
 			migrate_page_add(page, private, flags);
 		else
@@ -1752,66 +1752,148 @@ static inline int mpol_to_str(char *buff
 struct numa_maps {
 	unsigned long pages;
 	unsigned long anon;
-	unsigned long mapped;
+	unsigned long active;
+	unsigned long locked;
 	unsigned long mapcount_max;
+	unsigned long dirty;
+	unsigned long pages_dirty;
+	unsigned long ptes_dirty;
+	unsigned long swapcache;
 	unsigned long node[MAX_NUMNODES];
 };
 
-static void gather_stats(struct page *page, void *private)
+static void gather_stats(struct page *page, void *private, int pte_dirty)
 {
 	struct numa_maps *md = private;
 	int count = page_mapcount(page);
 
-	if (count)
-		md->mapped++;
+	md->pages++;
+	if (pte_dirty || PageDirty(page))
+		md->dirty++;
 
-	if (count > md->mapcount_max)
-		md->mapcount_max = count;
+	if (PageSwapCache(page))
+		md->swapcache++;
 
-	md->pages++;
+	if (PageActive(page))
+		md->active++;
+
+	if (PageLocked(page))
+		md->locked++;
 
 	if (PageAnon(page))
 		md->anon++;
 
+	if (count > md->mapcount_max)
+		md->mapcount_max = count;
+
 	md->node[page_to_nid(page)]++;
 	cond_resched();
 }
 
+void check_huge_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct numa_maps *md)
+{
+	unsigned long addr;
+	struct page *page;
+
+	for(addr = start; addr < end; addr += HPAGE_SIZE) {
+		pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & HPAGE_MASK);
+		pte_t pte;
+
+		if (!ptep)
+			continue;
+
+		pte = *ptep;
+
+		if (pte_none(pte))
+			continue;
+
+		page = pte_page(*ptep);
+
+		if (!page)
+			continue;
+
+		gather_stats(page, md, pte_dirty(*ptep));
+	}
+}
+
 int show_numa_map(struct seq_file *m, void *v)
 {
 	struct task_struct *task = m->private;
 	struct vm_area_struct *vma = v;
 	struct numa_maps *md;
+	struct file *file = vma->vm_file;
+	struct mm_struct *mm = vma->vm_mm;
 	int n;
 	char buffer[50];
 
-	if (!vma->vm_mm)
+	if (!mm)
 		return 0;
 
 	md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL);
 	if (!md)
 		return 0;
 
-	check_pgd_range(vma, vma->vm_start, vma->vm_end,
-		    &node_online_map, MPOL_MF_STATS, md);
+	seq_printf(m, "%08lx %s", vma->vm_start, buffer);
+
+	mpol_to_str(buffer, sizeof(buffer),
+			get_vma_policy(task, vma, vma->vm_start));
+
+	if (file) {
+
+		seq_printf(m, " file=");
+		seq_path(m, file->f_vfsmnt, file->f_dentry, "\n\t");
+
+	} else if (vma->vm_start <= mm->brk &&
+		   vma->vm_end >= mm->start_brk)
+
+			seq_printf(m, " heap");
+
+	else if (vma->vm_start <= mm->start_stack &&
+		vma->vm_end >= mm->start_stack)
+
+			seq_printf(m, " stack");
+
+	if (is_vm_hugetlb_page(vma)) {
+
+		check_huge_range(vma, vma->vm_start, vma->vm_end, md);
+		seq_printf(m, " huge");
+
+	} else {
 
-	if (md->pages) {
-		mpol_to_str(buffer, sizeof(buffer),
-			    get_vma_policy(task, vma, vma->vm_start));
-
-		seq_printf(m, "%08lx %s pages=%lu mapped=%lu maxref=%lu",
-			   vma->vm_start, buffer, md->pages,
-			   md->mapped, md->mapcount_max);
-
-		if (md->anon)
-			seq_printf(m," anon=%lu",md->anon);
-
-		for_each_online_node(n)
-			if (md->node[n])
-				seq_printf(m, " N%d=%lu", n, md->node[n]);
+		check_pgd_range(vma, vma->vm_start, vma->vm_end,
+		    &node_online_map, MPOL_MF_STATS, md);
 
-		seq_putc(m, '\n');
 	}
+
+	if (!md->pages)
+		goto out;
+
+	if (md->anon)
+		seq_printf(m," anon=%lu",md->anon);
+
+	if (md->dirty)
+		seq_printf(m," dirty=%lu",md->dirty);
+
+	if (md->pages != md->anon && md->pages != md->dirty)
+		seq_printf(m, " mapped=%lu", md->pages);
+
+	if (md->mapcount_max > 1)
+		seq_printf(m, " maxshare=%lu", md->mapcount_max);
+
+	if (md->swapcache)
+		seq_printf(m," swapcache=%lu", md->swapcache);
+
+	if (md->active < md->pages)
+		seq_printf(m," active=%lu", md->active);
+
+	if (md->locked)
+		seq_printf(m," locked=%lu", md->locked);
+
+	for_each_online_node(n)
+		if (md->node[n])
+			seq_printf(m, " N%d=%lu", n, md->node[n]);
+out:
+	seq_putc(m, '\n');
 	kfree(md);
 
 	if (m->count < m->size)