From: Matt Mackall This makes physical page flags and counts available to userspace. Together with /proc/pid/pagemap and /proc/pid/clear_refs, this can be used to measure memory usage on a per-page basis. [bunk@stusta.de: make struct proc_kpagemap static] Signed-off-by: Matt Mackall Cc: Jeremy Fitzhardinge Cc: David Rientjes Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton --- fs/proc/proc_misc.c | 92 ++++++++++++++++++++++++++++++++++++++++++ init/Kconfig | 9 ++++ 2 files changed, 101 insertions(+) diff -puN fs/proc/proc_misc.c~maps2-add-proc-kpagemap-interface fs/proc/proc_misc.c --- a/fs/proc/proc_misc.c~maps2-add-proc-kpagemap-interface +++ a/fs/proc/proc_misc.c @@ -46,6 +46,8 @@ #include #include #include +#include +#include #include #include #include @@ -645,6 +647,91 @@ static const struct file_operations proc }; #endif +#ifdef CONFIG_PROC_KPAGEMAP +#define KPMSIZE (sizeof(unsigned long) * 2) +#define KPMMASK (KPMSIZE - 1) +/* /proc/kpagemap - an array exposing page flags and counts + * + * Each entry is a pair of unsigned longs representing the + * corresponding physical page, the first containing the page flags + * and the second containing the page use count. + * + * The first 4 bytes of this file form a simple header: + * + * first byte: 0 for big endian, 1 for little + * second byte: page shift (eg 12 for 4096 byte pages) + * third byte: entry size in bytes (currently either 4 or 8) + * fourth byte: header size + */ +static ssize_t kpagemap_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned long *page; + struct page *ppage; + unsigned long src = *ppos; + unsigned long pfn; + ssize_t ret = 0; + int chunk, i; + + pfn = src / KPMSIZE - 1; + count = min_t(size_t, count, ((max_pfn + 1) * KPMSIZE) - src); + if (src & KPMMASK || count & KPMMASK) + return -EIO; + + page = (unsigned long *)__get_free_page(GFP_USER); + if (!page) + return -ENOMEM; + + while (count > 0) { + chunk = min_t(size_t, count, PAGE_SIZE); + i = 0; + + if (pfn == -1) { + page[0] = 0; + page[1] = 0; + ((char *)page)[0] = (ntohl(1) != 1); + ((char *)page)[1] = PAGE_SHIFT; + ((char *)page)[2] = sizeof(unsigned long); + ((char *)page)[3] = KPMSIZE; + i = 2; + pfn++; + } + + for (; i < 2 * chunk / KPMSIZE; i += 2, pfn++) { + ppage = pfn_to_page(pfn); + if (!ppage) { + page[i] = 0; + page[i + 1] = 0; + } else { + page[i] = ppage->flags; + page[i + 1] = atomic_read(&ppage->_count); + } + } + chunk = (i / 2) * KPMSIZE; + + if (copy_to_user(buf, page, chunk)) { + ret = -EFAULT; + break; + } + ret += chunk; + src += chunk; + buf += chunk; + count -= chunk; + cond_resched(); + } + *ppos = src; + + free_page((unsigned long)page); + return ret; +} + +static struct proc_dir_entry *proc_kpagemap; +static struct file_operations proc_kpagemap_operations = { + .llseek = mem_lseek, + .read = kpagemap_read, +}; +#endif + struct proc_dir_entry *proc_root_kcore; void create_seq_entry(char *name, mode_t mode, const struct file_operations *f) @@ -724,6 +811,11 @@ void __init proc_misc_init(void) (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; } #endif +#ifdef CONFIG_PROC_KPAGEMAP + proc_kpagemap = create_proc_entry("kpagemap", S_IRUSR, NULL); + if (proc_kpagemap) + proc_kpagemap->proc_fops = &proc_kpagemap_operations; +#endif #ifdef CONFIG_PROC_VMCORE proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); if (proc_vmcore) diff -puN init/Kconfig~maps2-add-proc-kpagemap-interface init/Kconfig --- a/init/Kconfig~maps2-add-proc-kpagemap-interface +++ a/init/Kconfig @@ -614,6 +614,15 @@ config PROC_PAGEMAP with other processes. Disabling this interface will reduce the size of the kernel for small machines. +config PROC_KPAGEMAP + default y + bool "Enable /proc/kpagemap support" if EMBEDDED && PROC_FS + help + The /proc/pid/kpagemap interface allows reading the + kernel's per-page flag and usage counts to gather precise + information on page-level memory usage. Disabling this interface + will reduce the size of the kernel for small machines. + endmenu # General setup config RT_MUTEXES _