--- include/linux/perf.h | 60 +++++++++ init/Kconfig | 9 + kernel/perf.c | 333 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 402 insertions(+) Index: linux-2.6.21-rc6/include/linux/perf.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.21-rc6/include/linux/perf.h 2007-04-13 23:11:57.000000000 -0700 @@ -0,0 +1,60 @@ +#ifndef __LINUX_PERF_H +#define __LINUX_PERF_H + +#include +/* + * Time Stamp Performance Counters + * + * (C) 2007 sgi + * Christoph Lameter , April 2007 + * + * Counters are calculated using the cycle counter. If a process + * is migrated to another cpu during the measurement then the measurement + * is invalid. + */ + +enum pc_item { + PC_UPDATE_PROCESS_TIMES, + PC_ALLOC_PAGES, + PC_DIRECT_RECLAIM_SCANNED, + PC_DIRECT_RECLAIM_RECLAIMED, + PC_RECLAIM_SCANNED, + PC_RECLAIM_RECLAIMED, + NR_PC_ITEMS +}; + +/* + * Information about the start of the measurement + */ +struct pc { + unsigned long time; + int processor; + enum pc_item item; +}; + +#define pc_stop(__pc) pc_bytes(__pc, 0) + +#ifdef CONFIG_PERFCOUNT + +#define INIT_PC(__var, __item) struct pc __var = \ + { get_cycles(), smp_processor_id(), __item } + +static inline void pc_start(struct pc *pc, enum pc_item nr) +{ + pc->item = nr; + pc->processor = smp_processor_id(); + pc->time = get_cycles(); +} + +void pc_bytes(struct pc *pc, unsigned long bytes); + +#else + +#define INIT_PC(__var, __item) do { } while(0) +static inline void pc_start(struct pc *pc, enum pc_item nr) {} +static inline void pc_bytes(struct pc *pc, unsigned long bytes) {} + +#endif + +#endif + Index: linux-2.6.21-rc6/init/Kconfig =================================================================== --- linux-2.6.21-rc6.orig/init/Kconfig 2007-04-13 23:11:03.000000000 -0700 +++ linux-2.6.21-rc6/init/Kconfig 2007-04-13 23:11:57.000000000 -0700 @@ -216,6 +216,15 @@ config TASK_IO_ACCOUNTING Say N if unsure. +config PERFCOUNT + bool "Time Stamp Counter based performance measurements" + help + Enables performance counters based on the time stamp counters. + These can be used to measure code paths in the kernel and also + gauge their effectiveness in transferring bytes. The performance + counters must be added by modifying code. The counters will then + be visible via files in /proc/perf/*. + config UTS_NS bool "UTS Namespaces" default n Index: linux-2.6.21-rc6/kernel/perf.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.21-rc6/kernel/perf.c 2007-04-13 23:11:57.000000000 -0700 @@ -0,0 +1,333 @@ +/* + * Simple Performance Counter subsystem + * + * (C) 2007 sgi. + * + * April 2007, Christoph Lameter + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_NUMA +static int unsynced_get_cycles = 1; +#else +#define unsynced_get_cycles = 0 +#endif + +const char *var_id[NR_PC_ITEMS] = { + "update_process_times", + "alloc_pages", + "direct_reclaim_scanned", + "direct_reclaim_reclaimed", + "reclaim_scanned", + "reclaim_reclaimed" +}; + +struct perf_counter { + u32 events; + u32 mintime; + u32 maxtime; + u32 minbytes; + u32 maxbytes; + u32 skipped; + u64 time; + u64 bytes; +}; + +static DEFINE_PER_CPU(struct perf_counter, perf_counters)[NR_PC_ITEMS]; + +void pc_bytes(struct pc *pc, unsigned long bytes) +{ + unsigned long time = get_cycles(); + unsigned long ns; + struct perf_counter *p = &get_cpu_var(perf_counters)[pc->item]; + + if (unlikely(pc->item >= NR_PC_ITEMS)) { + printk(KERN_CRIT "pc_bytes: item number " + "(%d) out of range\n", pc->item); + dump_stack(); + goto out; + } + + if (unlikely(unsynced_get_cycles && + pc->processor != smp_processor_id())) { + /* On different processor. TSC measurement not possible. */ + p->skipped++; + goto out; + } + + ns = cycles_to_ns((unsigned long long)(time - pc->time)); + p->time += ns; + p->events++; + + if (ns > p->maxtime) + p->maxtime = ns; + + if (p->mintime == 0 || ns < p->mintime) + p->mintime = ns; + + if (bytes) { + p->bytes += bytes; + if (bytes > p->maxbytes) + p->maxbytes = bytes; + if (p->minbytes == 0 || bytes < p->minbytes) + p->minbytes = bytes; + } +out: + put_cpu_var(); + return; +} +EXPORT_SYMBOL(pc_bytes); + +static void reset_perfcount_item(struct perf_counter *c) +{ + memset(c, 0, sizeof(struct perf_counter)); +} + +static void perfcount_reset(void) { + int cpu; + enum pc_item i; + + for_each_online_cpu(cpu) + for (i = 0; i < NR_PC_ITEMS; i++) + reset_perfcount_item( + &per_cpu(perf_counters, cpu)[i]); +} + +struct unit { + unsigned int n; + const char * s; +}; + +static const struct unit event_units[] = { + { 1000, "" }, + { 1000, "K" }, + { 1000, "M" }, + { 1000, "G" }, + { 1000, "T" }, + { 1000, "P" }, + { 1000, "Q" }, +}; + + +static const struct unit time_units[] = { + { 1000, "ns" }, + { 1000, "us" }, + { 1000, "ms" }, + { 60, "s" }, + { 60, "m" }, + { 24, "h" }, + { 365, "d" }, + { 1000, "y" }, +}; + +static const struct unit byte_units[] = { + { 1000, "b" }, + { 1000, "kb" }, + { 1000, "mb" }, + { 1000, "gb" }, + { 1000, "tb" }, + { 1000, "pb" }, + { 1000, "qb" } +}; + +/* Print a value using the given array of units and scale it properly */ +static void pval(struct seq_file *s, unsigned long x, const struct unit *u) +{ + unsigned n = 0; + unsigned rem = 0; + unsigned last_divisor = 0; + + while (x >= u[n].n) { + last_divisor = u[n].n; + rem = x % last_divisor; + x = x / last_divisor; + n++; + } + + if (last_divisor) + rem = (rem * 10 + last_divisor / 2) / last_divisor; + else + rem = 0; + + /* + * Rounding may have resulted in the need to go + * to the next number + */ + if (rem == 10) { + x++; + rem = 0; + }; + + seq_printf(s, "%lu", x); + if (rem) { + seq_putc(s, '.'); + seq_putc(s, '0' + rem); + } + seq_puts(s, u[n].s); +} + +/* Print a set of statistical values in the form sum(max/avg/min) */ +static void pc_print(struct seq_file *s, const struct unit *u, + unsigned long count, unsigned long sum, + unsigned long min, unsigned long max) +{ + pval(s, sum, u); + seq_putc(s,'('); + pval(s, min, u); + seq_putc(s,'/'); + if (count) + pval(s, (sum + count / 2 ) / count, u); + else + pval(s, 0, u); + seq_putc(s,'/'); + pval(s, max, u); + seq_putc(s,')'); +} + + +static int perf_show(struct seq_file *s, void *v) +{ + int cpu = (unsigned long)s->private; + enum pc_item counter = (unsigned long)v - 1; + struct perf_counter summary, *x; + + if (cpu >= 0) + x = &per_cpu(perf_counters, cpu)[counter]; + else { + memcpy(&summary, &per_cpu(perf_counters, 0)[counter], + sizeof(summary)); + for_each_online_cpu(cpu) { + struct perf_counter *c = + &per_cpu(perf_counters, 0)[counter]; + + summary.events += c->events; + summary.skipped += c->skipped; + summary.time += c->time; + summary.bytes += c->bytes; + + if (summary.maxtime < c->maxtime) + summary.maxtime = c->maxtime; + + if (summary.mintime == 0 || + (c->mintime != 0 && + summary.mintime > c->mintime)) + summary.mintime = c->mintime; + + if (summary.maxbytes < c->maxbytes) + summary.maxbytes = c->maxbytes; + + if (summary.minbytes == 0 || + (c->minbytes != 0 && + summary.minbytes > c->minbytes)) + summary.minbytes = c->minbytes; + + } + x = &summary; + } + + seq_printf(s, "%-30s %10u ", var_id[counter], x->events); + if (x->skipped) + seq_printf(s, "(+%3u) ", x->skipped); + pc_print(s, time_units, x->events, x->time, x->mintime, x->maxtime); + if (x->bytes) { + seq_putc(s,' '); + pc_print(s, byte_units, x->events, x->bytes, + x->minbytes, x->maxbytes); + } + seq_putc(s, '\n'); + return 0; +} + +static void *perf_start(struct seq_file *m, loff_t *pos) +{ + return (*pos < NR_PC_ITEMS) ? (void *)(*pos +1) : NULL; +} + +static void *perf_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return perf_start(m, pos); +} + +static void perf_stop(struct seq_file *m, void *v) +{ +} + +struct seq_operations perf_data_ops = { + .start = perf_start, + .next = perf_next, + .stop = perf_stop, + .show = perf_show, +}; + +static int perf_data_open(struct inode *inode, struct file *file) +{ + int res; + + res = seq_open(file, &perf_data_ops); + if (!res) + ((struct seq_file *)file->private_data)->private = + PDE(inode)->data; + + return res; +} + +static struct file_operations perf_data_fops = { + .open = perf_data_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int perf_reset_write(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + perfcount_reset(); + return count; +} + +static __init int init_perfcounter(void) { + int cpu; + + struct proc_dir_entry *proc_perf, *perf_reset, *perf_all; + + proc_perf = proc_mkdir("perf", NULL); + if (!proc_perf) + return -ENOMEM; + + perf_reset = create_proc_entry("reset", S_IWUGO, proc_perf); + perf_reset->write_proc = perf_reset_write; + + perf_all = create_proc_entry("all", S_IRUGO, proc_perf); + perf_all->proc_fops = &perf_data_fops; + perf_all->data = (void *)-1; + + for_each_possible_cpu(cpu) { + char name[20]; + struct proc_dir_entry *p; + + sprintf(name, "%d", cpu); + p = create_proc_entry(name, S_IRUGO, proc_perf); + + p->proc_fops = &perf_data_fops; + p->data = (void *)(unsigned long)cpu; + } + + perfcount_reset(); + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + if (!unsynchronized_tsc()) + unsynced_get_cycles = 0; +#endif + return 0; +} + +__initcall(init_perfcounter); +