Subject: make oprofile on cell take more samples From: David Erb Oprofile does not take enough samples - dozens instead of thousands Signed-off-by: Arnd Bergmann Index: linus-2.6/arch/powerpc/platforms/cell/perfmon.c =================================================================== --- linus-2.6.orig/arch/powerpc/platforms/cell/perfmon.c +++ linus-2.6/arch/powerpc/platforms/cell/perfmon.c @@ -41,8 +41,10 @@ static struct cbe_shadow_regs { struct pm_status pm_status; struct pm_control pm_control; struct pm pm_interval; + struct pm pm_ctr[NR_PHYS_CTRS]; struct pm pm_start_stop; struct pm07_control pm07_control[NR_PHYS_CTRS*2]; + int counter_value_in_latch; } cbe_shadow_regs[MAX_CBE]; static struct cbe_shadow_regs *shadow[NR_CPUS] = { @@ -50,6 +52,7 @@ static struct cbe_shadow_regs *shadow[NR &cbe_shadow_regs[1], &cbe_shadow_regs[1] }; +/* When writing to write-only mmio addresses, save a shadow copy */ #define write_wo_mmio(register,x) \ { \ shadow[cpu]->register.val = x; \ @@ -88,19 +91,30 @@ u32 get_cbe_ctr_size(u32 cpu, u32 phys_c return 32; } + /* Read a physical counter (either 1 32-bit or 2 16-bit) */ u32 cbe_read_phys_ctr(u32 cpu, u32 phys_ctr) { struct cbe_pmd_regs __iomem *pmd_regs; + u32 val; if (phys_ctr >= NR_PHYS_CTRS) return 0; - pmd_regs = cbe_get_cpu_pmd_regs(cpu); + /* + * Read the latch or the actual counter, whichever is newer + */ + if (shadow[cpu]->counter_value_in_latch & phys_ctr) { + val = shadow[cpu]->pm_ctr[phys_ctr].val; + } else { + pmd_regs = cbe_get_cpu_pmd_regs(cpu); + val = in_be32(&pmd_regs->pm_ctr[phys_ctr].val); + } - return in_be32(&pmd_regs->pm_ctr[phys_ctr].val); + return val; } + /* Read a counter (either 32-bit or 16-bit) */ u32 cbe_read_ctr(u32 cpu, u32 ctr) { @@ -128,6 +142,14 @@ void cbe_write_phys_ctr(u32 cpu, u32 phy pmd_regs = cbe_get_cpu_pmd_regs(cpu); out_be32(&(pmd_regs->pm_ctr[phys_ctr].val), val); + + /* + * Writing to a counter only writes to a latch. The new value is not + * propagated to the actual counter until the performance monitor is + * enabled. + */ + shadow[cpu]->counter_value_in_latch |= phys_ctr; + write_wo_mmio (pm_ctr[phys_ctr], val); } @@ -139,8 +161,6 @@ void cbe_write_ctr(u32 cpu, u32 ctr, u32 phys_ctr = ctr & (NR_PHYS_CTRS - 1); - cbe_write_phys_ctr(cpu, phys_ctr, val); - if (get_cbe_ctr_size(cpu, phys_ctr) == 16) { phys_val = cbe_read_phys_ctr(cpu, phys_ctr); @@ -149,6 +169,8 @@ void cbe_write_ctr(u32 cpu, u32 ctr, u32 else val = (val & 0xffff) | (phys_val & 0xffff0000); } + + cbe_write_phys_ctr (cpu, phys_ctr, val); } u32 cbe_clear_pm_interrupts(u32 cpu) @@ -219,6 +241,7 @@ void cbe_enable_pm(u32 cpu) pmd_regs = cbe_get_cpu_pmd_regs(cpu); shadow[cpu]->pm_control.ena_perf_mon = 1; + shadow[cpu]->counter_value_in_latch = 0; out_be32 (&pmd_regs->pm_control.val, shadow[cpu]->pm_control.val); }