commit 7daa7abd91b7cd283b3138919cb1d2e1be6b7970 Author: Frederic Weisbecker Date: Mon Sep 7 00:38:14 2009 +0200 o diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 23439fb..1062e4a 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -75,13 +75,6 @@ */ #ifdef __KERNEL__ -/* For process management */ -extern void flush_thread_hw_breakpoint(struct task_struct *tsk); -extern int copy_thread_hw_breakpoint(struct task_struct *tsk, - struct task_struct *child, unsigned long clone_flags); - -/* For CPU management */ -extern void load_debug_registers(void); static inline void hw_breakpoint_disable(void) { /* Zero the control register for HW Breakpoint */ diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 1acb4d4..d077d3d 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -13,6 +13,8 @@ struct arch_hw_breakpoint { #include #include +#include +#include /* Available HW breakpoint length encodings */ #define HW_BREAKPOINT_LEN_1 0x40 @@ -36,20 +38,28 @@ struct arch_hw_breakpoint { /* Total number of available HW breakpoint registers */ #define HBP_NUM 4 -extern struct hw_breakpoint *hbp_kernel[HBP_NUM]; -DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); -extern unsigned int hbp_user_refcount[HBP_NUM]; - -extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk); -extern void arch_uninstall_thread_hw_breakpoint(void); extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, - struct task_struct *tsk); -extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk); -extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk); -extern void arch_update_kernel_hw_breakpoint(void *); + struct task_struct *tsk); +extern unsigned long arch_hw_bp_addr(struct hw_breakpoint *hw); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); +int arch_install_hw_breakpoint(struct perf_counter *counter); +void arch_uninstall_hw_breakpoint(struct perf_counter *counter); +void hw_breakpoint_pmu_read(struct perf_counter *counter); +void hw_breakpoint_pmu_unthrottle(struct perf_counter *counter); + +struct perf_counter; + +extern void +arch_fill_perf_breakpoint(struct perf_counter *counter); + +void arch_fill_breakpoint(struct hw_breakpoint *bp, int nr); +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); + +void flush_thread_hw_breakpoint(struct task_struct *tsk); + #endif /* __KERNEL__ */ #endif /* _I386_HW_BREAKPOINT_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5b0c491..26eda0b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -443,12 +443,10 @@ struct thread_struct { unsigned long fs; #endif unsigned long gs; - /* Hardware debugging registers: */ - unsigned long debugreg[HBP_NUM]; - unsigned long debugreg6; - unsigned long debugreg7; - /* Hardware breakpoint info */ - struct hw_breakpoint *hbp[HBP_NUM]; + /* Save middle states of ptrace breakpoints */ + struct hw_breakpoint *ptrace_bps[HBP_NUM]; + /* Debug status used for traps, single steps, etc... */ + unsigned long debugreg6; /* Fault info: */ unsigned long cr2; unsigned long trap_no; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 9316a9d..e2f4a43 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -15,6 +15,7 @@ * * Copyright (C) 2007 Alan Stern * Copyright (C) 2009 IBM Corporation + * Copyright (C) 2009 Frederic Weisbecker */ /* @@ -22,6 +23,7 @@ * using the CPU's debug registers. */ +#include #include #include #include @@ -39,25 +41,28 @@ #include /* Unmasked kernel DR7 value */ -static unsigned long kdr7; +static DEFINE_PER_CPU(unsigned long, dr7); -/* - * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register. - * Used to clear and verify the status of bits corresponding to DR0 - DR3 - */ -static const unsigned long dr7_masks[HBP_NUM] = { - 0x000f0003, /* LEN0, R/W0, G0, L0 */ - 0x00f0000c, /* LEN1, R/W1, G1, L1 */ - 0x0f000030, /* LEN2, R/W2, G2, L2 */ - 0xf00000c0 /* LEN3, R/W3, G3, L3 */ -}; +static DEFINE_PER_CPU(struct hw_breakpoint *, bp_per_reg[HBP_NUM]); + +static inline +struct arch_hw_breakpoint *counter_arch_bp(struct perf_counter *counter) +{ + return &counter->hw.bp->info; +} + +/* Temporary, we should really move the breakpoint addr to the core struct */ +unsigned long arch_hw_bp_addr(struct hw_breakpoint *hw) +{ + return hw->info.address; +} /* * Encode the length, type, Exact, and Enable bits for a particular breakpoint * as stored in debug register 7. */ -static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) { unsigned long bp_info; @@ -68,64 +73,79 @@ static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) return bp_info; } -void arch_update_kernel_hw_breakpoint(void *unused) +/* + * Decode the length and type bits for a particular breakpoint as + * stored in debug register 7. Return the "enabled" status. + */ +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) { - struct hw_breakpoint *bp; - int i, cpu = get_cpu(); - unsigned long temp_kdr7 = 0; + int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); - /* Don't allow debug exceptions while we update the registers */ - set_debugreg(0UL, 7); - - for (i = hbp_kernel_pos; i < HBP_NUM; i++) { - per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i]; - if (bp) { - temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type); - set_debugreg(bp->info.address, i); - } - } + *len = (bp_info & 0xc) | 0x40; + *type = (bp_info & 0x3) | 0x80; - /* No need to set DR6. Update the debug registers with kernel-space - * breakpoint values from kdr7 and user-space requests from the - * current process - */ - kdr7 = temp_kdr7; - set_debugreg(kdr7 | current->thread.debugreg7, 7); - put_cpu(); + return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; } /* * Install the thread breakpoints in their debug registers. + * + * Atomic: we hold the counter->ctx->lock and we only handle per cpu things */ -void arch_install_thread_hw_breakpoint(struct task_struct *tsk) +int arch_install_hw_breakpoint(struct perf_counter *counter) { - struct thread_struct *thread = &(tsk->thread); - - switch (hbp_kernel_pos) { - case 4: - set_debugreg(thread->debugreg[3], 3); - case 3: - set_debugreg(thread->debugreg[2], 2); - case 2: - set_debugreg(thread->debugreg[1], 1); - case 1: - set_debugreg(thread->debugreg[0], 0); - default: - break; + struct arch_hw_breakpoint *bp = counter_arch_bp(counter); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct hw_breakpoint **slot = &__get_cpu_var(bp_per_reg[i]); + + if (!*slot) { + *slot = counter->hw.bp; + break; + } } + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot") + return -EBUSY; + + set_debugreg(bp->address, i); + + dr7 = &__get_cpu_var(dr7); + *dr7 |= encode_dr7(i, bp->len, bp->type); /* No need to set DR6 */ - set_debugreg((kdr7 | thread->debugreg7), 7); + set_debugreg(*dr7, 7); + + return 0; } /* * Install the debug register values for just the kernel, no thread. + * + * Atomic: we hold the counter->ctx->lock */ -void arch_uninstall_thread_hw_breakpoint(void) +void arch_uninstall_hw_breakpoint(struct perf_counter *counter) { - /* Clear the user-space portion of debugreg7 by setting only kdr7 */ - set_debugreg(kdr7, 7); + struct arch_hw_breakpoint *bp = counter_arch_bp(counter); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct hw_breakpoint **slot = &__get_cpu_var(bp_per_reg[i]); + + if (*slot == counter->hw.bp) { + *slot = NULL; + break; + } + } + + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return; + dr7 = &per_cpu(dr7, cpu); + *dr7 &= encode_dr7(i, bp->len, bp->type); + set_debugreg(*dr7, 7); } static int get_hbp_len(u8 hbp_len) @@ -178,15 +198,9 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) /* * Store a breakpoint's encoded address, length, and type. */ -static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) +static int arch_store_info(struct hw_breakpoint *bp) { /* - * User-space requests will always have the address field populated - * Symbol names from user-space are rejected - */ - if (tsk && bp->info.name) - return -EINVAL; - /* * For kernel-addresses, either the address or symbol name can be * specified. */ @@ -202,7 +216,7 @@ static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) * Validate the arch-specific HW Breakpoint register settings */ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, - struct task_struct *tsk) + struct task_struct *tsk) { unsigned int align; int ret = -EINVAL; @@ -247,7 +261,7 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, } if (bp->triggered) - ret = arch_store_info(bp, tsk); + ret = arch_store_info(bp); if (ret < 0) return ret; @@ -267,31 +281,27 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, bp->info.len)) return -EFAULT; } + return 0; } -void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk) +/* start simple: just set a 1 byte length rw breakpoint to the location */ +void arch_fill_perf_breakpoint(struct perf_counter *counter) { - struct thread_struct *thread = &(tsk->thread); - struct hw_breakpoint *bp = thread->hbp[pos]; - - thread->debugreg7 &= ~dr7_masks[pos]; - if (bp) { - thread->debugreg[pos] = bp->info.address; - thread->debugreg7 |= encode_dr7(pos, bp->info.len, - bp->info.type); - } else - thread->debugreg[pos] = 0; + struct arch_hw_breakpoint *bp = counter_arch_bp(counter); + + bp->address = (unsigned long)counter->attr.config; + bp->len = HW_BREAKPOINT_LEN_1; + bp->type = HW_BREAKPOINT_RW; } -void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) +void flush_thread_hw_breakpoint(struct task_struct *tsk) { int i; - struct thread_struct *thread = &(tsk->thread); + struct thread_struct *t = &tsk->thread; - thread->debugreg7 = 0; for (i = 0; i < HBP_NUM; i++) - thread->debugreg[i] = 0; + kfree(t->ptrace_bps[i]); } /* @@ -325,10 +335,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; - /* Lazy debug register switching */ - if (!test_tsk_thread_flag(current, TIF_DEBUG)) - arch_uninstall_thread_hw_breakpoint(); - get_debugreg(dr7, 7); /* Disable breakpoints during exception handling */ set_debugreg(0UL, 7); @@ -344,17 +350,12 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) for (i = 0; i < HBP_NUM; ++i) { if (likely(!(dr6 & (DR_TRAP0 << i)))) continue; - /* - * Find the corresponding hw_breakpoint structure and - * invoke its triggered callback. - */ - if (i >= hbp_kernel_pos) - bp = per_cpu(this_hbp_kernel[i], cpu); - else { - bp = current->thread.hbp[i]; - if (bp) - rc = NOTIFY_DONE; - } + + rcu_read_lock(); + + bp = per_cpu(bp_per_reg[i], cpu); + if (bp) + rc = NOTIFY_DONE; /* * Reset the 'i'th TRAP bit in dr6 to denote completion of * exception handling @@ -362,13 +363,16 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) (*dr6_p) &= ~(DR_TRAP0 << i); /* * bp can be NULL due to lazy debug register switching - * or due to the delay between updates of hbp_kernel_pos - * and this_hbp_kernel. + * or due to concurrent perf counter removing. */ - if (!bp) - continue; + if (!bp) { + rcu_read_unlock(); + break; + } (bp->triggered)(bp, args->regs); + + rcu_read_unlock(); } if (dr6 & (~DR_TRAP_BITS)) rc = NOTIFY_DONE; @@ -389,3 +393,13 @@ int __kprobes hw_breakpoint_exceptions_notify( return hw_breakpoint_handler(data); } + +void hw_breakpoint_pmu_read(struct perf_counter *counter) +{ + /* TODO */ +} + +void hw_breakpoint_pmu_unthrottle(struct perf_counter *counter) +{ + /* TODO */ +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 209e748..bb5e689 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -264,9 +264,11 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.io_bitmap_ptr = NULL; tsk = current; err = -ENOMEM; - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) - if (copy_thread_hw_breakpoint(tsk, p, clone_flags)) - goto out; + + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) { + memset(tsk->thread.ptrace_bps, 0, + sizeof(tsk->thread.ptrace_bps)); + } if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -287,7 +289,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); -out: if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; @@ -437,23 +438,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) lazy_load_gs(next->gs); percpu_write(current_task, next_p); - /* - * There's a problem with moving the arch_install_thread_hw_breakpoint() - * call before current is updated. Suppose a kernel breakpoint is - * triggered in between the two, the hw-breakpoint handler will see that - * the 'current' task does not have TIF_DEBUG flag set and will think it - * is leftover from an old task (lazy switching) and will erase it. Then - * until the next context switch, no user-breakpoints will be installed. - * - * The real problem is that it's impossible to update both current and - * physical debug registers at the same instant, so there will always be - * a window in which they disagree and a breakpoint might get triggered. - * Since we use lazy switching, we are forced to assume that a - * disagreement means that current is correct and the exception is due - * to lazy debug register switching. - */ - if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) - arch_install_thread_hw_breakpoint(next_p); return prev_p; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 72edac0..3142e8c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -244,8 +244,6 @@ void release_thread(struct task_struct *dead_task) BUG(); } } - if (unlikely(dead_task->thread.debugreg7)) - flush_thread_hw_breakpoint(dead_task); } static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) @@ -309,9 +307,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, savesegment(ds, p->thread.ds); err = -ENOMEM; - if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) - if (copy_thread_hw_breakpoint(me, p, clone_flags)) - goto out; + if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) { + memset(me->thread.ptrace_bps, 0, + sizeof(me->thread.ptrace_bps)); + } if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -352,7 +351,7 @@ out: p->thread.io_bitmap_max = 0; } if (err) - flush_thread_hw_breakpoint(p); + flush_thread_hw_breakpoint(me); return err; } @@ -508,23 +507,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (preload_fpu) __math_state_restore(); - /* - * There's a problem with moving the arch_install_thread_hw_breakpoint() - * call before current is updated. Suppose a kernel breakpoint is - * triggered in between the two, the hw-breakpoint handler will see that - * the 'current' task does not have TIF_DEBUG flag set and will think it - * is leftover from an old task (lazy switching) and will erase it. Then - * until the next context switch, no user-breakpoints will be installed. - * - * The real problem is that it's impossible to update both current and - * physical debug registers at the same instant, so there will always be - * a window in which they disagree and a breakpoint might get triggered. - * Since we use lazy switching, we are forced to assume that a - * disagreement means that current is correct and the exception is due - * to lazy debug register switching. - */ - if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) - arch_install_thread_hw_breakpoint(next_p); return prev_p; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 113b892..7693354 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -451,20 +451,6 @@ static int genregs_set(struct task_struct *target, return ret; } -/* - * Decode the length and type bits for a particular breakpoint as - * stored in debug register 7. Return the "enabled" status. - */ -static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, - unsigned *type) -{ - int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); - - *len = (bp_info & 0xc) | 0x40; - *type = (bp_info & 0x3) | 0x80; - return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; -} - static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) { struct thread_struct *thread = &(current->thread); @@ -474,31 +460,50 @@ static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) * Store in the virtual DR6 register the fact that the breakpoint * was hit so the thread's debugger will see it. */ - for (i = 0; i < hbp_kernel_pos; i++) + for (i = 0; i < HBP_NUM; i++) { /* * We will check bp->info.address against the address stored in * thread's hbp structure and not debugreg[i]. This is to ensure * that the corresponding bit for 'i' in DR7 register is enabled */ - if (bp->info.address == thread->hbp[i]->info.address) + if (!thread->ptrace_bps[i]) + continue; + + if (bp->info.address == thread->ptrace_bps[i]->info.address) break; + } thread->debugreg6 |= (DR_TRAP0 << i); } +static unsigned long ptrace_get_dr7(struct hw_breakpoint *bp[]) +{ + int i; + int dr7 = 0; + + for (i = 0; i < HBP_NUM; i++) { + if (bp[i]) + dr7 |= encode_dr7(i, bp[i]->info.len, bp[i]->info.len); + } + + return dr7; +} + /* * Handle ptrace writes to debug register 7. */ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) { struct thread_struct *thread = &(tsk->thread); - unsigned long old_dr7 = thread->debugreg7; + unsigned long old_dr7; int i, orig_ret = 0, rc = 0; int enabled, second_pass = 0; unsigned len, type; struct hw_breakpoint *bp; data &= ~DR_CONTROL_RESERVED; + + old_dr7 = ptrace_get_dr7(thread->ptrace_bps); restore: /* * Loop through all the hardware breakpoints, making the @@ -506,7 +511,7 @@ restore: */ for (i = 0; i < HBP_NUM; i++) { enabled = decode_dr7(data, i, &len, &type); - bp = thread->hbp[i]; + bp = thread->ptrace_bps[i]; if (!enabled) { if (bp) { @@ -518,27 +523,31 @@ restore: */ if (!second_pass) continue; - unregister_user_hw_breakpoint(tsk, bp); + thread->ptrace_bps[i] = NULL; + unregister_hw_breakpoint(bp); kfree(bp); } continue; } - if (!bp) { - rc = -ENOMEM; - bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); - if (bp) { - bp->info.address = thread->debugreg[i]; - bp->triggered = ptrace_triggered; - bp->info.len = len; - bp->info.type = type; - rc = register_user_hw_breakpoint(tsk, bp); - if (rc) - kfree(bp); - } - } else - rc = modify_user_hw_breakpoint(tsk, bp); - if (rc) + + /* + * We shoud have at least an inactive breakpoint at this + * slot. It means the user is writing dr7 without having + * written the address register first + */ + if (!bp) + return -EINVAL; + + bp->info.len = len; + bp->info.type = type; + bp->inactive = false; + + rc = modify_user_hw_breakpoint(tsk, bp); + if (rc) { //FIXME + thread->ptrace_bps[i] = NULL; + kfree(bp); break; + } } /* * Make a second pass to free the remaining unused breakpoints @@ -563,15 +572,60 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) struct thread_struct *thread = &(tsk->thread); unsigned long val = 0; - if (n < HBP_NUM) - val = thread->debugreg[n]; - else if (n == 6) + if (n < HBP_NUM) { + struct hw_breakpoint *bp; + bp = thread->ptrace_bps[n]; + if (!bp) + return 0; + val = bp->info.address; + } else if (n == 6) { val = thread->debugreg6; - else if (n == 7) - val = thread->debugreg7; + } else if (n == 7) { + val = ptrace_get_dr7(thread->ptrace_bps); + } return val; } +int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, + unsigned long addr) +{ + struct hw_breakpoint *bp; + struct thread_struct *t = &tsk->thread; + bool new; + int ret; + + if (!t->ptrace_bps[nr]) { + bp = kzalloc(sizeof(*bp), GFP_KERNEL); + if (!bp) + return -ENOMEM; + + t->ptrace_bps[nr] = bp; + /* + * Put stub len and type to register an inactive but + * correct bp + */ + bp->info.len = HW_BREAKPOINT_LEN_1; + bp->info.type = HW_BREAKPOINT_WRITE; + bp->triggered = ptrace_triggered; + bp->inactive = true; + new = true; + } else + bp = t->ptrace_bps[nr]; + + bp->info.address = addr; + + if (new) { + ret = register_user_hw_breakpoint(tsk, bp); + if (ret) { + t->ptrace_bps[nr] = NULL; + kfree(bp); + } + } else + ret = modify_user_hw_breakpoint(tsk, bp); + + return ret; +} + /* * Handle PTRACE_POKEUSR calls for the debug register area. */ @@ -585,19 +639,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) return -EIO; if (n == 6) { - tsk->thread.debugreg6 = val; + thread->debugreg6 = val; goto ret_path; } if (n < HBP_NUM) { - if (thread->hbp[n]) { - if (arch_check_va_in_userspace(val, - thread->hbp[n]->info.len) == 0) { - rc = -EIO; - goto ret_path; - } - thread->hbp[n]->info.address = val; - } - thread->debugreg[n] = val; + rc = ptrace_set_breakpoint_addr(tsk, n, val); + if (rc) + return rc; } /* All that's left is DR7 */ if (n == 7) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a9ccc17..09c5e07 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -64,7 +64,6 @@ #include #include #include -#include #include #include @@ -328,7 +327,6 @@ notrace static void __cpuinit start_secondary(void *unused) x86_cpuinit.setup_percpu_clockev(); wmb(); - load_debug_registers(); cpu_idle(); } @@ -1272,7 +1270,6 @@ void cpu_disable_common(void) remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(); - hw_breakpoint_disable(); } int native_cpu_disable(void) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index bf217b3..414b296 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -105,7 +105,6 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr4 = read_cr4(); ctxt->cr8 = read_cr8(); #endif - hw_breakpoint_disable(); } /* Needed by apm.c */ @@ -144,11 +143,6 @@ static void fix_processor_context(void) #endif load_TR_desc(); /* This does ltr */ load_LDT(¤t->active_mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - load_debug_registers(); } /** diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h index 9bf2d12..656b902 100644 --- a/include/asm-generic/hw_breakpoint.h +++ b/include/asm-generic/hw_breakpoint.h @@ -10,6 +10,8 @@ #include #include +struct perf_counter; + /** * struct hw_breakpoint - unified kernel/user-space hardware breakpoint * @triggered: callback invoked after target address access @@ -103,8 +105,13 @@ struct hw_breakpoint { void (*triggered)(struct hw_breakpoint *, struct pt_regs *); struct arch_hw_breakpoint info; + struct list_head list; + struct perf_counter *counter; + bool inactive; }; +struct pmu; + /* * len and type values are defined in include/asm/hw_breakpoint.h. * Available values vary according to the architecture. On i386 the @@ -123,17 +130,17 @@ struct hw_breakpoint { extern int register_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp); -extern int modify_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern void unregister_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); +extern int +modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp); /* * Kernel breakpoints are not associated with any particular thread. */ -extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); -extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); +extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp, int cpu); +extern int register_perf_hw_breakpoint(struct perf_counter *counter); +extern int __register_perf_hw_breakpoint(struct perf_counter *counter); +extern void unregister_hw_breakpoint(struct hw_breakpoint *bp); -extern unsigned int hbp_kernel_pos; +extern struct pmu perf_ops_bp; #endif /* __KERNEL__ */ #endif /* _ASM_GENERIC_HW_BREAKPOINT_H */ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 972f90d..a2b8fde 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -31,6 +31,7 @@ enum perf_type_id { PERF_TYPE_TRACEPOINT = 2, PERF_TYPE_HW_CACHE = 3, PERF_TYPE_RAW = 4, + PERF_TYPE_BREAKPOINT = 5, PERF_TYPE_MAX, /* non-ABI */ }; @@ -468,6 +469,10 @@ struct hw_perf_counter { atomic64_t count; struct hrtimer hrtimer; }; + struct { /* Hardware breakpoint */ + struct hw_breakpoint *bp; + int counter; + }; }; atomic64_t prev_count; u64 sample_period; @@ -491,6 +496,8 @@ struct pmu { void (*disable) (struct perf_counter *counter); void (*read) (struct perf_counter *counter); void (*unthrottle) (struct perf_counter *counter); + int (*open) (struct perf_counter *counter); + void (*close) (struct perf_counter *counter); }; /** @@ -501,6 +508,7 @@ enum perf_counter_active_state { PERF_COUNTER_STATE_OFF = -1, PERF_COUNTER_STATE_INACTIVE = 0, PERF_COUNTER_STATE_ACTIVE = 1, + PERF_COUNTER_STATE_UNOPENED = 2, }; struct file; @@ -693,10 +701,22 @@ struct perf_cpu_context { extern int perf_max_counters; extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); - +extern int +__perf_counter_init(struct perf_counter *counter, + struct perf_counter_attr *attr, + int cpu, + struct perf_counter_context *ctx, + struct perf_counter *group_leader, + struct perf_counter *parent_counter); +extern void free_counter(struct perf_counter *counter); extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); extern void perf_counter_task_sched_out(struct task_struct *task, struct task_struct *next, int cpu); +extern struct perf_counter_context *find_get_context(pid_t pid, int cpu); +extern void perf_install_in_context(struct perf_counter_context *ctx, + struct perf_counter *counter, + int cpu); +extern void perf_counter_remove_from_context(struct perf_counter *counter); extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern int perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); @@ -768,6 +788,8 @@ extern int sysctl_perf_counter_sample_rate; extern void perf_counter_init(void); extern void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record, int entry_size); +extern void +perf_bpcounter_event(struct hw_breakpoint *bp, struct pt_regs *regs); #ifndef perf_misc_flags #define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \ @@ -801,6 +823,24 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) { } static inline void perf_counter_comm(struct task_struct *tsk) { } static inline void perf_counter_fork(struct task_struct *tsk) { } static inline void perf_counter_init(void) { } +static inline int +__perf_counter_init(struct perf_counter *counter, + struct perf_counter_attr *attr, + int cpu, + struct perf_counter_context *ctx, + struct perf_counter *group_leader, + struct perf_counter *parent_counter) { return NULL; } +static inline void free_counter(struct perf_counter *counter) { } +static inline struct perf_counter_context * +find_get_context(pid_t pid, int cpu) { return NULL; } +static inline void perf_install_in_context(struct perf_counter_context *ctx, + struct perf_counter *counter, + int cpu) { } +static inline void +perf_counter_remove_from_context(struct perf_counter *counter) { } +static inline void +perf_bpcounter_event(struct hw_breakpoint *bp, struct pt_regs *regs) { } + #endif #endif /* __KERNEL__ */ diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c1f64e6..bb84889 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -15,6 +15,7 @@ * * Copyright (C) 2007 Alan Stern * Copyright (C) IBM Corporation, 2009 + * Copyright (C) 2009, Frederic Weisbecker */ /* @@ -35,179 +36,278 @@ #include #include +#include + #include #include #ifdef CONFIG_X86 #include #endif + /* - * Spinlock that protects all (un)register operations over kernel/user-space - * breakpoint requests + * Constraints data */ -static DEFINE_SPINLOCK(hw_breakpoint_lock); -/* Array of kernel-space breakpoint structures */ -struct hw_breakpoint *hbp_kernel[HBP_NUM]; +static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); +static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); +static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); -/* - * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being - * modified but we need the older copy to handle any hbp exceptions. It will - * sync with hbp_kernel[] value after updation is done through IPIs. - */ -DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); +struct bp_busy_slots { + unsigned int pinned; + unsigned int flexible; +}; -/* - * Kernel breakpoints grow downwards, starting from HBP_NUM - * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for - * kernel-space request. We will initialise it here and not in an __init - * routine because load_debug_registers(), which uses this variable can be - * called very early during CPU initialisation. - */ -unsigned int hbp_kernel_pos = HBP_NUM; +/* Serialize accesses to nr_cpu_bp_pinned, task_bp_pinned, nr_bp_flexible */ +static DEFINE_MUTEX(nr_bp_mutex); -/* - * An array containing refcount of threads using a given bkpt register - * Accesses are synchronised by acquiring hw_breakpoint_lock - */ -unsigned int hbp_user_refcount[HBP_NUM]; +static unsigned int max_task_bp_pinned(int cpu) +{ + int i; + unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); -/* - * Load the debug registers during startup of a CPU. - */ -void load_debug_registers(void) + for (i = HBP_NUM -1; i >= 0; i--) { + if (tsk_pinned[i] > 0) + return i + 1; + } + + return 0; +} + +static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) { - unsigned long flags; - struct task_struct *tsk = current; + if (cpu >= 0) { + slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); + slots->pinned += max_task_bp_pinned(cpu); + slots->flexible = per_cpu(nr_bp_flexible, cpu); + + return; + } + + for_each_online_cpu(cpu) { + unsigned int nr; - spin_lock_bh(&hw_breakpoint_lock); + nr = per_cpu(nr_cpu_bp_pinned, cpu); + nr += max_task_bp_pinned(cpu); - /* Prevent IPIs for new kernel breakpoint updates */ - local_irq_save(flags); - arch_update_kernel_hw_breakpoint(NULL); - local_irq_restore(flags); + if (nr > slots->pinned) + slots->pinned = nr; - if (test_tsk_thread_flag(tsk, TIF_DEBUG)) - arch_install_thread_hw_breakpoint(tsk); + nr = per_cpu(nr_bp_flexible, cpu); - spin_unlock_bh(&hw_breakpoint_lock); + if (nr > slots->flexible) + slots->flexible = nr; + } } -/* - * Erase all the hardware breakpoint info associated with a thread. - * - * If tsk != current then tsk must not be usable (for example, a - * child being cleaned up from a failed fork). - */ -void flush_thread_hw_breakpoint(struct task_struct *tsk) +static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) { - int i; - struct thread_struct *thread = &(tsk->thread); + int count = 0; + struct perf_counter *counter; + struct perf_counter_context *ctx = tsk->perf_counter_ctxp; + unsigned int *task_bp_pinned; + struct list_head *list; + unsigned long flags; + + if (WARN_ONCE(!ctx, "No perf context for this task")) + return; - spin_lock_bh(&hw_breakpoint_lock); + list = &ctx->counter_list; - /* The thread no longer has any breakpoints associated with it */ - clear_tsk_thread_flag(tsk, TIF_DEBUG); - for (i = 0; i < HBP_NUM; i++) { - if (thread->hbp[i]) { - hbp_user_refcount[i]--; - kfree(thread->hbp[i]); - thread->hbp[i] = NULL; - } + spin_lock_irqsave(&ctx->lock, flags); + + /* + * The current breakpoint counter is not included in the list + * at the start() callback time + */ + list_for_each_entry(counter, list, list_entry) { + if (counter->attr.type == PERF_TYPE_BREAKPOINT) + count++; } - arch_flush_thread_hw_breakpoint(tsk); + spin_unlock_irqrestore(&ctx->lock, flags); + + if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) + return; - /* Actually uninstall the breakpoints if necessary */ - if (tsk == current) - arch_uninstall_thread_hw_breakpoint(); - spin_unlock_bh(&hw_breakpoint_lock); + task_bp_pinned = per_cpu(task_bp_pinned, cpu); + if (enable) { + task_bp_pinned[count]++; + if (count > 0) + task_bp_pinned[count-1]--; + } else { + task_bp_pinned[count]--; + if (count > 0) + task_bp_pinned[count-1]++; + } } -/* - * Copy the hardware breakpoint info from a thread to its cloned child. - */ -int copy_thread_hw_breakpoint(struct task_struct *tsk, - struct task_struct *child, unsigned long clone_flags) +static void toggle_bp_slot(struct perf_counter *counter, bool enable) { - /* - * We will assume that breakpoint settings are not inherited - * and the child starts out with no debug registers set. - * But what about CLONE_PTRACE? - */ - clear_tsk_thread_flag(child, TIF_DEBUG); + int cpu = counter->cpu; + unsigned int *nr; + struct task_struct *tsk = counter->ctx->task; + + /* Flexible */ + if (!counter->attr.pinned) { + if (cpu >= 0) { + nr = &per_cpu(nr_bp_flexible, cpu); + goto toggle; + } + + for_each_online_cpu(cpu) { + nr = &per_cpu(nr_bp_flexible, cpu); + goto toggle; + } + } + /* Pinned counter task profiling */ + if (tsk) { + if (cpu >= 0) { + toggle_bp_task_slot(tsk, cpu, enable); + return; + } + + for_each_online_cpu(cpu) + toggle_bp_task_slot(tsk, cpu, enable); + return; + } - /* We will call flush routine since the debugregs are not inherited */ - arch_flush_thread_hw_breakpoint(child); + /* Pinned counter cpu profiling */ + nr = &per_cpu(nr_bp_flexible, counter->cpu); - return 0; +toggle: + *nr = enable ? *nr + 1 : *nr - 1; } -static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk, - struct hw_breakpoint *bp) +static int reserve_bp_slot(struct perf_counter *counter) { - struct thread_struct *thread = &(tsk->thread); - int rc; + struct bp_busy_slots slots = {0}; + int ret = 0; + + mutex_lock(&nr_bp_mutex); + + fetch_bp_busy_slots(&slots, counter->cpu); + + if (!counter->attr.pinned) { + /* + * If there are already flexible counters here, + * there is at least one slot reserved for all + * of them. Just join the party. + * + * Otherwise, check there is at least one free slot + */ + if (!slots.flexible && slots.pinned == HBP_NUM) { + ret = -ENOSPC; + goto end; + } - /* Do not overcommit. Fail if kernel has used the hbp registers */ - if (pos >= hbp_kernel_pos) - return -ENOSPC; + /* Flexible counters need at least one slot */ + } else if (slots.pinned + (!!slots.flexible) == HBP_NUM) { + ret = -ENOSPC; + goto end; + } - rc = arch_validate_hwbkpt_settings(bp, tsk); - if (rc) - return rc; + toggle_bp_slot(counter, true); - thread->hbp[pos] = bp; - hbp_user_refcount[pos]++; +end: + mutex_unlock(&nr_bp_mutex); - arch_update_user_hw_breakpoint(pos, tsk); - /* - * Does it need to be installed right now? - * Otherwise it will get installed the next time tsk runs - */ - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); + return 0; +} + +static void release_bp_slot(struct perf_counter *counter) +{ + mutex_lock(&nr_bp_mutex); + + toggle_bp_slot(counter, false); - return rc; + mutex_unlock(&nr_bp_mutex); } -/* - * Modify the address of a hbp register already in use by the task - * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint() - */ -static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk, - struct hw_breakpoint *bp) +int __register_perf_hw_breakpoint(struct perf_counter *counter) { - struct thread_struct *thread = &(tsk->thread); + int ret; + struct hw_breakpoint *bp = counter->hw.bp; - if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk))) - return -EINVAL; + ret = arch_validate_hwbkpt_settings(bp, counter->ctx->task); + if (ret) + return ret; - if (thread->hbp[pos] == NULL) + if (!bp->triggered) return -EINVAL; - thread->hbp[pos] = bp; - /* - * 'pos' must be that of a hbp register already used by 'tsk' - * Otherwise arch_modify_user_hw_breakpoint() will fail - */ - arch_update_user_hw_breakpoint(pos, tsk); + return 0; +} - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); +int register_perf_hw_breakpoint(struct perf_counter *counter) +{ + counter->hw.bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); + if (!counter->hw.bp) + return -ENOMEM; - return 0; + arch_fill_perf_breakpoint(counter); + counter->hw.bp->triggered = perf_bpcounter_event; + + counter->hw.bp->counter = counter; + + return __register_perf_hw_breakpoint(counter); } -static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk) +static int register_user_hw_breakpoint_cpu(pid_t pid, + struct hw_breakpoint *bp, + int cpu) { - hbp_user_refcount[pos]--; - tsk->thread.hbp[pos] = NULL; + struct perf_counter_attr *attr; + struct perf_counter_context *ctx; + struct perf_counter *counter; + int ret; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return -ENOMEM; + + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(*attr); + attr->pinned = 1; + + ctx = find_get_context(pid, cpu); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto fail_ctx; + } + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) { + ret = -ENOMEM; + goto fail_counter; + } + + counter->hw.bp = bp; + bp->counter = counter; - arch_update_user_hw_breakpoint(pos, tsk); + ret = __perf_counter_init(counter, attr, cpu, ctx, NULL, NULL); + if (ret) + goto fail_init; - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); + counter->owner = current; + get_task_struct(current); + mutex_lock(¤t->perf_counter_mutex); + list_add_tail_rcu(&counter->owner_entry, ¤t->perf_counter_list); + mutex_unlock(¤t->perf_counter_mutex); + + if (!bp->inactive) + perf_install_in_context(counter->ctx, counter, counter->cpu); + + return 0; + +fail_init: + free_counter(counter); +fail_counter: + kfree(ctx); +fail_ctx: + kfree(attr); + + return ret; } /** @@ -220,79 +320,42 @@ static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk) * */ int register_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp) + struct hw_breakpoint *bp) { - struct thread_struct *thread = &(tsk->thread); - int i, rc = -ENOSPC; - - spin_lock_bh(&hw_breakpoint_lock); - - for (i = 0; i < hbp_kernel_pos; i++) { - if (!thread->hbp[i]) { - rc = __register_user_hw_breakpoint(i, tsk, bp); - break; - } - } - if (!rc) - set_tsk_thread_flag(tsk, TIF_DEBUG); - - spin_unlock_bh(&hw_breakpoint_lock); - return rc; + return register_user_hw_breakpoint_cpu(tsk->pid, bp, -1); } EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); /** * modify_user_hw_breakpoint - modify a user-space hardware breakpoint - * @tsk: pointer to 'task_struct' of the process to which the address belongs * @bp: the breakpoint structure to unregister * */ int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp) { - struct thread_struct *thread = &(tsk->thread); - int i, ret = -ENOENT; - - spin_lock_bh(&hw_breakpoint_lock); - for (i = 0; i < hbp_kernel_pos; i++) { - if (bp == thread->hbp[i]) { - ret = __modify_user_hw_breakpoint(i, tsk, bp); - break; - } - } - spin_unlock_bh(&hw_breakpoint_lock); - return ret; + /* + * FIXME: do it without unregistering + * - We don't want to lose our slot + * - If the new bp is incorrect, don't lose the older one + */ + unregister_hw_breakpoint(bp); + return register_user_hw_breakpoint(tsk, bp); } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); /** - * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint - * @tsk: pointer to 'task_struct' of the process to which the address belongs + * unregister_hw_breakpoint - unregister a user-space hardware breakpoint * @bp: the breakpoint structure to unregister * */ -void unregister_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp) +void unregister_hw_breakpoint(struct hw_breakpoint *bp) { - struct thread_struct *thread = &(tsk->thread); - int i, pos = -1, hbp_counter = 0; - - spin_lock_bh(&hw_breakpoint_lock); - for (i = 0; i < hbp_kernel_pos; i++) { - if (thread->hbp[i]) - hbp_counter++; - if (bp == thread->hbp[i]) - pos = i; - } - if (pos >= 0) { - __unregister_user_hw_breakpoint(pos, tsk); - hbp_counter--; - } - if (!hbp_counter) - clear_tsk_thread_flag(tsk, TIF_DEBUG); - - spin_unlock_bh(&hw_breakpoint_lock); + perf_counter_remove_from_context(bp->counter); + /* Hmm, find a way to ensure there are no pending trigger callbacks (rcu) */ + free_counter(bp->counter); } -EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint); +EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); + /** * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space @@ -302,67 +365,13 @@ EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint); * @bp->triggered must be set properly before invocation * */ -int register_kernel_hw_breakpoint(struct hw_breakpoint *bp) +int register_kernel_hw_breakpoint(struct hw_breakpoint *bp, int cpu) { - int rc; - - rc = arch_validate_hwbkpt_settings(bp, NULL); - if (rc) - return rc; - - spin_lock_bh(&hw_breakpoint_lock); - - rc = -ENOSPC; - /* Check if we are over-committing */ - if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) { - hbp_kernel_pos--; - hbp_kernel[hbp_kernel_pos] = bp; - on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); - rc = 0; - } - - spin_unlock_bh(&hw_breakpoint_lock); - return rc; + /* FIXME: support system wide (every cpu) */ + return register_user_hw_breakpoint_cpu(-1, bp, cpu); } EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint); -/** - * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space - * @bp: the breakpoint structure to unregister - * - * Uninstalls and unregisters @bp. - */ -void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp) -{ - int i, j; - - spin_lock_bh(&hw_breakpoint_lock); - - /* Find the 'bp' in our list of breakpoints for kernel */ - for (i = hbp_kernel_pos; i < HBP_NUM; i++) - if (bp == hbp_kernel[i]) - break; - - /* Check if we did not find a match for 'bp'. If so return early */ - if (i == HBP_NUM) { - spin_unlock_bh(&hw_breakpoint_lock); - return; - } - - /* - * We'll shift the breakpoints one-level above to compact if - * unregistration creates a hole - */ - for (j = i; j > hbp_kernel_pos; j--) - hbp_kernel[j] = hbp_kernel[j-1]; - - hbp_kernel[hbp_kernel_pos] = NULL; - on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); - hbp_kernel_pos++; - - spin_unlock_bh(&hw_breakpoint_lock); -} -EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint); static struct notifier_block hw_breakpoint_exceptions_nb = { .notifier_call = hw_breakpoint_exceptions_notify, @@ -376,3 +385,12 @@ static int __init init_hw_breakpoint(void) } core_initcall(init_hw_breakpoint); + +struct pmu perf_ops_bp = { + .enable = arch_install_hw_breakpoint, + .disable = arch_uninstall_hw_breakpoint, + .read = hw_breakpoint_pmu_read, + .unthrottle = hw_breakpoint_pmu_unthrottle, + .open = reserve_bp_slot, + .close = release_bp_slot +}; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index e0d91fd..e2170f8 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -28,6 +28,7 @@ #include #include +#include #include /* @@ -411,7 +412,7 @@ static void __perf_counter_remove_from_context(void *info) * When called from perf_counter_exit_task, it's OK because the * context has been detached from its task. */ -static void perf_counter_remove_from_context(struct perf_counter *counter) +void perf_counter_remove_from_context(struct perf_counter *counter) { struct perf_counter_context *ctx = counter->ctx; struct task_struct *task = ctx->task; @@ -817,7 +818,7 @@ static void __perf_install_in_context(void *info) * * Must be called with ctx->mutex held. */ -static void +void perf_install_in_context(struct perf_counter_context *ctx, struct perf_counter *counter, int cpu) @@ -1581,7 +1582,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx, ctx->task = task; } -static struct perf_counter_context *find_get_context(pid_t pid, int cpu) +struct perf_counter_context *find_get_context(pid_t pid, int cpu) { struct perf_counter_context *ctx; struct perf_cpu_context *cpuctx; @@ -1684,10 +1685,14 @@ static void free_counter_rcu(struct rcu_head *head) static void perf_pending_sync(struct perf_counter *counter); -static void free_counter(struct perf_counter *counter) +void free_counter(struct perf_counter *counter) { perf_pending_sync(counter); + if (counter->pmu->close) + if (counter->state != PERF_COUNTER_STATE_UNOPENED) + counter->pmu->close(counter); + if (!counter->parent) { atomic_dec(&nr_counters); if (counter->attr.mmap) @@ -3995,6 +4000,26 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) } #endif +static const struct pmu *bp_perf_counter_init(struct perf_counter *counter) +{ + /* + * The breakpoint is already filled if we haven't created the counter + * through perf syscall + */ + if (!counter->hw.bp) + register_perf_hw_breakpoint(counter); + else + __register_perf_hw_breakpoint(counter); + + return &perf_ops_bp; +} + +void +perf_bpcounter_event(struct hw_breakpoint *bp, struct pt_regs *regs) +{ + /* TODO (need to know where we encode the id of the bp counter) */ +} + atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX]; static void sw_perf_counter_destroy(struct perf_counter *counter) @@ -4050,26 +4075,18 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) return pmu; } -/* - * Allocate and initialize a counter structure - */ -static struct perf_counter * -perf_counter_alloc(struct perf_counter_attr *attr, - int cpu, - struct perf_counter_context *ctx, - struct perf_counter *group_leader, - struct perf_counter *parent_counter, - gfp_t gfpflags) + +int __perf_counter_init(struct perf_counter *counter, + struct perf_counter_attr *attr, + int cpu, + struct perf_counter_context *ctx, + struct perf_counter *group_leader, + struct perf_counter *parent_counter) { const struct pmu *pmu; - struct perf_counter *counter; struct hw_perf_counter *hwc; long err; - counter = kzalloc(sizeof(*counter), gfpflags); - if (!counter) - return ERR_PTR(-ENOMEM); - /* * Single counters are their own group leaders, with an * empty sibling list: @@ -4135,6 +4152,10 @@ perf_counter_alloc(struct perf_counter_attr *attr, pmu = tp_perf_counter_init(counter); break; + case PERF_TYPE_BREAKPOINT: + pmu = bp_perf_counter_init(counter); + break; + default: break; } @@ -4145,15 +4166,19 @@ done: else if (IS_ERR(pmu)) err = PTR_ERR(pmu); - if (err) { - if (counter->ns) - put_pid_ns(counter->ns); - kfree(counter); - return ERR_PTR(err); - } + if (err) + goto fail; counter->pmu = pmu; + if (pmu->open) { + err = pmu->open(counter); + if (err) { + counter->state = PERF_COUNTER_STATE_UNOPENED; + goto fail; + } + } + if (!counter->parent) { atomic_inc(&nr_counters); if (counter->attr.mmap) @@ -4164,6 +4189,38 @@ done: atomic_inc(&nr_task_counters); } + return 0; + +fail: + if (counter->ns) + put_pid_ns(counter->ns); + kfree(counter); + return err; +} + +/* + * Allocate and initialize a counter structure + */ +static struct perf_counter * +perf_counter_alloc(struct perf_counter_attr *attr, + int cpu, + struct perf_counter_context *ctx, + struct perf_counter *group_leader, + struct perf_counter *parent_counter, + gfp_t gfpflags) +{ + int err; + struct perf_counter *counter; + + counter = kzalloc(sizeof(*counter), gfpflags); + if (!counter) + return ERR_PTR(-ENOMEM); + + err = __perf_counter_init(counter, attr, cpu, ctx, group_leader, + parent_counter); + if (err) + return ERR_PTR(err); + return counter; } @@ -4405,7 +4462,7 @@ SYSCALL_DEFINE5(perf_counter_open, counter->owner = current; get_task_struct(current); mutex_lock(¤t->perf_counter_mutex); - list_add_tail(&counter->owner_entry, ¤t->perf_counter_list); + list_add_tail_rcu(&counter->owner_entry, ¤t->perf_counter_list); mutex_unlock(¤t->perf_counter_mutex); err_fput_free_put_context: @@ -4488,6 +4545,10 @@ inherit_counter(struct perf_counter *parent_counter, list_add_tail(&child_counter->child_list, &parent_counter->child_list); mutex_unlock(&parent_counter->child_mutex); + mutex_lock(&child->perf_counter_mutex); + list_add_tail_rcu(&child_counter->owner_entry, &child->perf_counter_list); + mutex_unlock(&child->perf_counter_mutex); + return child_counter; } diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 2fde875..b1a17f2 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -208,7 +208,8 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) #endif entry->ksym_hbp->triggered = (void *)ksym_hbp_handler; - ret = register_kernel_hw_breakpoint(entry->ksym_hbp); + /* TODO: cross CPU */ + ret = register_kernel_hw_breakpoint(entry->ksym_hbp, 0); if (ret < 0) { printk(KERN_INFO "ksym_tracer request failed. Try again" " later!!\n"); @@ -268,7 +269,7 @@ static void __ksym_trace_reset(void) mutex_lock(&ksym_tracer_mutex); hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, ksym_hlist) { - unregister_kernel_hw_breakpoint(entry->ksym_hbp); + unregister_hw_breakpoint(entry->ksym_hbp); ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); @@ -334,10 +335,10 @@ static ssize_t ksym_trace_filter_write(struct file *file, } } if (changed) { - unregister_kernel_hw_breakpoint(entry->ksym_hbp); + unregister_hw_breakpoint(entry->ksym_hbp); entry->ksym_hbp->info.type = op; if (op > 0) { - ret = register_kernel_hw_breakpoint(entry->ksym_hbp); + ret = register_kernel_hw_breakpoint(entry->ksym_hbp, 0); if (ret == 0) goto out; }