Signed-off-by: Andrew Morton --- arch/i386/kernel/ptrace.c | 40 arch/powerpc/kernel/ptrace.c | 242 +++ arch/powerpc/kernel/signal_32.c | 52 arch/powerpc/lib/sstep.c | 3 arch/x86_64/ia32/ia32entry.S | 2 arch/x86_64/ia32/ptrace32.c | 56 arch/x86_64/kernel/ptrace.c | 46 fs/proc/base.c | 40 include/asm-x86_64/tracehook.h | 1 include/linux/ptrace.h | 222 +++ include/linux/sched.h | 4 init/Kconfig | 12 kernel/Makefile | 3 kernel/exit.c | 13 kernel/fork.c | 2 kernel/ptrace.c | 1823 ++++++++++++++++++++++++++++-- kernel/sys_ni.c | 4 17 files changed, 2398 insertions(+), 167 deletions(-) diff -puN arch/i386/kernel/ptrace.c~utrace-utrace-ptrace-compat arch/i386/kernel/ptrace.c --- a/arch/i386/kernel/ptrace.c~utrace-utrace-ptrace-compat +++ a/arch/i386/kernel/ptrace.c @@ -731,6 +731,46 @@ const struct utrace_regset_view utrace_i }; EXPORT_SYMBOL_GPL(utrace_i386_native); +#ifdef CONFIG_PTRACE +static const struct ptrace_layout_segment i386_uarea[] = { + {0, FRAME_SIZE*4, 0, 0}, + {FRAME_SIZE*4, offsetof(struct user, u_debugreg[0]), -1, 0}, + {offsetof(struct user, u_debugreg[0]), + offsetof(struct user, u_debugreg[8]), 4, 0}, + {0, 0, -1, 0} +}; + +int arch_ptrace(long *req, struct task_struct *child, + struct utrace_attached_engine *engine, + unsigned long addr, unsigned long data, long *val) +{ + switch (*req) { + case PTRACE_PEEKUSR: + return ptrace_peekusr(child, engine, i386_uarea, addr, data); + case PTRACE_POKEUSR: + return ptrace_pokeusr(child, engine, i386_uarea, addr, data); + case PTRACE_GETREGS: + return ptrace_whole_regset(child, engine, data, 0, 0); + case PTRACE_SETREGS: + return ptrace_whole_regset(child, engine, data, 0, 1); + case PTRACE_GETFPREGS: + return ptrace_whole_regset(child, engine, data, 1, 0); + case PTRACE_SETFPREGS: + return ptrace_whole_regset(child, engine, data, 1, 1); + case PTRACE_GETFPXREGS: + return ptrace_whole_regset(child, engine, data, 2, 0); + case PTRACE_SETFPXREGS: + return ptrace_whole_regset(child, engine, data, 2, 1); + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: + return ptrace_onereg_access(child, engine, + utrace_native_view(current), 3, + addr, (void __user *)data, + *req == PTRACE_SET_THREAD_AREA); + } + return -ENOSYS; +} +#endif void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) { diff -puN arch/powerpc/kernel/ptrace.c~utrace-utrace-ptrace-compat arch/powerpc/kernel/ptrace.c --- a/arch/powerpc/kernel/ptrace.c~utrace-utrace-ptrace-compat +++ a/arch/powerpc/kernel/ptrace.c @@ -458,13 +458,251 @@ const struct utrace_regset_view utrace_p .regsets = ppc32_regsets, .n = ARRAY_SIZE(ppc32_regsets) }; EXPORT_SYMBOL_GPL(utrace_ppc32_view); +#endif + -long compat_sys_ptrace(int request, int pid, unsigned long addr, - unsigned long data) +#ifdef CONFIG_PTRACE +static const struct ptrace_layout_segment ppc_uarea[] = { + {0, PT_FPR0 * sizeof(long), 0, 0}, + {PT_FPR0 * sizeof(long), (PT_FPSCR + 1) * sizeof(long), 1, 0}, + {0, 0, -1, 0} +}; + +int arch_ptrace(long *request, struct task_struct *child, + struct utrace_attached_engine *engine, + unsigned long addr, unsigned long data, long *val) { + switch (*request) { + case PTRACE_PEEKUSR: + return ptrace_peekusr(child, engine, ppc_uarea, addr, data); + case PTRACE_POKEUSR: + return ptrace_pokeusr(child, engine, ppc_uarea, addr, data); + case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ + case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ + return ptrace_regset_access(child, engine, + utrace_native_view(current), 0, + 0, 32 * sizeof(long), + (void __user *)addr, + *request == PPC_PTRACE_SETREGS); + case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */ + case PPC_PTRACE_SETFPREGS: /* Get FPRs 0 - 31. */ + return ptrace_regset_access(child, engine, + utrace_native_view(current), 1, + 0, 32 * sizeof(double), + (void __user *)addr, + *request == PPC_PTRACE_SETFPREGS); +#ifdef CONFIG_PPC64 + case PTRACE_GET_DEBUGREG: + case PTRACE_SET_DEBUGREG: + return ptrace_onereg_access(child, engine, + utrace_native_view(current), 3, + addr, (unsigned long __user *)data, + *request == PTRACE_SET_DEBUGREG); +#endif /* CONFIG_PPC64 */ +#ifdef CONFIG_ALTIVEC + case PTRACE_GETVRREGS: + return ptrace_whole_regset(child, engine, data, 2, 0); + case PTRACE_SETVRREGS: + return ptrace_whole_regset(child, engine, data, 2, 1); +#endif +#ifdef CONFIG_SPE +#ifdef CONFIG_ALTIVEC +#define REGSET_EVR 3 +#else +#define REGSET_EVR 2 +#endif + case PTRACE_GETEVRREGS: + return ptrace_whole_regset(child, engine, data, REGSET_EVR, 0); + case PTRACE_SETEVRREGS: + return ptrace_whole_regset(child, engine, data, REGSET_EVR, 1); +#endif + } return -ENOSYS; } + +#ifdef CONFIG_COMPAT +#include +#include + +static const struct ptrace_layout_segment ppc32_uarea[] = { + {0, PT_FPR0 * sizeof(u32), 0, 0}, + {PT_FPR0 * sizeof(u32), (PT_FPSCR32 + 1) * sizeof(u32), 1, 0}, + {0, 0, -1, 0} +}; + +int arch_compat_ptrace(compat_long_t *request, + struct task_struct *child, + struct utrace_attached_engine *engine, + compat_ulong_t addr, compat_ulong_t data, + compat_long_t *val) +{ + void __user *uaddr = (void __user *) (unsigned long) addr; + int ret = -ENOSYS; + + switch (*request) { + case PTRACE_PEEKUSR: + return ptrace_compat_peekusr(child, engine, ppc32_uarea, + addr, data); + case PTRACE_POKEUSR: + return ptrace_compat_pokeusr(child, engine, ppc32_uarea, + addr, data); + + case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ + case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ + return ptrace_regset_access(child, engine, + utrace_native_view(current), 0, + 0, 32 * sizeof(compat_long_t), + uaddr, + *request == PPC_PTRACE_SETREGS); + case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */ + case PPC_PTRACE_SETFPREGS: /* Get FPRs 0 - 31. */ + return ptrace_regset_access(child, engine, + utrace_native_view(current), 1, + 0, 32 * sizeof(double), + uaddr, + *request == PPC_PTRACE_SETFPREGS); +#ifdef CONFIG_ALTIVEC + case PTRACE_GETVRREGS: + return ptrace_whole_regset(child, engine, data, 2, 0); + case PTRACE_SETVRREGS: + return ptrace_whole_regset(child, engine, data, 2, 1); #endif + case PTRACE_GET_DEBUGREG: + case PTRACE_SET_DEBUGREG: + return ptrace_onereg_access(child, engine, + utrace_native_view(current), 3, + addr, + (unsigned long __user *) + (unsigned long) data, + *request == PTRACE_SET_DEBUGREG); + + /* + * Read 4 bytes of the other process' storage + * data is a pointer specifying where the user wants the + * 4 bytes copied into + * addr is a pointer in the user's storage that contains an 8 byte + * address in the other process of the 4 bytes that is to be read + * (this is run in a 32-bit process looking at a 64-bit process) + * when I and D space are separate, these will need to be fixed. + */ + case PPC_PTRACE_PEEKTEXT_3264: + case PPC_PTRACE_PEEKDATA_3264: { + u32 tmp; + int copied; + u32 __user * addrOthers; + + ret = -EIO; + + /* Get the addr in the other process that we want to read */ + if (get_user(addrOthers, ((u32 __user * __user *) + (unsigned long) addr)) != 0) + break; + + copied = access_process_vm(child, (u64)addrOthers, &tmp, + sizeof(tmp), 0); + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp, (u32 __user *)(unsigned long)data); + break; + } + + /* + * Write 4 bytes into the other process' storage + * data is the 4 bytes that the user wants written + * addr is a pointer in the user's storage that contains an + * 8 byte address in the other process where the 4 bytes + * that is to be written + * (this is run in a 32-bit process looking at a 64-bit process) + * when I and D space are separate, these will need to be fixed. + */ + case PPC_PTRACE_POKETEXT_3264: + case PPC_PTRACE_POKEDATA_3264: { + u32 tmp = data; + u32 __user * addrOthers; + + /* Get the addr in the other process that we want to write into */ + ret = -EIO; + if (get_user(addrOthers, ((u32 __user * __user *) + (unsigned long) addr)) != 0) + break; + ret = 0; + if (access_process_vm(child, (u64)addrOthers, &tmp, + sizeof(tmp), 1) == sizeof(tmp)) + break; + ret = -EIO; + break; + } + + /* + * This is like PTRACE_PEEKUSR on a 64-bit process, + * but here we access only 4 bytes at a time. + */ + case PPC_PTRACE_PEEKUSR_3264: { + union + { + u64 whole; + u32 half[2]; + } reg; + int setno; + const struct utrace_regset *regset; + + ret = -EIO; + if ((addr & 3) || addr > PT_FPSCR*8) + break; + + setno = 0; + if (addr >= PT_FPR0*8) { + setno = 1; + addr -= PT_FPR0*8; + } + regset = utrace_regset(child, NULL, + &utrace_ppc_native_view, setno); + ret = (*regset->get)(child, regset, addr &~ 7, + sizeof(reg.whole), ®.whole, NULL); + if (ret == 0) + ret = put_user(reg.half[(addr >> 2) & 1], + (u32 __user *)(unsigned long)data); + break; + } + + /* + * This is like PTRACE_POKEUSR on a 64-bit process, + * but here we access only 4 bytes at a time. + */ + case PPC_PTRACE_POKEUSR_3264: { + union + { + u64 whole; + u32 half[2]; + } reg; + int setno; + const struct utrace_regset *regset; + + ret = -EIO; + if ((addr & 3) || addr > PT_FPSCR*8) + break; + + setno = 0; + if (addr >= PT_FPR0*8) { + setno = 1; + addr -= PT_FPR0*8; + } + regset = utrace_regset(child, NULL, + &utrace_ppc_native_view, setno); + ret = (*regset->get)(child, regset, addr &~ 7, + sizeof(reg.whole), ®.whole, NULL); + BUG_ON(ret); + reg.half[(addr >> 2) & 1] = data; + ret = (*regset->set)(child, regset, addr &~ 7, + sizeof(reg.whole), ®.whole, NULL); + break; + } + } + return ret; +} +#endif /* CONFIG_COMPAT */ +#endif /* CONFIG_PTRACE */ + void do_syscall_trace_enter(struct pt_regs *regs) { diff -puN arch/powerpc/kernel/signal_32.c~utrace-utrace-ptrace-compat arch/powerpc/kernel/signal_32.c --- a/arch/powerpc/kernel/signal_32.c~utrace-utrace-ptrace-compat +++ a/arch/powerpc/kernel/signal_32.c @@ -631,6 +631,58 @@ int copy_siginfo_to_user32(struct compat #define copy_siginfo_to_user copy_siginfo_to_user32 +/* mostly stolen from arch/s390/kernel/compat_signal.c */ +int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) +{ + int err; + u32 tmp; + + if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t))) + return -EFAULT; + + err = __get_user(to->si_signo, &from->si_signo); + err |= __get_user(to->si_errno, &from->si_errno); + err |= __get_user(to->si_code, &from->si_code); + + if (to->si_code < 0) + err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); + else { + switch (to->si_code >> 16) { + case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: + err |= __get_user(to->si_int, &from->si_int); + /* fallthrough */ + case __SI_KILL >> 16: + err |= __get_user(to->si_pid, &from->si_pid); + err |= __get_user(to->si_uid, &from->si_uid); + break; + case __SI_CHLD >> 16: + err |= __get_user(to->si_pid, &from->si_pid); + err |= __get_user(to->si_uid, &from->si_uid); + err |= __get_user(to->si_utime, &from->si_utime); + err |= __get_user(to->si_stime, &from->si_stime); + err |= __get_user(to->si_status, &from->si_status); + break; + case __SI_FAULT >> 16: + err |= __get_user(tmp, &from->si_addr); + to->si_addr = (void __user *)(u64) tmp; + break; + case __SI_POLL >> 16: + err |= __get_user(to->si_band, &from->si_band); + err |= __get_user(to->si_fd, &from->si_fd); + break; + case __SI_TIMER >> 16: + err |= __get_user(to->si_tid, &from->si_tid); + err |= __get_user(to->si_overrun, &from->si_overrun); + err |= __get_user(to->si_int, &from->si_int); + break; + default: + break; + } + } + return err; +} + /* * Note: it is necessary to treat pid and sig as unsigned ints, with the * corresponding cast to a signed int to insure that the proper conversion diff -puN arch/powerpc/lib/sstep.c~utrace-utrace-ptrace-compat arch/powerpc/lib/sstep.c --- a/arch/powerpc/lib/sstep.c~utrace-utrace-ptrace-compat +++ a/arch/powerpc/lib/sstep.c @@ -13,6 +13,9 @@ #include #include #include +#ifdef CONFIG_PPC64 +#include +#endif extern char system_call_common[]; diff -puN arch/x86_64/ia32/ia32entry.S~utrace-utrace-ptrace-compat arch/x86_64/ia32/ia32entry.S --- a/arch/x86_64/ia32/ia32entry.S~utrace-utrace-ptrace-compat +++ a/arch/x86_64/ia32/ia32entry.S @@ -421,7 +421,7 @@ ia32_sys_call_table: .quad sys_setuid16 .quad sys_getuid16 .quad compat_sys_stime /* stime */ /* 25 */ - .quad sys32_ptrace /* ptrace */ + .quad compat_sys_ptrace /* ptrace */ .quad sys_alarm .quad sys_fstat /* (old)fstat */ .quad sys_pause diff -puN arch/x86_64/ia32/ptrace32.c~utrace-utrace-ptrace-compat arch/x86_64/ia32/ptrace32.c --- a/arch/x86_64/ia32/ptrace32.c~utrace-utrace-ptrace-compat +++ a/arch/x86_64/ia32/ptrace32.c @@ -166,11 +166,6 @@ static int getreg32(struct task_struct * #undef R32 -asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) -{ - return -ENOSYS; -} - static int ia32_genregs_get(struct task_struct *target, const struct utrace_regset *regset, @@ -602,3 +597,54 @@ const struct utrace_regset_view utrace_i .regsets = ia32_regsets, .n = ARRAY_SIZE(ia32_regsets) }; EXPORT_SYMBOL_GPL(utrace_ia32_view); + + +#ifdef CONFIG_PTRACE +/* + * This matches the arch/i386/kernel/ptrace.c definitions. + */ + +static const struct ptrace_layout_segment ia32_uarea[] = { + {0, sizeof(struct user_regs_struct32), 0, 0}, + {sizeof(struct user_regs_struct32), + offsetof(struct user32, u_debugreg[0]), -1, 0}, + {offsetof(struct user32, u_debugreg[0]), + offsetof(struct user32, u_debugreg[8]), 4, 0}, + {0, 0, -1, 0} +}; + +int arch_compat_ptrace(compat_long_t *req, struct task_struct *child, + struct utrace_attached_engine *engine, + compat_ulong_t addr, compat_ulong_t data, + compat_long_t *val) +{ + switch (*req) { + case PTRACE_PEEKUSR: + return ptrace_compat_peekusr(child, engine, ia32_uarea, + addr, data); + case PTRACE_POKEUSR: + return ptrace_compat_pokeusr(child, engine, ia32_uarea, + addr, data); + case PTRACE_GETREGS: + return ptrace_whole_regset(child, engine, data, 0, 0); + case PTRACE_SETREGS: + return ptrace_whole_regset(child, engine, data, 0, 1); + case PTRACE_GETFPREGS: + return ptrace_whole_regset(child, engine, data, 1, 0); + case PTRACE_SETFPREGS: + return ptrace_whole_regset(child, engine, data, 1, 1); + case PTRACE_GETFPXREGS: + return ptrace_whole_regset(child, engine, data, 2, 0); + case PTRACE_SETFPXREGS: + return ptrace_whole_regset(child, engine, data, 2, 1); + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: + return ptrace_onereg_access(child, engine, + &utrace_ia32_view, 3, + addr, + (void __user *)(unsigned long)data, + *req == PTRACE_SET_THREAD_AREA); + } + return -ENOSYS; +} +#endif /* CONFIG_PTRACE */ diff -puN arch/x86_64/kernel/ptrace.c~utrace-utrace-ptrace-compat arch/x86_64/kernel/ptrace.c --- a/arch/x86_64/kernel/ptrace.c~utrace-utrace-ptrace-compat +++ a/arch/x86_64/kernel/ptrace.c @@ -703,6 +703,52 @@ const struct utrace_regset_view utrace_x EXPORT_SYMBOL_GPL(utrace_x86_64_native); +#ifdef CONFIG_PTRACE +static const struct ptrace_layout_segment x86_64_uarea[] = { + {0, sizeof(struct user_regs_struct), 0, 0}, + {sizeof(struct user_regs_struct), + offsetof(struct user, u_debugreg[0]), -1, 0}, + {offsetof(struct user, u_debugreg[0]), + offsetof(struct user, u_debugreg[8]), 3, 0}, + {0, 0, -1, 0} +}; + +int arch_ptrace(long *req, struct task_struct *child, + struct utrace_attached_engine *engine, + unsigned long addr, unsigned long data, long *val) +{ + switch (*req) { + case PTRACE_PEEKUSR: + return ptrace_peekusr(child, engine, x86_64_uarea, addr, data); + case PTRACE_POKEUSR: + return ptrace_pokeusr(child, engine, x86_64_uarea, addr, data); + case PTRACE_GETREGS: + return ptrace_whole_regset(child, engine, data, 0, 0); + case PTRACE_SETREGS: + return ptrace_whole_regset(child, engine, data, 0, 1); + case PTRACE_GETFPREGS: + return ptrace_whole_regset(child, engine, data, 1, 0); + case PTRACE_SETFPREGS: + return ptrace_whole_regset(child, engine, data, 1, 1); +#ifdef CONFIG_IA32_EMULATION + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: + return ptrace_onereg_access(child, engine, + &utrace_ia32_view, 3, + addr, (void __user *)data, + *req == PTRACE_SET_THREAD_AREA); +#endif + /* normal 64bit interface to access TLS data. + Works just like arch_prctl, except that the arguments + are reversed. */ + case PTRACE_ARCH_PRCTL: + return do_arch_prctl(child, data, addr); + } + return -ENOSYS; +} +#endif /* CONFIG_PTRACE */ + + asmlinkage void syscall_trace_enter(struct pt_regs *regs) { /* do the secure computing check first */ diff -puN fs/proc/base.c~utrace-utrace-ptrace-compat fs/proc/base.c --- a/fs/proc/base.c~utrace-utrace-ptrace-compat +++ a/fs/proc/base.c @@ -151,6 +151,46 @@ static int get_nr_threads(struct task_st return count; } +static int __ptrace_may_attach(struct task_struct *task) +{ + /* May we inspect the given task? + * This check is used both for attaching with ptrace + * and for allowing access to sensitive information in /proc. + * + * ptrace_attach denies several cases that /proc allows + * because setting up the necessary parent/child relationship + * or halting the specified task is impossible. + */ + int dumpable = 0; + /* Don't let security modules deny introspection */ + if (task == current) + return 0; + if (((current->uid != task->euid) || + (current->uid != task->suid) || + (current->uid != task->uid) || + (current->gid != task->egid) || + (current->gid != task->sgid) || + (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) + return -EPERM; + smp_rmb(); + if (task->mm) + dumpable = task->mm->dumpable; + if (!dumpable && !capable(CAP_SYS_PTRACE)) + return -EPERM; + + return security_ptrace(current, task); +} + +int ptrace_may_attach(struct task_struct *task) +{ + int err; + task_lock(task); + err = __ptrace_may_attach(task); + task_unlock(task); + return !err; +} + + static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { struct task_struct *task = get_proc_task(inode); diff -puN include/asm-x86_64/tracehook.h~utrace-utrace-ptrace-compat include/asm-x86_64/tracehook.h --- a/include/asm-x86_64/tracehook.h~utrace-utrace-ptrace-compat +++ a/include/asm-x86_64/tracehook.h @@ -15,6 +15,7 @@ #include #include +#include /* * See linux/tracehook.h for the descriptions of what these need to do. diff -puN include/linux/ptrace.h~utrace-utrace-ptrace-compat include/linux/ptrace.h --- a/include/linux/ptrace.h~utrace-utrace-ptrace-compat +++ a/include/linux/ptrace.h @@ -49,50 +49,198 @@ #include #ifdef __KERNEL__ -/* - * Ptrace flags - * - * The owner ship rules for task->ptrace which holds the ptrace - * flags is simple. When a task is running it owns it's task->ptrace - * flags. When the a task is stopped the ptracer owns task->ptrace. - */ - -#define PT_PTRACED 0x00000001 -#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ -#define PT_TRACESYSGOOD 0x00000004 -#define PT_PTRACE_CAP 0x00000008 /* ptracer can follow suid-exec */ -#define PT_TRACE_FORK 0x00000010 -#define PT_TRACE_VFORK 0x00000020 -#define PT_TRACE_CLONE 0x00000040 -#define PT_TRACE_EXEC 0x00000080 -#define PT_TRACE_VFORK_DONE 0x00000100 -#define PT_TRACE_EXIT 0x00000200 - -#define PT_TRACE_MASK 0x000003f4 - -/* single stepping state bits (used on ARM and PA-RISC) */ -#define PT_SINGLESTEP_BIT 31 -#define PT_SINGLESTEP (1< /* For unlikely. */ #include /* For struct task_struct. */ +#include +#include +struct siginfo; +struct rusage; -extern long arch_ptrace(struct task_struct *child, long request, long addr, long data); -extern struct task_struct *ptrace_get_task_struct(pid_t pid); -extern int ptrace_traceme(void); -extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); -extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); -extern int ptrace_attach(struct task_struct *tsk); -extern int ptrace_detach(struct task_struct *, unsigned int); -extern void ptrace_disable(struct task_struct *); -extern int ptrace_check_attach(struct task_struct *task, int kill); -extern int ptrace_request(struct task_struct *child, long request, long addr, long data); extern int ptrace_may_attach(struct task_struct *task); +#ifdef CONFIG_PTRACE +#include +struct utrace_attached_engine; +struct utrace_regset_view; + +/* + * These must be defined by arch code to handle machine-specific ptrace + * requests such as PTRACE_PEEKUSR and PTRACE_GETREGS. Returns -ENOSYS for + * any request it does not handle, then handled by machine-independent code. + * This can change *request and then return -ENOSYS to handle a + * machine-specific alias for a generic request. + * + * This code should NOT access task machine state directly. Instead it + * should use the utrace_regset accessors. The functions below make this easy. + * + * Any nonzero return value should be for an error. If the return value of + * the ptrace syscall should be a nonzero success value, this returns zero + * and sets *retval to the value--which might have any bit pattern at all, + * including one that looks like -ENOSYS or another error code. + */ +extern int arch_ptrace(long *request, struct task_struct *child, + struct utrace_attached_engine *engine, + unsigned long addr, unsigned long data, + long *retval); +#ifdef CONFIG_COMPAT +#include + +extern int arch_compat_ptrace(compat_long_t *request, + struct task_struct *child, + struct utrace_attached_engine *engine, + compat_ulong_t a, compat_ulong_t d, + compat_long_t *retval); +#endif + +/* + * Convenience function doing access to a single utrace_regset for ptrace. + * The offset and size are in bytes, giving the location in the regset data. + */ +extern int ptrace_regset_access(struct task_struct *child, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + int setno, unsigned long offset, + unsigned int size, void __user *data, + int write); + +/* + * Convenience wrapper for doing access to a whole utrace_regset for ptrace. + */ +static inline int ptrace_whole_regset(struct task_struct *child, + struct utrace_attached_engine *engine, + long data, int setno, int write) +{ + return ptrace_regset_access(child, engine, utrace_native_view(current), + setno, 0, -1, (void __user *)data, write); +} + +/* + * Convenience function doing access to a single slot in a utrace_regset. + * The regno value gives a slot number plus regset->bias. + * The value accessed is regset->size bytes long. + */ +extern int ptrace_onereg_access(struct task_struct *child, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + int setno, unsigned long regno, + void __user *data, int write); + + +/* + * An array of these describes the layout of the virtual struct user + * accessed by PEEKUSR/POKEUSR, or the structure used by GETREGS et al. + * The array is terminated by an element with .end of zero. + * An element describes the range [.start, .end) of struct user offsets, + * measured in bytes; it maps to the regset in the view's regsets array + * at the index given by .regset, at .offset bytes into that regset's data. + * If .regset is -1, then the [.start, .end) range reads as zero + * if .offset is zero, and is skipped on read (user's buffer unchanged) + * if .offset is -1. + */ +struct ptrace_layout_segment { + unsigned int start, end, regset, offset; +}; + +/* + * Convenience function for doing access to a ptrace compatibility layout. + * The offset and size are in bytes. + */ +extern int ptrace_layout_access(struct task_struct *child, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + const struct ptrace_layout_segment layout[], + unsigned long offset, unsigned int size, + void __user *data, void *kdata, int write); + + +/* Convenience wrapper for the common PTRACE_PEEKUSR implementation. */ +static inline int ptrace_peekusr(struct task_struct *child, + struct utrace_attached_engine *engine, + const struct ptrace_layout_segment layout[], + unsigned long addr, long data) +{ + return ptrace_layout_access(child, engine, utrace_native_view(current), + layout, addr, sizeof(long), + (unsigned long __user *)data, NULL, 0); +} + +/* Convenience wrapper for the common PTRACE_PEEKUSR implementation. */ +static inline int ptrace_pokeusr(struct task_struct *child, + struct utrace_attached_engine *engine, + const struct ptrace_layout_segment layout[], + unsigned long addr, long data) +{ + return ptrace_layout_access(child, engine, utrace_native_view(current), + layout, addr, sizeof(long), + NULL, &data, 1); +} + +#ifdef CONFIG_COMPAT +/* Convenience wrapper for the common PTRACE_PEEKUSR implementation. */ +static inline int ptrace_compat_peekusr( + struct task_struct *child, struct utrace_attached_engine *engine, + const struct ptrace_layout_segment layout[], + compat_ulong_t addr, compat_ulong_t data) +{ + compat_ulong_t *udata = (compat_ulong_t __user *) (unsigned long) data; + return ptrace_layout_access(child, engine, utrace_native_view(current), + layout, addr, sizeof(compat_ulong_t), + udata, NULL, 0); +} + +/* Convenience wrapper for the common PTRACE_PEEKUSR implementation. */ +static inline int ptrace_compat_pokeusr( + struct task_struct *child, struct utrace_attached_engine *engine, + const struct ptrace_layout_segment layout[], + compat_ulong_t addr, compat_ulong_t data) +{ + return ptrace_layout_access(child, engine, utrace_native_view(current), + layout, addr, sizeof(compat_ulong_t), + NULL, &data, 1); +} +#endif + + +/* + * Called in copy_process. + */ +static inline void ptrace_init_task(struct task_struct *tsk) +{ + INIT_LIST_HEAD(&tsk->ptracees); +} + +/* + * Called in do_exit, after setting PF_EXITING, no locks are held. + */ +void ptrace_exit(struct task_struct *tsk); + +/* + * Called in do_wait, with tasklist_lock held for reading. + * This reports any ptrace-child that is ready as do_wait would a normal child. + * If there are no ptrace children, returns -ECHILD. + * If there are some ptrace children but none reporting now, returns 0. + * In those cases the tasklist_lock is still held so next_thread(tsk) works. + * For any other return value, tasklist_lock is released before return. + */ +int ptrace_do_wait(struct task_struct *tsk, + pid_t pid, int options, struct siginfo __user *infop, + int __user *stat_addr, struct rusage __user *rusagep); +#else +static inline void ptrace_init_task(struct task_struct *tsk) { } +static inline void ptrace_exit(struct task_struct *tsk) { } +static inline int ptrace_do_wait(struct task_struct *tsk, + pid_t pid, int options, + struct siginfo __user *infop, + int __user *stat_addr, + struct rusage __user *rusagep) +{ + return -ECHILD; +} +#endif + + #ifndef force_successful_syscall_return /* * System call handlers that, upon successful completion, need to return a diff -puN include/linux/sched.h~utrace-utrace-ptrace-compat include/linux/sched.h --- a/include/linux/sched.h~utrace-utrace-ptrace-compat +++ a/include/linux/sched.h @@ -1075,6 +1075,10 @@ struct task_struct { atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; +#ifdef CONFIG_PTRACE + struct list_head ptracees; +#endif + /* * cache last used pipe for splice */ diff -puN init/Kconfig~utrace-utrace-ptrace-compat init/Kconfig --- a/init/Kconfig~utrace-utrace-ptrace-compat +++ a/init/Kconfig @@ -645,6 +645,18 @@ config UTRACE applications. Unless you are making a specially stripped-down kernel and are very sure you don't need these facilitiies, say Y. + +config PTRACE + bool "Legacy ptrace system call interface" + default y + depends on UTRACE && PROC_FS + help + Enable the ptrace system call. + This is traditionally used by debuggers like GDB, + and is used by UML and some other applications. + Unless you are very sure you won't run anything that needs it, + say Y. + endmenu menu "Block layer" diff -puN kernel/Makefile~utrace-utrace-ptrace-compat kernel/Makefile --- a/kernel/Makefile~utrace-utrace-ptrace-compat +++ a/kernel/Makefile @@ -4,7 +4,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ exit.o itimer.o time.o softirq.o resource.o \ - sysctl.o capability.o ptrace.o timer.o user.o \ + sysctl.o capability.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ @@ -53,6 +53,7 @@ obj-$(CONFIG_UTS_NS) += utsname.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_UTRACE) += utrace.o +obj-$(CONFIG_PTRACE) += ptrace.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff -puN kernel/exit.c~utrace-utrace-ptrace-compat kernel/exit.c --- a/kernel/exit.c~utrace-utrace-ptrace-compat +++ a/kernel/exit.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -827,6 +828,8 @@ fastcall NORET_TYPE void do_exit(long co tsk->flags |= PF_EXITING; + ptrace_exit(tsk); + if (unlikely(in_atomic())) printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", current->comm, current->pid, @@ -1442,9 +1445,15 @@ check_continued: break; } } - if (!flag) { - // XXX set flag if we have ptracees + + retval = ptrace_do_wait(tsk, pid, options, + infop, stat_addr, ru); + if (retval != -ECHILD) { + flag = 1; + if (retval != 0) /* He released the lock. */ + goto end; } + if (options & __WNOTHREAD) break; tsk = next_thread(tsk); diff -puN kernel/fork.c~utrace-utrace-ptrace-compat kernel/fork.c --- a/kernel/fork.c~utrace-utrace-ptrace-compat +++ a/kernel/fork.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -1028,6 +1029,7 @@ static struct task_struct *copy_process( INIT_LIST_HEAD(&p->sibling); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); + ptrace_init_task(p); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); diff -puN kernel/ptrace.c~utrace-utrace-ptrace-compat kernel/ptrace.c --- a/kernel/ptrace.c~utrace-utrace-ptrace-compat +++ a/kernel/ptrace.c @@ -19,191 +19,1776 @@ #include #include #include +#include +#include +#include #include #include +struct ptrace_state +{ + struct rcu_head rcu; + + /* + * These elements are always available, even when the struct is + * awaiting destruction at the next RCU callback point. + */ + struct utrace_attached_engine *engine; + struct task_struct *task; /* Target task. */ + struct task_struct *parent; /* Whom we report to. */ + struct list_head entry; /* Entry on parent->ptracees list. */ + + u8 options; /* PTRACE_SETOPTIONS bits. */ + unsigned int syscall:1; /* Reporting for syscall. */ +#ifdef PTRACE_SYSEMU + unsigned int sysemu:1; /* PTRACE_SYSEMU in progress. */ +#endif + unsigned int have_eventmsg:1; /* u.eventmsg valid. */ + unsigned int cap_sys_ptrace:1; /* Tracer capable. */ + + union + { + unsigned long eventmsg; + siginfo_t *siginfo; + } u; +}; + +static const struct utrace_engine_ops ptrace_utrace_ops; /* Initialized below. */ + +static void +ptrace_state_unlink(struct ptrace_state *state) +{ + task_lock(state->parent); + list_del_rcu(&state->entry); + task_unlock(state->parent); +} + +static struct ptrace_state * +ptrace_setup(struct task_struct *target, struct utrace_attached_engine *engine, + struct task_struct *parent, u8 options, int cap_sys_ptrace, + struct ptrace_state *state) +{ + if (state == NULL) { + state = kzalloc(sizeof *state, GFP_USER); + if (unlikely(state == NULL)) + return ERR_PTR(-ENOMEM); + } + + state->engine = engine; + state->task = target; + state->parent = parent; + state->options = options; + state->cap_sys_ptrace = cap_sys_ptrace; + + task_lock(parent); + if (unlikely(parent->flags & PF_EXITING)) { + task_unlock(parent); + kfree(state); + return ERR_PTR(-EALREADY); + } + list_add_rcu(&state->entry, &state->parent->ptracees); + task_unlock(state->parent); + + BUG_ON(engine->data != 0); + rcu_assign_pointer(engine->data, (unsigned long) state); + + return state; +} + +static void +ptrace_state_free(struct rcu_head *rhead) +{ + struct ptrace_state *state = container_of(rhead, + struct ptrace_state, rcu); + kfree(state); +} + +static void +ptrace_done(struct ptrace_state *state) +{ + INIT_RCU_HEAD(&state->rcu); + call_rcu(&state->rcu, ptrace_state_free); +} /* - * Check that we have indeed attached to the thing.. + * Update the tracing engine state to match the new ptrace state. */ -int ptrace_check_attach(struct task_struct *child, int kill) +static int __must_check +ptrace_update(struct task_struct *target, + struct utrace_attached_engine *engine, + unsigned long flags, int from_stopped) { - return -ENOSYS; + struct ptrace_state *state = (struct ptrace_state *) engine->data; + + /* + * These events are always reported. + */ + flags |= (UTRACE_EVENT(DEATH) | UTRACE_EVENT(EXEC) + | UTRACE_EVENT_SIGNAL_ALL | UTRACE_EVENT(JCTL)); + + /* + * We always have to examine clone events to check for CLONE_PTRACE. + */ + flags |= UTRACE_EVENT(CLONE); + + /* + * PTRACE_SETOPTIONS can request more events. + */ + if (state->options & PTRACE_O_TRACEEXIT) + flags |= UTRACE_EVENT(EXIT); + if (state->options & PTRACE_O_TRACEVFORKDONE) + flags |= UTRACE_EVENT(VFORK_DONE); + + /* + * ptrace always inhibits normal parent reaping. + * But for a corner case we sometimes see the REAP event anyway. + */ + flags |= UTRACE_ACTION_NOREAP | UTRACE_EVENT(REAP); + + if (from_stopped && !(flags & UTRACE_ACTION_QUIESCE)) { + /* + * We're letting the thread resume from ptrace stop. + * If SIGKILL is waking it up, it can be racing with us here + * to set its own exit_code in do_exit. Though we clobber + * it here, we check for the case in ptrace_report_death. + */ + if (!unlikely(target->flags & PF_SIGNALED)) + target->exit_code = 0; + + if (!state->have_eventmsg) + state->u.siginfo = NULL; + + if (target->state == TASK_STOPPED) { + /* + * We have to double-check for naughty de_thread + * reaping despite NOREAP, before we can get siglock. + */ + read_lock(&tasklist_lock); + if (!target->exit_state) { + spin_lock_irq(&target->sighand->siglock); + if (target->state == TASK_STOPPED) + target->signal->flags &= + ~SIGNAL_STOP_STOPPED; + spin_unlock_irq(&target->sighand->siglock); + } + read_unlock(&tasklist_lock); + } + } + + return utrace_set_flags(target, engine, flags); } -static int may_attach(struct task_struct *task) +static int ptrace_traceme(void) { - /* May we inspect the given task? - * This check is used both for attaching with ptrace - * and for allowing access to sensitive information in /proc. - * - * ptrace_attach denies several cases that /proc allows - * because setting up the necessary parent/child relationship - * or halting the specified task is impossible. - */ - int dumpable = 0; - /* Don't let security modules deny introspection */ - if (task == current) - return 0; - if (((current->uid != task->euid) || - (current->uid != task->suid) || - (current->uid != task->uid) || - (current->gid != task->egid) || - (current->gid != task->sgid) || - (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) - return -EPERM; - smp_rmb(); - if (task->mm) - dumpable = task->mm->dumpable; - if (!dumpable && !capable(CAP_SYS_PTRACE)) - return -EPERM; - - return security_ptrace(current, task); -} - -int ptrace_may_attach(struct task_struct *task) -{ - int err; - task_lock(task); - err = may_attach(task); - task_unlock(task); - return !err; + struct utrace_attached_engine *engine; + struct ptrace_state *state; + struct task_struct *parent; + int retval; + + engine = utrace_attach(current, (UTRACE_ATTACH_CREATE + | UTRACE_ATTACH_EXCLUSIVE + | UTRACE_ATTACH_MATCH_OPS), + &ptrace_utrace_ops, 0UL); + + if (IS_ERR(engine)) { + retval = PTR_ERR(engine); + if (retval == -EEXIST) + retval = -EPERM; + } + else { + /* + * We need to preallocate so that we can hold + * rcu_read_lock from extracting ->parent through + * ptrace_setup using it. + */ + state = kzalloc(sizeof *state, GFP_USER); + if (unlikely(state == NULL)) { + (void) utrace_detach(current, engine); + printk(KERN_ERR + "ptrace out of memory, lost child %d of %d", + current->pid, current->parent->pid); + return -ENOMEM; + } + + rcu_read_lock(); + parent = rcu_dereference(current->parent); + + task_lock(current); + retval = security_ptrace(parent, current); + task_unlock(current); + + if (retval) { + kfree(state); + (void) utrace_detach(current, engine); + } + else { + state = ptrace_setup(current, engine, parent, 0, 0, + state); + if (IS_ERR(state)) + retval = PTR_ERR(state); + } + rcu_read_unlock(); + + if (!retval) { + /* + * This can't fail because we can't die while we + * are here doing this. + */ + retval = ptrace_update(current, engine, 0, 0); + BUG_ON(retval); + } + else if (unlikely(retval == -EALREADY)) + /* + * We raced with our parent's exit, which would + * have detached us just after our attach if + * we'd won the race. Pretend we got attached + * and then detached immediately, no error. + */ + retval = 0; + } + + return retval; } -int ptrace_attach(struct task_struct *task) +static int ptrace_attach(struct task_struct *task) { + struct utrace_attached_engine *engine; + struct ptrace_state *state; int retval; retval = -EPERM; if (task->pid <= 1) - goto out; + goto bad; if (task->tgid == current->tgid) goto bad; - retval = may_attach(task); - if (retval) + if (!task->mm) /* kernel threads */ goto bad; - retval = -ENOSYS; + pr_debug("%d ptrace_attach %d state %lu exit_code %x\n", + current->pid, task->pid, task->state, task->exit_code); + + engine = utrace_attach(task, (UTRACE_ATTACH_CREATE + | UTRACE_ATTACH_EXCLUSIVE + | UTRACE_ATTACH_MATCH_OPS), + &ptrace_utrace_ops, 0); + if (IS_ERR(engine)) { + retval = PTR_ERR(engine); + if (retval == -EEXIST) + retval = -EPERM; + goto bad; + } + + pr_debug("%d ptrace_attach %d after utrace_attach: %lu exit_code %x\n", + current->pid, task->pid, task->state, task->exit_code); + + if (ptrace_may_attach(task)) { + state = ptrace_setup(task, engine, current, 0, + capable(CAP_SYS_PTRACE), NULL); + if (IS_ERR(state)) + retval = PTR_ERR(state); + else { + retval = ptrace_update(task, engine, 0, 0); + + pr_debug("%d ptrace_attach %d after ptrace_update (%d)" + " %lu exit_code %x\n", + current->pid, task->pid, retval, + task->state, task->exit_code); + + if (retval) { + /* + * It died before we enabled any callbacks. + */ + if (retval == -EALREADY) + retval = -ESRCH; + BUG_ON(retval != -ESRCH); + ptrace_state_unlink(state); + ptrace_done(state); + } + } + } + if (retval) + (void) utrace_detach(task, engine); + else { + int stopped = 0; + + /* + * We must double-check that task has not just died and + * been reaped (after ptrace_update succeeded). + * This happens when exec (de_thread) ignores NOREAP. + * We cannot call into the signal code if it's dead. + */ + read_lock(&tasklist_lock); + if (likely(!task->exit_state)) { + force_sig_specific(SIGSTOP, task); + + spin_lock_irq(&task->sighand->siglock); + stopped = (task->state == TASK_STOPPED); + spin_unlock_irq(&task->sighand->siglock); + } + read_unlock(&tasklist_lock); + + if (stopped) { + const struct utrace_regset *regset; + + /* + * Set QUIESCE immediately, so we can allow + * ptrace requests while he's in TASK_STOPPED. + */ + retval = ptrace_update(task, engine, + UTRACE_ACTION_QUIESCE, 0); + if (retval) + BUG_ON(retval != -ESRCH); + retval = 0; + + /* + * Do now the regset 0 writeback that we do on every + * stop, since it's never been done. On register + * window machines, this makes sure the user memory + * backing the register data is up to date. + */ + regset = utrace_regset(task, engine, + utrace_native_view(task), 0); + if (regset->writeback) + (*regset->writeback)(task, regset, 1); + } + + pr_debug("%d ptrace_attach %d complete (%sstopped)" + " state %lu code %x", + current->pid, task->pid, stopped ? "" : "not ", + task->state, task->exit_code); + } bad: -out: return retval; } -int ptrace_detach(struct task_struct *child, unsigned int data) +/* + * The task might be dying or being reaped in parallel, in which case + * engine and state may no longer be valid. utrace_detach checks for us. + */ +static int ptrace_detach(struct task_struct *task, + struct utrace_attached_engine *engine, + struct ptrace_state *state) +{ + + int error; + +#ifdef HAVE_ARCH_PTRACE_DETACH + /* + * Some funky compatibility code in arch_ptrace may have + * needed to install special state it should clean up now. + */ + arch_ptrace_detach(task); +#endif + + /* + * Traditional ptrace behavior does wake_up_process no matter what + * in ptrace_detach. But utrace_detach will not do a wakeup if + * it's in a proper job control stop. We need it to wake up from + * TASK_STOPPED and either resume or process more signals. A + * pending stop signal will just leave it stopped again, but will + * consume the signal, and reset task->exit_code for the next wait + * call to see. This is important to userland if ptrace_do_wait + * "stole" the previous unwaited-for-ness (clearing exit_code), but + * there is a pending SIGSTOP, e.g. sent by a PTRACE_ATTACH done + * while already in job control stop. + */ + read_lock(&tasklist_lock); + if (likely(task->signal != NULL)) { + spin_lock_irq(&task->sighand->siglock); + task->signal->flags &= ~SIGNAL_STOP_STOPPED; + spin_unlock_irq(&task->sighand->siglock); + } + read_unlock(&tasklist_lock); + + error = utrace_detach(task, engine); + if (!error) { + /* + * We can only get here from the ptracer itself or via + * detach_zombie from another thread in its group. + */ + BUG_ON(state->parent->tgid != current->tgid); + ptrace_state_unlink(state); + ptrace_done(state); + + /* + * Wake up any other threads that might be blocked in + * wait. Though traditional ptrace does not guarantee + * this wakeup on PTRACE_DETACH, it does prevent + * erroneous blocking in wait when another racing + * thread's wait call reap-detaches the last child. + * Without this wakeup, another thread might stay + * blocked when it should return -ECHILD. + */ + spin_lock_irq(¤t->sighand->siglock); + wake_up_interruptible(¤t->signal->wait_chldexit); + spin_unlock_irq(¤t->sighand->siglock); + } + return error; +} + + +/* + * This is called when we are exiting. We must stop all our ptracing. + */ +void +ptrace_exit(struct task_struct *tsk) +{ + struct list_head *pos, *n; + + /* + * Taking the task_lock after PF_EXITING is set ensures that a + * child in ptrace_traceme will not put itself on our list when + * we might already be tearing it down. + */ + task_lock(tsk); + if (likely(list_empty(&tsk->ptracees))) { + task_unlock(tsk); + return; + } + task_unlock(tsk); + +restart: + rcu_read_lock(); + + list_for_each_safe_rcu(pos, n, &tsk->ptracees) { + struct ptrace_state *state = list_entry(pos, + struct ptrace_state, + entry); + int error = utrace_detach(state->task, state->engine); + BUG_ON(state->parent != tsk); + if (likely(error == 0)) { + ptrace_state_unlink(state); + ptrace_done(state); + } + else if (unlikely(error == -EALREADY)) { + /* + * It's still doing report_death callbacks. + * Just wait for it to settle down. + * Since wait_task_inactive might yield, + * we must go out of rcu_read_lock and restart. + */ + struct task_struct *p = state->task; + get_task_struct(p); + rcu_read_unlock(); + wait_task_inactive(p); + put_task_struct(p); + goto restart; + } + else + BUG_ON(error != -ESRCH); + } + + rcu_read_unlock(); + + BUG_ON(!list_empty(&tsk->ptracees)); +} + +static int +ptrace_induce_signal(struct task_struct *target, + struct utrace_attached_engine *engine, + long signr) { - if (!valid_signal(data)) + struct ptrace_state *state = (struct ptrace_state *) engine->data; + + if (signr == 0) + return 0; + + if (!valid_signal(signr)) return -EIO; - return -ENOSYS; + if (state->syscall) { + /* + * This is the traditional ptrace behavior when given + * a signal to resume from a syscall tracing stop. + */ + send_sig(signr, target, 1); + } + else if (!state->have_eventmsg && state->u.siginfo) { + siginfo_t *info = state->u.siginfo; + + /* Update the siginfo structure if the signal has + changed. If the debugger wanted something + specific in the siginfo structure then it should + have updated *info via PTRACE_SETSIGINFO. */ + if (signr != info->si_signo) { + info->si_signo = signr; + info->si_errno = 0; + info->si_code = SI_USER; + info->si_pid = current->pid; + info->si_uid = current->uid; + } + + return utrace_inject_signal(target, engine, + UTRACE_ACTION_RESUME, info, NULL); + } + + return 0; } -int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len) +int +ptrace_regset_access(struct task_struct *target, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + int setno, unsigned long offset, unsigned int size, + void __user *data, int write) { - int copied = 0; + const struct utrace_regset *regset = utrace_regset(target, engine, + view, setno); + int ret; - while (len > 0) { - char buf[128]; - int this_len, retval; + if (unlikely(regset == NULL)) + return -EIO; - this_len = (len > sizeof(buf)) ? sizeof(buf) : len; - retval = access_process_vm(tsk, src, buf, this_len, 0); - if (!retval) { - if (copied) - break; + if (size == (unsigned int) -1) + size = regset->size * regset->n; + + if (write) { + if (!access_ok(VERIFY_READ, data, size)) + ret = -EIO; + else + ret = (*regset->set)(target, regset, + offset, size, NULL, data); + } + else { + if (!access_ok(VERIFY_WRITE, data, size)) + ret = -EIO; + else + ret = (*regset->get)(target, regset, + offset, size, NULL, data); + } + + return ret; +} + +int +ptrace_onereg_access(struct task_struct *target, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + int setno, unsigned long regno, + void __user *data, int write) +{ + const struct utrace_regset *regset = utrace_regset(target, engine, + view, setno); + unsigned int pos; + int ret; + + if (unlikely(regset == NULL)) + return -EIO; + + if (regno < regset->bias || regno >= regset->bias + regset->n) + return -EINVAL; + + pos = (regno - regset->bias) * regset->size; + + if (write) { + if (!access_ok(VERIFY_READ, data, regset->size)) + ret = -EIO; + else + ret = (*regset->set)(target, regset, pos, regset->size, + NULL, data); + } + else { + if (!access_ok(VERIFY_WRITE, data, regset->size)) + ret = -EIO; + else + ret = (*regset->get)(target, regset, pos, regset->size, + NULL, data); + } + + return ret; +} + +int +ptrace_layout_access(struct task_struct *target, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + const struct ptrace_layout_segment layout[], + unsigned long addr, unsigned int size, + void __user *udata, void *kdata, int write) +{ + const struct ptrace_layout_segment *seg; + int ret = -EIO; + + if (kdata == NULL && + !access_ok(write ? VERIFY_READ : VERIFY_WRITE, udata, size)) + return -EIO; + + seg = layout; + do { + unsigned int pos, n; + + while (addr >= seg->end && seg->end != 0) + ++seg; + + if (addr < seg->start || addr >= seg->end) return -EIO; + + pos = addr - seg->start + seg->offset; + n = min(size, seg->end - (unsigned int) addr); + + if (unlikely(seg->regset == (unsigned int) -1)) { + /* + * This is a no-op/zero-fill portion of struct user. + */ + ret = 0; + if (!write && seg->offset == 0) { + if (kdata) + memset(kdata, 0, n); + else if (clear_user(udata, n)) + ret = -EFAULT; + } } - if (copy_to_user(dst, buf, retval)) - return -EFAULT; - copied += retval; - src += retval; - dst += retval; - len -= retval; + else { + unsigned int align; + const struct utrace_regset *regset = utrace_regset( + target, engine, view, seg->regset); + if (unlikely(regset == NULL)) + return -EIO; + + /* + * A ptrace compatibility layout can do a misaligned + * regset access, e.g. word access to larger data. + * An arch's compat layout can be this way only if + * it is actually ok with the regset code despite the + * regset->align setting. + */ + align = min(regset->align, size); + if ((pos & (align - 1)) + || pos >= regset->n * regset->size) + return -EIO; + + if (write) + ret = (*regset->set)(target, regset, + pos, n, kdata, udata); + else + ret = (*regset->get)(target, regset, + pos, n, kdata, udata); + } + + if (kdata) + kdata += n; + else + udata += n; + addr += n; + size -= n; + } while (ret == 0 && size > 0); + + return ret; +} + + +static int +ptrace_start(long pid, long request, + struct task_struct **childp, + struct utrace_attached_engine **enginep, + struct ptrace_state **statep) + +{ + struct task_struct *child; + struct utrace_attached_engine *engine; + struct ptrace_state *state; + int ret; + + if (request == PTRACE_TRACEME) + return ptrace_traceme(); + + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + pr_debug("ptrace pid %ld => %p\n", pid, child); + if (!child) + goto out; + + ret = -EPERM; + if (pid == 1) /* you may not mess with init */ + goto out_tsk; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out_tsk; + } + + rcu_read_lock(); + engine = utrace_attach(child, UTRACE_ATTACH_MATCH_OPS, + &ptrace_utrace_ops, 0); + ret = -ESRCH; + if (IS_ERR(engine) || engine == NULL) + goto out_tsk_rcu; + state = rcu_dereference((struct ptrace_state *) engine->data); + if (state == NULL || state->parent != current) + goto out_tsk_rcu; + rcu_read_unlock(); + + /* + * Traditional ptrace behavior demands that the target already be + * quiescent, but not dead. + */ + if (request != PTRACE_KILL + && !(engine->flags & UTRACE_ACTION_QUIESCE)) { + pr_debug("%d not stopped (%lu)\n", child->pid, child->state); + goto out_tsk; } - return copied; + + /* + * We do this for all requests to match traditional ptrace behavior. + * If the machine state synchronization done at context switch time + * includes e.g. writing back to user memory, we want to make sure + * that has finished before a PTRACE_PEEKDATA can fetch the results. + * On most machines, only regset data is affected by context switch + * and calling utrace_regset later on will take care of that, so + * this is superfluous. + * + * To do this purely in utrace terms, we could do: + * (void) utrace_regset(child, engine, utrace_native_view(child), 0); + */ + wait_task_inactive(child); + + if (child->exit_state) + goto out_tsk; + + *childp = child; + *enginep = engine; + *statep = state; + return -EIO; + +out_tsk_rcu: + rcu_read_unlock(); +out_tsk: + put_task_struct(child); +out: + return ret; } -int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len) +static int +ptrace_common(long request, struct task_struct *child, + struct utrace_attached_engine *engine, + struct ptrace_state *state, + unsigned long addr, long data) { - int copied = 0; + unsigned long flags; + int ret = -EIO; - while (len > 0) { - char buf[128]; - int this_len, retval; + switch (request) { + case PTRACE_DETACH: + /* + * Detach a process that was attached. + */ + ret = ptrace_induce_signal(child, engine, data); + if (!ret) { + ret = ptrace_detach(child, engine, state); + if (ret == -EALREADY) /* Already a zombie. */ + ret = -ESRCH; + if (ret) + BUG_ON(ret != -ESRCH); + } + break; - this_len = (len > sizeof(buf)) ? sizeof(buf) : len; - if (copy_from_user(buf, src, this_len)) - return -EFAULT; - retval = access_process_vm(tsk, dst, buf, this_len, 1); - if (!retval) { - if (copied) + /* + * These are the operations that resume the child running. + */ + case PTRACE_KILL: + data = SIGKILL; + case PTRACE_CONT: + case PTRACE_SYSCALL: +#ifdef PTRACE_SYSEMU + case PTRACE_SYSEMU: + case PTRACE_SYSEMU_SINGLESTEP: +#endif +#ifdef PTRACE_SINGLEBLOCK + case PTRACE_SINGLEBLOCK: +# ifdef ARCH_HAS_BLOCK_STEP + if (! ARCH_HAS_BLOCK_STEP) +# endif + if (request == PTRACE_SINGLEBLOCK) break; - return -EIO; +#endif + case PTRACE_SINGLESTEP: +#ifdef ARCH_HAS_SINGLE_STEP + if (! ARCH_HAS_SINGLE_STEP) +#endif + if (request == PTRACE_SINGLESTEP +#ifdef PTRACE_SYSEMU_SINGLESTEP + || request == PTRACE_SYSEMU_SINGLESTEP +#endif + ) + break; + + ret = ptrace_induce_signal(child, engine, data); + if (ret) + break; + + + /* + * Reset the action flags without QUIESCE, so it resumes. + */ + flags = 0; +#ifdef PTRACE_SYSEMU + state->sysemu = (request == PTRACE_SYSEMU_SINGLESTEP + || request == PTRACE_SYSEMU); +#endif + if (request == PTRACE_SINGLESTEP +#ifdef PTRACE_SYSEMU + || request == PTRACE_SYSEMU_SINGLESTEP +#endif + ) + flags |= UTRACE_ACTION_SINGLESTEP; +#ifdef PTRACE_SINGLEBLOCK + else if (request == PTRACE_SINGLEBLOCK) + flags |= UTRACE_ACTION_BLOCKSTEP; +#endif + if (request == PTRACE_SYSCALL) + flags |= UTRACE_EVENT_SYSCALL; +#ifdef PTRACE_SYSEMU + else if (request == PTRACE_SYSEMU + || request == PTRACE_SYSEMU_SINGLESTEP) + flags |= UTRACE_EVENT(SYSCALL_ENTRY); +#endif + ret = ptrace_update(child, engine, flags, 1); + if (ret) + BUG_ON(ret != -ESRCH); + ret = 0; + break; + +#ifdef PTRACE_OLDSETOPTIONS + case PTRACE_OLDSETOPTIONS: +#endif + case PTRACE_SETOPTIONS: + ret = -EINVAL; + if (data & ~PTRACE_O_MASK) + break; + state->options = data; + ret = ptrace_update(child, engine, UTRACE_ACTION_QUIESCE, 1); + if (ret) + BUG_ON(ret != -ESRCH); + ret = 0; + break; + } + + return ret; +} + + +asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +{ + struct task_struct *child; + struct utrace_attached_engine *engine; + struct ptrace_state *state; + long ret, val; + + pr_debug("%d sys_ptrace(%ld, %ld, %lx, %lx)\n", + current->pid, request, pid, addr, data); + + ret = ptrace_start(pid, request, &child, &engine, &state); + if (ret != -EIO) + goto out; + + val = 0; + ret = arch_ptrace(&request, child, engine, addr, data, &val); + if (ret != -ENOSYS) { + if (ret == 0) { + ret = val; + force_successful_syscall_return(); + } + goto out_tsk; + } + + switch (request) { + default: + ret = ptrace_common(request, child, engine, state, addr, data); + break; + + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: { + unsigned long tmp; + int copied; + + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + ret = -EIO; + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp, (unsigned long __user *) data); + break; + } + + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + ret = 0; + if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) + break; + ret = -EIO; + break; + + case PTRACE_GETEVENTMSG: + ret = put_user(state->have_eventmsg + ? state->u.eventmsg : 0L, + (unsigned long __user *) data); + break; + case PTRACE_GETSIGINFO: + ret = -EINVAL; + if (!state->have_eventmsg && state->u.siginfo) + ret = copy_siginfo_to_user((siginfo_t __user *) data, + state->u.siginfo); + break; + case PTRACE_SETSIGINFO: + ret = -EINVAL; + if (!state->have_eventmsg && state->u.siginfo) { + ret = 0; + if (copy_from_user(state->u.siginfo, + (siginfo_t __user *) data, + sizeof(siginfo_t))) + ret = -EFAULT; } - copied += retval; - src += retval; - dst += retval; - len -= retval; + break; } - return copied; + +out_tsk: + put_task_struct(child); +out: + pr_debug("%d ptrace -> %lx\n", current->pid, ret); + return ret; } -int ptrace_request(struct task_struct *child, long request, - long addr, long data) + +#ifdef CONFIG_COMPAT +#include + +asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, + compat_ulong_t addr, compat_long_t cdata) { - return -ENOSYS; + const unsigned long data = (unsigned long) (compat_ulong_t) cdata; + struct task_struct *child; + struct utrace_attached_engine *engine; + struct ptrace_state *state; + compat_long_t ret, val; + + pr_debug("%d compat_sys_ptrace(%d, %d, %x, %x)\n", + current->pid, request, pid, addr, cdata); + ret = ptrace_start(pid, request, &child, &engine, &state); + if (ret != -EIO) + goto out; + + val = 0; + ret = arch_compat_ptrace(&request, child, engine, addr, cdata, &val); + if (ret != -ENOSYS) { + if (ret == 0) { + ret = val; + force_successful_syscall_return(); + } + goto out_tsk; + } + + switch (request) { + default: + ret = ptrace_common(request, child, engine, state, addr, data); + break; + + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: { + compat_ulong_t tmp; + int copied; + + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + ret = -EIO; + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp, (compat_ulong_t __user *) data); + break; + } + + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + ret = 0; + if (access_process_vm(child, addr, &cdata, sizeof(cdata), 1) == sizeof(cdata)) + break; + ret = -EIO; + break; + + case PTRACE_GETEVENTMSG: + ret = put_user(state->have_eventmsg + ? state->u.eventmsg : 0L, + (compat_long_t __user *) data); + break; + case PTRACE_GETSIGINFO: + ret = -EINVAL; + if (!state->have_eventmsg && state->u.siginfo) + ret = copy_siginfo_to_user32( + (struct compat_siginfo __user *) data, + state->u.siginfo); + break; + case PTRACE_SETSIGINFO: + ret = -EINVAL; + if (!state->have_eventmsg && state->u.siginfo + && copy_siginfo_from_user32( + state->u.siginfo, + (struct compat_siginfo __user *) data)) + ret = -EFAULT; + break; + } + +out_tsk: + put_task_struct(child); +out: + pr_debug("%d ptrace -> %lx\n", current->pid, (long)ret); + return ret; } +#endif + -/** - * ptrace_traceme -- helper for PTRACE_TRACEME - * - * Performs checks and sets PT_PTRACED. - * Should be used by all ptrace implementations for PTRACE_TRACEME. +/* + * Detach the zombie being reported for wait. */ -int ptrace_traceme(void) +static inline void +detach_zombie(struct task_struct *tsk, + struct task_struct *p, struct ptrace_state *state) { - int ret = -EPERM; + int detach_error; + struct utrace_attached_engine *engine; - ret = security_ptrace(current->parent, current); - if (ret) - return -EPERM; - - return -ENOSYS; +restart: + detach_error = 0; + rcu_read_lock(); + if (tsk == current) + engine = state->engine; + else { + /* + * We've excluded other ptrace_do_wait calls. But the + * ptracer itself might have done ptrace_detach while we + * did not have rcu_read_lock. So double-check that state + * is still valid. + */ + engine = utrace_attach( + p, (UTRACE_ATTACH_MATCH_OPS + | UTRACE_ATTACH_MATCH_DATA), + &ptrace_utrace_ops, + (unsigned long) state); + if (IS_ERR(engine) || state->parent != tsk) + detach_error = -ESRCH; + else + BUG_ON(state->engine != engine); + } + rcu_read_unlock(); + if (likely(!detach_error)) + detach_error = ptrace_detach(p, engine, state); + if (unlikely(detach_error == -EALREADY)) { + /* + * It's still doing report_death callbacks. + * Just wait for it to settle down. + */ + wait_task_inactive(p); /* Might block. */ + goto restart; + } + /* + * A failure with -ESRCH means that report_reap is + * already running and will do the cleanup, or that + * we lost a race with ptrace_detach in another + * thread or with the automatic detach in + * report_death. + */ + if (detach_error) + BUG_ON(detach_error != -ESRCH); } -/** - * ptrace_get_task_struct -- grab a task struct reference for ptrace - * @pid: process id to grab a task_struct reference of - * - * This function is a helper for ptrace implementations. It checks - * permissions and then grabs a task struct for use of the actual - * ptrace implementation. - * - * Returns the task_struct for @pid or an ERR_PTR() on failure. +/* + * We're called with tasklist_lock held for reading. + * If we return -ECHILD or zero, next_thread(tsk) must still be valid to use. + * If we return another error code, or a successful PID value, we + * release tasklist_lock first. */ -struct task_struct *ptrace_get_task_struct(pid_t pid) +int +ptrace_do_wait(struct task_struct *tsk, + pid_t pid, int options, struct siginfo __user *infop, + int __user *stat_addr, struct rusage __user *rusagep) + __releases(tasklist_lock) { - struct task_struct *child; + struct ptrace_state *state; + struct task_struct *p; + int err = -ECHILD; + int exit_code, why, status; + + rcu_read_lock(); + list_for_each_entry_rcu(state, &tsk->ptracees, entry) { + p = state->task; + + if (pid > 0) { + if (p->pid != pid) + continue; + } else if (!pid) { + if (process_group(p) != process_group(current)) + continue; + } else if (pid != -1) { + if (process_group(p) != -pid) + continue; + } + if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) + && !(options & __WALL)) + continue; + if (security_task_wait(p)) + continue; + + /* + * This is a matching child. If we don't win now, tell + * our caller to block and repeat. From this point we + * must ensure that wait_chldexit will get a wakeup for + * any tracee stopping, dying, or being detached. + * For death, tasklist_lock guarantees this already. + */ + err = 0; + + switch (p->exit_state) { + case EXIT_ZOMBIE: + if (!likely(options & WEXITED)) + continue; + if (delay_group_leader(p)) { + struct task_struct *next = next_thread(p); + pr_debug("%d ptrace_do_wait leaving %d " + "zombie code %x " + "delay_group_leader (%d/%lu)\n", + current->pid, p->pid, p->exit_code, + next->pid, next->state); + continue; + } + exit_code = p->exit_code; + goto found; + case EXIT_DEAD: + continue; + default: + /* + * tasklist_lock holds up any transitions to + * EXIT_ZOMBIE. After releasing it we are + * guaranteed a wakeup on wait_chldexit after + * any new deaths. + */ + if (p->flags & PF_EXITING) + /* + * It's in do_exit and might have set + * p->exit_code already, but it's not quite + * dead yet. It will get to report_death + * and wakes us up when it finishes. + */ + continue; + break; + } + + /* + * This xchg atomically ensures that only one do_wait + * call can report this thread. Because exit_code is + * always set before do_notify wakes us up, after this + * check fails we are sure to get a wakeup if it stops. + */ + exit_code = xchg(&p->exit_code, 0); + if (exit_code) + goto found; + + // XXX should handle WCONTINUED + + pr_debug("%d ptrace_do_wait leaving %d state %lu code %x\n", + current->pid, p->pid, p->state, p->exit_code); + } + rcu_read_unlock(); + if (err == 0) + pr_debug("%d ptrace_do_wait blocking\n", current->pid); + + return err; + +found: + BUG_ON(state->parent != tsk); + rcu_read_unlock(); + + pr_debug("%d ptrace_do_wait (%d) found %d code %x (%lu/%d)\n", + current->pid, tsk->pid, p->pid, exit_code, + p->exit_state, p->exit_signal); /* - * Tracing init is not allowed. + * If there was a group exit in progress, all threads report that + * status. Most will have SIGKILL in their own exit_code. */ - if (pid == 1) - return ERR_PTR(-EPERM); + if (p->signal->flags & SIGNAL_GROUP_EXIT) + exit_code = p->signal->group_exit_code; + + if (p->exit_state) { + if (unlikely(p->parent == tsk && p->exit_signal != -1)) + /* + * This is our natural child we were ptracing. + * When it dies it detaches (see ptrace_report_death). + * So we're seeing it here in a race. When it + * finishes detaching it will become reapable in + * the normal wait_task_zombie path instead. + */ + return 0; + if ((exit_code & 0x7f) == 0) { + why = CLD_EXITED; + status = exit_code >> 8; + } + else { + why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; + status = exit_code & 0x7f; + } + } + else { + why = CLD_TRAPPED; + status = exit_code; + exit_code = (status << 8) | 0x7f; + } + + /* + * At this point we are committed to a successful return + * or a user error return. Release the tasklist_lock. + */ + get_task_struct(p); + read_unlock(&tasklist_lock); + + if (rusagep) + err = getrusage(p, RUSAGE_BOTH, rusagep); + if (infop) { + if (!err) + err = put_user(SIGCHLD, &infop->si_signo); + if (!err) + err = put_user(0, &infop->si_errno); + if (!err) + err = put_user((short)why, &infop->si_code); + if (!err) + err = put_user(p->pid, &infop->si_pid); + if (!err) + err = put_user(p->uid, &infop->si_uid); + if (!err) + err = put_user(status, &infop->si_status); + } + if (!err && stat_addr) + err = put_user(exit_code, stat_addr); + + if (!err) { + if (why != CLD_TRAPPED) + /* + * This was a death report. The ptracer's wait + * does an implicit detach, so the zombie reports + * to its real parent now. + */ + detach_zombie(tsk, p, state); + err = p->pid; + } + + put_task_struct(p); + + return err; +} + + +/* + * All the report callbacks (except death and reap) are subject to a race + * with ptrace_exit doing a quick detach and ptrace_done. It can do this + * even when the target is not quiescent, so a callback may already be in + * progress when it does ptrace_done. Callbacks use this function to fetch + * the struct ptrace_state while ensuring it doesn't disappear until + * put_ptrace_state is called. This just uses RCU, since state and + * anything we try to do to state->parent is safe under rcu_read_lock. + */ +static struct ptrace_state * +get_ptrace_state(struct utrace_attached_engine *engine, + struct task_struct *tsk) + __acquires(RCU) +{ + struct ptrace_state *state; + + rcu_read_lock(); + state = rcu_dereference((struct ptrace_state *) engine->data); + if (likely(state != NULL)) + return state; + + rcu_read_unlock(); + return NULL; +} + +static inline void +put_ptrace_state(struct ptrace_state *state) + __releases(RCU) +{ + rcu_read_unlock(); +} + + +static void +do_notify(struct task_struct *tsk, struct task_struct *parent, int why) +{ + struct siginfo info; + unsigned long flags; + struct sighand_struct *sighand; + int sa_mask; + + info.si_signo = SIGCHLD; + info.si_errno = 0; + info.si_pid = tsk->pid; + info.si_uid = tsk->uid; + + /* FIXME: find out whether or not this is supposed to be c*time. */ + info.si_utime = cputime_to_jiffies(tsk->utime); + info.si_stime = cputime_to_jiffies(tsk->stime); + + sa_mask = SA_NOCLDSTOP; + info.si_code = why; + info.si_status = tsk->exit_code & 0x7f; + if (why == CLD_CONTINUED) + info.si_status = SIGCONT; + else if (why == CLD_STOPPED) + info.si_status = tsk->signal->group_exit_code & 0x7f; + else if (why == CLD_EXITED) { + sa_mask = SA_NOCLDWAIT; + if (tsk->exit_code & 0x80) + info.si_code = CLD_DUMPED; + else if (tsk->exit_code & 0x7f) + info.si_code = CLD_KILLED; + else { + info.si_code = CLD_EXITED; + info.si_status = tsk->exit_code >> 8; + } + } read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); + if (unlikely(parent->signal == NULL)) + goto out; + sighand = parent->sighand; + spin_lock_irqsave(&sighand->siglock, flags); + if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN && + !(sighand->action[SIGCHLD-1].sa.sa_flags & sa_mask)) + __group_send_sig_info(SIGCHLD, &info, parent); + /* + * Even if SIGCHLD is not generated, we must wake up wait4 calls. + */ + wake_up_interruptible_sync(&parent->signal->wait_chldexit); + spin_unlock_irqrestore(&sighand->siglock, flags); + +out: read_unlock(&tasklist_lock); - if (!child) - return ERR_PTR(-ESRCH); - return child; } -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +static u32 +ptrace_report(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct ptrace_state *state, + int code) + __releases(RCU) { - return -ENOSYS; + const struct utrace_regset *regset; + + pr_debug("%d ptrace_report %d engine %p" + " state %p code %x parent %d (%p)\n", + current->pid, tsk->pid, engine, state, code, + state->parent->pid, state->parent); + if (!state->have_eventmsg && state->u.siginfo) { + const siginfo_t *si = state->u.siginfo; + pr_debug(" si %d code %x errno %d addr %p\n", + si->si_signo, si->si_code, si->si_errno, + si->si_addr); + } + + /* + * Set our QUIESCE flag right now, before notifying the tracer. + * We do this before setting tsk->exit_code rather than + * by using UTRACE_ACTION_NEWSTATE in our return value, to + * ensure that the tracer can't get the notification and then + * try to resume us with PTRACE_CONT before we set the flag. + */ + utrace_set_flags(tsk, engine, engine->flags | UTRACE_ACTION_QUIESCE); + + /* + * If regset 0 has a writeback call, do it now. On register window + * machines, this makes sure the user memory backing the register + * data is up to date by the time wait_task_inactive returns to + * ptrace_start in our tracer doing a PTRACE_PEEKDATA or the like. + */ + regset = utrace_regset(tsk, engine, utrace_native_view(tsk), 0); + if (regset->writeback) + (*regset->writeback)(tsk, regset, 0); + + BUG_ON(code == 0); + tsk->exit_code = code; + do_notify(tsk, state->parent, CLD_TRAPPED); + + pr_debug("%d ptrace_report quiescing exit_code %x\n", + current->pid, current->exit_code); + + put_ptrace_state(state); + + return UTRACE_ACTION_RESUME; +} + +static inline u32 +ptrace_event(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct ptrace_state *state, + int event) + __releases(RCU) +{ + state->syscall = 0; + return ptrace_report(engine, tsk, state, (event << 8) | SIGTRAP); +} + +/* + * Unlike other report callbacks, this can't be called while ptrace_exit + * is doing ptrace_done in parallel, so we don't need get_ptrace_state. + */ +static u32 +ptrace_report_death(struct utrace_attached_engine *engine, + struct task_struct *tsk) +{ + struct ptrace_state *state = (struct ptrace_state *) engine->data; + + if (tsk->exit_code == 0 && unlikely(tsk->flags & PF_SIGNALED)) + /* + * This can only mean that tsk->exit_code was clobbered + * by ptrace_update or ptrace_do_wait in a race with + * an asynchronous wakeup and exit for SIGKILL. + */ + tsk->exit_code = SIGKILL; + + if (tsk->parent == state->parent && tsk->exit_signal != -1) { + /* + * This is a natural child (excluding clone siblings of a + * child group_leader), so we detach and let the normal + * reporting happen once our NOREAP action is gone. But + * first, generate a SIGCHLD for those cases where normal + * behavior won't. A ptrace'd child always generates SIGCHLD. + */ + pr_debug("ptrace %d death natural parent %d exit_code %x\n", + tsk->pid, state->parent->pid, tsk->exit_code); + if (!thread_group_empty(tsk)) + do_notify(tsk, state->parent, CLD_EXITED); + ptrace_state_unlink(state); + rcu_assign_pointer(engine->data, 0UL); + ptrace_done(state); + return UTRACE_ACTION_DETACH; + } + + /* + * This might be a second report_death callback for a group leader + * that was delayed when its original report_death callback was made. + * Repeating do_notify is exactly what we need for that case too. + * After the wakeup, ptrace_do_wait will see delay_group_leader false. + */ + + pr_debug("ptrace %d death notify %d exit_code %x: ", + tsk->pid, state->parent->pid, tsk->exit_code); + do_notify(tsk, state->parent, CLD_EXITED); + pr_debug("%d notified %d\n", tsk->pid, state->parent->pid); + return UTRACE_ACTION_RESUME; +} + +/* + * We get this only in the case where our UTRACE_ACTION_NOREAP was ignored. + * That happens solely when a non-leader exec reaps the old leader. + */ +static void +ptrace_report_reap(struct utrace_attached_engine *engine, + struct task_struct *tsk) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (state != NULL) { + ptrace_state_unlink(state); + rcu_assign_pointer(engine->data, 0UL); + ptrace_done(state); + put_ptrace_state(state); + } +} + +/* + * Start tracing the child. This has to do put_ptrace_state before it can + * do allocation that might block. + */ +static void +ptrace_clone_setup(struct utrace_attached_engine *engine, + struct task_struct *parent, + struct ptrace_state *state, + struct task_struct *child) + __releases(RCU) +{ + struct task_struct *tracer; + struct utrace_attached_engine *child_engine; + struct ptrace_state *child_state; + int ret; + u8 options; + int cap_sys_ptrace; + + tracer = state->parent; + options = state->options; + cap_sys_ptrace = state->cap_sys_ptrace; + get_task_struct(tracer); + put_ptrace_state(state); + + child_engine = utrace_attach(child, (UTRACE_ATTACH_CREATE + | UTRACE_ATTACH_EXCLUSIVE + | UTRACE_ATTACH_MATCH_OPS), + &ptrace_utrace_ops, 0UL); + if (unlikely(IS_ERR(child_engine))) { + BUG_ON(PTR_ERR(child_engine) != -ENOMEM); + put_task_struct(tracer); + goto nomem; + } + + child_state = ptrace_setup(child, child_engine, + tracer, options, cap_sys_ptrace, NULL); + + put_task_struct(tracer); + + if (unlikely(IS_ERR(child_state))) { + (void) utrace_detach(child, child_engine); + + if (PTR_ERR(child_state) == -ENOMEM) + goto nomem; + + /* + * Our tracer has started exiting. It's + * too late to set it up tracing the child. + */ + BUG_ON(PTR_ERR(child_state) != -EALREADY); + } + else { + sigaddset(&child->pending.signal, SIGSTOP); + set_tsk_thread_flag(child, TIF_SIGPENDING); + ret = ptrace_update(child, child_engine, 0, 0); + + /* + * The child hasn't run yet, it can't have died already. + */ + BUG_ON(ret); + } + + return; + +nomem: + printk(KERN_ERR "ptrace out of memory, lost child %d of %d", + child->pid, parent->pid); +} + +static u32 +ptrace_report_clone(struct utrace_attached_engine *engine, + struct task_struct *parent, + unsigned long clone_flags, struct task_struct *child) +{ + int event, option; + struct ptrace_state *state = get_ptrace_state(engine, parent); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + pr_debug("%d (%p) engine %p" + " ptrace_report_clone child %d (%p) fl %lx\n", + parent->pid, parent, engine, child->pid, child, clone_flags); + + event = PTRACE_EVENT_FORK; + option = PTRACE_O_TRACEFORK; + if (clone_flags & CLONE_VFORK) { + event = PTRACE_EVENT_VFORK; + option = PTRACE_O_TRACEVFORK; + } + else if ((clone_flags & CSIGNAL) != SIGCHLD) { + event = PTRACE_EVENT_CLONE; + option = PTRACE_O_TRACECLONE; + } + + if (state->options & option) { + state->have_eventmsg = 1; + state->u.eventmsg = child->pid; + } + else + event = 0; + + if (!(clone_flags & CLONE_UNTRACED) + && (event || (clone_flags & CLONE_PTRACE))) { + /* + * Have our tracer start following the child too. + */ + ptrace_clone_setup(engine, parent, state, child); + + /* + * That did put_ptrace_state, so we have to check + * again in case our tracer just started exiting. + */ + state = get_ptrace_state(engine, parent); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + } + + if (event) + return ptrace_event(engine, parent, state, event); + + put_ptrace_state(state); + + return UTRACE_ACTION_RESUME; +} + + +static u32 +ptrace_report_vfork_done(struct utrace_attached_engine *engine, + struct task_struct *parent, pid_t child_pid) +{ + struct ptrace_state *state = get_ptrace_state(engine, parent); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + state->have_eventmsg = 1; + state->u.eventmsg = child_pid; + return ptrace_event(engine, parent, state, PTRACE_EVENT_VFORK_DONE); +} + + +static u32 +ptrace_report_signal(struct utrace_attached_engine *engine, + struct task_struct *tsk, struct pt_regs *regs, + u32 action, siginfo_t *info, + const struct k_sigaction *orig_ka, + struct k_sigaction *return_ka) +{ + int signo = info == NULL ? SIGTRAP : info->si_signo; + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + state->syscall = 0; + state->have_eventmsg = 0; + state->u.siginfo = info; + return ptrace_report(engine, tsk, state, signo) | UTRACE_SIGNAL_IGN; +} + +static u32 +ptrace_report_jctl(struct utrace_attached_engine *engine, + struct task_struct *tsk, int type) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + pr_debug("ptrace %d jctl notify %d type %x exit_code %x\n", + tsk->pid, state->parent->pid, type, tsk->exit_code); + + do_notify(tsk, state->parent, type); + put_ptrace_state(state); + + return UTRACE_JCTL_NOSIGCHLD; +} + +static u32 +ptrace_report_exec(struct utrace_attached_engine *engine, + struct task_struct *tsk, + const struct linux_binprm *bprm, + struct pt_regs *regs) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + return ptrace_event(engine, tsk, state, + (state->options & PTRACE_O_TRACEEXEC) + ? PTRACE_EVENT_EXEC : 0); +} + +static u32 +ptrace_report_syscall(struct utrace_attached_engine *engine, + struct task_struct *tsk, struct pt_regs *regs, + int entry) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + +#ifdef PTRACE_SYSEMU + if (entry && state->sysemu) + tracehook_abort_syscall(regs); +#endif + state->syscall = 1; + return ptrace_report(engine, tsk, state, + ((state->options & PTRACE_O_TRACESYSGOOD) + ? 0x80 : 0) | SIGTRAP); +} + +static u32 +ptrace_report_syscall_entry(struct utrace_attached_engine *engine, + struct task_struct *tsk, struct pt_regs *regs) +{ + return ptrace_report_syscall(engine, tsk, regs, 1); +} + +static u32 +ptrace_report_syscall_exit(struct utrace_attached_engine *engine, + struct task_struct *tsk, struct pt_regs *regs) +{ + return ptrace_report_syscall(engine, tsk, regs, 0); +} + +static u32 +ptrace_report_exit(struct utrace_attached_engine *engine, + struct task_struct *tsk, long orig_code, long *code) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + state->have_eventmsg = 1; + state->u.eventmsg = *code; + return ptrace_event(engine, tsk, state, PTRACE_EVENT_EXIT); +} + +static int +ptrace_unsafe_exec(struct utrace_attached_engine *engine, + struct task_struct *tsk) +{ + int unsafe = LSM_UNSAFE_PTRACE; + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (likely(state != NULL) && state->cap_sys_ptrace) + unsafe = LSM_UNSAFE_PTRACE_CAP; + put_ptrace_state(state); + return unsafe; } + +static struct task_struct * +ptrace_tracer_task(struct utrace_attached_engine *engine, + struct task_struct *target) +{ + struct task_struct *parent = NULL; + struct ptrace_state *state = get_ptrace_state(engine, target); + if (likely(state != NULL)) { + parent = state->parent; + put_ptrace_state(state); + } + return parent; +} + +static int +ptrace_allow_access_process_vm(struct utrace_attached_engine *engine, + struct task_struct *target, + struct task_struct *caller) +{ + struct ptrace_state *state; + int ours = 0; + + state = get_ptrace_state(engine, target); + if (likely(state != NULL)) { + ours = (((engine->flags & UTRACE_ACTION_QUIESCE) + || target->state == TASK_STOPPED) + && state->parent == caller); + put_ptrace_state(state); + } + + return ours && security_ptrace(caller, target) == 0; +} + + +static const struct utrace_engine_ops ptrace_utrace_ops = +{ + .report_syscall_entry = ptrace_report_syscall_entry, + .report_syscall_exit = ptrace_report_syscall_exit, + .report_exec = ptrace_report_exec, + .report_jctl = ptrace_report_jctl, + .report_signal = ptrace_report_signal, + .report_vfork_done = ptrace_report_vfork_done, + .report_clone = ptrace_report_clone, + .report_exit = ptrace_report_exit, + .report_death = ptrace_report_death, + .report_reap = ptrace_report_reap, + .unsafe_exec = ptrace_unsafe_exec, + .tracer_task = ptrace_tracer_task, + .allow_access_process_vm = ptrace_allow_access_process_vm, +}; diff -puN kernel/sys_ni.c~utrace-utrace-ptrace-compat kernel/sys_ni.c --- a/kernel/sys_ni.c~utrace-utrace-ptrace-compat +++ a/kernel/sys_ni.c @@ -114,6 +114,10 @@ cond_syscall(sys_vm86); cond_syscall(compat_sys_ipc); cond_syscall(compat_sys_sysctl); +/* CONFIG_PTRACE syscalls */ +cond_syscall(sys_ptrace); +cond_syscall(compat_sys_ptrace); + /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); cond_syscall(sys_pciconfig_write); _