[PATCH 4] utrace: ptrace compatibility ptrace compatibility on top of interfaces. This attempts to be precisely compatible with existing ptrace behavior. It does not extend, improve, or change it. The ptrace support is made an option, CONFIG_PTRACE. For now, noone will want to turn this off except maybe a bizarre embedded configuration. But it looks forward to a day when we can punt the ptrace system call completely. Signed-off-by: Roland McGrath --- include/asm-x86_64/tracehook.h | 1 include/linux/sched.h | 4 include/linux/ptrace.h | 222 ++++- kernel/fork.c | 2 kernel/sys_ni.c | 4 kernel/exit.c | 13 kernel/ptrace.c | 1823 ++++++++++++++++++++++++++++++++++++--- kernel/Makefile | 3 fs/proc/base.c | 40 + init/Kconfig | 12 arch/i386/kernel/ptrace.c | 40 + arch/powerpc/lib/sstep.c | 3 arch/powerpc/kernel/signal_32.c | 52 + arch/powerpc/kernel/ptrace.c | 242 +++++ arch/x86_64/kernel/ptrace.c | 46 + arch/x86_64/ia32/ptrace32.c | 56 + arch/x86_64/ia32/ia32entry.S | 2 17 files changed, 2398 insertions(+), 167 deletions(-) --- linux-2.6/include/asm-x86_64/tracehook.h +++ linux-2.6/include/asm-x86_64/tracehook.h @@ -15,6 +15,7 @@ #include #include +#include /* * See linux/tracehook.h for the descriptions of what these need to do. --- linux-2.6/include/linux/sched.h +++ linux-2.6/include/linux/sched.h @@ -1036,6 +1036,10 @@ struct task_struct { atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; +#ifdef CONFIG_PTRACE + struct list_head ptracees; +#endif + /* * cache last used pipe for splice */ --- linux-2.6/include/linux/ptrace.h +++ linux-2.6/include/linux/ptrace.h @@ -49,50 +49,198 @@ #include #ifdef __KERNEL__ -/* - * Ptrace flags - * - * The owner ship rules for task->ptrace which holds the ptrace - * flags is simple. When a task is running it owns it's task->ptrace - * flags. When the a task is stopped the ptracer owns task->ptrace. - */ - -#define PT_PTRACED 0x00000001 -#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ -#define PT_TRACESYSGOOD 0x00000004 -#define PT_PTRACE_CAP 0x00000008 /* ptracer can follow suid-exec */ -#define PT_TRACE_FORK 0x00000010 -#define PT_TRACE_VFORK 0x00000020 -#define PT_TRACE_CLONE 0x00000040 -#define PT_TRACE_EXEC 0x00000080 -#define PT_TRACE_VFORK_DONE 0x00000100 -#define PT_TRACE_EXIT 0x00000200 - -#define PT_TRACE_MASK 0x000003f4 - -/* single stepping state bits (used on ARM and PA-RISC) */ -#define PT_SINGLESTEP_BIT 31 -#define PT_SINGLESTEP (1< /* For unlikely. */ #include /* For struct task_struct. */ +#include +#include +struct siginfo; +struct rusage; -extern long arch_ptrace(struct task_struct *child, long request, long addr, long data); -extern struct task_struct *ptrace_get_task_struct(pid_t pid); -extern int ptrace_traceme(void); -extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); -extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); -extern int ptrace_attach(struct task_struct *tsk); -extern int ptrace_detach(struct task_struct *, unsigned int); -extern void ptrace_disable(struct task_struct *); -extern int ptrace_check_attach(struct task_struct *task, int kill); -extern int ptrace_request(struct task_struct *child, long request, long addr, long data); extern int ptrace_may_attach(struct task_struct *task); +#ifdef CONFIG_PTRACE +#include +struct utrace_attached_engine; +struct utrace_regset_view; + +/* + * These must be defined by arch code to handle machine-specific ptrace + * requests such as PTRACE_PEEKUSR and PTRACE_GETREGS. Returns -ENOSYS for + * any request it does not handle, then handled by machine-independent code. + * This can change *request and then return -ENOSYS to handle a + * machine-specific alias for a generic request. + * + * This code should NOT access task machine state directly. Instead it + * should use the utrace_regset accessors. The functions below make this easy. + * + * Any nonzero return value should be for an error. If the return value of + * the ptrace syscall should be a nonzero success value, this returns zero + * and sets *retval to the value--which might have any bit pattern at all, + * including one that looks like -ENOSYS or another error code. + */ +extern int arch_ptrace(long *request, struct task_struct *child, + struct utrace_attached_engine *engine, + unsigned long addr, unsigned long data, + long *retval); +#ifdef CONFIG_COMPAT +#include + +extern int arch_compat_ptrace(compat_long_t *request, + struct task_struct *child, + struct utrace_attached_engine *engine, + compat_ulong_t a, compat_ulong_t d, + compat_long_t *retval); +#endif + +/* + * Convenience function doing access to a single utrace_regset for ptrace. + * The offset and size are in bytes, giving the location in the regset data. + */ +extern int ptrace_regset_access(struct task_struct *child, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + int setno, unsigned long offset, + unsigned int size, void __user *data, + int write); + +/* + * Convenience wrapper for doing access to a whole utrace_regset for ptrace. + */ +static inline int ptrace_whole_regset(struct task_struct *child, + struct utrace_attached_engine *engine, + long data, int setno, int write) +{ + return ptrace_regset_access(child, engine, utrace_native_view(current), + setno, 0, -1, (void __user *)data, write); +} + +/* + * Convenience function doing access to a single slot in a utrace_regset. + * The regno value gives a slot number plus regset->bias. + * The value accessed is regset->size bytes long. + */ +extern int ptrace_onereg_access(struct task_struct *child, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + int setno, unsigned long regno, + void __user *data, int write); + + +/* + * An array of these describes the layout of the virtual struct user + * accessed by PEEKUSR/POKEUSR, or the structure used by GETREGS et al. + * The array is terminated by an element with .end of zero. + * An element describes the range [.start, .end) of struct user offsets, + * measured in bytes; it maps to the regset in the view's regsets array + * at the index given by .regset, at .offset bytes into that regset's data. + * If .regset is -1, then the [.start, .end) range reads as zero + * if .offset is zero, and is skipped on read (user's buffer unchanged) + * if .offset is -1. + */ +struct ptrace_layout_segment { + unsigned int start, end, regset, offset; +}; + +/* + * Convenience function for doing access to a ptrace compatibility layout. + * The offset and size are in bytes. + */ +extern int ptrace_layout_access(struct task_struct *child, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + const struct ptrace_layout_segment layout[], + unsigned long offset, unsigned int size, + void __user *data, void *kdata, int write); + + +/* Convenience wrapper for the common PTRACE_PEEKUSR implementation. */ +static inline int ptrace_peekusr(struct task_struct *child, + struct utrace_attached_engine *engine, + const struct ptrace_layout_segment layout[], + unsigned long addr, long data) +{ + return ptrace_layout_access(child, engine, utrace_native_view(current), + layout, addr, sizeof(long), + (unsigned long __user *)data, NULL, 0); +} + +/* Convenience wrapper for the common PTRACE_PEEKUSR implementation. */ +static inline int ptrace_pokeusr(struct task_struct *child, + struct utrace_attached_engine *engine, + const struct ptrace_layout_segment layout[], + unsigned long addr, long data) +{ + return ptrace_layout_access(child, engine, utrace_native_view(current), + layout, addr, sizeof(long), + NULL, &data, 1); +} + +#ifdef CONFIG_COMPAT +/* Convenience wrapper for the common PTRACE_PEEKUSR implementation. */ +static inline int ptrace_compat_peekusr( + struct task_struct *child, struct utrace_attached_engine *engine, + const struct ptrace_layout_segment layout[], + compat_ulong_t addr, compat_ulong_t data) +{ + compat_ulong_t *udata = (compat_ulong_t __user *) (unsigned long) data; + return ptrace_layout_access(child, engine, utrace_native_view(current), + layout, addr, sizeof(compat_ulong_t), + udata, NULL, 0); +} + +/* Convenience wrapper for the common PTRACE_PEEKUSR implementation. */ +static inline int ptrace_compat_pokeusr( + struct task_struct *child, struct utrace_attached_engine *engine, + const struct ptrace_layout_segment layout[], + compat_ulong_t addr, compat_ulong_t data) +{ + return ptrace_layout_access(child, engine, utrace_native_view(current), + layout, addr, sizeof(compat_ulong_t), + NULL, &data, 1); +} +#endif + + +/* + * Called in copy_process. + */ +static inline void ptrace_init_task(struct task_struct *tsk) +{ + INIT_LIST_HEAD(&tsk->ptracees); +} + +/* + * Called in do_exit, after setting PF_EXITING, no locks are held. + */ +void ptrace_exit(struct task_struct *tsk); + +/* + * Called in do_wait, with tasklist_lock held for reading. + * This reports any ptrace-child that is ready as do_wait would a normal child. + * If there are no ptrace children, returns -ECHILD. + * If there are some ptrace children but none reporting now, returns 0. + * In those cases the tasklist_lock is still held so next_thread(tsk) works. + * For any other return value, tasklist_lock is released before return. + */ +int ptrace_do_wait(struct task_struct *tsk, + pid_t pid, int options, struct siginfo __user *infop, + int __user *stat_addr, struct rusage __user *rusagep); +#else +static inline void ptrace_init_task(struct task_struct *tsk) { } +static inline void ptrace_exit(struct task_struct *tsk) { } +static inline int ptrace_do_wait(struct task_struct *tsk, + pid_t pid, int options, + struct siginfo __user *infop, + int __user *stat_addr, + struct rusage __user *rusagep) +{ + return -ECHILD; +} +#endif + + #ifndef force_successful_syscall_return /* * System call handlers that, upon successful completion, need to return a --- linux-2.6/kernel/fork.c +++ linux-2.6/kernel/fork.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -1030,6 +1031,7 @@ static struct task_struct *copy_process( INIT_LIST_HEAD(&p->sibling); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); + ptrace_init_task(p); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); --- linux-2.6/kernel/sys_ni.c +++ linux-2.6/kernel/sys_ni.c @@ -113,6 +113,10 @@ cond_syscall(sys_vm86); cond_syscall(compat_sys_ipc); cond_syscall(compat_sys_sysctl); +/* CONFIG_PTRACE syscalls */ +cond_syscall(sys_ptrace); +cond_syscall(compat_sys_ptrace); + /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); cond_syscall(sys_pciconfig_write); --- linux-2.6/kernel/exit.c +++ linux-2.6/kernel/exit.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -825,6 +826,8 @@ fastcall NORET_TYPE void do_exit(long co tsk->flags |= PF_EXITING; + ptrace_exit(tsk); + if (unlikely(in_atomic())) printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", current->comm, current->pid, @@ -1426,9 +1429,15 @@ check_continued: break; } } - if (!flag) { - // XXX set flag if we have ptracees + + retval = ptrace_do_wait(tsk, pid, options, + infop, stat_addr, ru); + if (retval != -ECHILD) { + flag = 1; + if (retval != 0) /* He released the lock. */ + goto end; } + if (options & __WNOTHREAD) break; tsk = next_thread(tsk); --- linux-2.6/kernel/ptrace.c +++ linux-2.6/kernel/ptrace.c @@ -19,191 +19,1776 @@ #include #include #include +#include +#include +#include #include #include +struct ptrace_state +{ + struct rcu_head rcu; + + /* + * These elements are always available, even when the struct is + * awaiting destruction at the next RCU callback point. + */ + struct utrace_attached_engine *engine; + struct task_struct *task; /* Target task. */ + struct task_struct *parent; /* Whom we report to. */ + struct list_head entry; /* Entry on parent->ptracees list. */ + + u8 options; /* PTRACE_SETOPTIONS bits. */ + unsigned int syscall:1; /* Reporting for syscall. */ +#ifdef PTRACE_SYSEMU + unsigned int sysemu:1; /* PTRACE_SYSEMU in progress. */ +#endif + unsigned int have_eventmsg:1; /* u.eventmsg valid. */ + unsigned int cap_sys_ptrace:1; /* Tracer capable. */ + + union + { + unsigned long eventmsg; + siginfo_t *siginfo; + } u; +}; + +static const struct utrace_engine_ops ptrace_utrace_ops; /* Initialized below. */ + +static void +ptrace_state_unlink(struct ptrace_state *state) +{ + task_lock(state->parent); + list_del_rcu(&state->entry); + task_unlock(state->parent); +} + +static struct ptrace_state * +ptrace_setup(struct task_struct *target, struct utrace_attached_engine *engine, + struct task_struct *parent, u8 options, int cap_sys_ptrace, + struct ptrace_state *state) +{ + if (state == NULL) { + state = kzalloc(sizeof *state, GFP_USER); + if (unlikely(state == NULL)) + return ERR_PTR(-ENOMEM); + } + + state->engine = engine; + state->task = target; + state->parent = parent; + state->options = options; + state->cap_sys_ptrace = cap_sys_ptrace; + + task_lock(parent); + if (unlikely(parent->flags & PF_EXITING)) { + task_unlock(parent); + kfree(state); + return ERR_PTR(-EALREADY); + } + list_add_rcu(&state->entry, &state->parent->ptracees); + task_unlock(state->parent); + + BUG_ON(engine->data != 0); + rcu_assign_pointer(engine->data, (unsigned long) state); + + return state; +} + +static void +ptrace_state_free(struct rcu_head *rhead) +{ + struct ptrace_state *state = container_of(rhead, + struct ptrace_state, rcu); + kfree(state); +} + +static void +ptrace_done(struct ptrace_state *state) +{ + INIT_RCU_HEAD(&state->rcu); + call_rcu(&state->rcu, ptrace_state_free); +} /* - * Check that we have indeed attached to the thing.. + * Update the tracing engine state to match the new ptrace state. */ -int ptrace_check_attach(struct task_struct *child, int kill) +static int __must_check +ptrace_update(struct task_struct *target, + struct utrace_attached_engine *engine, + unsigned long flags, int from_stopped) { - return -ENOSYS; + struct ptrace_state *state = (struct ptrace_state *) engine->data; + + /* + * These events are always reported. + */ + flags |= (UTRACE_EVENT(DEATH) | UTRACE_EVENT(EXEC) + | UTRACE_EVENT_SIGNAL_ALL | UTRACE_EVENT(JCTL)); + + /* + * We always have to examine clone events to check for CLONE_PTRACE. + */ + flags |= UTRACE_EVENT(CLONE); + + /* + * PTRACE_SETOPTIONS can request more events. + */ + if (state->options & PTRACE_O_TRACEEXIT) + flags |= UTRACE_EVENT(EXIT); + if (state->options & PTRACE_O_TRACEVFORKDONE) + flags |= UTRACE_EVENT(VFORK_DONE); + + /* + * ptrace always inhibits normal parent reaping. + * But for a corner case we sometimes see the REAP event anyway. + */ + flags |= UTRACE_ACTION_NOREAP | UTRACE_EVENT(REAP); + + if (from_stopped && !(flags & UTRACE_ACTION_QUIESCE)) { + /* + * We're letting the thread resume from ptrace stop. + * If SIGKILL is waking it up, it can be racing with us here + * to set its own exit_code in do_exit. Though we clobber + * it here, we check for the case in ptrace_report_death. + */ + if (!unlikely(target->flags & PF_SIGNALED)) + target->exit_code = 0; + + if (!state->have_eventmsg) + state->u.siginfo = NULL; + + if (target->state == TASK_STOPPED) { + /* + * We have to double-check for naughty de_thread + * reaping despite NOREAP, before we can get siglock. + */ + read_lock(&tasklist_lock); + if (!target->exit_state) { + spin_lock_irq(&target->sighand->siglock); + if (target->state == TASK_STOPPED) + target->signal->flags &= + ~SIGNAL_STOP_STOPPED; + spin_unlock_irq(&target->sighand->siglock); + } + read_unlock(&tasklist_lock); + } + } + + return utrace_set_flags(target, engine, flags); } -static int may_attach(struct task_struct *task) +static int ptrace_traceme(void) { - /* May we inspect the given task? - * This check is used both for attaching with ptrace - * and for allowing access to sensitive information in /proc. - * - * ptrace_attach denies several cases that /proc allows - * because setting up the necessary parent/child relationship - * or halting the specified task is impossible. - */ - int dumpable = 0; - /* Don't let security modules deny introspection */ - if (task == current) - return 0; - if (((current->uid != task->euid) || - (current->uid != task->suid) || - (current->uid != task->uid) || - (current->gid != task->egid) || - (current->gid != task->sgid) || - (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) - return -EPERM; - smp_rmb(); - if (task->mm) - dumpable = task->mm->dumpable; - if (!dumpable && !capable(CAP_SYS_PTRACE)) - return -EPERM; - - return security_ptrace(current, task); -} - -int ptrace_may_attach(struct task_struct *task) -{ - int err; - task_lock(task); - err = may_attach(task); - task_unlock(task); - return !err; + struct utrace_attached_engine *engine; + struct ptrace_state *state; + struct task_struct *parent; + int retval; + + engine = utrace_attach(current, (UTRACE_ATTACH_CREATE + | UTRACE_ATTACH_EXCLUSIVE + | UTRACE_ATTACH_MATCH_OPS), + &ptrace_utrace_ops, 0UL); + + if (IS_ERR(engine)) { + retval = PTR_ERR(engine); + if (retval == -EEXIST) + retval = -EPERM; + } + else { + /* + * We need to preallocate so that we can hold + * rcu_read_lock from extracting ->parent through + * ptrace_setup using it. + */ + state = kzalloc(sizeof *state, GFP_USER); + if (unlikely(state == NULL)) { + (void) utrace_detach(current, engine); + printk(KERN_ERR + "ptrace out of memory, lost child %d of %d", + current->pid, current->parent->pid); + return -ENOMEM; + } + + rcu_read_lock(); + parent = rcu_dereference(current->parent); + + task_lock(current); + retval = security_ptrace(parent, current); + task_unlock(current); + + if (retval) { + kfree(state); + (void) utrace_detach(current, engine); + } + else { + state = ptrace_setup(current, engine, parent, 0, 0, + state); + if (IS_ERR(state)) + retval = PTR_ERR(state); + } + rcu_read_unlock(); + + if (!retval) { + /* + * This can't fail because we can't die while we + * are here doing this. + */ + retval = ptrace_update(current, engine, 0, 0); + BUG_ON(retval); + } + else if (unlikely(retval == -EALREADY)) + /* + * We raced with our parent's exit, which would + * have detached us just after our attach if + * we'd won the race. Pretend we got attached + * and then detached immediately, no error. + */ + retval = 0; + } + + return retval; } -int ptrace_attach(struct task_struct *task) +static int ptrace_attach(struct task_struct *task) { + struct utrace_attached_engine *engine; + struct ptrace_state *state; int retval; retval = -EPERM; if (task->pid <= 1) - goto out; + goto bad; if (task->tgid == current->tgid) goto bad; - retval = may_attach(task); - if (retval) + if (!task->mm) /* kernel threads */ goto bad; - retval = -ENOSYS; + pr_debug("%d ptrace_attach %d state %lu exit_code %x\n", + current->pid, task->pid, task->state, task->exit_code); + + engine = utrace_attach(task, (UTRACE_ATTACH_CREATE + | UTRACE_ATTACH_EXCLUSIVE + | UTRACE_ATTACH_MATCH_OPS), + &ptrace_utrace_ops, 0); + if (IS_ERR(engine)) { + retval = PTR_ERR(engine); + if (retval == -EEXIST) + retval = -EPERM; + goto bad; + } + + pr_debug("%d ptrace_attach %d after utrace_attach: %lu exit_code %x\n", + current->pid, task->pid, task->state, task->exit_code); + + if (ptrace_may_attach(task)) { + state = ptrace_setup(task, engine, current, 0, + capable(CAP_SYS_PTRACE), NULL); + if (IS_ERR(state)) + retval = PTR_ERR(state); + else { + retval = ptrace_update(task, engine, 0, 0); + + pr_debug("%d ptrace_attach %d after ptrace_update (%d)" + " %lu exit_code %x\n", + current->pid, task->pid, retval, + task->state, task->exit_code); + + if (retval) { + /* + * It died before we enabled any callbacks. + */ + if (retval == -EALREADY) + retval = -ESRCH; + BUG_ON(retval != -ESRCH); + ptrace_state_unlink(state); + ptrace_done(state); + } + } + } + if (retval) + (void) utrace_detach(task, engine); + else { + int stopped = 0; + + /* + * We must double-check that task has not just died and + * been reaped (after ptrace_update succeeded). + * This happens when exec (de_thread) ignores NOREAP. + * We cannot call into the signal code if it's dead. + */ + read_lock(&tasklist_lock); + if (likely(!task->exit_state)) { + force_sig_specific(SIGSTOP, task); + + spin_lock_irq(&task->sighand->siglock); + stopped = (task->state == TASK_STOPPED); + spin_unlock_irq(&task->sighand->siglock); + } + read_unlock(&tasklist_lock); + + if (stopped) { + const struct utrace_regset *regset; + + /* + * Set QUIESCE immediately, so we can allow + * ptrace requests while he's in TASK_STOPPED. + */ + retval = ptrace_update(task, engine, + UTRACE_ACTION_QUIESCE, 0); + if (retval) + BUG_ON(retval != -ESRCH); + retval = 0; + + /* + * Do now the regset 0 writeback that we do on every + * stop, since it's never been done. On register + * window machines, this makes sure the user memory + * backing the register data is up to date. + */ + regset = utrace_regset(task, engine, + utrace_native_view(task), 0); + if (regset->writeback) + (*regset->writeback)(task, regset, 1); + } + + pr_debug("%d ptrace_attach %d complete (%sstopped)" + " state %lu code %x", + current->pid, task->pid, stopped ? "" : "not ", + task->state, task->exit_code); + } bad: -out: return retval; } -int ptrace_detach(struct task_struct *child, unsigned int data) +/* + * The task might be dying or being reaped in parallel, in which case + * engine and state may no longer be valid. utrace_detach checks for us. + */ +static int ptrace_detach(struct task_struct *task, + struct utrace_attached_engine *engine, + struct ptrace_state *state) +{ + + int error; + +#ifdef HAVE_ARCH_PTRACE_DETACH + /* + * Some funky compatibility code in arch_ptrace may have + * needed to install special state it should clean up now. + */ + arch_ptrace_detach(task); +#endif + + /* + * Traditional ptrace behavior does wake_up_process no matter what + * in ptrace_detach. But utrace_detach will not do a wakeup if + * it's in a proper job control stop. We need it to wake up from + * TASK_STOPPED and either resume or process more signals. A + * pending stop signal will just leave it stopped again, but will + * consume the signal, and reset task->exit_code for the next wait + * call to see. This is important to userland if ptrace_do_wait + * "stole" the previous unwaited-for-ness (clearing exit_code), but + * there is a pending SIGSTOP, e.g. sent by a PTRACE_ATTACH done + * while already in job control stop. + */ + read_lock(&tasklist_lock); + if (likely(task->signal != NULL)) { + spin_lock_irq(&task->sighand->siglock); + task->signal->flags &= ~SIGNAL_STOP_STOPPED; + spin_unlock_irq(&task->sighand->siglock); + } + read_unlock(&tasklist_lock); + + error = utrace_detach(task, engine); + if (!error) { + /* + * We can only get here from the ptracer itself or via + * detach_zombie from another thread in its group. + */ + BUG_ON(state->parent->tgid != current->tgid); + ptrace_state_unlink(state); + ptrace_done(state); + + /* + * Wake up any other threads that might be blocked in + * wait. Though traditional ptrace does not guarantee + * this wakeup on PTRACE_DETACH, it does prevent + * erroneous blocking in wait when another racing + * thread's wait call reap-detaches the last child. + * Without this wakeup, another thread might stay + * blocked when it should return -ECHILD. + */ + spin_lock_irq(¤t->sighand->siglock); + wake_up_interruptible(¤t->signal->wait_chldexit); + spin_unlock_irq(¤t->sighand->siglock); + } + return error; +} + + +/* + * This is called when we are exiting. We must stop all our ptracing. + */ +void +ptrace_exit(struct task_struct *tsk) +{ + struct list_head *pos, *n; + + /* + * Taking the task_lock after PF_EXITING is set ensures that a + * child in ptrace_traceme will not put itself on our list when + * we might already be tearing it down. + */ + task_lock(tsk); + if (likely(list_empty(&tsk->ptracees))) { + task_unlock(tsk); + return; + } + task_unlock(tsk); + +restart: + rcu_read_lock(); + + list_for_each_safe_rcu(pos, n, &tsk->ptracees) { + struct ptrace_state *state = list_entry(pos, + struct ptrace_state, + entry); + int error = utrace_detach(state->task, state->engine); + BUG_ON(state->parent != tsk); + if (likely(error == 0)) { + ptrace_state_unlink(state); + ptrace_done(state); + } + else if (unlikely(error == -EALREADY)) { + /* + * It's still doing report_death callbacks. + * Just wait for it to settle down. + * Since wait_task_inactive might yield, + * we must go out of rcu_read_lock and restart. + */ + struct task_struct *p = state->task; + get_task_struct(p); + rcu_read_unlock(); + wait_task_inactive(p); + put_task_struct(p); + goto restart; + } + else + BUG_ON(error != -ESRCH); + } + + rcu_read_unlock(); + + BUG_ON(!list_empty(&tsk->ptracees)); +} + +static int +ptrace_induce_signal(struct task_struct *target, + struct utrace_attached_engine *engine, + long signr) { - if (!valid_signal(data)) + struct ptrace_state *state = (struct ptrace_state *) engine->data; + + if (signr == 0) + return 0; + + if (!valid_signal(signr)) return -EIO; - return -ENOSYS; + if (state->syscall) { + /* + * This is the traditional ptrace behavior when given + * a signal to resume from a syscall tracing stop. + */ + send_sig(signr, target, 1); + } + else if (!state->have_eventmsg && state->u.siginfo) { + siginfo_t *info = state->u.siginfo; + + /* Update the siginfo structure if the signal has + changed. If the debugger wanted something + specific in the siginfo structure then it should + have updated *info via PTRACE_SETSIGINFO. */ + if (signr != info->si_signo) { + info->si_signo = signr; + info->si_errno = 0; + info->si_code = SI_USER; + info->si_pid = current->pid; + info->si_uid = current->uid; + } + + return utrace_inject_signal(target, engine, + UTRACE_ACTION_RESUME, info, NULL); + } + + return 0; } -int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len) +int +ptrace_regset_access(struct task_struct *target, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + int setno, unsigned long offset, unsigned int size, + void __user *data, int write) { - int copied = 0; + const struct utrace_regset *regset = utrace_regset(target, engine, + view, setno); + int ret; - while (len > 0) { - char buf[128]; - int this_len, retval; + if (unlikely(regset == NULL)) + return -EIO; - this_len = (len > sizeof(buf)) ? sizeof(buf) : len; - retval = access_process_vm(tsk, src, buf, this_len, 0); - if (!retval) { - if (copied) - break; + if (size == (unsigned int) -1) + size = regset->size * regset->n; + + if (write) { + if (!access_ok(VERIFY_READ, data, size)) + ret = -EIO; + else + ret = (*regset->set)(target, regset, + offset, size, NULL, data); + } + else { + if (!access_ok(VERIFY_WRITE, data, size)) + ret = -EIO; + else + ret = (*regset->get)(target, regset, + offset, size, NULL, data); + } + + return ret; +} + +int +ptrace_onereg_access(struct task_struct *target, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + int setno, unsigned long regno, + void __user *data, int write) +{ + const struct utrace_regset *regset = utrace_regset(target, engine, + view, setno); + unsigned int pos; + int ret; + + if (unlikely(regset == NULL)) + return -EIO; + + if (regno < regset->bias || regno >= regset->bias + regset->n) + return -EINVAL; + + pos = (regno - regset->bias) * regset->size; + + if (write) { + if (!access_ok(VERIFY_READ, data, regset->size)) + ret = -EIO; + else + ret = (*regset->set)(target, regset, pos, regset->size, + NULL, data); + } + else { + if (!access_ok(VERIFY_WRITE, data, regset->size)) + ret = -EIO; + else + ret = (*regset->get)(target, regset, pos, regset->size, + NULL, data); + } + + return ret; +} + +int +ptrace_layout_access(struct task_struct *target, + struct utrace_attached_engine *engine, + const struct utrace_regset_view *view, + const struct ptrace_layout_segment layout[], + unsigned long addr, unsigned int size, + void __user *udata, void *kdata, int write) +{ + const struct ptrace_layout_segment *seg; + int ret = -EIO; + + if (kdata == NULL && + !access_ok(write ? VERIFY_READ : VERIFY_WRITE, udata, size)) + return -EIO; + + seg = layout; + do { + unsigned int pos, n; + + while (addr >= seg->end && seg->end != 0) + ++seg; + + if (addr < seg->start || addr >= seg->end) return -EIO; + + pos = addr - seg->start + seg->offset; + n = min(size, seg->end - (unsigned int) addr); + + if (unlikely(seg->regset == (unsigned int) -1)) { + /* + * This is a no-op/zero-fill portion of struct user. + */ + ret = 0; + if (!write && seg->offset == 0) { + if (kdata) + memset(kdata, 0, n); + else if (clear_user(udata, n)) + ret = -EFAULT; + } } - if (copy_to_user(dst, buf, retval)) - return -EFAULT; - copied += retval; - src += retval; - dst += retval; - len -= retval; + else { + unsigned int align; + const struct utrace_regset *regset = utrace_regset( + target, engine, view, seg->regset); + if (unlikely(regset == NULL)) + return -EIO; + + /* + * A ptrace compatibility layout can do a misaligned + * regset access, e.g. word access to larger data. + * An arch's compat layout can be this way only if + * it is actually ok with the regset code despite the + * regset->align setting. + */ + align = min(regset->align, size); + if ((pos & (align - 1)) + || pos >= regset->n * regset->size) + return -EIO; + + if (write) + ret = (*regset->set)(target, regset, + pos, n, kdata, udata); + else + ret = (*regset->get)(target, regset, + pos, n, kdata, udata); + } + + if (kdata) + kdata += n; + else + udata += n; + addr += n; + size -= n; + } while (ret == 0 && size > 0); + + return ret; +} + + +static int +ptrace_start(long pid, long request, + struct task_struct **childp, + struct utrace_attached_engine **enginep, + struct ptrace_state **statep) + +{ + struct task_struct *child; + struct utrace_attached_engine *engine; + struct ptrace_state *state; + int ret; + + if (request == PTRACE_TRACEME) + return ptrace_traceme(); + + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + pr_debug("ptrace pid %ld => %p\n", pid, child); + if (!child) + goto out; + + ret = -EPERM; + if (pid == 1) /* you may not mess with init */ + goto out_tsk; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out_tsk; + } + + rcu_read_lock(); + engine = utrace_attach(child, UTRACE_ATTACH_MATCH_OPS, + &ptrace_utrace_ops, 0); + ret = -ESRCH; + if (IS_ERR(engine) || engine == NULL) + goto out_tsk_rcu; + state = rcu_dereference((struct ptrace_state *) engine->data); + if (state == NULL || state->parent != current) + goto out_tsk_rcu; + rcu_read_unlock(); + + /* + * Traditional ptrace behavior demands that the target already be + * quiescent, but not dead. + */ + if (request != PTRACE_KILL + && !(engine->flags & UTRACE_ACTION_QUIESCE)) { + pr_debug("%d not stopped (%lu)\n", child->pid, child->state); + goto out_tsk; } - return copied; + + /* + * We do this for all requests to match traditional ptrace behavior. + * If the machine state synchronization done at context switch time + * includes e.g. writing back to user memory, we want to make sure + * that has finished before a PTRACE_PEEKDATA can fetch the results. + * On most machines, only regset data is affected by context switch + * and calling utrace_regset later on will take care of that, so + * this is superfluous. + * + * To do this purely in utrace terms, we could do: + * (void) utrace_regset(child, engine, utrace_native_view(child), 0); + */ + wait_task_inactive(child); + + if (child->exit_state) + goto out_tsk; + + *childp = child; + *enginep = engine; + *statep = state; + return -EIO; + +out_tsk_rcu: + rcu_read_unlock(); +out_tsk: + put_task_struct(child); +out: + return ret; } -int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len) +static int +ptrace_common(long request, struct task_struct *child, + struct utrace_attached_engine *engine, + struct ptrace_state *state, + unsigned long addr, long data) { - int copied = 0; + unsigned long flags; + int ret = -EIO; - while (len > 0) { - char buf[128]; - int this_len, retval; + switch (request) { + case PTRACE_DETACH: + /* + * Detach a process that was attached. + */ + ret = ptrace_induce_signal(child, engine, data); + if (!ret) { + ret = ptrace_detach(child, engine, state); + if (ret == -EALREADY) /* Already a zombie. */ + ret = -ESRCH; + if (ret) + BUG_ON(ret != -ESRCH); + } + break; - this_len = (len > sizeof(buf)) ? sizeof(buf) : len; - if (copy_from_user(buf, src, this_len)) - return -EFAULT; - retval = access_process_vm(tsk, dst, buf, this_len, 1); - if (!retval) { - if (copied) + /* + * These are the operations that resume the child running. + */ + case PTRACE_KILL: + data = SIGKILL; + case PTRACE_CONT: + case PTRACE_SYSCALL: +#ifdef PTRACE_SYSEMU + case PTRACE_SYSEMU: + case PTRACE_SYSEMU_SINGLESTEP: +#endif +#ifdef PTRACE_SINGLEBLOCK + case PTRACE_SINGLEBLOCK: +# ifdef ARCH_HAS_BLOCK_STEP + if (! ARCH_HAS_BLOCK_STEP) +# endif + if (request == PTRACE_SINGLEBLOCK) break; - return -EIO; +#endif + case PTRACE_SINGLESTEP: +#ifdef ARCH_HAS_SINGLE_STEP + if (! ARCH_HAS_SINGLE_STEP) +#endif + if (request == PTRACE_SINGLESTEP +#ifdef PTRACE_SYSEMU_SINGLESTEP + || request == PTRACE_SYSEMU_SINGLESTEP +#endif + ) + break; + + ret = ptrace_induce_signal(child, engine, data); + if (ret) + break; + + + /* + * Reset the action flags without QUIESCE, so it resumes. + */ + flags = 0; +#ifdef PTRACE_SYSEMU + state->sysemu = (request == PTRACE_SYSEMU_SINGLESTEP + || request == PTRACE_SYSEMU); +#endif + if (request == PTRACE_SINGLESTEP +#ifdef PTRACE_SYSEMU + || request == PTRACE_SYSEMU_SINGLESTEP +#endif + ) + flags |= UTRACE_ACTION_SINGLESTEP; +#ifdef PTRACE_SINGLEBLOCK + else if (request == PTRACE_SINGLEBLOCK) + flags |= UTRACE_ACTION_BLOCKSTEP; +#endif + if (request == PTRACE_SYSCALL) + flags |= UTRACE_EVENT_SYSCALL; +#ifdef PTRACE_SYSEMU + else if (request == PTRACE_SYSEMU + || request == PTRACE_SYSEMU_SINGLESTEP) + flags |= UTRACE_EVENT(SYSCALL_ENTRY); +#endif + ret = ptrace_update(child, engine, flags, 1); + if (ret) + BUG_ON(ret != -ESRCH); + ret = 0; + break; + +#ifdef PTRACE_OLDSETOPTIONS + case PTRACE_OLDSETOPTIONS: +#endif + case PTRACE_SETOPTIONS: + ret = -EINVAL; + if (data & ~PTRACE_O_MASK) + break; + state->options = data; + ret = ptrace_update(child, engine, UTRACE_ACTION_QUIESCE, 1); + if (ret) + BUG_ON(ret != -ESRCH); + ret = 0; + break; + } + + return ret; +} + + +asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +{ + struct task_struct *child; + struct utrace_attached_engine *engine; + struct ptrace_state *state; + long ret, val; + + pr_debug("%d sys_ptrace(%ld, %ld, %lx, %lx)\n", + current->pid, request, pid, addr, data); + + ret = ptrace_start(pid, request, &child, &engine, &state); + if (ret != -EIO) + goto out; + + val = 0; + ret = arch_ptrace(&request, child, engine, addr, data, &val); + if (ret != -ENOSYS) { + if (ret == 0) { + ret = val; + force_successful_syscall_return(); + } + goto out_tsk; + } + + switch (request) { + default: + ret = ptrace_common(request, child, engine, state, addr, data); + break; + + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: { + unsigned long tmp; + int copied; + + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + ret = -EIO; + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp, (unsigned long __user *) data); + break; + } + + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + ret = 0; + if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) + break; + ret = -EIO; + break; + + case PTRACE_GETEVENTMSG: + ret = put_user(state->have_eventmsg + ? state->u.eventmsg : 0L, + (unsigned long __user *) data); + break; + case PTRACE_GETSIGINFO: + ret = -EINVAL; + if (!state->have_eventmsg && state->u.siginfo) + ret = copy_siginfo_to_user((siginfo_t __user *) data, + state->u.siginfo); + break; + case PTRACE_SETSIGINFO: + ret = -EINVAL; + if (!state->have_eventmsg && state->u.siginfo) { + ret = 0; + if (copy_from_user(state->u.siginfo, + (siginfo_t __user *) data, + sizeof(siginfo_t))) + ret = -EFAULT; } - copied += retval; - src += retval; - dst += retval; - len -= retval; + break; } - return copied; + +out_tsk: + put_task_struct(child); +out: + pr_debug("%d ptrace -> %lx\n", current->pid, ret); + return ret; } -int ptrace_request(struct task_struct *child, long request, - long addr, long data) + +#ifdef CONFIG_COMPAT +#include + +asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, + compat_ulong_t addr, compat_long_t cdata) { - return -ENOSYS; + const unsigned long data = (unsigned long) (compat_ulong_t) cdata; + struct task_struct *child; + struct utrace_attached_engine *engine; + struct ptrace_state *state; + compat_long_t ret, val; + + pr_debug("%d compat_sys_ptrace(%d, %d, %x, %x)\n", + current->pid, request, pid, addr, cdata); + ret = ptrace_start(pid, request, &child, &engine, &state); + if (ret != -EIO) + goto out; + + val = 0; + ret = arch_compat_ptrace(&request, child, engine, addr, cdata, &val); + if (ret != -ENOSYS) { + if (ret == 0) { + ret = val; + force_successful_syscall_return(); + } + goto out_tsk; + } + + switch (request) { + default: + ret = ptrace_common(request, child, engine, state, addr, data); + break; + + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: { + compat_ulong_t tmp; + int copied; + + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + ret = -EIO; + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp, (compat_ulong_t __user *) data); + break; + } + + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + ret = 0; + if (access_process_vm(child, addr, &cdata, sizeof(cdata), 1) == sizeof(cdata)) + break; + ret = -EIO; + break; + + case PTRACE_GETEVENTMSG: + ret = put_user(state->have_eventmsg + ? state->u.eventmsg : 0L, + (compat_long_t __user *) data); + break; + case PTRACE_GETSIGINFO: + ret = -EINVAL; + if (!state->have_eventmsg && state->u.siginfo) + ret = copy_siginfo_to_user32( + (struct compat_siginfo __user *) data, + state->u.siginfo); + break; + case PTRACE_SETSIGINFO: + ret = -EINVAL; + if (!state->have_eventmsg && state->u.siginfo + && copy_siginfo_from_user32( + state->u.siginfo, + (struct compat_siginfo __user *) data)) + ret = -EFAULT; + break; + } + +out_tsk: + put_task_struct(child); +out: + pr_debug("%d ptrace -> %lx\n", current->pid, (long)ret); + return ret; } +#endif + -/** - * ptrace_traceme -- helper for PTRACE_TRACEME - * - * Performs checks and sets PT_PTRACED. - * Should be used by all ptrace implementations for PTRACE_TRACEME. +/* + * Detach the zombie being reported for wait. */ -int ptrace_traceme(void) +static inline void +detach_zombie(struct task_struct *tsk, + struct task_struct *p, struct ptrace_state *state) { - int ret = -EPERM; + int detach_error; + struct utrace_attached_engine *engine; - ret = security_ptrace(current->parent, current); - if (ret) - return -EPERM; - - return -ENOSYS; +restart: + detach_error = 0; + rcu_read_lock(); + if (tsk == current) + engine = state->engine; + else { + /* + * We've excluded other ptrace_do_wait calls. But the + * ptracer itself might have done ptrace_detach while we + * did not have rcu_read_lock. So double-check that state + * is still valid. + */ + engine = utrace_attach( + p, (UTRACE_ATTACH_MATCH_OPS + | UTRACE_ATTACH_MATCH_DATA), + &ptrace_utrace_ops, + (unsigned long) state); + if (IS_ERR(engine) || state->parent != tsk) + detach_error = -ESRCH; + else + BUG_ON(state->engine != engine); + } + rcu_read_unlock(); + if (likely(!detach_error)) + detach_error = ptrace_detach(p, engine, state); + if (unlikely(detach_error == -EALREADY)) { + /* + * It's still doing report_death callbacks. + * Just wait for it to settle down. + */ + wait_task_inactive(p); /* Might block. */ + goto restart; + } + /* + * A failure with -ESRCH means that report_reap is + * already running and will do the cleanup, or that + * we lost a race with ptrace_detach in another + * thread or with the automatic detach in + * report_death. + */ + if (detach_error) + BUG_ON(detach_error != -ESRCH); } -/** - * ptrace_get_task_struct -- grab a task struct reference for ptrace - * @pid: process id to grab a task_struct reference of - * - * This function is a helper for ptrace implementations. It checks - * permissions and then grabs a task struct for use of the actual - * ptrace implementation. - * - * Returns the task_struct for @pid or an ERR_PTR() on failure. +/* + * We're called with tasklist_lock held for reading. + * If we return -ECHILD or zero, next_thread(tsk) must still be valid to use. + * If we return another error code, or a successful PID value, we + * release tasklist_lock first. */ -struct task_struct *ptrace_get_task_struct(pid_t pid) +int +ptrace_do_wait(struct task_struct *tsk, + pid_t pid, int options, struct siginfo __user *infop, + int __user *stat_addr, struct rusage __user *rusagep) + __releases(tasklist_lock) { - struct task_struct *child; + struct ptrace_state *state; + struct task_struct *p; + int err = -ECHILD; + int exit_code, why, status; + + rcu_read_lock(); + list_for_each_entry_rcu(state, &tsk->ptracees, entry) { + p = state->task; + + if (pid > 0) { + if (p->pid != pid) + continue; + } else if (!pid) { + if (process_group(p) != process_group(current)) + continue; + } else if (pid != -1) { + if (process_group(p) != -pid) + continue; + } + if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) + && !(options & __WALL)) + continue; + if (security_task_wait(p)) + continue; + + /* + * This is a matching child. If we don't win now, tell + * our caller to block and repeat. From this point we + * must ensure that wait_chldexit will get a wakeup for + * any tracee stopping, dying, or being detached. + * For death, tasklist_lock guarantees this already. + */ + err = 0; + + switch (p->exit_state) { + case EXIT_ZOMBIE: + if (!likely(options & WEXITED)) + continue; + if (delay_group_leader(p)) { + struct task_struct *next = next_thread(p); + pr_debug("%d ptrace_do_wait leaving %d " + "zombie code %x " + "delay_group_leader (%d/%lu)\n", + current->pid, p->pid, p->exit_code, + next->pid, next->state); + continue; + } + exit_code = p->exit_code; + goto found; + case EXIT_DEAD: + continue; + default: + /* + * tasklist_lock holds up any transitions to + * EXIT_ZOMBIE. After releasing it we are + * guaranteed a wakeup on wait_chldexit after + * any new deaths. + */ + if (p->flags & PF_EXITING) + /* + * It's in do_exit and might have set + * p->exit_code already, but it's not quite + * dead yet. It will get to report_death + * and wakes us up when it finishes. + */ + continue; + break; + } + + /* + * This xchg atomically ensures that only one do_wait + * call can report this thread. Because exit_code is + * always set before do_notify wakes us up, after this + * check fails we are sure to get a wakeup if it stops. + */ + exit_code = xchg(&p->exit_code, 0); + if (exit_code) + goto found; + + // XXX should handle WCONTINUED + + pr_debug("%d ptrace_do_wait leaving %d state %lu code %x\n", + current->pid, p->pid, p->state, p->exit_code); + } + rcu_read_unlock(); + if (err == 0) + pr_debug("%d ptrace_do_wait blocking\n", current->pid); + + return err; + +found: + BUG_ON(state->parent != tsk); + rcu_read_unlock(); + + pr_debug("%d ptrace_do_wait (%d) found %d code %x (%lu/%d)\n", + current->pid, tsk->pid, p->pid, exit_code, + p->exit_state, p->exit_signal); /* - * Tracing init is not allowed. + * If there was a group exit in progress, all threads report that + * status. Most will have SIGKILL in their own exit_code. */ - if (pid == 1) - return ERR_PTR(-EPERM); + if (p->signal->flags & SIGNAL_GROUP_EXIT) + exit_code = p->signal->group_exit_code; + + if (p->exit_state) { + if (unlikely(p->parent == tsk && p->exit_signal != -1)) + /* + * This is our natural child we were ptracing. + * When it dies it detaches (see ptrace_report_death). + * So we're seeing it here in a race. When it + * finishes detaching it will become reapable in + * the normal wait_task_zombie path instead. + */ + return 0; + if ((exit_code & 0x7f) == 0) { + why = CLD_EXITED; + status = exit_code >> 8; + } + else { + why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; + status = exit_code & 0x7f; + } + } + else { + why = CLD_TRAPPED; + status = exit_code; + exit_code = (status << 8) | 0x7f; + } + + /* + * At this point we are committed to a successful return + * or a user error return. Release the tasklist_lock. + */ + get_task_struct(p); + read_unlock(&tasklist_lock); + + if (rusagep) + err = getrusage(p, RUSAGE_BOTH, rusagep); + if (infop) { + if (!err) + err = put_user(SIGCHLD, &infop->si_signo); + if (!err) + err = put_user(0, &infop->si_errno); + if (!err) + err = put_user((short)why, &infop->si_code); + if (!err) + err = put_user(p->pid, &infop->si_pid); + if (!err) + err = put_user(p->uid, &infop->si_uid); + if (!err) + err = put_user(status, &infop->si_status); + } + if (!err && stat_addr) + err = put_user(exit_code, stat_addr); + + if (!err) { + if (why != CLD_TRAPPED) + /* + * This was a death report. The ptracer's wait + * does an implicit detach, so the zombie reports + * to its real parent now. + */ + detach_zombie(tsk, p, state); + err = p->pid; + } + + put_task_struct(p); + + return err; +} + + +/* + * All the report callbacks (except death and reap) are subject to a race + * with ptrace_exit doing a quick detach and ptrace_done. It can do this + * even when the target is not quiescent, so a callback may already be in + * progress when it does ptrace_done. Callbacks use this function to fetch + * the struct ptrace_state while ensuring it doesn't disappear until + * put_ptrace_state is called. This just uses RCU, since state and + * anything we try to do to state->parent is safe under rcu_read_lock. + */ +static struct ptrace_state * +get_ptrace_state(struct utrace_attached_engine *engine, + struct task_struct *tsk) + __acquires(RCU) +{ + struct ptrace_state *state; + + rcu_read_lock(); + state = rcu_dereference((struct ptrace_state *) engine->data); + if (likely(state != NULL)) + return state; + + rcu_read_unlock(); + return NULL; +} + +static inline void +put_ptrace_state(struct ptrace_state *state) + __releases(RCU) +{ + rcu_read_unlock(); +} + + +static void +do_notify(struct task_struct *tsk, struct task_struct *parent, int why) +{ + struct siginfo info; + unsigned long flags; + struct sighand_struct *sighand; + int sa_mask; + + info.si_signo = SIGCHLD; + info.si_errno = 0; + info.si_pid = tsk->pid; + info.si_uid = tsk->uid; + + /* FIXME: find out whether or not this is supposed to be c*time. */ + info.si_utime = cputime_to_jiffies(tsk->utime); + info.si_stime = cputime_to_jiffies(tsk->stime); + + sa_mask = SA_NOCLDSTOP; + info.si_code = why; + info.si_status = tsk->exit_code & 0x7f; + if (why == CLD_CONTINUED) + info.si_status = SIGCONT; + else if (why == CLD_STOPPED) + info.si_status = tsk->signal->group_exit_code & 0x7f; + else if (why == CLD_EXITED) { + sa_mask = SA_NOCLDWAIT; + if (tsk->exit_code & 0x80) + info.si_code = CLD_DUMPED; + else if (tsk->exit_code & 0x7f) + info.si_code = CLD_KILLED; + else { + info.si_code = CLD_EXITED; + info.si_status = tsk->exit_code >> 8; + } + } read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); + if (unlikely(parent->signal == NULL)) + goto out; + sighand = parent->sighand; + spin_lock_irqsave(&sighand->siglock, flags); + if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN && + !(sighand->action[SIGCHLD-1].sa.sa_flags & sa_mask)) + __group_send_sig_info(SIGCHLD, &info, parent); + /* + * Even if SIGCHLD is not generated, we must wake up wait4 calls. + */ + wake_up_interruptible_sync(&parent->signal->wait_chldexit); + spin_unlock_irqrestore(&sighand->siglock, flags); + +out: read_unlock(&tasklist_lock); - if (!child) - return ERR_PTR(-ESRCH); - return child; } -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +static u32 +ptrace_report(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct ptrace_state *state, + int code) + __releases(RCU) { - return -ENOSYS; + const struct utrace_regset *regset; + + pr_debug("%d ptrace_report %d engine %p" + " state %p code %x parent %d (%p)\n", + current->pid, tsk->pid, engine, state, code, + state->parent->pid, state->parent); + if (!state->have_eventmsg && state->u.siginfo) { + const siginfo_t *si = state->u.siginfo; + pr_debug(" si %d code %x errno %d addr %p\n", + si->si_signo, si->si_code, si->si_errno, + si->si_addr); + } + + /* + * Set our QUIESCE flag right now, before notifying the tracer. + * We do this before setting tsk->exit_code rather than + * by using UTRACE_ACTION_NEWSTATE in our return value, to + * ensure that the tracer can't get the notification and then + * try to resume us with PTRACE_CONT before we set the flag. + */ + utrace_set_flags(tsk, engine, engine->flags | UTRACE_ACTION_QUIESCE); + + /* + * If regset 0 has a writeback call, do it now. On register window + * machines, this makes sure the user memory backing the register + * data is up to date by the time wait_task_inactive returns to + * ptrace_start in our tracer doing a PTRACE_PEEKDATA or the like. + */ + regset = utrace_regset(tsk, engine, utrace_native_view(tsk), 0); + if (regset->writeback) + (*regset->writeback)(tsk, regset, 0); + + BUG_ON(code == 0); + tsk->exit_code = code; + do_notify(tsk, state->parent, CLD_TRAPPED); + + pr_debug("%d ptrace_report quiescing exit_code %x\n", + current->pid, current->exit_code); + + put_ptrace_state(state); + + return UTRACE_ACTION_RESUME; +} + +static inline u32 +ptrace_event(struct utrace_attached_engine *engine, + struct task_struct *tsk, + struct ptrace_state *state, + int event) + __releases(RCU) +{ + state->syscall = 0; + return ptrace_report(engine, tsk, state, (event << 8) | SIGTRAP); +} + +/* + * Unlike other report callbacks, this can't be called while ptrace_exit + * is doing ptrace_done in parallel, so we don't need get_ptrace_state. + */ +static u32 +ptrace_report_death(struct utrace_attached_engine *engine, + struct task_struct *tsk) +{ + struct ptrace_state *state = (struct ptrace_state *) engine->data; + + if (tsk->exit_code == 0 && unlikely(tsk->flags & PF_SIGNALED)) + /* + * This can only mean that tsk->exit_code was clobbered + * by ptrace_update or ptrace_do_wait in a race with + * an asynchronous wakeup and exit for SIGKILL. + */ + tsk->exit_code = SIGKILL; + + if (tsk->parent == state->parent && tsk->exit_signal != -1) { + /* + * This is a natural child (excluding clone siblings of a + * child group_leader), so we detach and let the normal + * reporting happen once our NOREAP action is gone. But + * first, generate a SIGCHLD for those cases where normal + * behavior won't. A ptrace'd child always generates SIGCHLD. + */ + pr_debug("ptrace %d death natural parent %d exit_code %x\n", + tsk->pid, state->parent->pid, tsk->exit_code); + if (!thread_group_empty(tsk)) + do_notify(tsk, state->parent, CLD_EXITED); + ptrace_state_unlink(state); + rcu_assign_pointer(engine->data, 0UL); + ptrace_done(state); + return UTRACE_ACTION_DETACH; + } + + /* + * This might be a second report_death callback for a group leader + * that was delayed when its original report_death callback was made. + * Repeating do_notify is exactly what we need for that case too. + * After the wakeup, ptrace_do_wait will see delay_group_leader false. + */ + + pr_debug("ptrace %d death notify %d exit_code %x: ", + tsk->pid, state->parent->pid, tsk->exit_code); + do_notify(tsk, state->parent, CLD_EXITED); + pr_debug("%d notified %d\n", tsk->pid, state->parent->pid); + return UTRACE_ACTION_RESUME; +} + +/* + * We get this only in the case where our UTRACE_ACTION_NOREAP was ignored. + * That happens solely when a non-leader exec reaps the old leader. + */ +static void +ptrace_report_reap(struct utrace_attached_engine *engine, + struct task_struct *tsk) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (state != NULL) { + ptrace_state_unlink(state); + rcu_assign_pointer(engine->data, 0UL); + ptrace_done(state); + put_ptrace_state(state); + } +} + +/* + * Start tracing the child. This has to do put_ptrace_state before it can + * do allocation that might block. + */ +static void +ptrace_clone_setup(struct utrace_attached_engine *engine, + struct task_struct *parent, + struct ptrace_state *state, + struct task_struct *child) + __releases(RCU) +{ + struct task_struct *tracer; + struct utrace_attached_engine *child_engine; + struct ptrace_state *child_state; + int ret; + u8 options; + int cap_sys_ptrace; + + tracer = state->parent; + options = state->options; + cap_sys_ptrace = state->cap_sys_ptrace; + get_task_struct(tracer); + put_ptrace_state(state); + + child_engine = utrace_attach(child, (UTRACE_ATTACH_CREATE + | UTRACE_ATTACH_EXCLUSIVE + | UTRACE_ATTACH_MATCH_OPS), + &ptrace_utrace_ops, 0UL); + if (unlikely(IS_ERR(child_engine))) { + BUG_ON(PTR_ERR(child_engine) != -ENOMEM); + put_task_struct(tracer); + goto nomem; + } + + child_state = ptrace_setup(child, child_engine, + tracer, options, cap_sys_ptrace, NULL); + + put_task_struct(tracer); + + if (unlikely(IS_ERR(child_state))) { + (void) utrace_detach(child, child_engine); + + if (PTR_ERR(child_state) == -ENOMEM) + goto nomem; + + /* + * Our tracer has started exiting. It's + * too late to set it up tracing the child. + */ + BUG_ON(PTR_ERR(child_state) != -EALREADY); + } + else { + sigaddset(&child->pending.signal, SIGSTOP); + set_tsk_thread_flag(child, TIF_SIGPENDING); + ret = ptrace_update(child, child_engine, 0, 0); + + /* + * The child hasn't run yet, it can't have died already. + */ + BUG_ON(ret); + } + + return; + +nomem: + printk(KERN_ERR "ptrace out of memory, lost child %d of %d", + child->pid, parent->pid); +} + +static u32 +ptrace_report_clone(struct utrace_attached_engine *engine, + struct task_struct *parent, + unsigned long clone_flags, struct task_struct *child) +{ + int event, option; + struct ptrace_state *state = get_ptrace_state(engine, parent); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + pr_debug("%d (%p) engine %p" + " ptrace_report_clone child %d (%p) fl %lx\n", + parent->pid, parent, engine, child->pid, child, clone_flags); + + event = PTRACE_EVENT_FORK; + option = PTRACE_O_TRACEFORK; + if (clone_flags & CLONE_VFORK) { + event = PTRACE_EVENT_VFORK; + option = PTRACE_O_TRACEVFORK; + } + else if ((clone_flags & CSIGNAL) != SIGCHLD) { + event = PTRACE_EVENT_CLONE; + option = PTRACE_O_TRACECLONE; + } + + if (state->options & option) { + state->have_eventmsg = 1; + state->u.eventmsg = child->pid; + } + else + event = 0; + + if (!(clone_flags & CLONE_UNTRACED) + && (event || (clone_flags & CLONE_PTRACE))) { + /* + * Have our tracer start following the child too. + */ + ptrace_clone_setup(engine, parent, state, child); + + /* + * That did put_ptrace_state, so we have to check + * again in case our tracer just started exiting. + */ + state = get_ptrace_state(engine, parent); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + } + + if (event) + return ptrace_event(engine, parent, state, event); + + put_ptrace_state(state); + + return UTRACE_ACTION_RESUME; +} + + +static u32 +ptrace_report_vfork_done(struct utrace_attached_engine *engine, + struct task_struct *parent, pid_t child_pid) +{ + struct ptrace_state *state = get_ptrace_state(engine, parent); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + state->have_eventmsg = 1; + state->u.eventmsg = child_pid; + return ptrace_event(engine, parent, state, PTRACE_EVENT_VFORK_DONE); +} + + +static u32 +ptrace_report_signal(struct utrace_attached_engine *engine, + struct task_struct *tsk, struct pt_regs *regs, + u32 action, siginfo_t *info, + const struct k_sigaction *orig_ka, + struct k_sigaction *return_ka) +{ + int signo = info == NULL ? SIGTRAP : info->si_signo; + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + state->syscall = 0; + state->have_eventmsg = 0; + state->u.siginfo = info; + return ptrace_report(engine, tsk, state, signo) | UTRACE_SIGNAL_IGN; +} + +static u32 +ptrace_report_jctl(struct utrace_attached_engine *engine, + struct task_struct *tsk, int type) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + pr_debug("ptrace %d jctl notify %d type %x exit_code %x\n", + tsk->pid, state->parent->pid, type, tsk->exit_code); + + do_notify(tsk, state->parent, type); + put_ptrace_state(state); + + return UTRACE_JCTL_NOSIGCHLD; +} + +static u32 +ptrace_report_exec(struct utrace_attached_engine *engine, + struct task_struct *tsk, + const struct linux_binprm *bprm, + struct pt_regs *regs) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + return ptrace_event(engine, tsk, state, + (state->options & PTRACE_O_TRACEEXEC) + ? PTRACE_EVENT_EXEC : 0); +} + +static u32 +ptrace_report_syscall(struct utrace_attached_engine *engine, + struct task_struct *tsk, struct pt_regs *regs, + int entry) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + +#ifdef PTRACE_SYSEMU + if (entry && state->sysemu) + tracehook_abort_syscall(regs); +#endif + state->syscall = 1; + return ptrace_report(engine, tsk, state, + ((state->options & PTRACE_O_TRACESYSGOOD) + ? 0x80 : 0) | SIGTRAP); +} + +static u32 +ptrace_report_syscall_entry(struct utrace_attached_engine *engine, + struct task_struct *tsk, struct pt_regs *regs) +{ + return ptrace_report_syscall(engine, tsk, regs, 1); +} + +static u32 +ptrace_report_syscall_exit(struct utrace_attached_engine *engine, + struct task_struct *tsk, struct pt_regs *regs) +{ + return ptrace_report_syscall(engine, tsk, regs, 0); +} + +static u32 +ptrace_report_exit(struct utrace_attached_engine *engine, + struct task_struct *tsk, long orig_code, long *code) +{ + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (unlikely(state == NULL)) + return UTRACE_ACTION_RESUME; + + state->have_eventmsg = 1; + state->u.eventmsg = *code; + return ptrace_event(engine, tsk, state, PTRACE_EVENT_EXIT); +} + +static int +ptrace_unsafe_exec(struct utrace_attached_engine *engine, + struct task_struct *tsk) +{ + int unsafe = LSM_UNSAFE_PTRACE; + struct ptrace_state *state = get_ptrace_state(engine, tsk); + if (likely(state != NULL) && state->cap_sys_ptrace) + unsafe = LSM_UNSAFE_PTRACE_CAP; + put_ptrace_state(state); + return unsafe; } + +static struct task_struct * +ptrace_tracer_task(struct utrace_attached_engine *engine, + struct task_struct *target) +{ + struct task_struct *parent = NULL; + struct ptrace_state *state = get_ptrace_state(engine, target); + if (likely(state != NULL)) { + parent = state->parent; + put_ptrace_state(state); + } + return parent; +} + +static int +ptrace_allow_access_process_vm(struct utrace_attached_engine *engine, + struct task_struct *target, + struct task_struct *caller) +{ + struct ptrace_state *state; + int ours = 0; + + state = get_ptrace_state(engine, target); + if (likely(state != NULL)) { + ours = (((engine->flags & UTRACE_ACTION_QUIESCE) + || target->state == TASK_STOPPED) + && state->parent == caller); + put_ptrace_state(state); + } + + return ours && security_ptrace(caller, target) == 0; +} + + +static const struct utrace_engine_ops ptrace_utrace_ops = +{ + .report_syscall_entry = ptrace_report_syscall_entry, + .report_syscall_exit = ptrace_report_syscall_exit, + .report_exec = ptrace_report_exec, + .report_jctl = ptrace_report_jctl, + .report_signal = ptrace_report_signal, + .report_vfork_done = ptrace_report_vfork_done, + .report_clone = ptrace_report_clone, + .report_exit = ptrace_report_exit, + .report_death = ptrace_report_death, + .report_reap = ptrace_report_reap, + .unsafe_exec = ptrace_unsafe_exec, + .tracer_task = ptrace_tracer_task, + .allow_access_process_vm = ptrace_allow_access_process_vm, +}; --- linux-2.6/kernel/Makefile +++ linux-2.6/kernel/Makefile @@ -4,7 +4,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ exit.o itimer.o time.o softirq.o resource.o \ - sysctl.o capability.o ptrace.o timer.o user.o \ + sysctl.o capability.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ @@ -52,6 +52,7 @@ obj-$(CONFIG_UTS_NS) += utsname.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_UTRACE) += utrace.o +obj-$(CONFIG_PTRACE) += ptrace.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is --- linux-2.6/fs/proc/base.c +++ linux-2.6/fs/proc/base.c @@ -148,6 +148,46 @@ static int get_nr_threads(struct task_st return count; } +static int __ptrace_may_attach(struct task_struct *task) +{ + /* May we inspect the given task? + * This check is used both for attaching with ptrace + * and for allowing access to sensitive information in /proc. + * + * ptrace_attach denies several cases that /proc allows + * because setting up the necessary parent/child relationship + * or halting the specified task is impossible. + */ + int dumpable = 0; + /* Don't let security modules deny introspection */ + if (task == current) + return 0; + if (((current->uid != task->euid) || + (current->uid != task->suid) || + (current->uid != task->uid) || + (current->gid != task->egid) || + (current->gid != task->sgid) || + (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) + return -EPERM; + smp_rmb(); + if (task->mm) + dumpable = task->mm->dumpable; + if (!dumpable && !capable(CAP_SYS_PTRACE)) + return -EPERM; + + return security_ptrace(current, task); +} + +int ptrace_may_attach(struct task_struct *task) +{ + int err; + task_lock(task); + err = __ptrace_may_attach(task); + task_unlock(task); + return !err; +} + + static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { struct task_struct *task = get_proc_task(inode); --- linux-2.6/init/Kconfig +++ linux-2.6/init/Kconfig @@ -610,6 +610,18 @@ config UTRACE applications. Unless you are making a specially stripped-down kernel and are very sure you don't need these facilitiies, say Y. + +config PTRACE + bool "Legacy ptrace system call interface" + default y + depends on UTRACE && PROC_FS + help + Enable the ptrace system call. + This is traditionally used by debuggers like GDB, + and is used by UML and some other applications. + Unless you are very sure you won't run anything that needs it, + say Y. + endmenu menu "Block layer" --- linux-2.6/arch/i386/kernel/ptrace.c +++ linux-2.6/arch/i386/kernel/ptrace.c @@ -724,6 +724,46 @@ const struct utrace_regset_view utrace_i }; EXPORT_SYMBOL_GPL(utrace_i386_native); +#ifdef CONFIG_PTRACE +static const struct ptrace_layout_segment i386_uarea[] = { + {0, FRAME_SIZE*4, 0, 0}, + {FRAME_SIZE*4, offsetof(struct user, u_debugreg[0]), -1, 0}, + {offsetof(struct user, u_debugreg[0]), + offsetof(struct user, u_debugreg[8]), 4, 0}, + {0, 0, -1, 0} +}; + +int arch_ptrace(long *req, struct task_struct *child, + struct utrace_attached_engine *engine, + unsigned long addr, unsigned long data, long *val) +{ + switch (*req) { + case PTRACE_PEEKUSR: + return ptrace_peekusr(child, engine, i386_uarea, addr, data); + case PTRACE_POKEUSR: + return ptrace_pokeusr(child, engine, i386_uarea, addr, data); + case PTRACE_GETREGS: + return ptrace_whole_regset(child, engine, data, 0, 0); + case PTRACE_SETREGS: + return ptrace_whole_regset(child, engine, data, 0, 1); + case PTRACE_GETFPREGS: + return ptrace_whole_regset(child, engine, data, 1, 0); + case PTRACE_SETFPREGS: + return ptrace_whole_regset(child, engine, data, 1, 1); + case PTRACE_GETFPXREGS: + return ptrace_whole_regset(child, engine, data, 2, 0); + case PTRACE_SETFPXREGS: + return ptrace_whole_regset(child, engine, data, 2, 1); + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: + return ptrace_onereg_access(child, engine, + utrace_native_view(current), 3, + addr, (void __user *)data, + *req == PTRACE_SET_THREAD_AREA); + } + return -ENOSYS; +} +#endif void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) { --- linux-2.6/arch/powerpc/lib/sstep.c +++ linux-2.6/arch/powerpc/lib/sstep.c @@ -13,6 +13,9 @@ #include #include #include +#ifdef CONFIG_PPC64 +#include +#endif extern char system_call_common[]; --- linux-2.6/arch/powerpc/kernel/signal_32.c +++ linux-2.6/arch/powerpc/kernel/signal_32.c @@ -632,6 +632,58 @@ int copy_siginfo_to_user32(struct compat #define copy_siginfo_to_user copy_siginfo_to_user32 +/* mostly stolen from arch/s390/kernel/compat_signal.c */ +int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) +{ + int err; + u32 tmp; + + if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t))) + return -EFAULT; + + err = __get_user(to->si_signo, &from->si_signo); + err |= __get_user(to->si_errno, &from->si_errno); + err |= __get_user(to->si_code, &from->si_code); + + if (to->si_code < 0) + err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); + else { + switch (to->si_code >> 16) { + case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: + err |= __get_user(to->si_int, &from->si_int); + /* fallthrough */ + case __SI_KILL >> 16: + err |= __get_user(to->si_pid, &from->si_pid); + err |= __get_user(to->si_uid, &from->si_uid); + break; + case __SI_CHLD >> 16: + err |= __get_user(to->si_pid, &from->si_pid); + err |= __get_user(to->si_uid, &from->si_uid); + err |= __get_user(to->si_utime, &from->si_utime); + err |= __get_user(to->si_stime, &from->si_stime); + err |= __get_user(to->si_status, &from->si_status); + break; + case __SI_FAULT >> 16: + err |= __get_user(tmp, &from->si_addr); + to->si_addr = (void __user *)(u64) tmp; + break; + case __SI_POLL >> 16: + err |= __get_user(to->si_band, &from->si_band); + err |= __get_user(to->si_fd, &from->si_fd); + break; + case __SI_TIMER >> 16: + err |= __get_user(to->si_tid, &from->si_tid); + err |= __get_user(to->si_overrun, &from->si_overrun); + err |= __get_user(to->si_int, &from->si_int); + break; + default: + break; + } + } + return err; +} + /* * Note: it is necessary to treat pid and sig as unsigned ints, with the * corresponding cast to a signed int to insure that the proper conversion --- linux-2.6/arch/powerpc/kernel/ptrace.c +++ linux-2.6/arch/powerpc/kernel/ptrace.c @@ -459,13 +459,251 @@ const struct utrace_regset_view utrace_p .regsets = ppc32_regsets, .n = ARRAY_SIZE(ppc32_regsets) }; EXPORT_SYMBOL_GPL(utrace_ppc32_view); +#endif + -long compat_sys_ptrace(int request, int pid, unsigned long addr, - unsigned long data) +#ifdef CONFIG_PTRACE +static const struct ptrace_layout_segment ppc_uarea[] = { + {0, PT_FPR0 * sizeof(long), 0, 0}, + {PT_FPR0 * sizeof(long), (PT_FPSCR + 1) * sizeof(long), 1, 0}, + {0, 0, -1, 0} +}; + +int arch_ptrace(long *request, struct task_struct *child, + struct utrace_attached_engine *engine, + unsigned long addr, unsigned long data, long *val) { + switch (*request) { + case PTRACE_PEEKUSR: + return ptrace_peekusr(child, engine, ppc_uarea, addr, data); + case PTRACE_POKEUSR: + return ptrace_pokeusr(child, engine, ppc_uarea, addr, data); + case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ + case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ + return ptrace_regset_access(child, engine, + utrace_native_view(current), 0, + 0, 32 * sizeof(long), + (void __user *)addr, + *request == PPC_PTRACE_SETREGS); + case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */ + case PPC_PTRACE_SETFPREGS: /* Get FPRs 0 - 31. */ + return ptrace_regset_access(child, engine, + utrace_native_view(current), 1, + 0, 32 * sizeof(double), + (void __user *)addr, + *request == PPC_PTRACE_SETFPREGS); +#ifdef CONFIG_PPC64 + case PTRACE_GET_DEBUGREG: + case PTRACE_SET_DEBUGREG: + return ptrace_onereg_access(child, engine, + utrace_native_view(current), 3, + addr, (unsigned long __user *)data, + *request == PTRACE_SET_DEBUGREG); +#endif /* CONFIG_PPC64 */ +#ifdef CONFIG_ALTIVEC + case PTRACE_GETVRREGS: + return ptrace_whole_regset(child, engine, data, 2, 0); + case PTRACE_SETVRREGS: + return ptrace_whole_regset(child, engine, data, 2, 1); +#endif +#ifdef CONFIG_SPE +#ifdef CONFIG_ALTIVEC +#define REGSET_EVR 3 +#else +#define REGSET_EVR 2 +#endif + case PTRACE_GETEVRREGS: + return ptrace_whole_regset(child, engine, data, REGSET_EVR, 0); + case PTRACE_SETEVRREGS: + return ptrace_whole_regset(child, engine, data, REGSET_EVR, 1); +#endif + } return -ENOSYS; } + +#ifdef CONFIG_COMPAT +#include +#include + +static const struct ptrace_layout_segment ppc32_uarea[] = { + {0, PT_FPR0 * sizeof(u32), 0, 0}, + {PT_FPR0 * sizeof(u32), (PT_FPSCR32 + 1) * sizeof(u32), 1, 0}, + {0, 0, -1, 0} +}; + +int arch_compat_ptrace(compat_long_t *request, + struct task_struct *child, + struct utrace_attached_engine *engine, + compat_ulong_t addr, compat_ulong_t data, + compat_long_t *val) +{ + void __user *uaddr = (void __user *) (unsigned long) addr; + int ret = -ENOSYS; + + switch (*request) { + case PTRACE_PEEKUSR: + return ptrace_compat_peekusr(child, engine, ppc32_uarea, + addr, data); + case PTRACE_POKEUSR: + return ptrace_compat_pokeusr(child, engine, ppc32_uarea, + addr, data); + + case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ + case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ + return ptrace_regset_access(child, engine, + utrace_native_view(current), 0, + 0, 32 * sizeof(compat_long_t), + uaddr, + *request == PPC_PTRACE_SETREGS); + case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */ + case PPC_PTRACE_SETFPREGS: /* Get FPRs 0 - 31. */ + return ptrace_regset_access(child, engine, + utrace_native_view(current), 1, + 0, 32 * sizeof(double), + uaddr, + *request == PPC_PTRACE_SETFPREGS); +#ifdef CONFIG_ALTIVEC + case PTRACE_GETVRREGS: + return ptrace_whole_regset(child, engine, data, 2, 0); + case PTRACE_SETVRREGS: + return ptrace_whole_regset(child, engine, data, 2, 1); #endif + case PTRACE_GET_DEBUGREG: + case PTRACE_SET_DEBUGREG: + return ptrace_onereg_access(child, engine, + utrace_native_view(current), 3, + addr, + (unsigned long __user *) + (unsigned long) data, + *request == PTRACE_SET_DEBUGREG); + + /* + * Read 4 bytes of the other process' storage + * data is a pointer specifying where the user wants the + * 4 bytes copied into + * addr is a pointer in the user's storage that contains an 8 byte + * address in the other process of the 4 bytes that is to be read + * (this is run in a 32-bit process looking at a 64-bit process) + * when I and D space are separate, these will need to be fixed. + */ + case PPC_PTRACE_PEEKTEXT_3264: + case PPC_PTRACE_PEEKDATA_3264: { + u32 tmp; + int copied; + u32 __user * addrOthers; + + ret = -EIO; + + /* Get the addr in the other process that we want to read */ + if (get_user(addrOthers, ((u32 __user * __user *) + (unsigned long) addr)) != 0) + break; + + copied = access_process_vm(child, (u64)addrOthers, &tmp, + sizeof(tmp), 0); + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp, (u32 __user *)(unsigned long)data); + break; + } + + /* + * Write 4 bytes into the other process' storage + * data is the 4 bytes that the user wants written + * addr is a pointer in the user's storage that contains an + * 8 byte address in the other process where the 4 bytes + * that is to be written + * (this is run in a 32-bit process looking at a 64-bit process) + * when I and D space are separate, these will need to be fixed. + */ + case PPC_PTRACE_POKETEXT_3264: + case PPC_PTRACE_POKEDATA_3264: { + u32 tmp = data; + u32 __user * addrOthers; + + /* Get the addr in the other process that we want to write into */ + ret = -EIO; + if (get_user(addrOthers, ((u32 __user * __user *) + (unsigned long) addr)) != 0) + break; + ret = 0; + if (access_process_vm(child, (u64)addrOthers, &tmp, + sizeof(tmp), 1) == sizeof(tmp)) + break; + ret = -EIO; + break; + } + + /* + * This is like PTRACE_PEEKUSR on a 64-bit process, + * but here we access only 4 bytes at a time. + */ + case PPC_PTRACE_PEEKUSR_3264: { + union + { + u64 whole; + u32 half[2]; + } reg; + int setno; + const struct utrace_regset *regset; + + ret = -EIO; + if ((addr & 3) || addr > PT_FPSCR*8) + break; + + setno = 0; + if (addr >= PT_FPR0*8) { + setno = 1; + addr -= PT_FPR0*8; + } + regset = utrace_regset(child, NULL, + &utrace_ppc_native_view, setno); + ret = (*regset->get)(child, regset, addr &~ 7, + sizeof(reg.whole), ®.whole, NULL); + if (ret == 0) + ret = put_user(reg.half[(addr >> 2) & 1], + (u32 __user *)(unsigned long)data); + break; + } + + /* + * This is like PTRACE_POKEUSR on a 64-bit process, + * but here we access only 4 bytes at a time. + */ + case PPC_PTRACE_POKEUSR_3264: { + union + { + u64 whole; + u32 half[2]; + } reg; + int setno; + const struct utrace_regset *regset; + + ret = -EIO; + if ((addr & 3) || addr > PT_FPSCR*8) + break; + + setno = 0; + if (addr >= PT_FPR0*8) { + setno = 1; + addr -= PT_FPR0*8; + } + regset = utrace_regset(child, NULL, + &utrace_ppc_native_view, setno); + ret = (*regset->get)(child, regset, addr &~ 7, + sizeof(reg.whole), ®.whole, NULL); + BUG_ON(ret); + reg.half[(addr >> 2) & 1] = data; + ret = (*regset->set)(child, regset, addr &~ 7, + sizeof(reg.whole), ®.whole, NULL); + break; + } + } + return ret; +} +#endif /* CONFIG_COMPAT */ +#endif /* CONFIG_PTRACE */ + void do_syscall_trace_enter(struct pt_regs *regs) { --- linux-2.6/arch/x86_64/kernel/ptrace.c +++ linux-2.6/arch/x86_64/kernel/ptrace.c @@ -680,6 +680,52 @@ const struct utrace_regset_view utrace_x EXPORT_SYMBOL_GPL(utrace_x86_64_native); +#ifdef CONFIG_PTRACE +static const struct ptrace_layout_segment x86_64_uarea[] = { + {0, sizeof(struct user_regs_struct), 0, 0}, + {sizeof(struct user_regs_struct), + offsetof(struct user, u_debugreg[0]), -1, 0}, + {offsetof(struct user, u_debugreg[0]), + offsetof(struct user, u_debugreg[8]), 3, 0}, + {0, 0, -1, 0} +}; + +int arch_ptrace(long *req, struct task_struct *child, + struct utrace_attached_engine *engine, + unsigned long addr, unsigned long data, long *val) +{ + switch (*req) { + case PTRACE_PEEKUSR: + return ptrace_peekusr(child, engine, x86_64_uarea, addr, data); + case PTRACE_POKEUSR: + return ptrace_pokeusr(child, engine, x86_64_uarea, addr, data); + case PTRACE_GETREGS: + return ptrace_whole_regset(child, engine, data, 0, 0); + case PTRACE_SETREGS: + return ptrace_whole_regset(child, engine, data, 0, 1); + case PTRACE_GETFPREGS: + return ptrace_whole_regset(child, engine, data, 1, 0); + case PTRACE_SETFPREGS: + return ptrace_whole_regset(child, engine, data, 1, 1); +#ifdef CONFIG_IA32_EMULATION + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: + return ptrace_onereg_access(child, engine, + &utrace_ia32_view, 3, + addr, (void __user *)data, + *req == PTRACE_SET_THREAD_AREA); +#endif + /* normal 64bit interface to access TLS data. + Works just like arch_prctl, except that the arguments + are reversed. */ + case PTRACE_ARCH_PRCTL: + return do_arch_prctl(child, data, addr); + } + return -ENOSYS; +} +#endif /* CONFIG_PTRACE */ + + asmlinkage void syscall_trace_enter(struct pt_regs *regs) { /* do the secure computing check first */ --- linux-2.6/arch/x86_64/ia32/ptrace32.c +++ linux-2.6/arch/x86_64/ia32/ptrace32.c @@ -150,11 +150,6 @@ static int getreg32(struct task_struct * #undef R32 -asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) -{ - return -ENOSYS; -} - static int ia32_genregs_get(struct task_struct *target, const struct utrace_regset *regset, @@ -586,3 +581,54 @@ const struct utrace_regset_view utrace_i .regsets = ia32_regsets, .n = ARRAY_SIZE(ia32_regsets) }; EXPORT_SYMBOL_GPL(utrace_ia32_view); + + +#ifdef CONFIG_PTRACE +/* + * This matches the arch/i386/kernel/ptrace.c definitions. + */ + +static const struct ptrace_layout_segment ia32_uarea[] = { + {0, sizeof(struct user_regs_struct32), 0, 0}, + {sizeof(struct user_regs_struct32), + offsetof(struct user32, u_debugreg[0]), -1, 0}, + {offsetof(struct user32, u_debugreg[0]), + offsetof(struct user32, u_debugreg[8]), 4, 0}, + {0, 0, -1, 0} +}; + +int arch_compat_ptrace(compat_long_t *req, struct task_struct *child, + struct utrace_attached_engine *engine, + compat_ulong_t addr, compat_ulong_t data, + compat_long_t *val) +{ + switch (*req) { + case PTRACE_PEEKUSR: + return ptrace_compat_peekusr(child, engine, ia32_uarea, + addr, data); + case PTRACE_POKEUSR: + return ptrace_compat_pokeusr(child, engine, ia32_uarea, + addr, data); + case PTRACE_GETREGS: + return ptrace_whole_regset(child, engine, data, 0, 0); + case PTRACE_SETREGS: + return ptrace_whole_regset(child, engine, data, 0, 1); + case PTRACE_GETFPREGS: + return ptrace_whole_regset(child, engine, data, 1, 0); + case PTRACE_SETFPREGS: + return ptrace_whole_regset(child, engine, data, 1, 1); + case PTRACE_GETFPXREGS: + return ptrace_whole_regset(child, engine, data, 2, 0); + case PTRACE_SETFPXREGS: + return ptrace_whole_regset(child, engine, data, 2, 1); + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: + return ptrace_onereg_access(child, engine, + &utrace_ia32_view, 3, + addr, + (void __user *)(unsigned long)data, + *req == PTRACE_SET_THREAD_AREA); + } + return -ENOSYS; +} +#endif /* CONFIG_PTRACE */ --- linux-2.6/arch/x86_64/ia32/ia32entry.S +++ linux-2.6/arch/x86_64/ia32/ia32entry.S @@ -421,7 +421,7 @@ ia32_sys_call_table: .quad sys_setuid16 .quad sys_getuid16 .quad compat_sys_stime /* stime */ /* 25 */ - .quad sys32_ptrace /* ptrace */ + .quad compat_sys_ptrace /* ptrace */ .quad sys_alarm .quad sys_fstat /* (old)fstat */ .quad sys_pause