Subject: [PATCH] First stab and localizing pids --- arch/i386/kernel/init_task.c | 1 arch/i386/kernel/ptrace.c | 2 drivers/char/tty_io.c | 4 + fs/fcntl.c | 3 + fs/proc/array.c | 18 +++- fs/proc/base.c | 13 ++- fs/proc/proc_misc.c | 3 - include/linux/init_task.h | 1 include/linux/pid.h | 6 + include/linux/pspace.h | 74 +++++++++++++++++ include/linux/sched.h | 4 + kernel/exit.c | 12 ++- kernel/fork.c | 26 +++++- kernel/pid.c | 179 ++++++++++++++++++++++++++++++++++-------- kernel/sched.c | 3 - kernel/signal.c | 12 ++- kernel/sys.c | 10 ++ kernel/sysctl.c | 4 - kernel/timer.c | 5 + 19 files changed, 313 insertions(+), 67 deletions(-) create mode 100644 include/linux/pspace.h 24081f8733559d9037fb080b26f4769920725f73 diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c index 9caa8e8..4037eb2 100644 --- a/arch/i386/kernel/init_task.c +++ b/arch/i386/kernel/init_task.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 7b6368b..bc6646b 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -361,6 +362,7 @@ asmlinkage int sys_ptrace(long request, int i, ret; unsigned long __user *datap = (unsigned long __user *)data; + pid = pid_from_user(pid); lock_kernel(); ret = -EPERM; if (request == PTRACE_TRACEME) { diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index e5953f3..c9eb74a 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -95,6 +95,7 @@ #include #include #include +#include #include #include @@ -2161,7 +2162,7 @@ static int tiocgpgrp(struct tty_struct * */ if (tty == real_tty && current->signal->tty != real_tty) return -ENOTTY; - return put_user(real_tty->pgrp, p); + return put_user(pid_to_user(real_tty->pgrp), p); } static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) @@ -2181,6 +2182,7 @@ static int tiocspgrp(struct tty_struct * return -EFAULT; if (pgrp < 0) return -EINVAL; + pgrp = pid_from_user(pgrp); if (session_of_pgrp(pgrp) != current->signal->session) return -EPERM; real_tty->pgrp = pgrp; diff --git a/fs/fcntl.c b/fs/fcntl.c index 863b46e..b79c2ab 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -317,9 +318,11 @@ static long do_fcntl(int fd, unsigned in * to fix this will be in libc. */ err = filp->f_owner.pid; + err = (err >= 0)? pid_to_user(err) : -pid_to_user(-err); force_successful_syscall_return(); break; case F_SETOWN: + arg = (arg >= 0)? pid_from_user(arg) : -pid_from_user(-arg); err = f_setown(filp, arg, 1); break; case F_GETSIG: diff --git a/fs/proc/array.c b/fs/proc/array.c index d84eeca..6dfba7f 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include @@ -160,9 +161,14 @@ static inline char * task_state(struct t { struct group_info *group_info; int g; + pid_t pid, ptgid, tppid, tgid; struct fdtable *fdt = NULL; read_lock(&tasklist_lock); + tgid = pid_to_user(p->tgid); + pid = pid_to_user(p->pid); + ptgid = pid_alive(p) ? pid_to_user(p->group_leader->real_parent->tgid) : 0; + tppid = pid_alive(p) && p->ptrace ? pid_to_user(p->parent->pid) : 0; buffer += sprintf(buffer, "State:\t%s\n" "SleepAVG:\t%lu%%\n" @@ -174,9 +180,7 @@ static inline char * task_state(struct t "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), (p->sleep_avg/1024)*100/(1020000000/1024), - p->tgid, - p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0, - pid_alive(p) && p->ptrace ? p->parent->pid : 0, + tgid, pid, ptgid, tppid, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); read_unlock(&tasklist_lock); @@ -373,8 +377,8 @@ static int do_task_stat(struct task_stru tty_pgrp = task->signal->tty->pgrp; tty_nr = new_encode_dev(tty_devnum(task->signal->tty)); } - pgid = process_group(task); - sid = task->signal->session; + pgid = pid_to_user(process_group(task)); + sid = pid_to_user(task->signal->session); cmin_flt = task->signal->cmin_flt; cmaj_flt = task->signal->cmaj_flt; cutime = task->signal->cutime; @@ -388,7 +392,7 @@ static int do_task_stat(struct task_stru } it_real_value = task->signal->it_real_value; } - ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; + ppid = pid_alive(task) ? pid_to_user(task->group_leader->real_parent->tgid) : 0; read_unlock(&tasklist_lock); if (!whole || num_threads<2) @@ -415,7 +419,7 @@ static int do_task_stat(struct task_stru res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n", - task->pid, + pid_to_user(task->pid), tcomm, state, ppid, diff --git a/fs/proc/base.c b/fs/proc/base.c index 3b33f94..d8d1f10 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -70,6 +70,7 @@ #include #include #include +#include #include "internal.h" /* @@ -1834,14 +1835,14 @@ static int proc_self_readlink(struct den int buflen) { char tmp[30]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", pid_to_user(current->tgid)); return vfs_readlink(dentry,buffer,buflen,tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { char tmp[30]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", pid_to_user(current->tgid)); return ERR_PTR(vfs_follow_link(nd,tmp)); } @@ -2059,9 +2060,11 @@ static int get_tgid_list(int index, unsi int tgid = p->pid; if (!pid_alive(p)) continue; + if (!pid_visible(p)) + continue; if (--index >= 0) continue; - tgids[nr_tgids] = tgid; + tgids[nr_tgids] = pid_to_user(tgid); nr_tgids++; if (nr_tgids >= PROC_MAXPIDS) break; @@ -2088,13 +2091,13 @@ static int get_tid_list(int index, unsig * unlinked task, which cannot be used to access the task-list * via next_thread(). */ - if (pid_alive(task)) do { + if (pid_alive(task) && pid_visible(task)) do { int tid = task->pid; if (--index >= 0) continue; if (tids != NULL) - tids[nr_tids] = tid; + tids[nr_tids] = pid_to_user(tid); nr_tids++; if (nr_tids >= PROC_MAXPIDS) break; diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index a345355..ea3690f 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -92,7 +93,7 @@ static int loadavg_read_proc(char *page, LOAD_INT(a), LOAD_FRAC(a), LOAD_INT(b), LOAD_FRAC(b), LOAD_INT(c), LOAD_FRAC(c), - nr_running(), nr_threads, last_pid); + nr_running(), nr_threads, current->pspace->last_pid); return proc_calc_metrics(page, start, off, count, eof, len); } diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 68ab5f2..113ca4f 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -111,6 +111,7 @@ extern struct group_info init_groups; .thread = INIT_THREAD, \ .fs = &init_fs, \ .files = &init_files, \ + .pspace = &init_pspace, \ .signal = &init_signals, \ .sighand = &init_sighand, \ .pending = { \ diff --git a/include/linux/pid.h b/include/linux/pid.h index 5b2fcb1..fedeced 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -19,6 +19,8 @@ struct pid struct list_head pid_list; }; +struct pspace; + #define pid_task(elem, type) \ list_entry(elem, struct task_struct, pids[type].pid_list) @@ -36,8 +38,8 @@ extern void FASTCALL(detach_pid(struct t */ extern struct pid *FASTCALL(find_pid(enum pid_type, int)); -extern int alloc_pidmap(void); -extern void FASTCALL(free_pidmap(int)); +extern int alloc_pidmap(struct pspace *pspace, int count); +extern void FASTCALL(free_pidmap(struct pspace *pspace, int pid, int count)); extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread); #define do_each_task_pid(who, type, task) \ diff --git a/include/linux/pspace.h b/include/linux/pspace.h new file mode 100644 index 0000000..3735906 --- /dev/null +++ b/include/linux/pspace.h @@ -0,0 +1,74 @@ +#ifndef _LINUX_PSPACE_H +#define _LINUX_PSPACE_H + +#include +#include + +struct pidmap +{ + atomic_t nr_free; + void *page; +}; + +#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8) + +struct pspace +{ + atomic_t count; + struct pspace *parent; + int last_pid; + int offset; + int min; + int max; + struct pidmap pidmap[PIDMAP_ENTRIES]; +}; + +extern struct pspace init_pspace; + +#define INVALID_PID 0x7fffffff + +static inline int pid_visible(struct task_struct *p) +{ + return (p->pid > current->pspace->offset) && (p->pid < current->pspace->max); +} + +static inline int pid_from_user(int pid) +{ + if (pid < current->pspace->max) + pid += current->pspace->offset; + else + pid = INVALID_PID; + return pid; +} + +static inline int pid_to_user(int pid) +{ + pid -= current->pspace->offset; + return pid; +} + + +static inline void get_pspace(struct pspace *pspace) +{ + atomic_inc(&pspace->count); +} + +extern void __put_pspace(struct pspace *pspace); + +static inline void put_pspace(struct pspace *pspace) +{ + if (atomic_dec_and_test(&pspace->count)) { + __put_pspace(pspace); + } +} + +extern int copy_pspace(int flags, struct task_struct *p); + +static inline void exit_pspace(struct task_struct *tsk) +{ + put_pspace(tsk->pspace); + tsk->pspace = NULL; +} + + +#endif /* _LINUX_PSPACE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index c3ba31f..aab32c1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -60,6 +60,7 @@ struct exec_domain; #define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */ #define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */ #define CLONE_STOPPED 0x02000000 /* Start in stopped state */ +#define CLONE_NEWPSPACE 0x04000000 /* New process space groupe? */ /* * List of flags we want to share for kernel threads, @@ -231,6 +232,7 @@ extern signed long schedule_timeout_unin asmlinkage void schedule(void); struct namespace; +struct pspace; /* Maximum number of active map areas.. This is a random (large) number */ #define DEFAULT_MAX_MAP_COUNT 65536 @@ -751,6 +753,8 @@ struct task_struct { struct files_struct *files; /* namespace */ struct namespace *namespace; +/* pid namespace */ + struct pspace *pspace; /* signal handlers */ struct signal_struct *signal; struct sighand_struct *sighand; diff --git a/kernel/exit.c b/kernel/exit.c index 4307773..e4a807e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -52,6 +53,7 @@ static void __unhash_process(struct task if (p->pid) __get_cpu_var(process_counts)--; } + exit_pspace(p); REMOVE_LINKS(p); } @@ -997,7 +999,7 @@ static int wait_noreap_copyout(task_t *p if (!retval) retval = put_user((short)why, &infop->si_code); if (!retval) - retval = put_user(pid, &infop->si_pid); + retval = put_user(pid_to_user(pid), &infop->si_pid); if (!retval) retval = put_user(uid, &infop->si_uid); if (!retval) @@ -1129,7 +1131,7 @@ static int wait_task_zombie(task_t *p, i retval = put_user(status, &infop->si_status); } if (!retval && infop) - retval = put_user(p->pid, &infop->si_pid); + retval = put_user(pid_to_user(p->pid), &infop->si_pid); if (!retval && infop) retval = put_user(p->uid, &infop->si_uid); if (retval) { @@ -1268,7 +1270,7 @@ bail_ref: if (!retval && infop) retval = put_user(exit_code, &infop->si_status); if (!retval && infop) - retval = put_user(p->pid, &infop->si_pid); + retval = put_user(pid_to_user(p->pid), &infop->si_pid); if (!retval && infop) retval = put_user(p->uid, &infop->si_uid); if (!retval) @@ -1472,7 +1474,7 @@ end: remove_wait_queue(¤t->signal->wait_chldexit,&wait); if (infop) { if (retval > 0) - retval = 0; + retval = 0; else { /* * For a WNOHANG return, clear out all the fields @@ -1493,6 +1495,8 @@ end: retval = put_user(0, &infop->si_status); } } + if (retval > 0) + retval = pid_to_user(retval); return retval; } diff --git a/kernel/fork.c b/kernel/fork.c index 533ce27..5ec88d3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -859,7 +860,7 @@ asmlinkage long sys_set_tid_address(int { current->clear_child_tid = tidptr; - return current->pid; + return pid_to_user(current->pid); } /* @@ -884,6 +885,14 @@ static task_t *copy_process(unsigned lon if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); + /* For the moment don't allow sharing in the context of CLONE_NEWPSPACE + * Some sharing is likely safe but I haven't looked closely to see what that + * is yet. CLONE_THREAD is clearly not. + */ + if ((clone_flags & CLONE_NEWPSPACE) && + (clone_flags & (CLONE_THREAD|CLONE_FS|CLONE_SIGHAND|CLONE_VM))) + return ERR_PTR(-EINVAL); + /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. @@ -939,7 +948,7 @@ static task_t *copy_process(unsigned lon p->pid = pid; retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) - if (put_user(p->pid, parent_tidptr)) + if (put_user(pid_to_user(p->pid), parent_tidptr)) goto bad_fork_cleanup; p->proc_dentry = NULL; @@ -1126,6 +1135,13 @@ static task_t *copy_process(unsigned lon spin_unlock(¤t->sighand->siglock); } + /* It is important that we don't have an error + * handling path after this or the original + * pid will be freed twice. + */ + if ((retval = copy_pspace(clone_flags, p))) + goto bad_fork_cleanup_namespace; + /* * inherit ioprio */ @@ -1248,7 +1264,9 @@ long do_fork(unsigned long clone_flags, { struct task_struct *p; int trace = 0; - long pid = alloc_pidmap(); + long pid; + + pid = alloc_pidmap(current->pspace, 1); if (pid < 0) return -EAGAIN; @@ -1295,7 +1313,7 @@ long do_fork(unsigned long clone_flags, ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); } } else { - free_pidmap(pid); + free_pidmap(current->pspace, pid, 1); pid = PTR_ERR(p); } return pid; diff --git a/kernel/pid.c b/kernel/pid.c index edba31c..70feecd 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -26,23 +26,22 @@ #include #include #include +#include +#include +#include #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) static struct hlist_head *pid_hash[PIDTYPE_MAX]; static int pidhash_shift; -int pid_max = PID_MAX_DEFAULT; -int last_pid; - #define RESERVED_PIDS 300 int pid_max_min = RESERVED_PIDS + 1; int pid_max_max = PID_MAX_LIMIT; -#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8) #define BITS_PER_PAGE (PAGE_SIZE*8) #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) -#define mk_pid(map, off) (((map) - pidmap_array)*BITS_PER_PAGE + (off)) +#define mk_pid(map, off) (((map) - pspace->pidmap)*BITS_PER_PAGE + (off)) #define find_next_offset(map, off) \ find_next_zero_bit((map)->page, BITS_PER_PAGE, off) @@ -52,36 +51,53 @@ int pid_max_max = PID_MAX_LIMIT; * value does not cause lots of bitmaps to be allocated, but * the scheme scales to up to 4 million PIDs, runtime. */ -typedef struct pidmap { - atomic_t nr_free; - void *page; -} pidmap_t; - -static pidmap_t pidmap_array[PIDMAP_ENTRIES] = - { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }; +struct pspace init_pspace = { + .count = ATOMIC_INIT(1), + .parent = NULL, + .last_pid = 0, + .offset = 0, + .min = RESERVED_PIDS, + .max = PID_MAX_DEFAULT, + .pidmap = { + [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } + }, +}; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); -fastcall void free_pidmap(int pid) +fastcall void free_pidmap(struct pspace *pspace, int pid, int count) { - pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE; - int offset = pid & BITS_PER_PAGE_MASK; + struct pidmap *map; + int offset; + if (pid == 0) + return; + pid -= pspace->offset; + map = pspace->pidmap + pid/BITS_PER_PAGE; + offset = pid & BITS_PER_PAGE_MASK; - clear_bit(offset, map->page); - atomic_inc(&map->nr_free); + for(;count; --count) { + clear_bit(offset, map->page); + atomic_inc(&map->nr_free); + + if (++offset == BITS_PER_PAGE) { + offset = 0; + ++map; + } + } } -int alloc_pidmap(void) +int alloc_pidmap(struct pspace *pspace, int count) { - int i, offset, max_scan, pid, last = last_pid; - pidmap_t *map; + int i, offset, max_scan, pid, spid, lpid, last = pspace->last_pid; + struct pidmap *map; + spid = lpid = 0; pid = last + 1; - if (pid >= pid_max) - pid = RESERVED_PIDS; + if (pid >= pspace->max) + pid = pspace->min; offset = pid & BITS_PER_PAGE_MASK; - map = &pidmap_array[pid/BITS_PER_PAGE]; - max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset; + map = &pspace->pidmap[pid/BITS_PER_PAGE]; + max_scan = (pspace->max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset; for (i = 0; i <= max_scan; ++i) { if (unlikely(!map->page)) { unsigned long page = get_zeroed_page(GFP_KERNEL); @@ -101,9 +117,20 @@ int alloc_pidmap(void) if (likely(atomic_read(&map->nr_free))) { do { if (!test_and_set_bit(offset, map->page)) { + /* If there was a hole free the previous allocation */ + if (spid && (pid != (lpid + 1))) { + free_pidmap(pspace, + pspace->offset + spid, + lpid - spid + 1); + spid = 0; + } + if (!spid) spid = pid; + lpid = pid; atomic_dec(&map->nr_free); - last_pid = pid; - return pid; + if ((lpid - spid + 1) == count) { + pspace->last_pid = lpid; + return pspace->offset + spid; + } } offset = find_next_offset(map, offset); pid = mk_pid(map, offset); @@ -113,16 +140,16 @@ int alloc_pidmap(void) * bitmap block and the final block was the same * as the starting point, pid is before last_pid. */ - } while (offset < BITS_PER_PAGE && pid < pid_max && + } while (offset < BITS_PER_PAGE && pid < pspace->max && (i != max_scan || pid < last || !((last+1) & BITS_PER_PAGE_MASK))); } - if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) { + if (map < &pspace->pidmap[(pspace->max-1)/BITS_PER_PAGE]) { ++map; offset = 0; } else { - map = &pidmap_array[0]; - offset = RESERVED_PIDS; + map = &pspace->pidmap[0]; + offset = pspace->min; if (unlikely(last == offset)) break; } @@ -201,7 +228,7 @@ void fastcall detach_pid(task_t *task, e if (tmp != type && find_pid(tmp, nr)) return; - free_pidmap(nr); + free_pidmap(task->pspace, nr, 1); } task_t *find_task_by_pid_type(int type, int nr) @@ -247,6 +274,90 @@ void switch_exec_pids(task_t *leader, ta attach_pid(leader, PIDTYPE_SID, leader->signal->session); } +static struct pspace *new_pspace(int pids, int offset) +{ + struct pspace *pspace; + int pages, i; + size_t bytes; + pages = (pids + 8*PAGE_SIZE - 1)/PAGE_SIZE/8; + bytes = offsetof(struct pspace, pidmap) + pages*sizeof(struct pidmap); + pspace = kmalloc(bytes, GFP_KERNEL); + if (!pspace) + return NULL; + atomic_set(&pspace->count, 1); + pspace->parent = NULL; + pspace->last_pid = 0; + pspace->offset = offset; + pspace->min = 1; + pspace->max = pids; + for (i = 0; i < pages; i++) { + atomic_set(&pspace->pidmap[i].nr_free, BITS_PER_PAGE); + pspace->pidmap[i].page = NULL; + } + return pspace; +} + +int copy_pspace(int flags, struct task_struct *p) +{ + struct pspace *new; + int pids, pid; + get_pspace(p->pspace); + + if (!(flags & CLONE_NEWPSPACE)) + return 0; + + /* For now allocate 1/256 of the pidspace */ + pids = p->pspace->max >> 8; + pid = -1; + if (pids > 0) + pid = alloc_pidmap(p->pspace, pids); + if (pid <= 0) { + put_pspace(p->pspace); + return -EAGAIN; + } + + /* Allocate the new pidspace structure */ + new = new_pspace(pids, pid); + if (!new) { + free_pidmap(p->pspace, pid, pids); + put_pspace(p->pspace); + return -ENOMEM; + } + + /* Free the orignal pid */ + free_pidmap(p->pspace, p->pid, 1); + + /* Setup the new pspace and pid */ + new->parent = p->pspace; + p->pspace = new; + p->pid = pid; + p->tgid = pid; + + return 0; +} + +void __put_pspace(struct pspace *pspace) +{ + struct pspace *parent; + struct pidmap *map; + int i, offset, pids, pages; + + BUG_ON(atomic_read(&pspace->count) != 0); + + map = pspace->pidmap; + parent = pspace->parent; + offset = pspace->offset; + pids = pspace->max; + pages = (pids + 8*PAGE_SIZE - 1)/PAGE_SIZE/8; + for (i = 0; i < pages; i++) { + BUG_ON(&atomic_read(map[i].nr_free) != BITS_PER_PAGE); + free_page((unsigned long)map[i].page); + } + kfree(pspace); + free_pidmap(parent, offset, pids); + put_pspace(parent); +} + /* * The pid hash table is scaled according to the amount of memory in the * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or @@ -279,9 +390,9 @@ void __init pidmap_init(void) { int i; - pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL); - set_bit(0, pidmap_array->page); - atomic_dec(&pidmap_array->nr_free); + init_pspace.pidmap->page = (void *)get_zeroed_page(GFP_KERNEL); + set_bit(0, init_pspace.pidmap->page); + atomic_dec(&init_pspace.pidmap->nr_free); /* * Allocate PID 0, and hash it via all PID types: diff --git a/kernel/sched.c b/kernel/sched.c index 1f31a52..db0f769 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -1555,7 +1556,7 @@ asmlinkage void schedule_tail(task_t *pr preempt_enable(); #endif if (current->set_child_tid) - put_user(current->pid, current->set_child_tid); + put_user(pid_to_user(current->pid), current->set_child_tid); } /* diff --git a/kernel/signal.c b/kernel/signal.c index cba193c..5b89b8f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -1211,7 +1212,7 @@ static int kill_something_info(int sig, read_lock(&tasklist_lock); for_each_process(p) { - if (p->pid > 1 && p->tgid != current->tgid) { + if (p->pid > 1 && p->tgid != current->tgid && pid_visible(p)) { int err = group_send_sig_info(sig, info, p); ++count; if (err != -EPERM) @@ -1221,9 +1222,9 @@ static int kill_something_info(int sig, read_unlock(&tasklist_lock); return count ? retval : -ESRCH; } else if (pid < 0) { - return kill_pg_info(sig, info, -pid); + return kill_pg_info(sig, info, pid_from_user(-pid)); } else { - return kill_proc_info(sig, info, pid); + return kill_proc_info(sig, info, pid_from_user(pid)); } } @@ -2281,6 +2282,9 @@ asmlinkage long sys_tgkill(int tgid, int if (pid <= 0 || tgid <= 0) return -EINVAL; + pid = pid_from_user(pid); + tgid = pid_from_user(tgid); + info.si_signo = sig; info.si_errno = 0; info.si_code = SI_TKILL; @@ -2321,6 +2325,8 @@ sys_tkill(int pid, int sig) if (pid <= 0) return -EINVAL; + pid = pid_from_user(pid); + info.si_signo = sig; info.si_errno = 0; info.si_code = SI_TKILL; diff --git a/kernel/sys.c b/kernel/sys.c index 2fa1ed1..35524b5 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -1079,6 +1080,11 @@ asmlinkage long sys_setpgid(pid_t pid, p pgid = pid; if (pgid < 0) return -EINVAL; + if (pid < 0) + return -EINVAL; + pid = pid_from_user(pid); + pgid = pid_from_user(pgid); + /* From this point forward we keep holding onto the tasklist lock * so that our parent does not change from under us. -DaveM @@ -1142,11 +1148,12 @@ out: asmlinkage long sys_getpgid(pid_t pid) { if (!pid) { - return process_group(current); + return pid_to_user(process_group(current)); } else { int retval; struct task_struct *p; + pid = pid_from_user(pid); read_lock(&tasklist_lock); p = find_task_by_pid(pid); @@ -1155,6 +1162,7 @@ asmlinkage long sys_getpgid(pid_t pid) retval = security_task_getpgid(p); if (!retval) retval = process_group(p); + retval = pid_to_user(retval); } read_unlock(&tasklist_lock); return retval; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8e56e24..889eea9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -62,7 +63,6 @@ extern int core_uses_pid; extern int suid_dumpable; extern char core_pattern[]; extern int cad_pid; -extern int pid_max; extern int min_free_kbytes; extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; @@ -577,7 +577,7 @@ static ctl_table kern_table[] = { { .ctl_name = KERN_PIDMAX, .procname = "pid_max", - .data = &pid_max, + .data = &init_pspace.max, .maxlen = sizeof (int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, diff --git a/kernel/timer.c b/kernel/timer.c index 3ba10fa..cc1378b 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -996,7 +997,7 @@ asmlinkage unsigned long sys_alarm(unsig */ asmlinkage long sys_getpid(void) { - return current->tgid; + return pid_to_user(current->tgid); } /* @@ -1040,7 +1041,7 @@ asmlinkage long sys_getppid(void) #endif break; } - return pid; + return pid_to_user(pid); } asmlinkage long sys_getuid(void) -- 1.0.GIT