Subject: [PATCH] pspace: Rework to include the notion of system wide uniq kernel pid. From: Eric W. Biederman Date: 1134673946 -0700 With totally disjoint pid spaces it becomes hard to use the pid in hash tables and such. So to add a static limit on the number of pspaces and to allow simpler hash computations give each pspace a kernelwide unique number. This also modifies struct pid so the nr is an unsigned long instead of a signed int. The signedness does not matter, and given that nr is followed by a pointer making it an unsigned long does not increase the amount of space taken up on either a 32bit or a 64bit arch. What it does do is give me 32 + 10 bits I can store my pspace number on a 64bit arch and 17 bits I can store my pspace number on a 32bit arch. With only a single comparison the hash lookup should be noticable simpler. Also this gives me a cheap backdoor method to get my kernel pid from the process. --- arch/i386/kernel/ptrace.c | 1 include/linux/pid.h | 20 +++++---- include/linux/pspace.h | 43 ++++++++++++++++++ include/linux/sched.h | 2 - kernel/exit.c | 4 +- kernel/fork.c | 10 ++-- kernel/kthread.c | 1 kernel/pid.c | 105 ++++++++++++++++++++++++++++----------------- kernel/sys.c | 2 - mm/vmscan.c | 1 10 files changed, 129 insertions(+), 60 deletions(-) cf932b70c3933a988cf5a6ea542ad875ba9667b2 diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index e5a8e2b..e77a4a1 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/pid.h b/include/linux/pid.h index fff7805..94c072c 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -15,8 +15,7 @@ struct pspace; struct pid { /* Try to keep pid_chain in the same cacheline as nr for find_pid */ - int nr; - struct pspace *pspace; + unsigned long nr; struct hlist_node pid_chain; /* list of pids with the same nr, only one of them is in the hash */ struct list_head pid_list; @@ -25,14 +24,13 @@ struct pid #define pid_task(elem, type) \ list_entry(elem, struct task_struct, pids[type].pid_list) -#define pid_nr(tsk, type) (tsk->pids[type].nr) -#define pid_pspace(tsk, type) (tsk->pids[type].pspace) +#define pid_nr(tsk, type) ((tsk)->pids[type].nr) /* * attach_pid() and detach_pid() must be called with the tasklist_lock * write-held. */ -extern int FASTCALL(attach_pid(struct task_struct *task, struct pspace *pspace, enum pid_type type, int nr)); +extern int FASTCALL(attach_pid(struct task_struct *task, enum pid_type type, int nr)); extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type)); @@ -40,18 +38,22 @@ extern void FASTCALL(detach_pid(struct t * look up a PID in the hash table. Must be called with the tasklist_lock * held. */ -extern struct pid *FASTCALL(find_pid(struct pspace *, enum pid_type, int)); +extern struct pid *FASTCALL(find_kpid(enum pid_type, unsigned long)); + +extern struct task_struct *find_task_by_kpid(unsigned long nr); +extern struct task_struct *find_task_by_kpid_type(int type, unsigned long nr); extern int alloc_pidmap(struct pspace *pspace); extern void FASTCALL(free_pidmap(struct pspace *pspace, int pid)); extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread); -#define do_each_task_pid(pspace, who, type, task) \ - if ((task = find_task_by_pid_type(pspace, type, who))) { \ +#define do_each_task_kpid(who, type, task) \ + if ((task = find_task_by_kpid_type(type, who))) { \ prefetch((task)->pids[type].pid_list.next); \ do { -#define while_each_task_pid(pspace, who, type, task) \ + +#define while_each_task_kpid(who, type, task) \ } while (task = pid_task((task)->pids[type].pid_list.next,\ type), \ prefetch((task)->pids[type].pid_list.next), \ diff --git a/include/linux/pspace.h b/include/linux/pspace.h index 21d3a40..1e35948 100644 --- a/include/linux/pspace.h +++ b/include/linux/pspace.h @@ -10,6 +10,22 @@ struct pidmap void *page; }; +#if PID_MAX_LIMIT == 32768 +# define PSPACE_SHIFT 15 +#elif PID_MAX_LIMIT == (4 * 1024 * 1024) +# define PSPACE_SHIFT 22 +#else +# error "Unexpected value for PID_MAX_LIMIT" +#endif + +/* Careful ((MAX_PSPACES - 1) << PSPACE_SHIFT) | (PID_MAX_LIMIT - 1) must fit + * in an unsigned long. The allocator is currently a simple bitmap so + * making MAX_PSPaCES large can waste a lot of space when not used. + */ +#define MAX_PSPACES 512 +#define MK_KPID(PSPACE_NR, PID) (((PSPACE_NR) << PSPACE_SHIFT) | (PID)) +#define KPID_PID(KPID) (KPID & (PID_MAX_LIMIT - 1)) +#define KPID_PSPACE_NR(KPID) ((KPID >> PSPACE_SHIFT) & (MAX_PSPACES - 1)) #define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8) struct pspace @@ -19,6 +35,7 @@ struct pspace #define PSPACE_EXIT 0x00000001 /* pspace exit in progress */ struct pspace *parent; struct task_struct *child_reaper; + unsigned int nr; int nr_threads; int nr_processes; int last_pid; @@ -85,9 +102,33 @@ static inline int in_pspace(struct pspac { struct pspace *test; test = tsk->pspace; - while(test && (test != pspace)) + while((test != &init_pspace) && (test != pspace)) test = test->parent; return test == pspace; } + +static inline struct pid *find_pid(struct pspace *pspace, enum pid_type type, int nr) +{ + return find_kpid(type, MK_KPID(pspace->nr, nr)); +} + +static inline struct task_struct *find_task_by_pid_type(struct pspace *pspace, int type, int nr) +{ + return find_task_by_kpid_type(type, MK_KPID(pspace->nr, nr)); +} + +static inline struct task_struct *find_task_by_pid(struct pspace *pspace, int nr) +{ + return find_task_by_kpid(MK_KPID(pspace->nr, nr)); +} + + +#define do_each_task_pid(pspace, who, type, task) \ + do_each_task_kpid(MK_KPID((pspace)->nr, who), type, task) + +#define while_each_task_pid(pspace, who, type, task) \ + while_each_task_kpid(MK_KPID((pspace)->nr, who), type, task) + + #endif /* _LINUX_PSPACE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3a18d73..daed38c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -966,8 +966,6 @@ extern struct task_struct init_task; extern struct mm_struct init_mm; -extern struct task_struct *find_task_by_pid(struct pspace *, int pid); -extern struct task_struct *find_task_by_pid_type(struct pspace *, int type, int pid); extern void set_special_pids(pid_t session, pid_t pgrp); extern void __set_special_pids(pid_t session, pid_t pgrp); diff --git a/kernel/exit.c b/kernel/exit.c index 2e91f52..f130e9b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -269,12 +269,12 @@ void __set_special_pids(pid_t session, p if (curr->signal->session != session) { detach_pid(curr, PIDTYPE_SID); curr->signal->session = session; - attach_pid(curr, curr->pspace, PIDTYPE_SID, session); + attach_pid(curr, PIDTYPE_SID, session); } if (process_group(curr) != pgrp) { detach_pid(curr, PIDTYPE_PGID); curr->signal->pgrp = pgrp; - attach_pid(curr, curr->pspace, PIDTYPE_PGID, pgrp); + attach_pid(curr, PIDTYPE_PGID, pgrp); } } diff --git a/kernel/fork.c b/kernel/fork.c index 073041f..5b83c07 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1162,14 +1162,14 @@ static task_t *copy_process(unsigned lon p->signal->session = 0; p->signal->tty = NULL; p->signal->tty_old_pgrp = 0; - attach_pid(p, p->pspace->parent, PIDTYPE_WID, p->wid); + attach_pid(p, PIDTYPE_WID, p->wid); p->pspace->parent->nr_processes++; } - attach_pid(p, p->pspace, PIDTYPE_TID, p->tid); - attach_pid(p, p->pspace, PIDTYPE_TGID, p->tgid); + attach_pid(p, PIDTYPE_TID, p->tid); + attach_pid(p, PIDTYPE_TGID, p->tgid); if (thread_group_leader(p)) { - attach_pid(p, p->pspace, PIDTYPE_PGID, process_group(p)); - attach_pid(p, p->pspace, PIDTYPE_SID, p->signal->session); + attach_pid(p, PIDTYPE_PGID, process_group(p)); + attach_pid(p, PIDTYPE_SID, p->signal->session); if (p->tid) p->pspace->nr_processes++; } diff --git a/kernel/kthread.c b/kernel/kthread.c index c75ce8a..f1c342f 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -12,6 +12,7 @@ #include #include #include +#include #include /* diff --git a/kernel/pid.c b/kernel/pid.c index 7054adb..a0fd168 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -31,8 +31,8 @@ #include #include -#define pid_hashfn(pspace, nr) \ - hash_long(((unsigned long)pspace) ^ ((unsigned long)nr), pidhash_shift) +#define pid_hashfn(nr) \ + hash_long(nr, pidhash_shift) static struct hlist_head *pid_hash[PIDTYPE_MAX]; static int pidhash_shift; @@ -55,8 +55,9 @@ int pid_max_max = PID_MAX_LIMIT; */ struct pspace init_pspace = { .count = ATOMIC_INIT(1), - .parent = NULL, + .parent = &init_pspace, .child_reaper = &init_task, + .nr = 0, .nr_threads = 0, .nr_processes = 0, .last_pid = 0, @@ -69,6 +70,8 @@ struct pspace init_pspace = { static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); +static DECLARE_BITMAP(pspaces, MAX_PSPACES); + fastcall void free_pidmap(struct pspace *pspace, int pid) { struct pidmap *map = pspace->pidmap + pid / BITS_PER_PAGE; @@ -138,43 +141,50 @@ int alloc_pidmap(struct pspace *pspace) return -1; } -struct pid * fastcall find_pid(struct pspace *pspace, enum pid_type type, int nr) +struct pid * fastcall find_kpid(enum pid_type type, unsigned long nr) { struct hlist_node *elem; struct pid *pid; hlist_for_each_entry(pid, elem, - &pid_hash[type][pid_hashfn(pspace, nr)], pid_chain) { - if ((pid->nr == nr) && (pid->pspace == pspace)) + &pid_hash[type][pid_hashfn(nr)], pid_chain) { + if (pid->nr == nr) return pid; } return NULL; } -int fastcall attach_pid(task_t *task, struct pspace *pspace, enum pid_type type, int nr) +int fastcall attach_pid(task_t *task, enum pid_type type, int nr) { + struct pspace *pspace; struct pid *pid, *task_pid; + unsigned long knr; + pspace = task->pspace; + if (type == PIDTYPE_WID) + pspace = pspace->parent; + if (!pspace) + return -EINVAL; + knr = MK_KPID(pspace->nr, nr); task_pid = &task->pids[type]; - pid = find_pid(pspace, type, nr); + pid = find_kpid(type, knr); if (pid == NULL) { hlist_add_head(&task_pid->pid_chain, - &pid_hash[type][pid_hashfn(pspace, nr)]); + &pid_hash[type][pid_hashfn(knr)]); INIT_LIST_HEAD(&task_pid->pid_list); } else { INIT_HLIST_NODE(&task_pid->pid_chain); list_add_tail(&task_pid->pid_list, &pid->pid_list); } - task_pid->nr = nr; - task_pid->pspace = pspace; + task_pid->nr = knr; return 0; } -static fastcall int __detach_pid(task_t *task, enum pid_type type) +static fastcall unsigned long __detach_pid(task_t *task, enum pid_type type) { struct pid *pid, *pid_next; - int nr = 0; + unsigned long nr = 0; pid = &task->pids[type]; if (!hlist_unhashed(&pid->pid_chain)) { @@ -187,55 +197,59 @@ static fastcall int __detach_pid(task_t struct pid, pid_list); /* insert next pid from pid_list to hash */ hlist_add_head(&pid_next->pid_chain, - &pid_hash[type][pid_hashfn(pid->pspace, pid_next->nr)]); + &pid_hash[type][pid_hashfn(pid_next->nr)]); } } list_del(&pid->pid_list); - pid->nr = -1; return nr; } void fastcall detach_pid(task_t *task, enum pid_type type) { - struct pspace *pspace = pid_pspace(task, type); - int tmp, nr; + struct pspace *pspace; + unsigned long knr; + int tmp, pid; - nr = __detach_pid(task, type); - if (!nr) + knr = __detach_pid(task, type); + pid = KPID_PID(knr); + if (!pid) return; for (tmp = PIDTYPE_MAX; --tmp >= 0; ) - if (tmp != type && find_pid(pspace, tmp, nr)) + if (tmp != type && find_kpid(tmp, knr)) return; - free_pidmap(pspace, nr); + pspace = task->pspace; + if (type == PIDTYPE_WID) + pspace = pspace->parent; + free_pidmap(pspace, pid); } -struct task_struct *find_task_by_pid_type(struct pspace *pspace, int type, int nr) +struct task_struct *find_task_by_kpid_type(int type, unsigned long nr) { struct pid *pid; - pid = find_pid(pspace, type, nr); + pid = find_kpid(type, nr); if (!pid) return NULL; return pid_task(&pid->pid_list, type); } -EXPORT_SYMBOL(find_task_by_pid_type); +EXPORT_SYMBOL(find_task_by_kpid_type); -struct task_struct *find_task_by_pid(struct pspace *pspace, int nr) +struct task_struct *find_task_by_kpid(unsigned long nr) { struct task_struct *task; - task = find_task_by_pid_type(pspace, PIDTYPE_TID, nr); + task = find_task_by_kpid_type(PIDTYPE_TID, nr); if (!task) - task = find_task_by_pid_type(pspace, PIDTYPE_WID, nr); + task = find_task_by_kpid_type(PIDTYPE_WID, nr); return task; } -EXPORT_SYMBOL(find_task_by_pid); +EXPORT_SYMBOL(find_task_by_kpid); /* * This function switches the PIDs if a non-leader thread calls @@ -259,24 +273,24 @@ void switch_exec_pids(task_t *leader, ta thread->tid = thread->tgid; thread->wid = wid; - attach_pid(thread, thread->pspace, PIDTYPE_TID, thread->tid); - attach_pid(thread, thread->pspace, PIDTYPE_TGID, thread->tgid); - attach_pid(thread, thread->pspace, PIDTYPE_PGID, thread->signal->pgrp); - attach_pid(thread, thread->pspace, PIDTYPE_SID, thread->signal->session); + attach_pid(thread, PIDTYPE_TID, thread->tid); + attach_pid(thread, PIDTYPE_TGID, thread->tgid); + attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp); + attach_pid(thread, PIDTYPE_SID, thread->signal->session); list_add_tail(&thread->tasks, &init_task.tasks); - attach_pid(leader, leader->pspace, PIDTYPE_TID, leader->tid); - attach_pid(leader, leader->pspace, PIDTYPE_TGID, leader->tgid); - attach_pid(leader, leader->pspace, PIDTYPE_PGID, leader->signal->pgrp); - attach_pid(leader, leader->pspace, PIDTYPE_SID, leader->signal->session); + attach_pid(leader, PIDTYPE_TID, leader->tid); + attach_pid(leader, PIDTYPE_TGID, leader->tgid); + attach_pid(leader, PIDTYPE_PGID, leader->signal->pgrp); + attach_pid(leader, PIDTYPE_SID, leader->signal->session); if (pspace_leader(leader)) { - attach_pid(thread, thread->pspace->parent, PIDTYPE_WID, thread->wid); + attach_pid(thread, PIDTYPE_WID, thread->wid); thread->pspace->child_reaper = thread; } } -static struct pspace *new_pspace(void) +static struct pspace *new_pspace(unsigned int nr) { struct pspace *pspace; int i; @@ -286,6 +300,7 @@ static struct pspace *new_pspace(void) atomic_set(&pspace->count, 1); pspace->parent = NULL; pspace->child_reaper = &init_task; + pspace->nr = nr; pspace->nr_threads = 0; pspace->nr_processes = 0; pspace->last_pid = 0; @@ -302,14 +317,22 @@ static struct pspace *new_pspace(void) int copy_pspace(int flags, struct task_struct *p) { struct pspace *new; + unsigned int nr; int pid; get_pspace(p->pspace); if (!(flags & CLONE_NPSPACE)) return 0; + /* Allocate the new pspace nr */ + do { + nr = find_first_zero_bit(pspaces, MAX_PSPACES); + if (nr >= MAX_PSPACES) + return -EAGAIN; + } while(test_and_set_bit(nr, pspaces)); + /* Allocate the new pidspace structure */ - new = new_pspace(); + new = new_pspace(nr); if (!new) { put_pspace(p->pspace); return -ENOMEM; @@ -335,6 +358,7 @@ void __put_pspace(struct pspace *pspace) BUG_ON(atomic_read(&pspace->count) != 0); + clear_bit(pspace->nr, pspaces); parent = pspace->parent; map = pspace->pidmap; for (i = 0; i < PIDMAP_ENTRIES; i++) { @@ -377,6 +401,7 @@ void __init pidmap_init(void) { int i; + set_bit(0, pspaces); init_pspace.pidmap->page = (void *)get_zeroed_page(GFP_KERNEL); set_bit(0, init_pspace.pidmap->page); atomic_dec(&init_pspace.pidmap->nr_free); @@ -386,5 +411,5 @@ void __init pidmap_init(void) */ for (i = 0; i < PIDTYPE_MAX; i++) - attach_pid(current, current->pspace, i, 0); + attach_pid(current, i, 0); } diff --git a/kernel/sys.c b/kernel/sys.c index 3d6e1e8..f7ec565 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1134,7 +1134,7 @@ ok_pgid: if (process_group(p) != pgid) { detach_pid(p, PIDTYPE_PGID); p->signal->pgrp = pgid; - attach_pid(p, p->pspace, PIDTYPE_PGID, pgid); + attach_pid(p, PIDTYPE_PGID, pgid); } err = 0; diff --git a/mm/vmscan.c b/mm/vmscan.c index 5b2e0c3..ecb17d2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include -- 1.0.GIT