Subject: [PATCH] Imporve the /proc private pidspace implementation. From: Eric W. Biederman Date: 1129536785 -0600 - Add nr_threads to struct pspace - Make the root directory of proc a magic symlink that points into my process space. This gives better results and is simpler code than a magic hash function. Finding the dcache entries is still problematic. Currently I put them in struct pspace but that is a fairly serious layering violation. --- arch/parisc/kernel/sys_parisc32.c | 3 +- arch/s390/appldata/appldata_os.c | 3 +- fs/proc/base.c | 46 +-------------------------------- fs/proc/inode.c | 34 +++++++++++++++++++++--- fs/proc/internal.h | 3 ++ fs/proc/proc_misc.c | 2 + fs/proc/root.c | 52 +++++++++---------------------------- include/linux/proc_fs.h | 8 ++++++ include/linux/pspace.h | 3 ++ kernel/exit.c | 1 + kernel/fork.c | 1 + kernel/pid.c | 16 ++++++++--- kernel/timer.c | 2 + 13 files changed, 77 insertions(+), 97 deletions(-) 2ad18c7ef5b9c5cb6234d29dfa6f508061e99955 diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 6135690..4244aa9 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -663,7 +664,7 @@ asmlinkage int sys32_sysinfo(struct sysi val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); - val.procs = nr_threads; + val.procs = current->pspace->nr_threads; } while (read_seqretry(&xtime_lock, seq)); diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index e0a476b..e6e0197 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "appldata.h" @@ -131,7 +132,7 @@ static void appldata_get_os_data(void *d os_data->nr_cpus = num_online_cpus(); - os_data->nr_threads = nr_threads; + os_data->nr_threads = current->pspace->nr_threads; os_data->nr_running = nr_running(); os_data->nr_iowait = nr_iowait(); os_data->avenrun[0] = avenrun[0] + (FIXED_1/200); diff --git a/fs/proc/base.c b/fs/proc/base.c index 6b1062d..7d16b66 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1377,28 +1377,6 @@ static int pid_delete_dentry(struct dent return !pid_alive(proc_task(dentry->d_inode)); } -static int pid_task_hash(struct dentry *dentry, struct qstr *qstr) -{ - int tgid = proc_name_to_int(qstr); - /* Just use the pid as the hash value */ - tgid = pid_from_user(tgid); - qstr->hash = tgid; - return 0; -} - -static int pid_task_cmp(struct dentry *dentry, struct qstr *cached, struct qstr *user) -{ - int cached_tgid, user_tgid; - cached_tgid = proc_name_to_int(cached); - user_tgid = proc_name_to_int(user); - if (cached_tgid == user_tgid) { - /* 2 equal pids */ - return 0; - } - return 1; -} - - static struct dentry_operations tid_fd_dentry_operations = { .d_revalidate = tid_fd_revalidate, @@ -1411,14 +1389,6 @@ static struct dentry_operations pid_dent .d_delete = pid_delete_dentry, }; -static struct dentry_operations pid_task_dentry_operations = -{ - .d_hash = pid_task_hash, - .d_compare = pid_task_cmp, - .d_revalidate = pid_revalidate, - .d_delete = pid_delete_dentry, -}; - static struct dentry_operations pid_base_dentry_operations = { .d_revalidate = pid_revalidate, @@ -1623,7 +1593,6 @@ static struct dentry *proc_pident_lookup ei = PROC_I(inode); inode->i_mode = p->mode; - dentry->d_op = &pid_dentry_operations; /* * Yes, it does not scale. And it should not. Don't add * new entries into /proc// without very good reasons. @@ -1633,7 +1602,6 @@ static struct dentry *proc_pident_lookup inode->i_nlink = 2 + get_tid_list(2, NULL, dir); inode->i_op = &proc_task_inode_operations; inode->i_fop = &proc_task_operations; - dentry->d_op = &pid_task_dentry_operations; break; case PROC_TID_FD: case PROC_TGID_FD: @@ -1780,6 +1748,7 @@ static struct dentry *proc_pident_lookup iput(inode); return ERR_PTR(-EINVAL); } + dentry->d_op = &pid_dentry_operations; d_add(dentry, inode); return NULL; @@ -1965,13 +1934,6 @@ struct dentry *proc_pid_lookup(struct in goto out; tgid = pid_from_user(tgid); - /* Normalize the dentry name */ - if (dentry->d_name.name != dentry->d_iname) - kfree(dentry->d_name.name); - dentry->d_name.name = dentry->d_iname; - dentry->d_name.len = snprintf(dentry->d_iname, sizeof(dentry->d_iname), - "%d", tgid); - read_lock(&tasklist_lock); task = find_task_by_pid(tgid); if (task) @@ -2031,12 +1993,6 @@ static struct dentry *proc_task_lookup(s if (tid == ~0U) goto out; tid = pid_from_user(tid); - /* Normalize the dentry name */ - if (dentry->d_name.name != dentry->d_iname) - kfree(dentry->d_name.name); - dentry->d_name.name = dentry->d_iname; - dentry->d_name.len = snprintf(dentry->d_iname, sizeof(dentry->d_iname), - "%d", tid); read_lock(&tasklist_lock); task = find_task_by_pid(tid); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 8d2f330..27b6df7 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -15,12 +15,12 @@ #include #include #include +#include +#include "internal.h" #include #include -extern void free_proc_entry(struct proc_dir_entry *); -extern struct dentry_operations proc_root_dops; static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de) { @@ -188,6 +188,31 @@ out_fail: goto out; } +static int do_proc_pspace_root_dentry(struct super_block *sb, struct pspace *pspace) +{ + struct inode *root_inode; + struct dentry *root; + root_inode = iget(sb, PROC_ROOT_INO); + if (!root_inode) + goto out_no_root; + + root = d_alloc_root(root_inode); + if (!root) + goto out_no_root; + root->d_fsdata = pspace; + pspace->proc_root = root; + return 0; + out_no_root: + iput(root_inode); + return -ENOMEM; + +} + +int proc_pspace_root_dentry(struct pspace *pspace) +{ + return do_proc_pspace_root_dentry(proc_sb(), pspace); +} + int proc_fill_super(struct super_block *s, void *data, int silent) { struct inode * root_inode; @@ -205,13 +230,14 @@ int proc_fill_super(struct super_block * /* * Fixup the root inode's nlink value */ - root_inode->i_nlink += nr_processes(); root_inode->i_uid = 0; root_inode->i_gid = 0; s->s_root = d_alloc_root(root_inode); if (!s->s_root) goto out_no_root; - s->s_root->d_op = &proc_root_dops; + + if (do_proc_pspace_root_dentry(s, &init_pspace)) + goto out_no_root; return 0; out_no_root: diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 4d582ae..1fca472 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -47,3 +47,6 @@ static inline int proc_type(struct inode { return PROC_I(inode)->type; } + +extern void free_proc_entry(struct proc_dir_entry *); +extern struct super_block *proc_sb(void); diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index ea3690f..9c1c92f 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -93,7 +93,7 @@ static int loadavg_read_proc(char *page, LOAD_INT(a), LOAD_FRAC(a), LOAD_INT(b), LOAD_FRAC(b), LOAD_INT(c), LOAD_FRAC(c), - nr_running(), nr_threads, current->pspace->last_pid); + nr_running(), current->pspace->nr_threads, current->pspace->last_pid); return proc_calc_metrics(page, start, off, count, eof, len); } diff --git a/fs/proc/root.c b/fs/proc/root.c index ed26e10..6102d6d 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "internal.h" struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; @@ -38,6 +39,11 @@ static struct file_system_type proc_fs_t .kill_sb = kill_anon_super, }; +struct super_block *proc_sb(void) +{ + return get_sb_single(&proc_fs_type, 0, NULL, proc_fill_super); +} + extern int __init proc_init_inodecache(void); void __init proc_root_init(void) { @@ -81,47 +87,14 @@ void __init proc_root_init(void) proc_bus = proc_mkdir("bus", NULL); } -static int proc_root_hash(struct dentry *dentry, struct qstr *qstr) +static void *proc_root_follow_link(struct dentry *dentry, struct nameidata *nd) { - int tgid = proc_name_to_int(qstr); - if (tgid == ~0) { - return 0; /* Use the default hash */ - } - /* Just use the pid as the hash value */ - tgid = pid_from_user(tgid); - qstr->hash = tgid; - return 0; + /* Follow the pseudo link to the per pspace root of the /proc filesystem */ + dput(nd->dentry); + nd->dentry = dget(current->pspace->proc_root); + return NULL; } -static int proc_root_cmp(struct dentry *dentry, struct qstr *cached, struct qstr *user) -{ - int cached_tgid, user_tgid; - cached_tgid = proc_name_to_int(cached); - user_tgid = proc_name_to_int(user); - if (user_tgid != ~0) { - user_tgid = pid_from_user(user_tgid); - } - if (cached_tgid == user_tgid) { - if (cached_tgid == ~0) { - /* 2 non pids see if their names are equal */ - if ((cached->len == user->len) && - (memcmp(cached->name, user->name, cached->len) == 0)) - { - return 0; - } - } else { - /* 2 equal pids */ - return 0; - } - } - return 1; -} - -struct dentry_operations proc_root_dops = { - .d_hash = proc_root_hash, - .d_compare = proc_root_cmp, -}; - static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) { /* @@ -130,7 +103,7 @@ static struct dentry *proc_root_lookup(s * reporting, without any locking whatsoever. */ if (dir->i_ino == PROC_ROOT_INO) /* check for safety... */ - dir->i_nlink = proc_root.nlink + nr_threads; + dir->i_nlink = proc_root.nlink + current->pspace->nr_threads; if (!proc_lookup(dir, dentry, nd)) { return NULL; @@ -176,6 +149,7 @@ static struct file_operations proc_root_ */ static struct inode_operations proc_root_inode_operations = { .lookup = proc_root_lookup, + .follow_link = proc_root_follow_link, }; /* diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 0563581..3f640ea 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -263,4 +263,12 @@ static inline struct proc_dir_entry *PDE return PROC_I(inode)->pde; } +struct pspace; +static inline struct pspace *PROC_P(const struct dentry *dentry) +{ + return dentry->d_fsdata; +} + +extern int proc_pspace_root_dentry(struct pspace *pspace); + #endif /* _LINUX_PROC_FS_H */ diff --git a/include/linux/pspace.h b/include/linux/pspace.h index 2a17045..deeaac8 100644 --- a/include/linux/pspace.h +++ b/include/linux/pspace.h @@ -12,10 +12,13 @@ struct pidmap #define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8) +struct dentry; struct pspace { atomic_t count; struct pspace *parent; + struct dentry *proc_root; + int nr_threads; int last_pid; int offset; int min; diff --git a/kernel/exit.c b/kernel/exit.c index e4a807e..4a608ba 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -44,6 +44,7 @@ static void exit_mm(struct task_struct * static void __unhash_process(struct task_struct *p) { + current->pspace->nr_threads--; nr_threads--; detach_pid(p, PIDTYPE_PID); detach_pid(p, PIDTYPE_TGID); diff --git a/kernel/fork.c b/kernel/fork.c index e6d81ed..fce637f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1172,6 +1172,7 @@ static task_t *copy_process(unsigned lon if (!current->signal->tty && p->signal->tty) p->signal->tty = NULL; + current->pspace->nr_threads++; nr_threads++; total_forks++; write_unlock_irq(&tasklist_lock); diff --git a/kernel/pid.c b/kernel/pid.c index fa4be23..615ed9b 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) @@ -285,11 +286,16 @@ static struct pspace *new_pspace(int pid if (!pspace) return NULL; atomic_set(&pspace->count, 1); - pspace->parent = NULL; - pspace->last_pid = 0; - pspace->offset = offset; - pspace->min = 1; - pspace->max = pids; + pspace->parent = NULL; + pspace->nr_threads = 0; + pspace->last_pid = 0; + pspace->offset = offset; + pspace->min = 1; + pspace->max = pids; + if (proc_pspace_root_dentry(pspace)) { + kfree(pspace); + return NULL; + } for (i = 0; i < pages; i++) { atomic_set(&pspace->pidmap[i].nr_free, BITS_PER_PAGE); pspace->pidmap[i].page = NULL; diff --git a/kernel/timer.c b/kernel/timer.c index efd2c03..cdd6080 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1270,7 +1270,7 @@ asmlinkage long sys_sysinfo(struct sysin val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); - val.procs = nr_threads; + val.procs = current->pspace->nr_threads; } while (read_seqretry(&xtime_lock, seq)); si_meminfo(&val); -- 1.0.GIT