From: Eric W. Biederman To keep the dcache from filling up with dead /proc entries we flush them on process exit. However over the years that code has gotten hairy with a dentry_pointer and a lock in task_struct and misdocumented as a correctness feature. I have rewritten this code to look and see if we have a corresponding entry in the dcache and if so flush it on process exit. This removes the extra fields in the task_struct and allows me to trivially handle the case of a /proc//task/ entry as well as the current /proc/ entries. Signed-off-by: Eric W. Biederman Signed-off-by: Andrew Morton --- fs/exec.c | 10 -- fs/proc/base.c | 134 ++++++++++++++++-------------------- include/linux/init_task.h | 1 include/linux/proc_fs.h | 6 - include/linux/sched.h | 3 kernel/exit.c | 7 - kernel/fork.c | 3 7 files changed, 64 insertions(+), 100 deletions(-) diff -puN fs/exec.c~proc-rewrite-the-proc-dentry-flush-on-exit fs/exec.c --- a/fs/exec.c~proc-rewrite-the-proc-dentry-flush-on-exit +++ a/fs/exec.c @@ -666,8 +666,6 @@ static int de_thread(struct task_struct * and to assume its PID: */ if (!thread_group_leader(current)) { - struct dentry *proc_dentry1, *proc_dentry2; - /* * Wait for the thread group leader to be a zombie. * It should already be zombie at this point, most @@ -689,10 +687,6 @@ static int de_thread(struct task_struct */ current->start_time = leader->start_time; - spin_lock(&leader->proc_lock); - spin_lock(¤t->proc_lock); - proc_dentry1 = proc_pid_unhash(current); - proc_dentry2 = proc_pid_unhash(leader); write_lock_irq(&tasklist_lock); BUG_ON(leader->tgid != current->tgid); @@ -729,10 +723,6 @@ static int de_thread(struct task_struct leader->exit_state = EXIT_DEAD; write_unlock_irq(&tasklist_lock); - spin_unlock(&leader->proc_lock); - spin_unlock(¤t->proc_lock); - proc_pid_flush(proc_dentry1); - proc_pid_flush(proc_dentry2); } /* diff -puN fs/proc/base.c~proc-rewrite-the-proc-dentry-flush-on-exit fs/proc/base.c --- a/fs/proc/base.c~proc-rewrite-the-proc-dentry-flush-on-exit +++ a/fs/proc/base.c @@ -1352,16 +1352,6 @@ static int tid_fd_revalidate(struct dent return 0; } -static void pid_base_iput(struct dentry *dentry, struct inode *inode) -{ - struct task_struct *task = proc_task(inode); - spin_lock(&task->proc_lock); - if (task->proc_dentry == dentry) - task->proc_dentry = NULL; - spin_unlock(&task->proc_lock); - iput(inode); -} - static int pid_delete_dentry(struct dentry * dentry) { /* Is the task we represent dead? @@ -1383,13 +1373,6 @@ static struct dentry_operations pid_dent .d_delete = pid_delete_dentry, }; -static struct dentry_operations pid_base_dentry_operations = -{ - .d_revalidate = pid_revalidate, - .d_iput = pid_base_iput, - .d_delete = pid_delete_dentry, -}; - /* Lookups */ static unsigned name_to_int(struct dentry *dentry) @@ -1859,57 +1842,70 @@ static struct inode_operations proc_self }; /** - * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. - * @p: task that should be flushed. + * proc_flush_task - Remove dcache entries for @task from the /proc dcache. + * + * @task: task that should be flushed. + * + * Looks in the dcache for + * /proc/@pid + * /proc/@tgid/task/@pid + * if either directory is present flushes it and all of it'ts children + * from the dcache. * - * Drops the /proc/@pid dcache entry from the hash chains. + * It is safe and reasonable to cache /proc entries for a task until + * that task exits. After that they just clog up the dcache with + * useless entries, possibly causing useful dcache entries to be + * flushed instead. This routine is proved to flush those useless + * dcache entries at process exit time. * - * Dropping /proc/@pid entries and detach_pid must be synchroneous, - * otherwise e.g. /proc/@pid/exe might point to the wrong executable, - * if the pid value is immediately reused. This is enforced by - * - caller must acquire spin_lock(p->proc_lock) - * - must be called before detach_pid() - * - proc_pid_lookup acquires proc_lock, and checks that - * the target is not dead by looking at the attach count - * of PIDTYPE_PID. + * NOTE: This routine is just an optimization so it does not guarantee + * that no dcache entries will exist at process exit time it + * just makes it very unlikely that any will persist. */ - -struct dentry *proc_pid_unhash(struct task_struct *p) +void proc_flush_task(struct task_struct *task) { - struct dentry *proc_dentry; - - proc_dentry = p->proc_dentry; - if (proc_dentry != NULL) { + struct dentry *dentry, *leader, *dir; + char buf[30]; + struct qstr name; - spin_lock(&dcache_lock); - spin_lock(&proc_dentry->d_lock); - if (!d_unhashed(proc_dentry)) { - dget_locked(proc_dentry); - __d_drop(proc_dentry); - spin_unlock(&proc_dentry->d_lock); - } else { - spin_unlock(&proc_dentry->d_lock); - proc_dentry = NULL; - } - spin_unlock(&dcache_lock); + name.name = buf; + name.len = snprintf(buf, sizeof(buf), "%d", task->pid); + dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name); + if (dentry) { + shrink_dcache_parent(dentry); + d_drop(dentry); + dput(dentry); } - return proc_dentry; -} -/** - * proc_pid_flush - recover memory used by stale /proc/@pid/x entries - * @proc_dentry: directoy to prune. - * - * Shrink the /proc directory that was used by the just killed thread. - */ - -void proc_pid_flush(struct dentry *proc_dentry) -{ - might_sleep(); - if(proc_dentry != NULL) { - shrink_dcache_parent(proc_dentry); - dput(proc_dentry); + if (thread_group_leader(task)) + goto out; + + name.name = buf; + name.len = snprintf(buf, sizeof(buf), "%d", task->tgid); + leader = d_hash_and_lookup(proc_mnt->mnt_root, &name); + if (!leader) + goto out; + + name.name = "task"; + name.len = strlen(name.name); + dir = d_hash_and_lookup(leader, &name); + if (!dir) + goto out_put_leader; + + name.name = buf; + name.len = snprintf(buf, sizeof(buf), "%d", task->pid); + dentry = d_hash_and_lookup(dir, &name); + if (dentry) { + shrink_dcache_parent(dentry); + d_drop(dentry); + dput(dentry); } + + dput(dir); +out_put_leader: + dput(leader); +out: + return; } /* SMP-safe */ @@ -1919,7 +1915,6 @@ struct dentry *proc_pid_lookup(struct in struct inode *inode; struct proc_inode *ei; unsigned tgid; - int died; if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { inode = new_inode(dir->i_sb); @@ -1965,23 +1960,16 @@ struct dentry *proc_pid_lookup(struct in inode->i_nlink = 4; #endif - dentry->d_op = &pid_base_dentry_operations; + dentry->d_op = &pid_dentry_operations; - died = 0; d_add(dentry, inode); - spin_lock(&task->proc_lock); - task->proc_dentry = dentry; if (!pid_alive(task)) { - dentry = proc_pid_unhash(task); - died = 1; + d_drop(dentry); + shrink_dcache_parent(dentry); + goto out; } - spin_unlock(&task->proc_lock); put_task_struct(task); - if (died) { - proc_pid_flush(dentry); - goto out; - } return NULL; out: return ERR_PTR(-ENOENT); @@ -2024,7 +2012,7 @@ static struct dentry *proc_task_lookup(s inode->i_nlink = 3; #endif - dentry->d_op = &pid_base_dentry_operations; + dentry->d_op = &pid_dentry_operations; d_add(dentry, inode); diff -puN include/linux/init_task.h~proc-rewrite-the-proc-dentry-flush-on-exit include/linux/init_task.h --- a/include/linux/init_task.h~proc-rewrite-the-proc-dentry-flush-on-exit +++ a/include/linux/init_task.h @@ -120,7 +120,6 @@ extern struct group_info init_groups; .signal = {{0}}}, \ .blocked = {{0}}, \ .alloc_lock = SPIN_LOCK_UNLOCKED, \ - .proc_lock = SPIN_LOCK_UNLOCKED, \ .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .fs_excl = ATOMIC_INIT(0), \ diff -puN include/linux/proc_fs.h~proc-rewrite-the-proc-dentry-flush-on-exit include/linux/proc_fs.h --- a/include/linux/proc_fs.h~proc-rewrite-the-proc-dentry-flush-on-exit +++ a/include/linux/proc_fs.h @@ -99,9 +99,8 @@ extern void proc_misc_init(void); struct mm_struct; +void proc_flush_task(struct task_struct *task); struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); -struct dentry *proc_pid_unhash(struct task_struct *p); -void proc_pid_flush(struct dentry *proc_dentry); int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); unsigned long task_vsize(struct mm_struct *); int task_statm(struct mm_struct *, int *, int *, int *, int *); @@ -211,8 +210,7 @@ static inline void proc_net_remove(const #define proc_net_create(name, mode, info) ({ (void)(mode), NULL; }) static inline void proc_net_remove(const char *name) {} -static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; } -static inline void proc_pid_flush(struct dentry *proc_dentry) { } +static inline void proc_flush_task(struct task_struct *task) { } static inline struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent) { return NULL; } diff -puN include/linux/sched.h~proc-rewrite-the-proc-dentry-flush-on-exit include/linux/sched.h --- a/include/linux/sched.h~proc-rewrite-the-proc-dentry-flush-on-exit +++ a/include/linux/sched.h @@ -905,8 +905,6 @@ struct task_struct { u32 self_exec_id; /* Protection of (de-)allocation: mm, files, fs, tty, keyrings */ spinlock_t alloc_lock; -/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */ - spinlock_t proc_lock; /* Protection of the PI data structures: */ spinlock_t pi_lock; @@ -933,7 +931,6 @@ struct task_struct { /* VM state */ struct reclaim_state *reclaim_state; - struct dentry *proc_dentry; struct backing_dev_info *backing_dev_info; struct io_context *io_context; diff -puN kernel/exit.c~proc-rewrite-the-proc-dentry-flush-on-exit kernel/exit.c --- a/kernel/exit.c~proc-rewrite-the-proc-dentry-flush-on-exit +++ a/kernel/exit.c @@ -139,12 +139,8 @@ void release_task(struct task_struct * p { int zap_leader; task_t *leader; - struct dentry *proc_dentry; - repeat: atomic_dec(&p->user->processes); - spin_lock(&p->proc_lock); - proc_dentry = proc_pid_unhash(p); write_lock_irq(&tasklist_lock); ptrace_unlink(p); BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); @@ -173,8 +169,7 @@ repeat: sched_exit(p); write_unlock_irq(&tasklist_lock); - spin_unlock(&p->proc_lock); - proc_pid_flush(proc_dentry); + proc_flush_task(p); release_thread(p); call_rcu(&p->rcu, delayed_put_task_struct); diff -puN kernel/fork.c~proc-rewrite-the-proc-dentry-flush-on-exit kernel/fork.c --- a/kernel/fork.c~proc-rewrite-the-proc-dentry-flush-on-exit +++ a/kernel/fork.c @@ -1008,13 +1008,10 @@ static task_t *copy_process(unsigned lon if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup; - p->proc_dentry = NULL; - INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); - spin_lock_init(&p->proc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); _