Subject: [PATCH] Enhance to do a petter job reporting pspaces. From: Eric W. Biederman Date: 1130615683 -0600 - All inodes now use the generic inode sequece so they are unique. - No more magic symlinks are used. - All references are again only to tasks so pspaces can go away. - Symlinks now only appear if we are in our pspace or our parents pspace. - Everything that depends on pspaces is now in base.c --- fs/proc/base.c | 434 ++++++++++++++++++++++++++++++++++------------- fs/proc/inode.c | 7 - fs/proc/internal.h | 7 + fs/proc/proc_misc.c | 22 -- fs/proc/root.c | 155 ----------------- include/linux/proc_fs.h | 4 6 files changed, 316 insertions(+), 313 deletions(-) e042975b4e5d2c83b73dc4c43df4d1d879296a58 diff --git a/fs/proc/base.c b/fs/proc/base.c index 5e139a6..8d0160c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -85,12 +85,15 @@ * except to return them to user space. */ -#define fake_ino(pid,ino) (((pid)<<16)|(ino)) enum pid_directory_inos { - PROC_TGID_INO = 2, + PROC_SELF = 2, + PROC_MOUNTS, + PROC_SYMLINK_LAST, + PROC_LOADAVG = PROC_SYMLINK_LAST, + + PROC_TGID_INO, PROC_TGID_TASK, - PROC_TGID_PSPACE, PROC_TGID_STATUS, PROC_TGID_MEM, #ifdef CONFIG_SECCOMP @@ -128,6 +131,7 @@ enum pid_directory_inos { #endif PROC_TGID_OOM_SCORE, PROC_TGID_OOM_ADJUST, + PROC_TID_INO, PROC_TID_STATUS, PROC_TID_MEM, @@ -180,6 +184,13 @@ struct pid_entry { #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} +static struct pid_entry proc_base_stuff[] = { + E(PROC_SELF, "self", S_IFLNK|S_IRWXUGO), + E(PROC_MOUNTS, "mounts", S_IFLNK|S_IRWXUGO), + E(PROC_LOADAVG, "loadavg", S_IFREG|S_IRUGO), + {0,0,NULL,0} +}; + static struct pid_entry tgid_base_stuff[] = { E(PROC_TGID_TASK, "task", S_IFDIR|S_IRUGO|S_IXUGO), E(PROC_TGID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), @@ -263,7 +274,7 @@ static struct pid_entry tid_base_stuff[] }; static struct pid_entry pspace_base_stuff[] = { - E(PROC_TGID_PSPACE, "pspace", S_IFDIR|S_IRUGO|S_IXUGO), + E(PROC_ROOT_INO, "pspace", S_IFDIR|S_IRUGO|S_IXUGO), {0,0,NULL,0} }; @@ -1035,6 +1046,120 @@ static struct file_operations proc_secco }; #endif /* CONFIG_SECCOMP */ + +/* + * /proc/self: + */ +struct pspace *child_pspace(struct pspace *pspace, struct task_struct *tsk) +{ + struct pspace *child; + child = tsk->pspace; + while(child && (child->parent != pspace)) { + child = child->parent; + } + return child; +} + +static int proc_self_readlink(struct dentry *dentry, char __user *buffer, + int buflen) +{ + struct pspace *pspace = proc_pspace(dentry->d_inode); + char tmp[30]; + int result, len = 0; + while(buflen && pspace && (pspace != current->pspace)) { + pspace = child_pspace(pspace, current); + sprintf(tmp, "%d/pspace/", pspace->child_reaper->wid); + result = vfs_readlink(dentry, buffer, buflen, tmp); + if (result < 0) + goto out; + len += result; + buffer += result; + buflen -= result; + } + sprintf(tmp, "%d", current->tgid); + result = vfs_readlink(dentry, buffer, buflen, tmp); + if (result < 0) + goto out; + len += result; + result = len; + out: + return result; +} + +static void *proc_self_follow_link(struct vfsmount *mnt, struct dentry *dentry, struct nameidata *nd) +{ + struct pspace *pspace = proc_pspace(dentry->d_inode); + char tmp[30]; + int result; + while(pspace && (pspace != current->pspace)) { + pspace = child_pspace(pspace, current); + sprintf(tmp, "%d/pspace/", pspace->child_reaper->wid); + result = vfs_follow_link(nd, tmp); + if (result < 0) + goto out; + } + sprintf(tmp, "%d", current->tgid); + result = vfs_follow_link(nd,tmp); + out: + return ERR_PTR(result); +} + +static int self_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + d_drop(dentry); + return 0; +} + +static int self_delete_dentry(struct dentry * dentry) +{ + return 1; +} + +static struct inode_operations proc_self_inode_operations = { + .readlink = proc_self_readlink, + .follow_link = proc_self_follow_link, +}; + +static struct dentry_operations self_dentry_operations = +{ + .d_revalidate = self_revalidate, + .d_delete = self_delete_dentry, +}; + +static void *proc_mounts_follow_link(struct vfsmount *mnt, struct dentry *dentry, struct nameidata *nd) +{ + static const char *mounts = "self/mounts"; + nd_set_link(nd, (char *)mounts); + return NULL; +} + +static struct inode_operations proc_mounts_inode_operations = { + .readlink = generic_readlink, + .follow_link = proc_mounts_follow_link, +}; + +#define LOAD_INT(x) ((x) >> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) + +static int proc_loadavg(struct task_struct *task, char * buffer) +{ + struct pspace *pspace = task->pspace; + int a, b, c; + int len; + + a = avenrun[0] + (FIXED_1/200); + b = avenrun[1] + (FIXED_1/200); + c = avenrun[2] + (FIXED_1/200); + len = sprintf(buffer,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", + LOAD_INT(a), LOAD_FRAC(a), + LOAD_INT(b), LOAD_FRAC(b), + LOAD_INT(c), LOAD_FRAC(c), + nr_running(), pspace->nr_threads, pspace->last_pid); + + return len; +} + + static void *proc_pid_follow_link(struct vfsmount *mnt, struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; @@ -1113,15 +1238,39 @@ static struct inode_operations proc_pid_ .follow_link = proc_pid_follow_link }; +static ino_t proc_ino(struct dentry *dir, const char *name) +{ + /* The directory inode semaphore should be held here + * The entries in /proc directories don't change so it is + * safe for readdir to drop i_sem. + */ + struct nameidata nd; + ino_t ino; + ino = ~0; + up(&dir->d_inode->i_sem); + memset(&nd, 0, sizeof(nd)); + nd.mnt = mntget(proc_mnt); + nd.dentry = dget(dir); + if (link_path_walk(name, &nd)) + goto out; + ino = nd.dentry->d_inode->i_ino; + path_release(&nd); + out: + down(&dir->d_inode->i_sem); + return ino; +} + + #define NUMBUF 10 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) { - struct inode *inode = filp->f_dentry->d_inode; + struct dentry *dentry = filp->f_dentry; + struct inode *inode = dentry->d_inode; struct task_struct *p = proc_task(inode); unsigned int fd, tid, ino; int retval; - char buf[NUMBUF]; + char buf[NUMBUF + 1]; struct files_struct * files; struct fdtable *fdt; @@ -1138,7 +1287,7 @@ static int proc_readfd(struct file * fil goto out; filp->f_pos++; case 1: - ino = fake_ino(tid, PROC_TID_INO); + ino = parent_ino(dentry); if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) goto out; filp->f_pos++; @@ -1157,15 +1306,16 @@ static int proc_readfd(struct file * fil continue; rcu_read_unlock(); - j = NUMBUF; i = fd; + j = NUMBUF; + buf[j] = '\0'; do { j--; buf[j] = '0' + (i % 10); i /= 10; } while (i); - ino = fake_ino(tid, PROC_TID_FD_DIR + fd); + ino = proc_ino(dentry, buf+j); if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { rcu_read_lock(); break; @@ -1220,12 +1370,11 @@ static int proc_pident_readdir(struct fi goto out; } p = ents + i; - while (p->name) { + for(; p->name; filp->f_pos++, p++) { + ino = proc_ino(dentry, p->name); if (filldir(dirent, p->name, p->len, filp->f_pos, - fake_ino(pid, p->type), p->mode >> 12) < 0) + ino, p->mode >> 12) < 0) goto out; - filp->f_pos++; - p++; } } @@ -1255,6 +1404,72 @@ static int proc_pspace_base_readdir(stru pspace_base_stuff,ARRAY_SIZE(pspace_base_stuff)); } +/* pspace root operations */ +static int proc_pspace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct task_struct *p = proc_task(inode); + generic_fillattr(inode, stat); + /* + * nr_processes is actually protected by the tasklist_lock; + * however, it's conventional to do reads, especially for + * reporting, without any locking whatsoever. + */ + stat->nlink = proc_root.nlink; + if (task_alive(p)) + stat->nlink += p->pspace->nr_processes; + return 0; +} + +static struct dentry *proc_pspace_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) +{ + if (!proc_lookup(dir, dentry, nd)) { + return NULL; + } + + return proc_pid_lookup(dir, dentry, nd); +} + +static int proc_pspace_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + unsigned int nr = filp->f_pos; + int ret; + + lock_kernel(); + + if (nr < FIRST_PROCESS_ENTRY) { + int error = proc_readdir(filp, dirent, filldir); + if (error <= 0) { + unlock_kernel(); + return error; + } + filp->f_pos = FIRST_PROCESS_ENTRY; + } + unlock_kernel(); + + ret = proc_pid_readdir(filp, dirent, filldir); + return ret; +} + +/* + * The root /proc directory is special, as it has the + * directories. Thus we don't use the generic + * directory handling functions for that.. + */ +static struct file_operations proc_pspace_operations = { + .read = generic_read_dir, + .readdir = proc_pspace_readdir, +}; + +/* + * proc root can do almost nothing.. + */ +static struct inode_operations proc_pspace_inode_operations = { + .lookup = proc_pspace_lookup, + .getattr = proc_pspace_getattr, +}; + /* building an inode */ static int task_dumpable(struct task_struct *task) @@ -1273,8 +1488,8 @@ static int task_dumpable(struct task_str } -static struct inode *proc_pid_make_inode(struct super_block * sb, - struct pspace *pspace, struct task_struct *task, int ino) +static struct inode *proc_pid_make_inode(struct super_block * sb, + struct task_struct *task, int ino) { struct inode * inode; struct proc_inode *ei; @@ -1289,17 +1504,14 @@ static struct inode *proc_pid_make_inode ei = PROC_I(inode); ei->task = NULL; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_ino = fake_ino(task->tid, ino); if (!task_alive(task)) goto out_unlock; /* - * grab references to the task and pspace; + * grab references to the task */ - get_pspace(pspace); get_task_struct(task); - ei->pspace = pspace; ei->task = task; ei->type = ino; inode->i_uid = 0; @@ -1319,6 +1531,21 @@ out_unlock: return NULL; } +struct inode *proc_pspace_make_inode(struct super_block *sb, struct pspace *pspace) +{ + struct inode *inode; + inode = proc_pid_make_inode(sb, pspace->child_reaper, PROC_ROOT_INO); + if (inode) { + struct proc_inode *ei = PROC_I(inode); + inode->i_op = &proc_pspace_inode_operations; + inode->i_fop = &proc_pspace_operations; + inode->i_nlink = proc_root.nlink; + inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; + ei->pde = &proc_root; + } + return inode; +} + /* dentry stuff */ /* @@ -1443,7 +1670,6 @@ out: /* SMP-safe */ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) { - struct pspace *pspace = proc_pspace(dir); struct task_struct *task = proc_task(dir); unsigned fd = name_to_int(&dentry->d_name); struct file * file; @@ -1456,7 +1682,7 @@ static struct dentry *proc_lookupfd(stru if (!task_alive(task)) goto out; - inode = proc_pid_make_inode(dir->i_sb, pspace, task, PROC_TID_FD_DIR+fd); + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); if (!inode) goto out; ei = PROC_I(inode); @@ -1490,34 +1716,6 @@ out: return ERR_PTR(-ENOENT); } -static void *proc_pspace_follow_link(struct vfsmount *mnt, struct dentry *parent, struct nameidata *nd) -{ - /* Follow the pseudo link to the per pspace root of the /proc - * filesystem - */ - struct task_struct *task = proc_task(nd->dentry->d_inode); - struct dentry *dentry; - dentry = get_pspace_root_dentry(parent->d_sb, task->pspace); - - if (!IS_ERR(dentry)) { - struct vfsmount *cover; - cover = alloc_vfsmnt(NULL); - cover->mnt_sb = dentry->d_sb; - cover->mnt_root = dget(dentry); - cover->mnt_mountpoint = parent; - cover->mnt_parent = mnt; - mntget(mnt); - mntput(nd->mnt); - nd->mnt = cover; - atomic_inc(&dentry->d_sb->s_active); - } - /* Forget nd->dentry */ - dput(nd->dentry); - nd->dentry = dentry; - - return NULL; -} - static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); @@ -1544,13 +1742,6 @@ static struct inode_operations proc_task .permission = proc_task_permission, }; -/* - * Magic pspace link/mount - */ -static struct inode_operations proc_pspace_inode_operations = { - .follow_link = proc_pspace_follow_link, -}; - #ifdef CONFIG_SECURITY static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) @@ -1643,28 +1834,39 @@ static struct dentry *proc_pident_lookup if (!p->name) goto out; + if ((p->type < PROC_SYMLINK_LAST) && !in_pspace(pspace, current)) + goto out; + error = -EINVAL; - inode = proc_pid_make_inode(dir->i_sb, pspace, task, p->type); + inode = proc_pid_make_inode(dir->i_sb, task, p->type); if (!inode) goto out; ei = PROC_I(inode); inode->i_mode = p->mode; + dentry->d_op = &pid_dentry_operations; /* * Yes, it does not scale. And it should not. Don't add * new entries into /proc// without very good reasons. */ switch(p->type) { + case PROC_SELF: + inode->i_op = &proc_self_inode_operations; + dentry->d_op = &self_dentry_operations; + break; + case PROC_MOUNTS: + inode->i_op = &proc_mounts_inode_operations; + dentry->d_op = &self_dentry_operations; + break; + case PROC_LOADAVG: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_loadavg; + break; case PROC_TGID_TASK: inode->i_nlink = 2 + get_tid_list(pspace, 2, NULL, dir); inode->i_op = &proc_task_inode_operations; inode->i_fop = &proc_task_operations; break; - case PROC_TGID_PSPACE: - inode->i_nlink = 2; - inode->i_op = &proc_pspace_inode_operations; - inode->i_ino = PROC_ROOT_INO; - break; case PROC_TID_FD: case PROC_TGID_FD: inode->i_nlink = 2; @@ -1810,7 +2012,6 @@ static struct dentry *proc_pident_lookup iput(inode); return ERR_PTR(-EINVAL); } - dentry->d_op = &pid_dentry_operations; d_add(dentry, inode); return NULL; @@ -1826,8 +2027,22 @@ static struct dentry *proc_tid_base_look return proc_pident_lookup(dir, dentry, tid_base_stuff); } -static struct dentry *proc_pspace_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ - return proc_pident_lookup(dir, dentry, pspace_base_stuff); +static struct dentry *proc_pspace_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode; + int error; + error = -ENOENT; + if (memcmp(dentry->d_name.name, "pspace", 6) != 0) + goto out; + error = -EINVAL; + inode = proc_pspace_make_inode(dir->i_sb, proc_pspace(dir)); + if (!inode) + goto out; + dentry->d_op = &pid_dentry_operations; + d_add(dentry, inode); + return NULL; + out: + return ERR_PTR(error); } static struct file_operations proc_tgid_base_operations = { @@ -1903,29 +2118,6 @@ static struct inode_operations proc_tid_ }; #endif -/* - * /proc/self: - */ -static int proc_self_readlink(struct dentry *dentry, char __user *buffer, - int buflen) -{ - char tmp[30]; - sprintf(tmp, "%d", current->tgid); - return vfs_readlink(dentry,buffer,buflen,tmp); -} - -static void *proc_self_follow_link(struct vfsmount *mnt, struct dentry *dentry, struct nameidata *nd) -{ - char tmp[30]; - sprintf(tmp, "%d", current->tgid); - return ERR_PTR(vfs_follow_link(nd,tmp)); -} - -static struct inode_operations proc_self_inode_operations = { - .readlink = proc_self_readlink, - .follow_link = proc_self_follow_link, -}; - /** * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. * @p: task that should be flushed. @@ -1984,29 +2176,16 @@ void proc_pid_flush(struct dentry *proc_ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { struct pspace *pspace = proc_pspace(nd->dentry->d_inode); + struct dentry *result; struct task_struct *task; struct inode *inode; - struct proc_inode *ei; unsigned tgid; int died; - if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4) && - (pspace == current->pspace)) - { - inode = new_inode(dir->i_sb); - if (!inode) - return ERR_PTR(-ENOMEM); - ei = PROC_I(inode); - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_ino = fake_ino(0, PROC_TGID_INO); - ei->pde = NULL; - inode->i_mode = S_IFLNK|S_IRWXUGO; - inode->i_uid = inode->i_gid = 0; - inode->i_size = 64; - inode->i_op = &proc_self_inode_operations; - d_add(dentry, inode); - return NULL; - } + result = proc_pident_lookup(dir, dentry, proc_base_stuff); + if (!IS_ERR(result) || (PTR_ERR(result) != -ENOENT)) + return result; + tgid = name_to_int(&dentry->d_name); if (tgid == ~0U) goto out; @@ -2019,7 +2198,7 @@ struct dentry *proc_pid_lookup(struct in if (!task) goto out; - inode = proc_pid_make_inode(dir->i_sb, pspace, task, PROC_TGID_INO); + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); if (!inode) { @@ -2087,7 +2266,7 @@ static struct dentry *proc_task_lookup(s if (leader->tgid != task->tgid) goto out_drop_task; - inode = proc_pid_make_inode(dir->i_sb, pspace, task, PROC_TID_INO); + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); if (!inode) @@ -2127,7 +2306,6 @@ static int get_tgid_list(struct pspace * struct task_struct *p; int nr_tgids = 0; - index--; read_lock(&tasklist_lock); p = NULL; if (version) { @@ -2193,21 +2371,28 @@ static int get_tid_list(struct pspace *p /* for the /proc/ directory itself, after non-process stuff has been done */ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) { - struct pspace *pspace = proc_pspace(filp->f_dentry->d_inode); + struct dentry *dentry = filp->f_dentry; + struct task_struct *task = proc_task(dentry->d_inode); + struct pspace *pspace; unsigned int tgid_array[PROC_MAXPIDS]; - char buf[PROC_NUMBUF]; + char buf[PROC_NUMBUF + 1]; unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; unsigned int nr_tgids, i; + ino_t ino; int next_tgid; - if (!nr) { - if (current->pspace == pspace) { - ino_t ino = fake_ino(0,PROC_TGID_INO); - if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0) - return 0; - } - filp->f_pos++; - nr++; + if (!task_alive(task)) + goto out; + + pspace = task->pspace; + for(; nr < (ARRAY_SIZE(proc_base_stuff) - 1); filp->f_pos++, nr++) { + struct pid_entry *p = proc_base_stuff + nr; + if ((p->type < PROC_SYMLINK_LAST) && !in_pspace(pspace, current)) + continue; + ino = proc_ino(dentry, p->name); + if (filldir(dirent, p->name, p->len, filp->f_pos, + ino, p->mode >> 12) < 0) + goto out; } /* f_version caches the tgid value that the last readdir call couldn't @@ -2215,6 +2400,7 @@ int proc_pid_readdir(struct file * filp, */ next_tgid = filp->f_version; filp->f_version = 0; + nr -= (ARRAY_SIZE(proc_base_stuff) - 1); for (;;) { nr_tgids = get_tgid_list(pspace, nr, next_tgid, tgid_array); if (!nr_tgids) { @@ -2231,14 +2417,14 @@ int proc_pid_readdir(struct file * filp, for (i=0;if_pos, ino, DT_DIR) < 0) { /* returning this tgid failed, save it as the first * pid for the next readir call */ @@ -2258,7 +2444,7 @@ static int proc_task_readdir(struct file { struct pspace *pspace = proc_pspace(filp->f_dentry->d_inode); unsigned int tid_array[PROC_MAXPIDS]; - char buf[PROC_NUMBUF]; + char buf[PROC_NUMBUF + 1]; unsigned int nr_tids, i; struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; @@ -2292,12 +2478,12 @@ static int proc_task_readdir(struct file unsigned long j = PROC_NUMBUF; int tid = tid_array[i]; - ino = fake_ino(tid,PROC_TID_INO); - + buf[j] = '\0'; do buf[--j] = '0' + (tid % 10); while ((tid /= 10) != 0); + ino = proc_ino(dentry, buf+j); if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) break; pos++; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index d620da9..70527e0 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -199,14 +199,9 @@ int proc_fill_super(struct super_block * s->s_op = &proc_sops; s->s_time_gran = 1; - root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); + root_inode = proc_pspace_make_inode(s, &init_pspace); if (!root_inode) goto out_no_root; - /* - * Fixup the root inode's nlink value - */ - root_inode->i_uid = 0; - root_inode->i_gid = 0; s->s_root = d_alloc_root(root_inode); if (!s->s_root) goto out_no_root; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 18bab3f..715d479 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -36,7 +36,6 @@ extern int proc_tid_stat(struct task_str extern int proc_tgid_stat(struct task_struct *, char *); extern int proc_pid_status(struct task_struct *, char *); extern int proc_pid_statm(struct task_struct *, char *); -extern struct dentry *get_pspace_root_dentry(struct super_block *, struct pspace *); static inline struct task_struct *proc_task(struct inode *inode) { @@ -50,7 +49,11 @@ static inline int proc_type(struct inode static inline struct pspace *proc_pspace(struct inode *inode) { - return PROC_I(inode)->pspace; + struct task_struct *task = proc_task(inode); + return task ? task->pspace : NULL; } extern void free_proc_entry(struct proc_dir_entry *); + +extern struct inode *proc_pspace_make_inode(struct super_block *sb, struct pspace *pspace); + diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 9c1c92f..b262abd 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -53,8 +53,6 @@ #include #include "internal.h" -#define LOAD_INT(x) ((x) >> FSHIFT) -#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) /* * Warning: stuff below (imported functions) assumes that its output will fit * into one page. For some of those functions it may be wrong. Moreover, we @@ -80,23 +78,6 @@ static int proc_calc_metrics(char *page, return len; } -static int loadavg_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int a, b, c; - int len; - - a = avenrun[0] + (FIXED_1/200); - b = avenrun[1] + (FIXED_1/200); - c = avenrun[2] + (FIXED_1/200); - len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", - LOAD_INT(a), LOAD_FRAC(a), - LOAD_INT(b), LOAD_FRAC(b), - LOAD_INT(c), LOAD_FRAC(c), - nr_running(), current->pspace->nr_threads, current->pspace->last_pid); - return proc_calc_metrics(page, start, off, count, eof, len); -} - static int uptime_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -571,7 +552,6 @@ void __init proc_misc_init(void) char *name; int (*read_proc)(char*,char**,off_t,int,int*,void*); } *p, simple_ones[] = { - {"loadavg", loadavg_read_proc}, {"uptime", uptime_read_proc}, {"meminfo", meminfo_read_proc}, {"version", version_read_proc}, @@ -591,8 +571,6 @@ void __init proc_misc_init(void) for (p = simple_ones; p->name; p++) create_proc_read_entry(p->name, 0, NULL, p->read_proc, NULL); - proc_symlink("mounts", NULL, "self/mounts"); - /* And now for trickier ones */ entry = create_proc_entry("kmsg", S_IRUSR, &proc_root); if (entry) diff --git a/fs/proc/root.c b/fs/proc/root.c index 042a4ca..e7f765a 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -22,9 +22,6 @@ #include #include "internal.h" -static struct file_operations pspace_root_operations; -static struct inode_operations pspace_root_inode_operations; - struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; #ifdef CONFIG_SYSCTL @@ -86,157 +83,6 @@ void __init proc_root_init(void) proc_bus = proc_mkdir("bus", NULL); } -struct dentry *get_pspace_root_dentry(struct super_block *sb, struct pspace *pspace) -{ - /* Find a pspace root of the /proc filesystem - */ - struct dentry *parent = sb->s_root; - struct inode *root = parent->d_inode; - struct dentry *dentry; - struct qstr str; - - /* Each pspace root is stored in the main /proc directory - * it's name is the pointer to it's pspace. - * - * To keep user space from detecting the redirection an - * additional vfsmount is added, which allows me to skip - * the dentry name when building the d_path name. - */ - str.name = (const unsigned char *)&pspace; - str.len = sizeof(pspace); - str.hash = full_name_hash(str.name, str.len); - - /* See if I already have the pspace dentry and if not create it */ - down(&root->i_sem); - dentry = d_lookup(parent, &str); - if (!dentry) { - struct inode *inode; - dentry = d_alloc(parent, &str); - if (dentry) { - inode = new_inode(parent->d_sb); - } - if (dentry && inode) { - struct proc_inode *ei = PROC_I(inode); - get_pspace(pspace); - ei->pspace = pspace; - ei->task = NULL; - ei->pde = PDE(root); - inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; - inode->i_uid = 0; - inode->i_gid = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_op = &pspace_root_inode_operations; - inode->i_fop = &pspace_root_operations; - inode->i_ino = PROC_ROOT_INO; - inode->i_nlink = proc_root.nlink; - d_add(dentry, inode); - } else { - dput(dentry); - dentry = ERR_PTR(-ENOMEM); - } - } - up(&root->i_sem); - return dentry; -} - - -static void *proc_root_follow_link(struct vfsmount *mnt, struct dentry *parent, struct nameidata *nd) -{ - /* Follow the pseudo link to the per pspace root of the /proc - * filesystem - */ - struct dentry *dentry; - dentry = get_pspace_root_dentry(parent->d_sb, current->pspace); - - if (!IS_ERR(dentry)) { - struct vfsmount *cover; - cover = alloc_vfsmnt(NULL); - cover->mnt_sb = dentry->d_sb; - cover->mnt_root = dget(dentry); - cover->mnt_mountpoint = parent; - cover->mnt_parent = mnt; - mntget(mnt); - mntput(nd->mnt); - nd->mnt = cover; - atomic_inc(&dentry->d_sb->s_active); - } - /* Forget nd->dentry */ - dput(nd->dentry); - nd->dentry = dentry; - - return NULL; -} - -static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) -{ - struct inode *inode = dentry->d_inode; - struct pspace *pspace = proc_pspace(inode); - generic_fillattr(inode, stat); - /* - * nr_processes is actually protected by the tasklist_lock; - * however, it's conventional to do reads, especially for - * reporting, without any locking whatsoever. - */ - stat->nlink = proc_root.nlink + pspace->nr_processes; - return 0; -} - -static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) -{ - if (!proc_lookup(dir, dentry, nd)) { - return NULL; - } - - return proc_pid_lookup(dir, dentry, nd); -} - -static int proc_root_readdir(struct file * filp, - void * dirent, filldir_t filldir) -{ - unsigned int nr = filp->f_pos; - int ret; - - lock_kernel(); - - if (nr < FIRST_PROCESS_ENTRY) { - int error = proc_readdir(filp, dirent, filldir); - if (error <= 0) { - unlock_kernel(); - return error; - } - filp->f_pos = FIRST_PROCESS_ENTRY; - } - unlock_kernel(); - - ret = proc_pid_readdir(filp, dirent, filldir); - return ret; -} - -/* - * The root /proc directory is special, as it has the - * directories. Thus we don't use the generic - * directory handling functions for that.. - */ -static struct file_operations pspace_root_operations = { - .read = generic_read_dir, - .readdir = proc_root_readdir, -}; - -/* - * proc root can do almost nothing.. - */ -static struct inode_operations pspace_root_inode_operations = { - .lookup = proc_root_lookup, - .getattr = proc_root_getattr, -}; - -/* - * proc root can do almost nothing.. - */ -static struct inode_operations proc_root_inode_operations = { - .follow_link = proc_root_follow_link, -}; - /* * This is the root "inode" in the /proc tree.. */ @@ -246,7 +92,6 @@ struct proc_dir_entry proc_root = { .name = "/proc", .mode = S_IFDIR | S_IRUGO | S_IXUGO, .nlink = 2, - .proc_iops = &proc_root_inode_operations, .parent = &proc_root, }; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 1cf2680..0563581 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -242,9 +242,7 @@ static inline void kclist_add(struct kco extern void kclist_add(struct kcore_list *, void *, size_t); #endif -struct pspace; struct proc_inode { - struct pspace *pspace; struct task_struct *task; int type; union { @@ -265,6 +263,4 @@ static inline struct proc_dir_entry *PDE return PROC_I(inode)->pde; } -extern int proc_pspace_root_dentry(struct pspace *pspace); - #endif /* _LINUX_PROC_FS_H */ -- 1.0.GIT