From: Ulrich Drepper Here is a series of patches which introduce in total 13 new system calls which take a file descriptor/filename pair instead of a single file name. These functions, openat etc, have been discussed on numerous occasions. They are needed to implement race-free filesystem traversal, they are necessary to implement a virtual per-thread current working directory (think multi-threaded backup software), etc. We have in glibc today implementations of the interfaces which use the /proc/self/fd magic. But this code is rather expensive. Here are some results (similar to what Jim Meyering posted before). The test creates a deep directory hierarchy on a tmpfs filesystem. Then rm -fr is used to remove all directories. Without syscall support I get this: real 0m31.921s user 0m0.688s sys 0m31.234s With syscall support the results are much better: real 0m20.699s user 0m0.536s sys 0m20.149s The interfaces are for obvious reasons currently not much used. But they'll be used. coreutils (and Jeff's posixutils) are already using them. Furthermore, code like ftw/fts in libc (maybe even glob) will also start using them. I expect a patch to make follow soon. Every program which is walking the filesystem tree will benefit. Signed-off-by: Ulrich Drepper Signed-off-by: Alexey Dobriyan Acked-by: Ingo Molnar Signed-off-by: Andrew Morton --- arch/alpha/kernel/osf_sys.c | 2 arch/sparc64/kernel/sys_sparc32.c | 4 fs/compat.c | 48 ++++++- fs/exec.c | 2 fs/namei.c | 169 ++++++++++++++++++++++------ fs/open.c | 83 +++++++++++-- fs/stat.c | 54 +++++++- include/linux/fcntl.h | 7 + include/linux/fs.h | 7 - include/linux/namei.h | 7 - include/linux/time.h | 2 11 files changed, 308 insertions(+), 77 deletions(-) diff -puN fs/compat.c~vfa-at-functions-core fs/compat.c --- 25/fs/compat.c~vfa-at-functions-core Fri Jan 13 17:54:38 2006 +++ 25-akpm/fs/compat.c Fri Jan 13 17:54:38 2006 @@ -68,10 +68,10 @@ asmlinkage long compat_sys_utime(char __ tv[0].tv_usec = 0; tv[1].tv_usec = 0; } - return do_utimes(filename, t ? tv : NULL); + return do_utimes(AT_FDCWD, filename, t ? tv : NULL); } -asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t) +asmlinkage long compat_sys_futimesat(int dfd, char __user *filename, struct compat_timeval __user *t) { struct timeval tv[2]; @@ -82,14 +82,19 @@ asmlinkage long compat_sys_utimes(char _ get_user(tv[1].tv_usec, &t[1].tv_usec)) return -EFAULT; } - return do_utimes(filename, t ? tv : NULL); + return do_utimes(dfd, filename, t ? tv : NULL); +} + +asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t) +{ + return compat_sys_futimesat(AT_FDCWD, filename, t); } asmlinkage long compat_sys_newstat(char __user * filename, struct compat_stat __user *statbuf) { struct kstat stat; - int error = vfs_stat(filename, &stat); + int error = vfs_stat_fd(AT_FDCWD, filename, &stat); if (!error) error = cp_compat_stat(&stat, statbuf); @@ -100,13 +105,34 @@ asmlinkage long compat_sys_newlstat(char struct compat_stat __user *statbuf) { struct kstat stat; - int error = vfs_lstat(filename, &stat); + int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); if (!error) error = cp_compat_stat(&stat, statbuf); return error; } +asmlinkage long compat_sys_newfstatat(int dfd, char __user *filename, + struct compat_stat __user *statbuf, int flag) +{ + struct kstat stat; + int error = -EINVAL; + + if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + goto out; + + if (flag & AT_SYMLINK_NOFOLLOW) + error = vfs_lstat_fd(dfd, filename, &stat); + else + error = vfs_stat_fd(dfd, filename, &stat); + + if (!error) + error = cp_compat_stat(&stat, statbuf); + +out: + return error; +} + asmlinkage long compat_sys_newfstat(unsigned int fd, struct compat_stat __user * statbuf) { @@ -1290,7 +1316,17 @@ out: asmlinkage long compat_sys_open(const char __user *filename, int flags, int mode) { - return do_sys_open(filename, flags, mode); + return do_sys_open(AT_FDCWD, filename, flags, mode); +} + +/* + * Exactly like fs/open.c:sys_openat(), except that it doesn't set the + * O_LARGEFILE flag. + */ +asmlinkage long +compat_sys_openat(int dfd, const char __user *filename, int flags, int mode) +{ + return do_sys_open(dfd, filename, flags, mode); } /* diff -puN fs/exec.c~vfa-at-functions-core fs/exec.c --- 25/fs/exec.c~vfa-at-functions-core Fri Jan 13 17:54:38 2006 +++ 25-akpm/fs/exec.c Fri Jan 13 17:54:38 2006 @@ -477,7 +477,7 @@ struct file *open_exec(const char *name) int err; struct file *file; - err = path_lookup_open(name, LOOKUP_FOLLOW, &nd, FMODE_READ); + err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ); file = ERR_PTR(err); if (!err) { diff -puN fs/namei.c~vfa-at-functions-core fs/namei.c --- 25/fs/namei.c~vfa-at-functions-core Fri Jan 13 17:54:38 2006 +++ 25-akpm/fs/namei.c Fri Jan 13 17:54:38 2006 @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include #include @@ -1063,7 +1065,7 @@ set_it: } /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ -int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd) +static int fastcall do_path_lookup(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { int retval = 0; @@ -1083,9 +1085,37 @@ int fastcall path_lookup(const char *nam } nd->mnt = mntget(current->fs->rootmnt); nd->dentry = dget(current->fs->root); - } else { + } else if (dfd == AT_FDCWD) { nd->mnt = mntget(current->fs->pwdmnt); nd->dentry = dget(current->fs->pwd); + } else { + struct file *file; + int fput_needed; + struct dentry *dentry; + + file = fget_light(dfd, &fput_needed); + if (!file) { + retval = -EBADF; + goto out_fail; + } + + dentry = file->f_dentry; + + if (!S_ISDIR(dentry->d_inode->i_mode)) { + retval = -ENOTDIR; + out_fail2: + fput_light(file, fput_needed); + goto out_fail; + } + + retval = file_permission(file, MAY_EXEC); + if (retval) + goto out_fail2; + + nd->mnt = mntget(file->f_vfsmnt); + nd->dentry = dget(dentry); + + fput_light(file, fput_needed); } read_unlock(¤t->fs->lock); current->total_link_count = 0; @@ -1093,11 +1123,18 @@ int fastcall path_lookup(const char *nam out: if (nd && nd->dentry && nd->dentry->d_inode) audit_inode(name, nd->dentry->d_inode, flags); +out_fail: return retval; } -static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags, - struct nameidata *nd, int open_flags, int create_mode) +int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd) +{ + return do_path_lookup(AT_FDCWD, name, flags, nd); +} + +static int __path_lookup_intent_open(int dfd, const char *name, + unsigned int lookup_flags, struct nameidata *nd, + int open_flags, int create_mode) { struct file *filp = get_empty_filp(); int err; @@ -1107,7 +1144,7 @@ static int __path_lookup_intent_open(con nd->intent.open.file = filp; nd->intent.open.flags = open_flags; nd->intent.open.create_mode = create_mode; - err = path_lookup(name, lookup_flags|LOOKUP_OPEN, nd); + err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); if (IS_ERR(nd->intent.open.file)) { if (err == 0) { err = PTR_ERR(nd->intent.open.file); @@ -1125,10 +1162,10 @@ static int __path_lookup_intent_open(con * @nd: pointer to nameidata * @open_flags: open intent flags */ -int path_lookup_open(const char *name, unsigned int lookup_flags, +int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, struct nameidata *nd, int open_flags) { - return __path_lookup_intent_open(name, lookup_flags, nd, + return __path_lookup_intent_open(dfd, name, lookup_flags, nd, open_flags, 0); } @@ -1140,12 +1177,12 @@ int path_lookup_open(const char *name, u * @open_flags: open intent flags * @create_mode: create intent flags */ -static int path_lookup_create(const char *name, unsigned int lookup_flags, - struct nameidata *nd, int open_flags, - int create_mode) +static int path_lookup_create(int dfd, const char *name, + unsigned int lookup_flags, struct nameidata *nd, + int open_flags, int create_mode) { - return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd, - open_flags, create_mode); + return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE, + nd, open_flags, create_mode); } int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags, @@ -1155,7 +1192,7 @@ int __user_path_lookup_open(const char _ int err = PTR_ERR(tmp); if (!IS_ERR(tmp)) { - err = __path_lookup_intent_open(tmp, lookup_flags, nd, open_flags, 0); + err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0); putname(tmp); } return err; @@ -1247,18 +1284,24 @@ access: * that namei follows links, while lnamei does not. * SMP-safe */ -int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) +int fastcall __user_walk_fd(int dfd, const char __user *name, unsigned flags, + struct nameidata *nd) { char *tmp = getname(name); int err = PTR_ERR(tmp); if (!IS_ERR(tmp)) { - err = path_lookup(tmp, flags, nd); + err = do_path_lookup(dfd, tmp, flags, nd); putname(tmp); } return err; } +int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) +{ + return __user_walk_fd(AT_FDCWD, name, flags, nd); +} + /* * It's inline, so penalty for filesystems that don't use sticky bit is * minimal. @@ -1518,7 +1561,8 @@ int may_open(struct nameidata *nd, int a * for symlinks (where the permissions are checked later). * SMP-safe */ -int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) +int open_namei(int dfd, const char *pathname, int flag, int mode, + struct nameidata *nd) { int acc_mode, error; struct path path; @@ -1540,7 +1584,8 @@ int open_namei(const char * pathname, in * The simplest case - just a plain lookup. */ if (!(flag & O_CREAT)) { - error = path_lookup_open(pathname, lookup_flags(flag), nd, flag); + error = path_lookup_open(dfd, pathname, lookup_flags(flag), + nd, flag); if (error) return error; goto ok; @@ -1549,7 +1594,7 @@ int open_namei(const char * pathname, in /* * Create - we need to know the parent. */ - error = path_lookup_create(pathname, LOOKUP_PARENT, nd, flag, mode); + error = path_lookup_create(dfd, pathname, LOOKUP_PARENT, nd, flag, mode); if (error) return error; @@ -1744,7 +1789,8 @@ int vfs_mknod(struct inode *dir, struct return error; } -asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev) +asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, + unsigned dev) { int error = 0; char * tmp; @@ -1757,7 +1803,7 @@ asmlinkage long sys_mknod(const char __u if (IS_ERR(tmp)) return PTR_ERR(tmp); - error = path_lookup(tmp, LOOKUP_PARENT, &nd); + error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); if (error) goto out; dentry = lookup_create(&nd, 0); @@ -1793,6 +1839,11 @@ out: return error; } +asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev) +{ + return sys_mknodat(AT_FDCWD, filename, mode, dev); +} + int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { int error = may_create(dir, dentry, NULL); @@ -1815,7 +1866,7 @@ int vfs_mkdir(struct inode *dir, struct return error; } -asmlinkage long sys_mkdir(const char __user * pathname, int mode) +asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode) { int error = 0; char * tmp; @@ -1826,7 +1877,7 @@ asmlinkage long sys_mkdir(const char __u struct dentry *dentry; struct nameidata nd; - error = path_lookup(tmp, LOOKUP_PARENT, &nd); + error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); if (error) goto out; dentry = lookup_create(&nd, 1); @@ -1846,6 +1897,11 @@ out: return error; } +asmlinkage long sys_mkdir(const char __user *pathname, int mode) +{ + return sys_mkdirat(AT_FDCWD, pathname, mode); +} + /* * We try to drop the dentry early: we should have * a usage count of 2 if we're the only user of this @@ -1907,7 +1963,7 @@ int vfs_rmdir(struct inode *dir, struct return error; } -asmlinkage long sys_rmdir(const char __user * pathname) +static long do_rmdir(int dfd, const char __user *pathname) { int error = 0; char * name; @@ -1918,7 +1974,7 @@ asmlinkage long sys_rmdir(const char __u if(IS_ERR(name)) return PTR_ERR(name); - error = path_lookup(name, LOOKUP_PARENT, &nd); + error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd); if (error) goto exit; @@ -1948,6 +2004,11 @@ exit: return error; } +asmlinkage long sys_rmdir(const char __user *pathname) +{ + return do_rmdir(AT_FDCWD, pathname); +} + int vfs_unlink(struct inode *dir, struct dentry *dentry) { int error = may_delete(dir, dentry, 0); @@ -1984,7 +2045,7 @@ int vfs_unlink(struct inode *dir, struct * writeout happening, and we don't want to prevent access to the directory * while waiting on the I/O. */ -asmlinkage long sys_unlink(const char __user * pathname) +static long do_unlinkat(int dfd, const char __user *pathname) { int error = 0; char * name; @@ -1996,7 +2057,7 @@ asmlinkage long sys_unlink(const char __ if(IS_ERR(name)) return PTR_ERR(name); - error = path_lookup(name, LOOKUP_PARENT, &nd); + error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd); if (error) goto exit; error = -EISDIR; @@ -2031,6 +2092,22 @@ slashes: goto exit2; } +asmlinkage long sys_unlinkat(int dfd, const char __user *pathname, int flag) +{ + if ((flag & ~AT_REMOVEDIR) != 0) + return -EINVAL; + + if (flag & AT_REMOVEDIR) + return do_rmdir(dfd, pathname); + + return do_unlinkat(dfd, pathname); +} + +asmlinkage long sys_unlink(const char __user *pathname) +{ + return do_unlinkat(AT_FDCWD, pathname); +} + int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) { int error = may_create(dir, dentry, NULL); @@ -2052,7 +2129,8 @@ int vfs_symlink(struct inode *dir, struc return error; } -asmlinkage long sys_symlink(const char __user * oldname, const char __user * newname) +asmlinkage long sys_symlinkat(const char __user *oldname, + int newdfd, const char __user *newname) { int error = 0; char * from; @@ -2067,7 +2145,7 @@ asmlinkage long sys_symlink(const char _ struct dentry *dentry; struct nameidata nd; - error = path_lookup(to, LOOKUP_PARENT, &nd); + error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); if (error) goto out; dentry = lookup_create(&nd, 0); @@ -2085,6 +2163,11 @@ out: return error; } +asmlinkage long sys_symlink(const char __user *oldname, const char __user *newname) +{ + return sys_symlinkat(oldname, AT_FDCWD, newname); +} + int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) { struct inode *inode = old_dentry->d_inode; @@ -2132,7 +2215,8 @@ int vfs_link(struct dentry *old_dentry, * with linux 2.0, and to avoid hard-linking to directories * and other special files. --ADM */ -asmlinkage long sys_link(const char __user * oldname, const char __user * newname) +asmlinkage long sys_linkat(int olddfd, const char __user *oldname, + int newdfd, const char __user *newname) { struct dentry *new_dentry; struct nameidata nd, old_nd; @@ -2143,10 +2227,10 @@ asmlinkage long sys_link(const char __us if (IS_ERR(to)) return PTR_ERR(to); - error = __user_walk(oldname, 0, &old_nd); + error = __user_walk_fd(olddfd, oldname, 0, &old_nd); if (error) goto exit; - error = path_lookup(to, LOOKUP_PARENT, &nd); + error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); if (error) goto out; error = -EXDEV; @@ -2169,6 +2253,11 @@ exit: return error; } +asmlinkage long sys_link(const char __user *oldname, const char __user *newname) +{ + return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname); +} + /* * The worst of all namespace operations - renaming directory. "Perverted" * doesn't even start to describe it. Somebody in UCB had a heck of a trip... @@ -2315,7 +2404,8 @@ int vfs_rename(struct inode *old_dir, st return error; } -static int do_rename(const char * oldname, const char * newname) +static int do_rename(int olddfd, const char *oldname, + int newdfd, const char *newname) { int error = 0; struct dentry * old_dir, * new_dir; @@ -2323,11 +2413,11 @@ static int do_rename(const char * oldnam struct dentry * trap; struct nameidata oldnd, newnd; - error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); + error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd); if (error) goto exit; - error = path_lookup(newname, LOOKUP_PARENT, &newnd); + error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd); if (error) goto exit1; @@ -2391,7 +2481,8 @@ exit: return error; } -asmlinkage long sys_rename(const char __user * oldname, const char __user * newname) +asmlinkage long sys_renameat(int olddfd, const char __user *oldname, + int newdfd, const char __user *newname) { int error; char * from; @@ -2403,13 +2494,18 @@ asmlinkage long sys_rename(const char __ to = getname(newname); error = PTR_ERR(to); if (!IS_ERR(to)) { - error = do_rename(from,to); + error = do_rename(olddfd, from, newdfd, to); putname(to); } putname(from); return error; } +asmlinkage long sys_rename(const char __user *oldname, const char __user *newname) +{ + return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); +} + int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) { int len; @@ -2553,6 +2649,7 @@ struct inode_operations page_symlink_ino }; EXPORT_SYMBOL(__user_walk); +EXPORT_SYMBOL(__user_walk_fd); EXPORT_SYMBOL(follow_down); EXPORT_SYMBOL(follow_up); EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ diff -puN fs/open.c~vfa-at-functions-core fs/open.c --- 25/fs/open.c~vfa-at-functions-core Fri Jan 13 17:54:38 2006 +++ 25-akpm/fs/open.c Fri Jan 13 17:54:38 2006 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -384,7 +385,7 @@ asmlinkage long sys_utime(char __user * error = get_user(newattrs.ia_atime.tv_sec, ×->actime); newattrs.ia_atime.tv_nsec = 0; - if (!error) + if (!error) error = get_user(newattrs.ia_mtime.tv_sec, ×->modtime); newattrs.ia_mtime.tv_nsec = 0; if (error) @@ -415,14 +416,14 @@ out: * must be owner or have write permission. * Else, update from *times, must be owner or super user. */ -long do_utimes(char __user * filename, struct timeval * times) +long do_utimes(int dfd, char __user *filename, struct timeval *times) { int error; struct nameidata nd; struct inode * inode; struct iattr newattrs; - error = user_path_walk(filename, &nd); + error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); if (error) goto out; @@ -462,13 +463,18 @@ out: return error; } -asmlinkage long sys_utimes(char __user * filename, struct timeval __user * utimes) +asmlinkage long sys_futimesat(int dfd, char __user *filename, struct timeval __user *utimes) { struct timeval times[2]; if (utimes && copy_from_user(×, utimes, sizeof(times))) return -EFAULT; - return do_utimes(filename, utimes ? times : NULL); + return do_utimes(dfd, filename, utimes ? times : NULL); +} + +asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes) +{ + return sys_futimesat(AT_FDCWD, filename, utimes); } @@ -477,7 +483,7 @@ asmlinkage long sys_utimes(char __user * * We do this by temporarily clearing all FS-related capabilities and * switching the fsuid/fsgid around to the real ones. */ -asmlinkage long sys_access(const char __user * filename, int mode) +asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) { struct nameidata nd; int old_fsuid, old_fsgid; @@ -507,7 +513,7 @@ asmlinkage long sys_access(const char __ else current->cap_effective = current->cap_permitted; - res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); + res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); if (!res) { res = vfs_permission(&nd, mode); /* SuS v2 requires we report a read only fs too */ @@ -524,6 +530,11 @@ asmlinkage long sys_access(const char __ return res; } +asmlinkage long sys_access(const char __user *filename, int mode) +{ + return sys_faccessat(AT_FDCWD, filename, mode); +} + asmlinkage long sys_chdir(const char __user * filename) { struct nameidata nd; @@ -638,14 +649,15 @@ out: return err; } -asmlinkage long sys_chmod(const char __user * filename, mode_t mode) +asmlinkage long sys_fchmodat(int dfd, const char __user *filename, + mode_t mode) { struct nameidata nd; struct inode * inode; int error; struct iattr newattrs; - error = user_path_walk(filename, &nd); + error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); if (error) goto out; inode = nd.dentry->d_inode; @@ -672,6 +684,11 @@ out: return error; } +asmlinkage long sys_chmod(const char __user *filename, mode_t mode) +{ + return sys_fchmodat(AT_FDCWD, filename, mode); +} + static int chown_common(struct dentry * dentry, uid_t user, gid_t group) { struct inode * inode; @@ -720,6 +737,26 @@ asmlinkage long sys_chown(const char __u return error; } +asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, + gid_t group, int flag) +{ + struct nameidata nd; + int error = -EINVAL; + int follow; + + if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + goto out; + + follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; + error = __user_walk_fd(dfd, filename, follow, &nd); + if (!error) { + error = chown_common(nd.dentry, user, group); + path_release(&nd); + } +out: + return error; +} + asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group) { struct nameidata nd; @@ -826,7 +863,8 @@ cleanup_file: * for the internal routines (ie open_namei()/follow_link() etc). 00 is * used by symlinks. */ -struct file *filp_open(const char * filename, int flags, int mode) +static struct file *do_filp_open(int dfd, const char *filename, int flags, + int mode) { int namei_flags, error; struct nameidata nd; @@ -835,12 +873,17 @@ struct file *filp_open(const char * file if ((namei_flags+1) & O_ACCMODE) namei_flags++; - error = open_namei(filename, namei_flags, mode, &nd); + error = open_namei(dfd, filename, namei_flags, mode, &nd); if (!error) return nameidata_to_filp(&nd, flags); return ERR_PTR(error); } + +struct file *filp_open(const char *filename, int flags, int mode) +{ + return do_filp_open(AT_FDCWD, filename, flags, mode); +} EXPORT_SYMBOL(filp_open); /** @@ -997,7 +1040,7 @@ void fastcall put_unused_fd(unsigned int EXPORT_SYMBOL(put_unused_fd); /* - * Install a file pointer in the fd array. + * Install a file pointer in the fd array. * * The VFS is full of places where we drop the files lock between * setting the open_fds bitmap and installing the file in the file @@ -1022,7 +1065,7 @@ void fastcall fd_install(unsigned int fd EXPORT_SYMBOL(fd_install); -long do_sys_open(const char __user *filename, int flags, int mode) +long do_sys_open(int dfd, const char __user *filename, int flags, int mode) { char *tmp = getname(filename); int fd = PTR_ERR(tmp); @@ -1030,7 +1073,7 @@ long do_sys_open(const char __user *file if (!IS_ERR(tmp)) { fd = get_unused_fd(); if (fd >= 0) { - struct file *f = filp_open(tmp, flags, mode); + struct file *f = do_filp_open(dfd, tmp, flags, mode); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); @@ -1049,10 +1092,20 @@ asmlinkage long sys_open(const char __us if (force_o_largefile()) flags |= O_LARGEFILE; - return do_sys_open(filename, flags, mode); + return do_sys_open(AT_FDCWD, filename, flags, mode); } EXPORT_SYMBOL_GPL(sys_open); +asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, + int mode) +{ + if (force_o_largefile()) + flags |= O_LARGEFILE; + + return do_sys_open(dfd, filename, flags, mode); +} +EXPORT_SYMBOL_GPL(sys_openat); + #ifndef __alpha__ /* diff -puN fs/stat.c~vfa-at-functions-core fs/stat.c --- 25/fs/stat.c~vfa-at-functions-core Fri Jan 13 17:54:38 2006 +++ 25-akpm/fs/stat.c Fri Jan 13 17:54:38 2006 @@ -63,12 +63,12 @@ int vfs_getattr(struct vfsmount *mnt, st EXPORT_SYMBOL(vfs_getattr); -int vfs_stat(char __user *name, struct kstat *stat) +int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) { struct nameidata nd; int error; - error = user_path_walk(name, &nd); + error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd); if (!error) { error = vfs_getattr(nd.mnt, nd.dentry, stat); path_release(&nd); @@ -76,14 +76,19 @@ int vfs_stat(char __user *name, struct k return error; } +int vfs_stat(char __user *name, struct kstat *stat) +{ + return vfs_stat_fd(AT_FDCWD, name, stat); +} + EXPORT_SYMBOL(vfs_stat); -int vfs_lstat(char __user *name, struct kstat *stat) +int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat) { struct nameidata nd; int error; - error = user_path_walk_link(name, &nd); + error = __user_walk_fd(dfd, name, 0, &nd); if (!error) { error = vfs_getattr(nd.mnt, nd.dentry, stat); path_release(&nd); @@ -91,6 +96,11 @@ int vfs_lstat(char __user *name, struct return error; } +int vfs_lstat(char __user *name, struct kstat *stat) +{ + return vfs_lstat_fd(AT_FDCWD, name, stat); +} + EXPORT_SYMBOL(vfs_lstat); int vfs_fstat(unsigned int fd, struct kstat *stat) @@ -151,7 +161,7 @@ static int cp_old_stat(struct kstat *sta asmlinkage long sys_stat(char __user * filename, struct __old_kernel_stat __user * statbuf) { struct kstat stat; - int error = vfs_stat(filename, &stat); + int error = vfs_stat_fd(AT_FDCWD, filename, &stat); if (!error) error = cp_old_stat(&stat, statbuf); @@ -161,7 +171,7 @@ asmlinkage long sys_stat(char __user * f asmlinkage long sys_lstat(char __user * filename, struct __old_kernel_stat __user * statbuf) { struct kstat stat; - int error = vfs_lstat(filename, &stat); + int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); if (!error) error = cp_old_stat(&stat, statbuf); @@ -232,7 +242,7 @@ static int cp_new_stat(struct kstat *sta asmlinkage long sys_newstat(char __user * filename, struct stat __user * statbuf) { struct kstat stat; - int error = vfs_stat(filename, &stat); + int error = vfs_stat_fd(AT_FDCWD, filename, &stat); if (!error) error = cp_new_stat(&stat, statbuf); @@ -242,13 +252,32 @@ asmlinkage long sys_newstat(char __user asmlinkage long sys_newlstat(char __user * filename, struct stat __user * statbuf) { struct kstat stat; - int error = vfs_lstat(filename, &stat); + int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); if (!error) error = cp_new_stat(&stat, statbuf); return error; } +asmlinkage long sys_newfstatat(int dfd, char __user *filename, struct stat __user *statbuf, int flag) +{ + struct kstat stat; + int error = -EINVAL; + + if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + goto out; + + if (flag & AT_SYMLINK_NOFOLLOW) + error = vfs_lstat_fd(dfd, filename, &stat); + else + error = vfs_stat_fd(dfd, filename, &stat); + + if (!error) + error = cp_new_stat(&stat, statbuf); + +out: + return error; +} asmlinkage long sys_newfstat(unsigned int fd, struct stat __user * statbuf) { struct kstat stat; @@ -260,7 +289,7 @@ asmlinkage long sys_newfstat(unsigned in return error; } -asmlinkage long sys_readlink(const char __user * path, char __user * buf, int bufsiz) +asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf, int bufsiz) { struct nameidata nd; int error; @@ -268,7 +297,7 @@ asmlinkage long sys_readlink(const char if (bufsiz <= 0) return -EINVAL; - error = user_path_walk_link(path, &nd); + error = __user_walk_fd(dfd, path, 0, &nd); if (!error) { struct inode * inode = nd.dentry->d_inode; @@ -285,6 +314,11 @@ asmlinkage long sys_readlink(const char return error; } +asmlinkage long sys_readlink(const char __user *path, char __user *buf, int bufsiz) +{ + return sys_readlinkat(AT_FDCWD, path, buf, bufsiz); +} + /* ---------- LFS-64 ----------- */ #ifdef __ARCH_WANT_STAT64 diff -puN include/linux/fcntl.h~vfa-at-functions-core include/linux/fcntl.h --- 25/include/linux/fcntl.h~vfa-at-functions-core Fri Jan 13 17:54:38 2006 +++ 25-akpm/include/linux/fcntl.h Fri Jan 13 17:54:38 2006 @@ -23,6 +23,13 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +#define AT_FDCWD -100 /* Special value used to indicate + openat should use the current + working directory. */ +#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ +#define AT_REMOVEDIR 0x200 /* Remove directory instead of + unlinking file. */ + #ifdef __KERNEL__ #ifndef force_o_largefile diff -puN include/linux/fs.h~vfa-at-functions-core include/linux/fs.h --- 25/include/linux/fs.h~vfa-at-functions-core Fri Jan 13 17:54:38 2006 +++ 25-akpm/include/linux/fs.h Fri Jan 13 17:54:38 2006 @@ -1347,7 +1347,8 @@ static inline int break_lease(struct ino extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); -extern long do_sys_open(const char __user *filename, int flags, int mode); +extern long do_sys_open(int fdf, const char __user *filename, int flags, + int mode); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); extern int filp_close(struct file *, fl_owner_t id); @@ -1487,7 +1488,7 @@ static inline void allow_write_access(st } extern int do_pipe(int *); -extern int open_namei(const char *, int, int, struct nameidata *); +extern int open_namei(int dfd, const char *, int, int, struct nameidata *); extern int may_open(struct nameidata *, int, int); extern int kernel_read(struct file *, unsigned long, char *, unsigned long); @@ -1685,6 +1686,8 @@ extern int vfs_readdir(struct file *, fi extern int vfs_stat(char __user *, struct kstat *); extern int vfs_lstat(char __user *, struct kstat *); +extern int vfs_stat_fd(int dfd, char __user *, struct kstat *); +extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long); diff -puN include/linux/namei.h~vfa-at-functions-core include/linux/namei.h --- 25/include/linux/namei.h~vfa-at-functions-core Fri Jan 13 17:54:38 2006 +++ 25-akpm/include/linux/namei.h Fri Jan 13 17:54:38 2006 @@ -56,10 +56,11 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA #define LOOKUP_ACCESS (0x0400) extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); +extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *)); #define user_path_walk(name,nd) \ - __user_walk(name, LOOKUP_FOLLOW, nd) + __user_walk_fd(AT_FDCWD, name, LOOKUP_FOLLOW, nd) #define user_path_walk_link(name,nd) \ - __user_walk(name, 0, nd) + __user_walk_fd(AT_FDCWD, name, 0, nd) extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); extern int FASTCALL(path_walk(const char *, struct nameidata *)); extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); @@ -67,7 +68,7 @@ extern void path_release(struct nameidat extern void path_release_on_umount(struct nameidata *); extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags); -extern int path_lookup_open(const char *, unsigned lookup_flags, struct nameidata *, int open_flags); +extern int path_lookup_open(int dfd, const char *name, unsigned lookup_flags, struct nameidata *, int open_flags); extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, int (*open)(struct inode *, struct file *)); extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); diff -puN include/linux/time.h~vfa-at-functions-core include/linux/time.h --- 25/include/linux/time.h~vfa-at-functions-core Fri Jan 13 17:54:38 2006 +++ 25-akpm/include/linux/time.h Fri Jan 13 17:54:38 2006 @@ -72,7 +72,7 @@ extern void do_gettimeofday(struct timev extern int do_settimeofday(struct timespec *tv); extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) -extern long do_utimes(char __user *filename, struct timeval *times); +extern long do_utimes(int dfd, char __user *filename, struct timeval *times); struct itimerval; extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); diff -puN arch/sparc64/kernel/sys_sparc32.c~vfa-at-functions-core arch/sparc64/kernel/sys_sparc32.c --- 25/arch/sparc64/kernel/sys_sparc32.c~vfa-at-functions-core Fri Jan 13 17:54:38 2006 +++ 25-akpm/arch/sparc64/kernel/sys_sparc32.c Fri Jan 13 17:54:38 2006 @@ -821,7 +821,7 @@ asmlinkage long sys32_utimes(char __user return -EFAULT; } - return do_utimes(filename, (tvs ? &ktvs[0] : NULL)); + return do_utimes(AT_FDCWD, filename, (tvs ? &ktvs[0] : NULL)); } /* These are here just in case some old sparc32 binary calls it. */ @@ -1003,7 +1003,7 @@ asmlinkage long sys32_adjtimex(struct ti asmlinkage long sparc32_open(const char __user *filename, int flags, int mode) { - return do_sys_open(filename, flags, mode); + return do_sys_open(AT_FDCWD, filename, flags, mode); } extern unsigned long do_mremap(unsigned long addr, diff -puN arch/alpha/kernel/osf_sys.c~vfa-at-functions-core arch/alpha/kernel/osf_sys.c --- 25/arch/alpha/kernel/osf_sys.c~vfa-at-functions-core Fri Jan 13 17:54:38 2006 +++ 25-akpm/arch/alpha/kernel/osf_sys.c Fri Jan 13 17:54:38 2006 @@ -960,7 +960,7 @@ osf_utimes(char __user *filename, struct return -EFAULT; } - return do_utimes(filename, tvs ? ktvs : NULL); + return do_utimes(AT_FDCWD, filename, tvs ? ktvs : NULL); } #define MAX_SELECT_SECONDS \ _