From 1a7bd2265fc57f29400d57f66275cc5918e30aa6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Aug 2012 17:18:05 -0400 Subject: make get_unused_fd_flags() a function ... and get_unused_fd() a macro around it Signed-off-by: Al Viro --- fs/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index ba3f6053025c..5b46e9970a7a 100644 --- a/fs/file.c +++ b/fs/file.c @@ -477,8 +477,8 @@ out: return error; } -int get_unused_fd(void) +int get_unused_fd_flags(unsigned flags) { - return alloc_fd(0, 0); + return alloc_fd(0, flags); } -EXPORT_SYMBOL(get_unused_fd); +EXPORT_SYMBOL(get_unused_fd_flags); -- cgit v1.2.3-58-ga151 From c921b40d6201f7ec7b1edf7ea9a844f93e1a27f4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Aug 2012 18:04:37 -0400 Subject: autofs4: don't open-code fd_install() The only difference between autofs_dev_ioctl_fd_install() and fd_install() is __set_close_on_exec() done by the latter. Just use get_unused_fd_flags(O_CLOEXEC) to allocate the descriptor and be done with that... Signed-off-by: Al Viro --- fs/autofs4/dev-ioctl.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index abf645c1703b..a16214109d31 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -221,20 +221,6 @@ static int test_by_type(struct path *path, void *p) return ino && ino->sbi->type & *(unsigned *)p; } -static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) -{ - struct files_struct *files = current->files; - struct fdtable *fdt; - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - BUG_ON(fdt->fd[fd] != NULL); - rcu_assign_pointer(fdt->fd[fd], file); - __set_close_on_exec(fd, fdt); - spin_unlock(&files->file_lock); -} - - /* * Open a file descriptor on the autofs mount point corresponding * to the given path and device number (aka. new_encode_dev(sb->s_dev)). @@ -243,7 +229,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) { int err, fd; - fd = get_unused_fd(); + fd = get_unused_fd_flags(O_CLOEXEC); if (likely(fd >= 0)) { struct file *filp; struct path path; @@ -264,7 +250,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) goto out; } - autofs_dev_ioctl_fd_install(fd, filp); + fd_install(fd, filp); } return fd; -- cgit v1.2.3-58-ga151 From 5b249b1b07c42c61d0c53b8ab4fad026e39a9be9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Aug 2012 12:17:29 -0400 Subject: pipe(2) - race-free error recovery don't mess with sys_close() if copy_to_user() fails; just postpone fd_install() until we know it hasn't. Signed-off-by: Al Viro --- fs/pipe.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/pipe.c b/fs/pipe.c index 8d85d7068c1e..bd3479db4b62 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1064,9 +1064,8 @@ err_inode: return err; } -int do_pipe_flags(int *fd, int flags) +static int __do_pipe_flags(int *fd, struct file **files, int flags) { - struct file *files[2]; int error; int fdw, fdr; @@ -1088,11 +1087,8 @@ int do_pipe_flags(int *fd, int flags) fdw = error; audit_fd_pair(fdr, fdw); - fd_install(fdr, files[0]); - fd_install(fdw, files[1]); fd[0] = fdr; fd[1] = fdw; - return 0; err_fdr: @@ -1103,21 +1099,38 @@ int do_pipe_flags(int *fd, int flags) return error; } +int do_pipe_flags(int *fd, int flags) +{ + struct file *files[2]; + int error = __do_pipe_flags(fd, files, flags); + if (!error) { + fd_install(fd[0], files[0]); + fd_install(fd[1], files[1]); + } + return error; +} + /* * sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way Unix traditionally does this, though. */ SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) { + struct file *files[2]; int fd[2]; int error; - error = do_pipe_flags(fd, flags); + error = __do_pipe_flags(fd, files, flags); if (!error) { - if (copy_to_user(fildes, fd, sizeof(fd))) { - sys_close(fd[0]); - sys_close(fd[1]); + if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) { + fput(files[0]); + fput(files[1]); + put_unused_fd(fd[0]); + put_unused_fd(fd[1]); error = -EFAULT; + } else { + fd_install(fd[0], files[0]); + fd_install(fd[1], files[1]); } } return error; -- cgit v1.2.3-58-ga151 From 352e3b249284235e00745f3e71fc348b913e5deb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Aug 2012 12:30:45 -0400 Subject: fanotify: sanitize failure exits in copy_event_to_user() * do copy_to_user() before prepare_for_access_response(); that kills the need in remove_access_response(). * don't do fd_install() until we are past the last possible failure exit. Don't use sys_close() on cleanup side - just put_unused_fd() and fput(). Less racy that way... Signed-off-by: Al Viro --- fs/notify/fanotify/fanotify_user.c | 59 +++++++++++++------------------------- 1 file changed, 20 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index d43803669739..ea48693940f1 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -58,7 +58,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, return fsnotify_remove_notify_event(group); } -static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) +static int create_fd(struct fsnotify_group *group, + struct fsnotify_event *event, + struct file **file) { int client_fd; struct file *new_file; @@ -98,7 +100,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) put_unused_fd(client_fd); client_fd = PTR_ERR(new_file); } else { - fd_install(client_fd, new_file); + *file = new_file; } return client_fd; @@ -106,13 +108,15 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) static int fill_event_metadata(struct fsnotify_group *group, struct fanotify_event_metadata *metadata, - struct fsnotify_event *event) + struct fsnotify_event *event, + struct file **file) { int ret = 0; pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, group, metadata, event); + *file = NULL; metadata->event_len = FAN_EVENT_METADATA_LEN; metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; @@ -121,7 +125,7 @@ static int fill_event_metadata(struct fsnotify_group *group, if (unlikely(event->mask & FAN_Q_OVERFLOW)) metadata->fd = FAN_NOFD; else { - metadata->fd = create_fd(group, event); + metadata->fd = create_fd(group, event, file); if (metadata->fd < 0) ret = metadata->fd; } @@ -220,25 +224,6 @@ static int prepare_for_access_response(struct fsnotify_group *group, return 0; } -static void remove_access_response(struct fsnotify_group *group, - struct fsnotify_event *event, - __s32 fd) -{ - struct fanotify_response_event *re; - - if (!(event->mask & FAN_ALL_PERM_EVENTS)) - return; - - re = dequeue_re(group, fd); - if (!re) - return; - - BUG_ON(re->event != event); - - kmem_cache_free(fanotify_response_event_cache, re); - - return; -} #else static int prepare_for_access_response(struct fsnotify_group *group, struct fsnotify_event *event, @@ -247,12 +232,6 @@ static int prepare_for_access_response(struct fsnotify_group *group, return 0; } -static void remove_access_response(struct fsnotify_group *group, - struct fsnotify_event *event, - __s32 fd) -{ - return; -} #endif static ssize_t copy_event_to_user(struct fsnotify_group *group, @@ -260,31 +239,33 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, char __user *buf) { struct fanotify_event_metadata fanotify_event_metadata; + struct file *f; int fd, ret; pr_debug("%s: group=%p event=%p\n", __func__, group, event); - ret = fill_event_metadata(group, &fanotify_event_metadata, event); + ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f); if (ret < 0) goto out; fd = fanotify_event_metadata.fd; - ret = prepare_for_access_response(group, event, fd); - if (ret) - goto out_close_fd; - ret = -EFAULT; if (copy_to_user(buf, &fanotify_event_metadata, fanotify_event_metadata.event_len)) - goto out_kill_access_response; + goto out_close_fd; + + ret = prepare_for_access_response(group, event, fd); + if (ret) + goto out_close_fd; + fd_install(fd, f); return fanotify_event_metadata.event_len; -out_kill_access_response: - remove_access_response(group, event, fd); out_close_fd: - if (fd != FAN_NOFD) - sys_close(fd); + if (fd != FAN_NOFD) { + put_unused_fd(fd); + fput(f); + } out: #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (event->mask & FAN_ALL_PERM_EVENTS) { -- cgit v1.2.3-58-ga151 From f33ff9927f42045116d738ee47ff7bc59f739bd7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Aug 2012 16:17:59 -0400 Subject: take rlimit check to callers of expand_files() ... except for one in android, where the check is different and already done in caller. No need to recalculate rlimit many times in alloc_fd() either. Signed-off-by: Al Viro --- fs/fcntl.c | 3 +++ fs/file.c | 16 +++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index 887b5ba8c9b5..08e6af5c1b1f 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -64,6 +64,9 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) if (unlikely(oldfd == newfd)) return -EINVAL; + if (newfd >= rlimit(RLIMIT_NOFILE)) + return -EMFILE; + spin_lock(&files->file_lock); err = expand_files(files, newfd); file = fcheck(oldfd); diff --git a/fs/file.c b/fs/file.c index 5b46e9970a7a..08922af4a62c 100644 --- a/fs/file.c +++ b/fs/file.c @@ -251,13 +251,6 @@ int expand_files(struct files_struct *files, int nr) fdt = files_fdtable(files); - /* - * N.B. For clone tasks sharing a files structure, this test - * will limit the total number of files that can be opened. - */ - if (nr >= rlimit(RLIMIT_NOFILE)) - return -EMFILE; - /* Do we need to expand? */ if (nr < fdt->max_fds) return 0; @@ -431,6 +424,7 @@ int alloc_fd(unsigned start, unsigned flags) { struct files_struct *files = current->files; unsigned int fd; + unsigned end = rlimit(RLIMIT_NOFILE); int error; struct fdtable *fdt; @@ -444,6 +438,14 @@ repeat: if (fd < fdt->max_fds) fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); + /* + * N.B. For clone tasks sharing a files structure, this test + * will limit the total number of files that can be opened. + */ + error = -EMFILE; + if (fd >= end) + goto out; + error = expand_files(files, fd); if (error < 0) goto out; -- cgit v1.2.3-58-ga151 From dcfadfa4ec5a12404a99ad6426871a6b03a62b37 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Aug 2012 17:27:30 -0400 Subject: new helper: __alloc_fd() Essentially, alloc_fd() in a files_struct we own a reference to. Most of the time wanting to use it is a sign of lousy API design (such as android/binder). It's *not* a general-purpose interface; better that than open-coding its guts, but again, playing with other process' descriptor table is a sign of bad design. Signed-off-by: Al Viro --- drivers/staging/android/binder.c | 59 ++++------------------------------------ fs/file.c | 12 +++++--- include/linux/fdtable.h | 3 ++ 3 files changed, 16 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index b9a534c46aac..4946d282a35c 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -362,71 +362,22 @@ struct binder_transaction { static void binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer); -/* - * copied from get_unused_fd_flags - */ int task_get_unused_fd_flags(struct binder_proc *proc, int flags) { struct files_struct *files = proc->files; - int fd, error; - struct fdtable *fdt; unsigned long rlim_cur; unsigned long irqs; if (files == NULL) return -ESRCH; - error = -EMFILE; - spin_lock(&files->file_lock); + if (!lock_task_sighand(proc->tsk, &irqs)) + return -EMFILE; -repeat: - fdt = files_fdtable(files); - fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, files->next_fd); - - /* - * N.B. For clone tasks sharing a files structure, this test - * will limit the total number of files that can be opened. - */ - rlim_cur = 0; - if (lock_task_sighand(proc->tsk, &irqs)) { - rlim_cur = proc->tsk->signal->rlim[RLIMIT_NOFILE].rlim_cur; - unlock_task_sighand(proc->tsk, &irqs); - } - if (fd >= rlim_cur) - goto out; - - /* Do we need to expand the fd array or fd set? */ - error = expand_files(files, fd); - if (error < 0) - goto out; - - if (error) { - /* - * If we needed to expand the fs array we - * might have blocked - try again. - */ - error = -EMFILE; - goto repeat; - } - - __set_open_fd(fd, fdt); - if (flags & O_CLOEXEC) - __set_close_on_exec(fd, fdt); - else - __clear_close_on_exec(fd, fdt); - files->next_fd = fd + 1; -#if 1 - /* Sanity check */ - if (fdt->fd[fd] != NULL) { - pr_warn("get_unused_fd: slot %d not NULL!\n", fd); - fdt->fd[fd] = NULL; - } -#endif - error = fd; + rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); + unlock_task_sighand(proc->tsk, &irqs); -out: - spin_unlock(&files->file_lock); - return error; + return __alloc_fd(files, 0, rlim_cur, flags); } /* diff --git a/fs/file.c b/fs/file.c index 08922af4a62c..a3a0705f8f51 100644 --- a/fs/file.c +++ b/fs/file.c @@ -420,11 +420,10 @@ struct files_struct init_files = { /* * allocate a file descriptor, mark it busy. */ -int alloc_fd(unsigned start, unsigned flags) +int __alloc_fd(struct files_struct *files, + unsigned start, unsigned end, unsigned flags) { - struct files_struct *files = current->files; unsigned int fd; - unsigned end = rlimit(RLIMIT_NOFILE); int error; struct fdtable *fdt; @@ -479,8 +478,13 @@ out: return error; } +int alloc_fd(unsigned start, unsigned flags) +{ + return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); +} + int get_unused_fd_flags(unsigned flags) { - return alloc_fd(0, flags); + return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); } EXPORT_SYMBOL(get_unused_fd_flags); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 158a41eed314..b84ca064f727 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -125,6 +125,9 @@ void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, int *); +extern int __alloc_fd(struct files_struct *files, + unsigned start, unsigned end, unsigned flags); + extern struct kmem_cache *files_cachep; #endif /* __LINUX_FDTABLE_H */ -- cgit v1.2.3-58-ga151 From 7cf4dc3c8dbfdfde163d4636f621cf99a1f63bfb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 19:56:12 -0400 Subject: move files_struct-related bits from kernel/exit.c to fs/file.c Signed-off-by: Al Viro --- fs/file.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++- include/linux/fdtable.h | 6 --- kernel/exit.c | 93 -------------------------------------------- 3 files changed, 99 insertions(+), 100 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index a3a0705f8f51..0be423cadb26 100644 --- a/fs/file.c +++ b/fs/file.c @@ -84,7 +84,7 @@ static void free_fdtable_work(struct work_struct *work) } } -void free_fdtable_rcu(struct rcu_head *rcu) +static void free_fdtable_rcu(struct rcu_head *rcu) { struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); struct fdtable_defer *fddef; @@ -116,6 +116,11 @@ void free_fdtable_rcu(struct rcu_head *rcu) } } +static inline void free_fdtable(struct fdtable *fdt) +{ + call_rcu(&fdt->rcu, free_fdtable_rcu); +} + /* * Expand the fdset in the files_struct. Called with the files spinlock * held for write. @@ -388,6 +393,99 @@ out: return NULL; } +static void close_files(struct files_struct * files) +{ + int i, j; + struct fdtable *fdt; + + j = 0; + + /* + * It is safe to dereference the fd table without RCU or + * ->file_lock because this is the last reference to the + * files structure. But use RCU to shut RCU-lockdep up. + */ + rcu_read_lock(); + fdt = files_fdtable(files); + rcu_read_unlock(); + for (;;) { + unsigned long set; + i = j * BITS_PER_LONG; + if (i >= fdt->max_fds) + break; + set = fdt->open_fds[j++]; + while (set) { + if (set & 1) { + struct file * file = xchg(&fdt->fd[i], NULL); + if (file) { + filp_close(file, files); + cond_resched(); + } + } + i++; + set >>= 1; + } + } +} + +struct files_struct *get_files_struct(struct task_struct *task) +{ + struct files_struct *files; + + task_lock(task); + files = task->files; + if (files) + atomic_inc(&files->count); + task_unlock(task); + + return files; +} + +void put_files_struct(struct files_struct *files) +{ + struct fdtable *fdt; + + if (atomic_dec_and_test(&files->count)) { + close_files(files); + /* + * Free the fd and fdset arrays if we expanded them. + * If the fdtable was embedded, pass files for freeing + * at the end of the RCU grace period. Otherwise, + * you can free files immediately. + */ + rcu_read_lock(); + fdt = files_fdtable(files); + if (fdt != &files->fdtab) + kmem_cache_free(files_cachep, files); + free_fdtable(fdt); + rcu_read_unlock(); + } +} + +void reset_files_struct(struct files_struct *files) +{ + struct task_struct *tsk = current; + struct files_struct *old; + + old = tsk->files; + task_lock(tsk); + tsk->files = files; + task_unlock(tsk); + put_files_struct(old); +} + +void exit_files(struct task_struct *tsk) +{ + struct files_struct * files = tsk->files; + + if (files) { + task_lock(tsk); + tsk->files = NULL; + task_unlock(tsk); + put_files_struct(files); + } +} + static void __devinit fdtable_defer_list_init(int cpu) { struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index b84ca064f727..3855f4febe70 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -94,14 +94,8 @@ struct vfsmount; struct dentry; extern int expand_files(struct files_struct *, int nr); -extern void free_fdtable_rcu(struct rcu_head *rcu); extern void __init files_defer_init(void); -static inline void free_fdtable(struct fdtable *fdt) -{ - call_rcu(&fdt->rcu, free_fdtable_rcu); -} - static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) { struct file * file = NULL; diff --git a/kernel/exit.c b/kernel/exit.c index f65345f9e5bb..20dfc7617c2e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -466,99 +466,6 @@ void daemonize(const char *name, ...) EXPORT_SYMBOL(daemonize); -static void close_files(struct files_struct * files) -{ - int i, j; - struct fdtable *fdt; - - j = 0; - - /* - * It is safe to dereference the fd table without RCU or - * ->file_lock because this is the last reference to the - * files structure. But use RCU to shut RCU-lockdep up. - */ - rcu_read_lock(); - fdt = files_fdtable(files); - rcu_read_unlock(); - for (;;) { - unsigned long set; - i = j * BITS_PER_LONG; - if (i >= fdt->max_fds) - break; - set = fdt->open_fds[j++]; - while (set) { - if (set & 1) { - struct file * file = xchg(&fdt->fd[i], NULL); - if (file) { - filp_close(file, files); - cond_resched(); - } - } - i++; - set >>= 1; - } - } -} - -struct files_struct *get_files_struct(struct task_struct *task) -{ - struct files_struct *files; - - task_lock(task); - files = task->files; - if (files) - atomic_inc(&files->count); - task_unlock(task); - - return files; -} - -void put_files_struct(struct files_struct *files) -{ - struct fdtable *fdt; - - if (atomic_dec_and_test(&files->count)) { - close_files(files); - /* - * Free the fd and fdset arrays if we expanded them. - * If the fdtable was embedded, pass files for freeing - * at the end of the RCU grace period. Otherwise, - * you can free files immediately. - */ - rcu_read_lock(); - fdt = files_fdtable(files); - if (fdt != &files->fdtab) - kmem_cache_free(files_cachep, files); - free_fdtable(fdt); - rcu_read_unlock(); - } -} - -void reset_files_struct(struct files_struct *files) -{ - struct task_struct *tsk = current; - struct files_struct *old; - - old = tsk->files; - task_lock(tsk); - tsk->files = files; - task_unlock(tsk); - put_files_struct(old); -} - -void exit_files(struct task_struct *tsk) -{ - struct files_struct * files = tsk->files; - - if (files) { - task_lock(tsk); - tsk->files = NULL; - task_unlock(tsk); - put_files_struct(files); - } -} - #ifdef CONFIG_MM_OWNER /* * A task is exiting. If it owned this mm, find a new owner for the mm. -- cgit v1.2.3-58-ga151 From b9e02af0ae0783894abb576fbab45ec29aa8e7fc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 20:00:58 -0400 Subject: don't bother with call_rcu() in put_files_struct() At that point nobody can see us anyway; everything that looks at files_fdtable(files) is separated from the guts of put_files_struct(files) - either since files is current->files or because we fetched it under task_lock() and hadn't dropped that yet, or because we'd bumped files->count while holding task_lock()... Signed-off-by: Al Viro --- fs/file.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 0be423cadb26..533fa5d56a5f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -447,18 +447,14 @@ void put_files_struct(struct files_struct *files) if (atomic_dec_and_test(&files->count)) { close_files(files); - /* - * Free the fd and fdset arrays if we expanded them. - * If the fdtable was embedded, pass files for freeing - * at the end of the RCU grace period. Otherwise, - * you can free files immediately. - */ + /* not really needed, since nobody can see us */ rcu_read_lock(); fdt = files_fdtable(files); - if (fdt != &files->fdtab) - kmem_cache_free(files_cachep, files); - free_fdtable(fdt); rcu_read_unlock(); + /* free the arrays if they are not embedded */ + if (fdt != &files->fdtab) + __free_fdtable(fdt); + kmem_cache_free(files_cachep, files); } } -- cgit v1.2.3-58-ga151 From 1983e781da2f7f77906f4ccc2c3dc279cd61d1ff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 20:06:36 -0400 Subject: trim free_fdtable_rcu() embedded case isn't hit anymore Signed-off-by: Al Viro --- fs/file.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 533fa5d56a5f..4ce4a0fcf320 100644 --- a/fs/file.c +++ b/fs/file.c @@ -90,16 +90,8 @@ static void free_fdtable_rcu(struct rcu_head *rcu) struct fdtable_defer *fddef; BUG_ON(!fdt); + BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT); - if (fdt->max_fds <= NR_OPEN_DEFAULT) { - /* - * This fdtable is embedded in the files structure and that - * structure itself is getting destroyed. - */ - kmem_cache_free(files_cachep, - container_of(fdt, struct files_struct, fdtab)); - return; - } if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { kfree(fdt->fd); kfree(fdt->open_fds); @@ -116,11 +108,6 @@ static void free_fdtable_rcu(struct rcu_head *rcu) } } -static inline void free_fdtable(struct fdtable *fdt) -{ - call_rcu(&fdt->rcu, free_fdtable_rcu); -} - /* * Expand the fdset in the files_struct. Called with the files spinlock * held for write. @@ -234,7 +221,7 @@ static int expand_fdtable(struct files_struct *files, int nr) copy_fdtable(new_fdt, cur_fdt); rcu_assign_pointer(files->fdt, new_fdt); if (cur_fdt->max_fds > NR_OPEN_DEFAULT) - free_fdtable(cur_fdt); + call_rcu(&cur_fdt->rcu, free_fdtable_rcu); } else { /* Somebody else expanded, so undo our attempt */ __free_fdtable(new_fdt); -- cgit v1.2.3-58-ga151 From 56007cae94f349387c088e738c7dcb6bc513063b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 21:03:26 -0400 Subject: move put_unused_fd() and fd_install() to fs/file.c Signed-off-by: Al Viro --- fs/file.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ fs/open.c | 44 -------------------------------------------- 2 files changed, 44 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 4ce4a0fcf320..78cf88f2a0e8 100644 --- a/fs/file.c +++ b/fs/file.c @@ -569,3 +569,47 @@ int get_unused_fd_flags(unsigned flags) return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); } EXPORT_SYMBOL(get_unused_fd_flags); + +static void __put_unused_fd(struct files_struct *files, unsigned int fd) +{ + struct fdtable *fdt = files_fdtable(files); + __clear_open_fd(fd, fdt); + if (fd < files->next_fd) + files->next_fd = fd; +} + +void put_unused_fd(unsigned int fd) +{ + struct files_struct *files = current->files; + spin_lock(&files->file_lock); + __put_unused_fd(files, fd); + spin_unlock(&files->file_lock); +} + +EXPORT_SYMBOL(put_unused_fd); + +/* + * Install a file pointer in the fd array. + * + * The VFS is full of places where we drop the files lock between + * setting the open_fds bitmap and installing the file in the file + * array. At any such point, we are vulnerable to a dup2() race + * installing a file in the array before us. We need to detect this and + * fput() the struct file we are about to overwrite in this case. + * + * It should never happen - if we allow dup2() do it, _really_ bad things + * will follow. + */ + +void fd_install(unsigned int fd, struct file *file) +{ + struct files_struct *files = current->files; + struct fdtable *fdt; + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + BUG_ON(fdt->fd[fd] != NULL); + rcu_assign_pointer(fdt->fd[fd], file); + spin_unlock(&files->file_lock); +} + +EXPORT_SYMBOL(fd_install); diff --git a/fs/open.c b/fs/open.c index e1f2cdb91a4d..c525bd0e65b6 100644 --- a/fs/open.c +++ b/fs/open.c @@ -803,50 +803,6 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); -static void __put_unused_fd(struct files_struct *files, unsigned int fd) -{ - struct fdtable *fdt = files_fdtable(files); - __clear_open_fd(fd, fdt); - if (fd < files->next_fd) - files->next_fd = fd; -} - -void put_unused_fd(unsigned int fd) -{ - struct files_struct *files = current->files; - spin_lock(&files->file_lock); - __put_unused_fd(files, fd); - spin_unlock(&files->file_lock); -} - -EXPORT_SYMBOL(put_unused_fd); - -/* - * Install a file pointer in the fd array. - * - * The VFS is full of places where we drop the files lock between - * setting the open_fds bitmap and installing the file in the file - * array. At any such point, we are vulnerable to a dup2() race - * installing a file in the array before us. We need to detect this and - * fput() the struct file we are about to overwrite in this case. - * - * It should never happen - if we allow dup2() do it, _really_ bad things - * will follow. - */ - -void fd_install(unsigned int fd, struct file *file) -{ - struct files_struct *files = current->files; - struct fdtable *fdt; - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - BUG_ON(fdt->fd[fd] != NULL); - rcu_assign_pointer(fdt->fd[fd], file); - spin_unlock(&files->file_lock); -} - -EXPORT_SYMBOL(fd_install); - static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) { int lookup_flags = 0; -- cgit v1.2.3-58-ga151 From f869e8a7f753e3fd43d6483e796774776f645edb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 21:06:33 -0400 Subject: expose a low-level variant of fd_install() for binder Similar situation to that of __alloc_fd(); do not use unless you really have to. You should not touch any descriptor table other than your own; it's a sure sign of a really bad API design. As with __alloc_fd(), you *must* use a first-class reference to struct files_struct; something obtained by get_files_struct(some task) (let alone direct task->files) will not do. It must be either current->files, or obtained by get_files_struct(current) by the owner of that sucker and given to you. Signed-off-by: Al Viro --- drivers/staging/android/binder.c | 13 ++----------- fs/file.c | 16 ++++++++++++++-- include/linux/fdtable.h | 2 ++ 3 files changed, 18 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 4946d282a35c..9e1a98a360d4 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -386,17 +386,8 @@ int task_get_unused_fd_flags(struct binder_proc *proc, int flags) static void task_fd_install( struct binder_proc *proc, unsigned int fd, struct file *file) { - struct files_struct *files = proc->files; - struct fdtable *fdt; - - if (files == NULL) - return; - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - BUG_ON(fdt->fd[fd] != NULL); - rcu_assign_pointer(fdt->fd[fd], file); - spin_unlock(&files->file_lock); + if (proc->files) + __fd_install(proc->files, fd, file); } /* diff --git a/fs/file.c b/fs/file.c index 78cf88f2a0e8..0d1bf0515111 100644 --- a/fs/file.c +++ b/fs/file.c @@ -599,11 +599,18 @@ EXPORT_SYMBOL(put_unused_fd); * * It should never happen - if we allow dup2() do it, _really_ bad things * will follow. + * + * NOTE: __fd_install() variant is really, really low-level; don't + * use it unless you are forced to by truly lousy API shoved down + * your throat. 'files' *MUST* be either current->files or obtained + * by get_files_struct(current) done by whoever had given it to you, + * or really bad things will happen. Normally you want to use + * fd_install() instead. */ -void fd_install(unsigned int fd, struct file *file) +void __fd_install(struct files_struct *files, unsigned int fd, + struct file *file) { - struct files_struct *files = current->files; struct fdtable *fdt; spin_lock(&files->file_lock); fdt = files_fdtable(files); @@ -612,4 +619,9 @@ void fd_install(unsigned int fd, struct file *file) spin_unlock(&files->file_lock); } +void fd_install(unsigned int fd, struct file *file) +{ + __fd_install(current->files, fd, file); +} + EXPORT_SYMBOL(fd_install); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 3855f4febe70..59d4fc7f10c8 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -121,6 +121,8 @@ struct files_struct *dup_fd(struct files_struct *, int *); extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); +extern void __fd_install(struct files_struct *files, + unsigned int fd, struct file *file); extern struct kmem_cache *files_cachep; -- cgit v1.2.3-58-ga151 From 0ee8cdfe6af052deb56dccd54838a1eb32fb4ca2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 21:12:10 -0400 Subject: take fget() and friends to fs/file.c Signed-off-by: Al Viro --- fs/file.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/file_table.c | 106 -------------------------------------------------------- 2 files changed, 106 insertions(+), 106 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 0d1bf0515111..6eef55ce30c9 100644 --- a/fs/file.c +++ b/fs/file.c @@ -625,3 +625,109 @@ void fd_install(unsigned int fd, struct file *file) } EXPORT_SYMBOL(fd_install); + +struct file *fget(unsigned int fd) +{ + struct file *file; + struct files_struct *files = current->files; + + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) { + /* File object ref couldn't be taken */ + if (file->f_mode & FMODE_PATH || + !atomic_long_inc_not_zero(&file->f_count)) + file = NULL; + } + rcu_read_unlock(); + + return file; +} + +EXPORT_SYMBOL(fget); + +struct file *fget_raw(unsigned int fd) +{ + struct file *file; + struct files_struct *files = current->files; + + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) { + /* File object ref couldn't be taken */ + if (!atomic_long_inc_not_zero(&file->f_count)) + file = NULL; + } + rcu_read_unlock(); + + return file; +} + +EXPORT_SYMBOL(fget_raw); + +/* + * Lightweight file lookup - no refcnt increment if fd table isn't shared. + * + * You can use this instead of fget if you satisfy all of the following + * conditions: + * 1) You must call fput_light before exiting the syscall and returning control + * to userspace (i.e. you cannot remember the returned struct file * after + * returning to userspace). + * 2) You must not call filp_close on the returned struct file * in between + * calls to fget_light and fput_light. + * 3) You must not clone the current task in between the calls to fget_light + * and fput_light. + * + * The fput_needed flag returned by fget_light should be passed to the + * corresponding fput_light. + */ +struct file *fget_light(unsigned int fd, int *fput_needed) +{ + struct file *file; + struct files_struct *files = current->files; + + *fput_needed = 0; + if (atomic_read(&files->count) == 1) { + file = fcheck_files(files, fd); + if (file && (file->f_mode & FMODE_PATH)) + file = NULL; + } else { + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) { + if (!(file->f_mode & FMODE_PATH) && + atomic_long_inc_not_zero(&file->f_count)) + *fput_needed = 1; + else + /* Didn't get the reference, someone's freed */ + file = NULL; + } + rcu_read_unlock(); + } + + return file; +} + +struct file *fget_raw_light(unsigned int fd, int *fput_needed) +{ + struct file *file; + struct files_struct *files = current->files; + + *fput_needed = 0; + if (atomic_read(&files->count) == 1) { + file = fcheck_files(files, fd); + } else { + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) { + if (atomic_long_inc_not_zero(&file->f_count)) + *fput_needed = 1; + else + /* Didn't get the reference, someone's freed */ + file = NULL; + } + rcu_read_unlock(); + } + + return file; +} diff --git a/fs/file_table.c b/fs/file_table.c index 701985e4ccda..c6780163bf3e 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -339,112 +339,6 @@ void __fput_sync(struct file *file) EXPORT_SYMBOL(fput); -struct file *fget(unsigned int fd) -{ - struct file *file; - struct files_struct *files = current->files; - - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - /* File object ref couldn't be taken */ - if (file->f_mode & FMODE_PATH || - !atomic_long_inc_not_zero(&file->f_count)) - file = NULL; - } - rcu_read_unlock(); - - return file; -} - -EXPORT_SYMBOL(fget); - -struct file *fget_raw(unsigned int fd) -{ - struct file *file; - struct files_struct *files = current->files; - - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - /* File object ref couldn't be taken */ - if (!atomic_long_inc_not_zero(&file->f_count)) - file = NULL; - } - rcu_read_unlock(); - - return file; -} - -EXPORT_SYMBOL(fget_raw); - -/* - * Lightweight file lookup - no refcnt increment if fd table isn't shared. - * - * You can use this instead of fget if you satisfy all of the following - * conditions: - * 1) You must call fput_light before exiting the syscall and returning control - * to userspace (i.e. you cannot remember the returned struct file * after - * returning to userspace). - * 2) You must not call filp_close on the returned struct file * in between - * calls to fget_light and fput_light. - * 3) You must not clone the current task in between the calls to fget_light - * and fput_light. - * - * The fput_needed flag returned by fget_light should be passed to the - * corresponding fput_light. - */ -struct file *fget_light(unsigned int fd, int *fput_needed) -{ - struct file *file; - struct files_struct *files = current->files; - - *fput_needed = 0; - if (atomic_read(&files->count) == 1) { - file = fcheck_files(files, fd); - if (file && (file->f_mode & FMODE_PATH)) - file = NULL; - } else { - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - if (!(file->f_mode & FMODE_PATH) && - atomic_long_inc_not_zero(&file->f_count)) - *fput_needed = 1; - else - /* Didn't get the reference, someone's freed */ - file = NULL; - } - rcu_read_unlock(); - } - - return file; -} - -struct file *fget_raw_light(unsigned int fd, int *fput_needed) -{ - struct file *file; - struct files_struct *files = current->files; - - *fput_needed = 0; - if (atomic_read(&files->count) == 1) { - file = fcheck_files(files, fd); - } else { - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - if (atomic_long_inc_not_zero(&file->f_count)) - *fput_needed = 1; - else - /* Didn't get the reference, someone's freed */ - file = NULL; - } - rcu_read_unlock(); - } - - return file; -} - void put_filp(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { -- cgit v1.2.3-58-ga151 From 483ce1d4b8c3b82bc9c9a1dd9dbc44f50b3aaf5a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Aug 2012 12:04:24 -0400 Subject: take descriptor-related part of close() to file.c Signed-off-by: Al Viro --- drivers/staging/android/binder.c | 34 ++-------------------------------- fs/file.c | 26 ++++++++++++++++++++++++++ fs/open.c | 22 +--------------------- include/linux/fdtable.h | 2 ++ 4 files changed, 31 insertions(+), 53 deletions(-) (limited to 'fs') diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 9e1a98a360d4..f71d624995ea 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -390,43 +390,17 @@ static void task_fd_install( __fd_install(proc->files, fd, file); } -/* - * copied from __put_unused_fd in open.c - */ -static void __put_unused_fd(struct files_struct *files, unsigned int fd) -{ - struct fdtable *fdt = files_fdtable(files); - __clear_open_fd(fd, fdt); - if (fd < files->next_fd) - files->next_fd = fd; -} - /* * copied from sys_close */ static long task_close_fd(struct binder_proc *proc, unsigned int fd) { - struct file *filp; - struct files_struct *files = proc->files; - struct fdtable *fdt; int retval; - if (files == NULL) + if (proc->files == NULL) return -ESRCH; - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - if (fd >= fdt->max_fds) - goto out_unlock; - filp = fdt->fd[fd]; - if (!filp) - goto out_unlock; - rcu_assign_pointer(fdt->fd[fd], NULL); - __clear_close_on_exec(fd, fdt); - __put_unused_fd(files, fd); - spin_unlock(&files->file_lock); - retval = filp_close(filp, files); - + retval = __close_fd(proc->files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || retval == -ERESTARTNOINTR || @@ -435,10 +409,6 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) retval = -EINTR; return retval; - -out_unlock: - spin_unlock(&files->file_lock); - return -EBADF; } static void binder_set_nice(long nice) diff --git a/fs/file.c b/fs/file.c index 6eef55ce30c9..fd4694e688ab 100644 --- a/fs/file.c +++ b/fs/file.c @@ -626,6 +626,32 @@ void fd_install(unsigned int fd, struct file *file) EXPORT_SYMBOL(fd_install); +/* + * The same warnings as for __alloc_fd()/__fd_install() apply here... + */ +int __close_fd(struct files_struct *files, unsigned fd) +{ + struct file *file; + struct fdtable *fdt; + + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + if (fd >= fdt->max_fds) + goto out_unlock; + file = fdt->fd[fd]; + if (!file) + goto out_unlock; + rcu_assign_pointer(fdt->fd[fd], NULL); + __clear_close_on_exec(fd, fdt); + __put_unused_fd(files, fd); + spin_unlock(&files->file_lock); + return filp_close(file, files); + +out_unlock: + spin_unlock(&files->file_lock); + return -EBADF; +} + struct file *fget(unsigned int fd) { struct file *file; diff --git a/fs/open.c b/fs/open.c index c525bd0e65b6..30760017deed 100644 --- a/fs/open.c +++ b/fs/open.c @@ -994,23 +994,7 @@ EXPORT_SYMBOL(filp_close); */ SYSCALL_DEFINE1(close, unsigned int, fd) { - struct file * filp; - struct files_struct *files = current->files; - struct fdtable *fdt; - int retval; - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - if (fd >= fdt->max_fds) - goto out_unlock; - filp = fdt->fd[fd]; - if (!filp) - goto out_unlock; - rcu_assign_pointer(fdt->fd[fd], NULL); - __clear_close_on_exec(fd, fdt); - __put_unused_fd(files, fd); - spin_unlock(&files->file_lock); - retval = filp_close(filp, files); + int retval = __close_fd(current->files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || @@ -1020,10 +1004,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd) retval = -EINTR; return retval; - -out_unlock: - spin_unlock(&files->file_lock); - return -EBADF; } EXPORT_SYMBOL(sys_close); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 59d4fc7f10c8..59488f2392bc 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -123,6 +123,8 @@ extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); extern void __fd_install(struct files_struct *files, unsigned int fd, struct file *file); +extern int __close_fd(struct files_struct *files, + unsigned int fd); extern struct kmem_cache *files_cachep; -- cgit v1.2.3-58-ga151 From 6a6d27de340c89c5323565b49f7851362619925d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 09:56:33 -0400 Subject: take close-on-exec logics to fs/file.c, clean it up a bit ... and add cond_resched() there, while we are at it. We can get large latencies as is... Signed-off-by: Al Viro --- fs/exec.c | 41 ++++++----------------------------------- fs/file.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/fdtable.h | 1 + 3 files changed, 44 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 574cf4de4ec3..f2b6af585d4a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1006,40 +1006,6 @@ no_thread_group: return 0; } -/* - * These functions flushes out all traces of the currently running executable - * so that a new one can be started - */ -static void flush_old_files(struct files_struct * files) -{ - long j = -1; - struct fdtable *fdt; - - spin_lock(&files->file_lock); - for (;;) { - unsigned long set, i; - - j++; - i = j * BITS_PER_LONG; - fdt = files_fdtable(files); - if (i >= fdt->max_fds) - break; - set = fdt->close_on_exec[j]; - if (!set) - continue; - fdt->close_on_exec[j] = 0; - spin_unlock(&files->file_lock); - for ( ; set ; i++,set >>= 1) { - if (set & 1) { - sys_close(i); - } - } - spin_lock(&files->file_lock); - - } - spin_unlock(&files->file_lock); -} - char *get_task_comm(char *buf, struct task_struct *tsk) { /* buf must be at least sizeof(tsk->comm) in size */ @@ -1050,6 +1016,11 @@ char *get_task_comm(char *buf, struct task_struct *tsk) } EXPORT_SYMBOL_GPL(get_task_comm); +/* + * These functions flushes out all traces of the currently running executable + * so that a new one can be started + */ + void set_task_comm(struct task_struct *tsk, char *buf) { task_lock(tsk); @@ -1171,7 +1142,7 @@ void setup_new_exec(struct linux_binprm * bprm) current->self_exec_id++; flush_signal_handlers(current, 0); - flush_old_files(current->files); + do_close_on_exec(current->files); } EXPORT_SYMBOL(setup_new_exec); diff --git a/fs/file.c b/fs/file.c index fd4694e688ab..92197dd9fdc8 100644 --- a/fs/file.c +++ b/fs/file.c @@ -652,6 +652,43 @@ out_unlock: return -EBADF; } +void do_close_on_exec(struct files_struct *files) +{ + unsigned i; + struct fdtable *fdt; + + /* exec unshares first */ + BUG_ON(atomic_read(&files->count) != 1); + spin_lock(&files->file_lock); + for (i = 0; ; i++) { + unsigned long set; + unsigned fd = i * BITS_PER_LONG; + fdt = files_fdtable(files); + if (fd >= fdt->max_fds) + break; + set = fdt->close_on_exec[i]; + if (!set) + continue; + fdt->close_on_exec[i] = 0; + for ( ; set ; fd++, set >>= 1) { + struct file *file; + if (!(set & 1)) + continue; + file = fdt->fd[fd]; + if (!file) + continue; + rcu_assign_pointer(fdt->fd[fd], NULL); + __put_unused_fd(files, fd); + spin_unlock(&files->file_lock); + filp_close(file, files); + cond_resched(); + spin_lock(&files->file_lock); + } + + } + spin_unlock(&files->file_lock); +} + struct file *fget(unsigned int fd) { struct file *file; diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 59488f2392bc..ef4b2137e6bc 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -118,6 +118,7 @@ void put_files_struct(struct files_struct *fs); void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, int *); +void do_close_on_exec(struct files_struct *); extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); -- cgit v1.2.3-58-ga151 From fe17f22d7fd0e344ef6447238f799bb49f670c6f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 11:48:11 -0400 Subject: take purely descriptor-related stuff from fcntl.c to file.c Signed-off-by: Al Viro --- fs/fcntl.c | 131 ++------------------------------------------------ fs/file.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/file.h | 2 + 3 files changed, 137 insertions(+), 128 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index 08e6af5c1b1f..40a5bfb72cca 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -26,127 +26,6 @@ #include #include -void set_close_on_exec(unsigned int fd, int flag) -{ - struct files_struct *files = current->files; - struct fdtable *fdt; - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - if (flag) - __set_close_on_exec(fd, fdt); - else - __clear_close_on_exec(fd, fdt); - spin_unlock(&files->file_lock); -} - -static bool get_close_on_exec(unsigned int fd) -{ - struct files_struct *files = current->files; - struct fdtable *fdt; - bool res; - rcu_read_lock(); - fdt = files_fdtable(files); - res = close_on_exec(fd, fdt); - rcu_read_unlock(); - return res; -} - -SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) -{ - int err = -EBADF; - struct file * file, *tofree; - struct files_struct * files = current->files; - struct fdtable *fdt; - - if ((flags & ~O_CLOEXEC) != 0) - return -EINVAL; - - if (unlikely(oldfd == newfd)) - return -EINVAL; - - if (newfd >= rlimit(RLIMIT_NOFILE)) - return -EMFILE; - - spin_lock(&files->file_lock); - err = expand_files(files, newfd); - file = fcheck(oldfd); - if (unlikely(!file)) - goto Ebadf; - if (unlikely(err < 0)) { - if (err == -EMFILE) - goto Ebadf; - goto out_unlock; - } - /* - * We need to detect attempts to do dup2() over allocated but still - * not finished descriptor. NB: OpenBSD avoids that at the price of - * extra work in their equivalent of fget() - they insert struct - * file immediately after grabbing descriptor, mark it larval if - * more work (e.g. actual opening) is needed and make sure that - * fget() treats larval files as absent. Potentially interesting, - * but while extra work in fget() is trivial, locking implications - * and amount of surgery on open()-related paths in VFS are not. - * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" - * deadlocks in rather amusing ways, AFAICS. All of that is out of - * scope of POSIX or SUS, since neither considers shared descriptor - * tables and this condition does not arise without those. - */ - err = -EBUSY; - fdt = files_fdtable(files); - tofree = fdt->fd[newfd]; - if (!tofree && fd_is_open(newfd, fdt)) - goto out_unlock; - get_file(file); - rcu_assign_pointer(fdt->fd[newfd], file); - __set_open_fd(newfd, fdt); - if (flags & O_CLOEXEC) - __set_close_on_exec(newfd, fdt); - else - __clear_close_on_exec(newfd, fdt); - spin_unlock(&files->file_lock); - - if (tofree) - filp_close(tofree, files); - - return newfd; - -Ebadf: - err = -EBADF; -out_unlock: - spin_unlock(&files->file_lock); - return err; -} - -SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) -{ - if (unlikely(newfd == oldfd)) { /* corner case */ - struct files_struct *files = current->files; - int retval = oldfd; - - rcu_read_lock(); - if (!fcheck_files(files, oldfd)) - retval = -EBADF; - rcu_read_unlock(); - return retval; - } - return sys_dup3(oldfd, newfd, 0); -} - -SYSCALL_DEFINE1(dup, unsigned int, fildes) -{ - int ret = -EBADF; - struct file *file = fget_raw(fildes); - - if (file) { - ret = get_unused_fd(); - if (ret >= 0) - fd_install(ret, file); - else - fput(file); - } - return ret; -} - #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) static int setfl(int fd, struct file * filp, unsigned long arg) @@ -376,14 +255,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, switch (cmd) { case F_DUPFD: + err = f_dupfd(arg, filp, 0); + break; case F_DUPFD_CLOEXEC: - if (arg >= rlimit(RLIMIT_NOFILE)) - break; - err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); - if (err >= 0) { - get_file(filp); - fd_install(err, filp); - } + err = f_dupfd(arg, filp, FD_CLOEXEC); break; case F_GETFD: err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; diff --git a/fs/file.c b/fs/file.c index 92197dd9fdc8..7f29544755d0 100644 --- a/fs/file.c +++ b/fs/file.c @@ -6,6 +6,7 @@ * Manage the dynamic fd arrays in the process files_struct. */ +#include #include #include #include @@ -794,3 +795,134 @@ struct file *fget_raw_light(unsigned int fd, int *fput_needed) return file; } + +void set_close_on_exec(unsigned int fd, int flag) +{ + struct files_struct *files = current->files; + struct fdtable *fdt; + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + if (flag) + __set_close_on_exec(fd, fdt); + else + __clear_close_on_exec(fd, fdt); + spin_unlock(&files->file_lock); +} + +bool get_close_on_exec(unsigned int fd) +{ + struct files_struct *files = current->files; + struct fdtable *fdt; + bool res; + rcu_read_lock(); + fdt = files_fdtable(files); + res = close_on_exec(fd, fdt); + rcu_read_unlock(); + return res; +} + +SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) +{ + int err = -EBADF; + struct file * file, *tofree; + struct files_struct * files = current->files; + struct fdtable *fdt; + + if ((flags & ~O_CLOEXEC) != 0) + return -EINVAL; + + if (newfd >= rlimit(RLIMIT_NOFILE)) + return -EMFILE; + + spin_lock(&files->file_lock); + err = expand_files(files, newfd); + file = fcheck(oldfd); + if (unlikely(!file)) + goto Ebadf; + if (unlikely(err < 0)) { + if (err == -EMFILE) + goto Ebadf; + goto out_unlock; + } + /* + * We need to detect attempts to do dup2() over allocated but still + * not finished descriptor. NB: OpenBSD avoids that at the price of + * extra work in their equivalent of fget() - they insert struct + * file immediately after grabbing descriptor, mark it larval if + * more work (e.g. actual opening) is needed and make sure that + * fget() treats larval files as absent. Potentially interesting, + * but while extra work in fget() is trivial, locking implications + * and amount of surgery on open()-related paths in VFS are not. + * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" + * deadlocks in rather amusing ways, AFAICS. All of that is out of + * scope of POSIX or SUS, since neither considers shared descriptor + * tables and this condition does not arise without those. + */ + err = -EBUSY; + fdt = files_fdtable(files); + tofree = fdt->fd[newfd]; + if (!tofree && fd_is_open(newfd, fdt)) + goto out_unlock; + get_file(file); + rcu_assign_pointer(fdt->fd[newfd], file); + __set_open_fd(newfd, fdt); + if (flags & O_CLOEXEC) + __set_close_on_exec(newfd, fdt); + else + __clear_close_on_exec(newfd, fdt); + spin_unlock(&files->file_lock); + + if (tofree) + filp_close(tofree, files); + + return newfd; + +Ebadf: + err = -EBADF; +out_unlock: + spin_unlock(&files->file_lock); + return err; +} + +SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) +{ + if (unlikely(newfd == oldfd)) { /* corner case */ + struct files_struct *files = current->files; + int retval = oldfd; + + rcu_read_lock(); + if (!fcheck_files(files, oldfd)) + retval = -EBADF; + rcu_read_unlock(); + return retval; + } + return sys_dup3(oldfd, newfd, 0); +} + +SYSCALL_DEFINE1(dup, unsigned int, fildes) +{ + int ret = -EBADF; + struct file *file = fget_raw(fildes); + + if (file) { + ret = get_unused_fd(); + if (ret >= 0) + fd_install(ret, file); + else + fput(file); + } + return ret; +} + +int f_dupfd(unsigned int from, struct file *file, unsigned flags) +{ + int err; + if (from >= rlimit(RLIMIT_NOFILE)) + return -EINVAL; + err = alloc_fd(from, flags); + if (err >= 0) { + get_file(file); + fd_install(err, file); + } + return err; +} diff --git a/include/linux/file.h b/include/linux/file.h index 86795ec6027d..da84fa0f4579 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -30,7 +30,9 @@ extern struct file *fget(unsigned int fd); extern struct file *fget_light(unsigned int fd, int *fput_needed); extern struct file *fget_raw(unsigned int fd); extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); +extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); +extern bool get_close_on_exec(unsigned int fd); extern void put_filp(struct file *); extern int alloc_fd(unsigned start, unsigned flags); extern int get_unused_fd_flags(unsigned flags); -- cgit v1.2.3-58-ga151 From 8280d16172243702ed43432f826ca6130edb4086 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 12:11:46 -0400 Subject: new helper: replace_fd() analog of dup2(), except that it takes struct file * as source. Signed-off-by: Al Viro --- fs/exec.c | 11 +------ fs/file.c | 91 +++++++++++++++++++++++++++++++++++----------------- include/linux/file.h | 1 + 3 files changed, 64 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index f2b6af585d4a..3fc74681cc6c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2041,23 +2041,14 @@ static void wait_for_dump_helpers(struct file *file) static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) { struct file *files[2]; - struct fdtable *fdt; struct coredump_params *cp = (struct coredump_params *)info->data; - struct files_struct *cf = current->files; int err = create_pipe_files(files, 0); if (err) return err; cp->file = files[1]; - sys_close(0); - fd_install(0, files[0]); - spin_lock(&cf->file_lock); - fdt = files_fdtable(cf); - __set_open_fd(0, fdt); - __clear_close_on_exec(0, fdt); - spin_unlock(&cf->file_lock); - + replace_fd(0, files[0], 0); /* and disallow core files too */ current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; diff --git a/fs/file.c b/fs/file.c index 7f29544755d0..a7bbe0324478 100644 --- a/fs/file.c +++ b/fs/file.c @@ -821,29 +821,12 @@ bool get_close_on_exec(unsigned int fd) return res; } -SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) +static int do_dup2(struct files_struct *files, + struct file *file, unsigned fd, unsigned flags) { - int err = -EBADF; - struct file * file, *tofree; - struct files_struct * files = current->files; + struct file *tofree; struct fdtable *fdt; - if ((flags & ~O_CLOEXEC) != 0) - return -EINVAL; - - if (newfd >= rlimit(RLIMIT_NOFILE)) - return -EMFILE; - - spin_lock(&files->file_lock); - err = expand_files(files, newfd); - file = fcheck(oldfd); - if (unlikely(!file)) - goto Ebadf; - if (unlikely(err < 0)) { - if (err == -EMFILE) - goto Ebadf; - goto out_unlock; - } /* * We need to detect attempts to do dup2() over allocated but still * not finished descriptor. NB: OpenBSD avoids that at the price of @@ -858,24 +841,74 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) * scope of POSIX or SUS, since neither considers shared descriptor * tables and this condition does not arise without those. */ - err = -EBUSY; fdt = files_fdtable(files); - tofree = fdt->fd[newfd]; - if (!tofree && fd_is_open(newfd, fdt)) - goto out_unlock; + tofree = fdt->fd[fd]; + if (!tofree && fd_is_open(fd, fdt)) + goto Ebusy; get_file(file); - rcu_assign_pointer(fdt->fd[newfd], file); - __set_open_fd(newfd, fdt); + rcu_assign_pointer(fdt->fd[fd], file); + __set_open_fd(fd, fdt); if (flags & O_CLOEXEC) - __set_close_on_exec(newfd, fdt); + __set_close_on_exec(fd, fdt); else - __clear_close_on_exec(newfd, fdt); + __clear_close_on_exec(fd, fdt); spin_unlock(&files->file_lock); if (tofree) filp_close(tofree, files); - return newfd; + return fd; + +Ebusy: + spin_unlock(&files->file_lock); + return -EBUSY; +} + +int replace_fd(unsigned fd, struct file *file, unsigned flags) +{ + int err; + struct files_struct *files = current->files; + + if (!file) + return __close_fd(files, fd); + + if (fd >= rlimit(RLIMIT_NOFILE)) + return -EMFILE; + + spin_lock(&files->file_lock); + err = expand_files(files, fd); + if (unlikely(err < 0)) + goto out_unlock; + return do_dup2(files, file, fd, flags); + +out_unlock: + spin_unlock(&files->file_lock); + return err; +} + +SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) +{ + int err = -EBADF; + struct file *file; + struct files_struct *files = current->files; + + if ((flags & ~O_CLOEXEC) != 0) + return -EINVAL; + + if (newfd >= rlimit(RLIMIT_NOFILE)) + return -EMFILE; + + spin_lock(&files->file_lock); + err = expand_files(files, newfd); + file = fcheck(oldfd); + if (unlikely(!file)) + goto Ebadf; + if (unlikely(err < 0)) { + if (err == -EMFILE) + goto Ebadf; + goto out_unlock; + } + return do_dup2(files, file, newfd, flags); Ebadf: err = -EBADF; diff --git a/include/linux/file.h b/include/linux/file.h index da84fa0f4579..6239591a6122 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -31,6 +31,7 @@ extern struct file *fget_light(unsigned int fd, int *fput_needed); extern struct file *fget_raw(unsigned int fd); extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); +extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); extern bool get_close_on_exec(unsigned int fd); extern void put_filp(struct file *); -- cgit v1.2.3-58-ga151 From b8318b01a8f7f760ae3ecae052ccc7fc123d9508 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 20:09:42 -0400 Subject: take __{set,clear}_{open_fd,close_on_exec}() into fs/file.c nobody uses those outside anymore. Signed-off-by: Al Viro --- fs/file.c | 20 ++++++++++++++++++++ include/linux/fdtable.h | 20 -------------------- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index a7bbe0324478..40ddef9fe041 100644 --- a/fs/file.c +++ b/fs/file.c @@ -256,6 +256,26 @@ int expand_files(struct files_struct *files, int nr) return expand_fdtable(files, nr); } +static inline void __set_close_on_exec(int fd, struct fdtable *fdt) +{ + __set_bit(fd, fdt->close_on_exec); +} + +static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) +{ + __clear_bit(fd, fdt->close_on_exec); +} + +static inline void __set_open_fd(int fd, struct fdtable *fdt) +{ + __set_bit(fd, fdt->open_fds); +} + +static inline void __clear_open_fd(int fd, struct fdtable *fdt) +{ + __clear_bit(fd, fdt->open_fds); +} + static int count_open_files(struct fdtable *fdt) { int size = fdt->max_fds; diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index ef4b2137e6bc..9ff26319d44f 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -30,31 +30,11 @@ struct fdtable { struct fdtable *next; }; -static inline void __set_close_on_exec(int fd, struct fdtable *fdt) -{ - __set_bit(fd, fdt->close_on_exec); -} - -static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) -{ - __clear_bit(fd, fdt->close_on_exec); -} - static inline bool close_on_exec(int fd, const struct fdtable *fdt) { return test_bit(fd, fdt->close_on_exec); } -static inline void __set_open_fd(int fd, struct fdtable *fdt) -{ - __set_bit(fd, fdt->open_fds); -} - -static inline void __clear_open_fd(int fd, struct fdtable *fdt) -{ - __clear_bit(fd, fdt->open_fds); -} - static inline bool fd_is_open(int fd, const struct fdtable *fdt) { return test_bit(fd, fdt->open_fds); -- cgit v1.2.3-58-ga151 From ad47bd7252bf402fe7dba92f5240b5ed16832ae7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 20:11:34 -0400 Subject: make expand_files() and alloc_fd() static no callers outside of fs/file.c left Signed-off-by: Al Viro --- fs/file.c | 4 ++-- include/linux/fdtable.h | 1 - include/linux/file.h | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 40ddef9fe041..967bd0dadbe5 100644 --- a/fs/file.c +++ b/fs/file.c @@ -238,7 +238,7 @@ static int expand_fdtable(struct files_struct *files, int nr) * expanded and execution may have blocked. * The files->file_lock should be held on entry, and will be held on exit. */ -int expand_files(struct files_struct *files, int nr) +static int expand_files(struct files_struct *files, int nr) { struct fdtable *fdt; @@ -580,7 +580,7 @@ out: return error; } -int alloc_fd(unsigned start, unsigned flags) +static int alloc_fd(unsigned start, unsigned flags) { return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); } diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 9ff26319d44f..de2b71caa0f0 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -73,7 +73,6 @@ struct file_operations; struct vfsmount; struct dentry; -extern int expand_files(struct files_struct *, int nr); extern void __init files_defer_init(void); static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) diff --git a/include/linux/file.h b/include/linux/file.h index 6239591a6122..6eee54aea279 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -35,7 +35,6 @@ extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); extern bool get_close_on_exec(unsigned int fd); extern void put_filp(struct file *); -extern int alloc_fd(unsigned start, unsigned flags); extern int get_unused_fd_flags(unsigned flags); #define get_unused_fd() get_unused_fd_flags(0) extern void put_unused_fd(unsigned int fd); -- cgit v1.2.3-58-ga151 From c3c073f808b22dfae15ef8412b6f7b998644139a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 22:32:06 -0400 Subject: new helper: iterate_fd() iterates through the opened files in given descriptor table, calling a supplied function; we stop once non-zero is returned. Callback gets struct file *, descriptor number and const void * argument passed to iterator. It is called with files->file_lock held, so it is not allowed to block. tty_io, netprio_cgroup and selinux flush_unauthorized_files() converted to its use. Signed-off-by: Al Viro --- drivers/tty/tty_io.c | 36 +++++++++++------------------- fs/file.c | 21 +++++++++++++++++ include/linux/fdtable.h | 3 +++ net/core/netprio_cgroup.c | 38 ++++++++++--------------------- security/selinux/hooks.c | 57 ++++++++++++++++++----------------------------- 5 files changed, 71 insertions(+), 84 deletions(-) (limited to 'fs') diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index b425c79675ad..71d95cfbabec 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2791,6 +2791,13 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd, } #endif +static int this_tty(const void *t, struct file *file, unsigned fd) +{ + if (likely(file->f_op->read != tty_read)) + return 0; + return file_tty(file) != t ? 0 : fd + 1; +} + /* * This implements the "Secure Attention Key" --- the idea is to * prevent trojan horses by killing all processes associated with this @@ -2818,8 +2825,6 @@ void __do_SAK(struct tty_struct *tty) struct task_struct *g, *p; struct pid *session; int i; - struct file *filp; - struct fdtable *fdt; if (!tty) return; @@ -2849,27 +2854,12 @@ void __do_SAK(struct tty_struct *tty) continue; } task_lock(p); - if (p->files) { - /* - * We don't take a ref to the file, so we must - * hold ->file_lock instead. - */ - spin_lock(&p->files->file_lock); - fdt = files_fdtable(p->files); - for (i = 0; i < fdt->max_fds; i++) { - filp = fcheck_files(p->files, i); - if (!filp) - continue; - if (filp->f_op->read == tty_read && - file_tty(filp) == tty) { - printk(KERN_NOTICE "SAK: killed process %d" - " (%s): fd#%d opened to the tty\n", - task_pid_nr(p), p->comm, i); - force_sig(SIGKILL, p); - break; - } - } - spin_unlock(&p->files->file_lock); + i = iterate_fd(p->files, 0, this_tty, tty); + if (i != 0) { + printk(KERN_NOTICE "SAK: killed process %d" + " (%s): fd#%d opened to the tty\n", + task_pid_nr(p), p->comm, i - 1); + force_sig(SIGKILL, p); } task_unlock(p); } while_each_thread(g, p); diff --git a/fs/file.c b/fs/file.c index 967bd0dadbe5..e6e418122587 100644 --- a/fs/file.c +++ b/fs/file.c @@ -979,3 +979,24 @@ int f_dupfd(unsigned int from, struct file *file, unsigned flags) } return err; } + +int iterate_fd(struct files_struct *files, unsigned n, + int (*f)(const void *, struct file *, unsigned), + const void *p) +{ + struct fdtable *fdt; + struct file *file; + int res = 0; + if (!files) + return 0; + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + while (!res && n < fdt->max_fds) { + file = rcu_dereference_check_fdtable(files, fdt->fd[n++]); + if (file) + res = f(p, file, n); + } + spin_unlock(&files->file_lock); + return res; +} +EXPORT_SYMBOL(iterate_fd); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index de2b71caa0f0..fb7dacae0522 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -98,6 +98,9 @@ void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, int *); void do_close_on_exec(struct files_struct *); +int iterate_fd(struct files_struct *, unsigned, + int (*)(const void *, struct file *, unsigned), + const void *); extern int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index c75e3f9d060f..5ffd084c6a83 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -272,38 +272,24 @@ out_free_devname: return ret; } +static int update_netprio(const void *v, struct file *file, unsigned n) +{ + int err; + struct socket *sock = sock_from_file(file, &err); + if (sock) + sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v; + return 0; +} + void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *p; + void *v; cgroup_taskset_for_each(p, cgrp, tset) { - unsigned int fd; - struct fdtable *fdt; - struct files_struct *files; - task_lock(p); - files = p->files; - if (!files) { - task_unlock(p); - continue; - } - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - for (fd = 0; fd < fdt->max_fds; fd++) { - struct file *file; - struct socket *sock; - int err; - - file = fcheck_files(files, fd); - if (!file) - continue; - - sock = sock_from_file(file, &err); - if (sock) - sock_update_netprioidx(sock->sk, p); - } - spin_unlock(&files->file_lock); + v = (void *)(unsigned long)task_netprioidx(p); + iterate_fd(p->files, 0, update_netprio, v); task_unlock(p); } } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 00b50113642d..4dfbcea10eb7 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2088,15 +2088,19 @@ static int selinux_bprm_secureexec(struct linux_binprm *bprm) return (atsecure || cap_bprm_secureexec(bprm)); } +static int match_file(const void *p, struct file *file, unsigned fd) +{ + return file_has_perm(p, file, file_to_av(file)) ? fd + 1 : 0; +} + /* Derived from fs/exec.c:flush_old_files. */ static inline void flush_unauthorized_files(const struct cred *cred, struct files_struct *files) { struct file *file, *devnull = NULL; struct tty_struct *tty; - struct fdtable *fdt; - long j = -1; int drop_tty = 0; + unsigned n; tty = get_current_tty(); if (tty) { @@ -2123,41 +2127,24 @@ static inline void flush_unauthorized_files(const struct cred *cred, no_tty(); /* Revalidate access to inherited open files. */ - spin_lock(&files->file_lock); - for (;;) { - unsigned long set, i; - j++; - i = j * BITS_PER_LONG; - fdt = files_fdtable(files); - if (i >= fdt->max_fds) - break; - set = fdt->open_fds[j]; - if (!set) - continue; - spin_unlock(&files->file_lock); - for ( ; set ; i++, set >>= 1) { - if (!(set & 1)) - continue; - file = fget(i); - if (!file) - continue; - if (file_has_perm(cred, file, file_to_av(file))) { - if (devnull) { - get_file(devnull); - } else { - devnull = dentry_open(&selinux_null, - O_RDWR, cred); - if (IS_ERR(devnull)) - devnull = NULL; - } - replace_fd(i, devnull, 0); - } - fput(file); - } - spin_lock(&files->file_lock); + n = iterate_fd(files, 0, match_file, cred); + if (!n) /* none found? */ + return; + devnull = dentry_open(&selinux_null, O_RDWR, cred); + if (!IS_ERR(devnull)) { + /* replace all the matching ones with this */ + do { + get_file(devnull); + replace_fd(n - 1, devnull, 0); + } while ((n = iterate_fd(files, n, match_file, cred)) != 0); + fput(devnull); + } else { + /* just close all the matching ones */ + do { + replace_fd(n - 1, NULL, 0); + } while ((n = iterate_fd(files, n, match_file, cred)) != 0); } - spin_unlock(&files->file_lock); } /* -- cgit v1.2.3-58-ga151 From 179e037fc1370288188cb1f90b81156d75a3cb2d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 Aug 2012 22:43:47 -0400 Subject: do_coredump(): make sure that descriptor table isn't shared Signed-off-by: Al Viro --- fs/exec.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 3fc74681cc6c..beb05a95e4a3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2066,6 +2066,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) int retval = 0; int flag = 0; int ispipe; + struct files_struct *displaced; bool need_nonrelative = false; static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { @@ -2219,6 +2220,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) goto close_fail; } + /* get us an unshared descriptor table; almost always a no-op */ + retval = unshare_files(&displaced); + if (retval) + goto close_fail; + if (displaced) + put_files_struct(displaced); retval = binfmt->core_dump(&cprm); if (retval) current->signal->group_exit_code |= 0x80; -- cgit v1.2.3-58-ga151 From 864bdb3b6cbd9911222543fef1cfe36f88183f44 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 22 Aug 2012 18:42:10 -0400 Subject: new helper: daemonize_descriptors() descriptor-related parts of daemonize, done right. As the result we simplify the locking rules for ->files - we hold task_lock in *all* cases when we modify ->files. Signed-off-by: Al Viro --- fs/file.c | 6 ++++++ include/linux/fdtable.h | 1 + kernel/exit.c | 4 +--- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index e6e418122587..15750b80e3ce 100644 --- a/fs/file.c +++ b/fs/file.c @@ -519,6 +519,12 @@ struct files_struct init_files = { .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), }; +void daemonize_descriptors(void) +{ + atomic_inc(&init_files.count); + reset_files_struct(&init_files); +} + /* * allocate a file descriptor, mark it busy. */ diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index fb7dacae0522..45052aa814c8 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -95,6 +95,7 @@ struct task_struct; struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); void reset_files_struct(struct files_struct *); +void daemonize_descriptors(void); int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, int *); void do_close_on_exec(struct files_struct *); diff --git a/kernel/exit.c b/kernel/exit.c index 20dfc7617c2e..095113321318 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -457,9 +457,7 @@ void daemonize(const char *name, ...) /* Become as one with the init task */ daemonize_fs_struct(); - exit_files(current); - current->files = init_task.files; - atomic_inc(¤t->files->count); + daemonize_descriptors(); reparent_to_kthreadd(); } -- cgit v1.2.3-58-ga151 From faf60af17f8da87e1c87a6be527344791025ce9e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 23 Aug 2012 14:43:24 +0400 Subject: procfs: Move /proc/pid/fd[info] handling code to fd.[ch] This patch prepares the ground for further extension of /proc/pid/fd[info] handling code by moving fdinfo handling code into fs/proc/fd.c. I think such move makes both fs/proc/base.c and fs/proc/fd.c easier to read. Signed-off-by: Cyrill Gorcunov Acked-by: Pavel Emelyanov CC: Al Viro CC: Alexey Dobriyan CC: Andrew Morton CC: James Bottomley CC: "Aneesh Kumar K.V" CC: Alexey Dobriyan CC: Matthew Helsley CC: "J. Bruce Fields" CC: "Aneesh Kumar K.V" Signed-off-by: Al Viro --- fs/proc/Makefile | 2 +- fs/proc/base.c | 388 +---------------------------------------------------- fs/proc/fd.c | 351 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/proc/fd.h | 14 ++ fs/proc/internal.h | 48 +++++++ 5 files changed, 416 insertions(+), 387 deletions(-) create mode 100644 fs/proc/fd.c create mode 100644 fs/proc/fd.h (limited to 'fs') diff --git a/fs/proc/Makefile b/fs/proc/Makefile index c1c729335924..99349efbbc2b 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o proc-$(CONFIG_MMU) := mmu.o task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ - proc_tty.o + proc_tty.o fd.o proc-y += cmdline.o proc-y += consoles.o proc-y += cpuinfo.o diff --git a/fs/proc/base.c b/fs/proc/base.c index 1b6c84cbdb73..b55c3bb298e3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -90,6 +90,7 @@ #endif #include #include "internal.h" +#include "fd.h" /* NOTE: * Implementing inode permission operations in /proc is almost @@ -136,8 +137,6 @@ struct pid_entry { NULL, &proc_single_file_operations, \ { .proc_show = show } ) -static int proc_fd_permission(struct inode *inode, int mask); - /* * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. @@ -1492,7 +1491,7 @@ out: return error; } -static const struct inode_operations proc_pid_link_inode_operations = { +const struct inode_operations proc_pid_link_inode_operations = { .readlink = proc_pid_readlink, .follow_link = proc_pid_follow_link, .setattr = proc_setattr, @@ -1501,21 +1500,6 @@ static const struct inode_operations proc_pid_link_inode_operations = { /* building an inode */ -static int task_dumpable(struct task_struct *task) -{ - int dumpable = 0; - struct mm_struct *mm; - - task_lock(task); - mm = task->mm; - if (mm) - dumpable = get_dumpable(mm); - task_unlock(task); - if(dumpable == 1) - return 1; - return 0; -} - struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) { struct inode * inode; @@ -1641,15 +1625,6 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) return 0; } -static int pid_delete_dentry(const struct dentry * dentry) -{ - /* Is the task we represent dead? - * If so, then don't put the dentry on the lru list, - * kill it immediately. - */ - return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; -} - const struct dentry_operations pid_dentry_operations = { .d_revalidate = pid_revalidate, @@ -1712,289 +1687,6 @@ end_instantiate: return filldir(dirent, name, len, filp->f_pos, ino, type); } -static unsigned name_to_int(struct dentry *dentry) -{ - const char *name = dentry->d_name.name; - int len = dentry->d_name.len; - unsigned n = 0; - - if (len > 1 && *name == '0') - goto out; - while (len-- > 0) { - unsigned c = *name++ - '0'; - if (c > 9) - goto out; - if (n >= (~0U-9)/10) - goto out; - n *= 10; - n += c; - } - return n; -out: - return ~0U; -} - -#define PROC_FDINFO_MAX 64 - -static int proc_fd_info(struct inode *inode, struct path *path, char *info) -{ - struct task_struct *task = get_proc_task(inode); - struct files_struct *files = NULL; - struct file *file; - int fd = proc_fd(inode); - - if (task) { - files = get_files_struct(task); - put_task_struct(task); - } - if (files) { - /* - * We are not taking a ref to the file structure, so we must - * hold ->file_lock. - */ - spin_lock(&files->file_lock); - file = fcheck_files(files, fd); - if (file) { - unsigned int f_flags; - struct fdtable *fdt; - - fdt = files_fdtable(files); - f_flags = file->f_flags & ~O_CLOEXEC; - if (close_on_exec(fd, fdt)) - f_flags |= O_CLOEXEC; - - if (path) { - *path = file->f_path; - path_get(&file->f_path); - } - if (info) - snprintf(info, PROC_FDINFO_MAX, - "pos:\t%lli\n" - "flags:\t0%o\n", - (long long) file->f_pos, - f_flags); - spin_unlock(&files->file_lock); - put_files_struct(files); - return 0; - } - spin_unlock(&files->file_lock); - put_files_struct(files); - } - return -ENOENT; -} - -static int proc_fd_link(struct dentry *dentry, struct path *path) -{ - return proc_fd_info(dentry->d_inode, path, NULL); -} - -static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) -{ - struct inode *inode; - struct task_struct *task; - int fd; - struct files_struct *files; - const struct cred *cred; - - if (flags & LOOKUP_RCU) - return -ECHILD; - - inode = dentry->d_inode; - task = get_proc_task(inode); - fd = proc_fd(inode); - - if (task) { - files = get_files_struct(task); - if (files) { - struct file *file; - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - unsigned f_mode = file->f_mode; - - rcu_read_unlock(); - put_files_struct(files); - - if (task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - } - - if (S_ISLNK(inode->i_mode)) { - unsigned i_mode = S_IFLNK; - if (f_mode & FMODE_READ) - i_mode |= S_IRUSR | S_IXUSR; - if (f_mode & FMODE_WRITE) - i_mode |= S_IWUSR | S_IXUSR; - inode->i_mode = i_mode; - } - - security_task_to_inode(task, inode); - put_task_struct(task); - return 1; - } - rcu_read_unlock(); - put_files_struct(files); - } - put_task_struct(task); - } - d_drop(dentry); - return 0; -} - -static const struct dentry_operations tid_fd_dentry_operations = -{ - .d_revalidate = tid_fd_revalidate, - .d_delete = pid_delete_dentry, -}; - -static struct dentry *proc_fd_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) -{ - unsigned fd = (unsigned long)ptr; - struct inode *inode; - struct proc_inode *ei; - struct dentry *error = ERR_PTR(-ENOENT); - - inode = proc_pid_make_inode(dir->i_sb, task); - if (!inode) - goto out; - ei = PROC_I(inode); - ei->fd = fd; - - inode->i_mode = S_IFLNK; - inode->i_op = &proc_pid_link_inode_operations; - inode->i_size = 64; - ei->op.proc_get_link = proc_fd_link; - d_set_d_op(dentry, &tid_fd_dentry_operations); - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, 0)) - error = NULL; - - out: - return error; -} - -static struct dentry *proc_lookupfd_common(struct inode *dir, - struct dentry *dentry, - instantiate_t instantiate) -{ - struct task_struct *task = get_proc_task(dir); - unsigned fd = name_to_int(dentry); - struct dentry *result = ERR_PTR(-ENOENT); - - if (!task) - goto out_no_task; - if (fd == ~0U) - goto out; - - result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); -out: - put_task_struct(task); -out_no_task: - return result; -} - -static int proc_readfd_common(struct file * filp, void * dirent, - filldir_t filldir, instantiate_t instantiate) -{ - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; - struct task_struct *p = get_proc_task(inode); - unsigned int fd, ino; - int retval; - struct files_struct * files; - - retval = -ENOENT; - if (!p) - goto out_no_task; - retval = 0; - - fd = filp->f_pos; - switch (fd) { - case 0: - if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) - goto out; - filp->f_pos++; - case 1: - ino = parent_ino(dentry); - if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) - goto out; - filp->f_pos++; - default: - files = get_files_struct(p); - if (!files) - goto out; - rcu_read_lock(); - for (fd = filp->f_pos-2; - fd < files_fdtable(files)->max_fds; - fd++, filp->f_pos++) { - char name[PROC_NUMBUF]; - int len; - int rv; - - if (!fcheck_files(files, fd)) - continue; - rcu_read_unlock(); - - len = snprintf(name, sizeof(name), "%d", fd); - rv = proc_fill_cache(filp, dirent, filldir, - name, len, instantiate, p, - (void *)(unsigned long)fd); - if (rv < 0) - goto out_fd_loop; - rcu_read_lock(); - } - rcu_read_unlock(); -out_fd_loop: - put_files_struct(files); - } -out: - put_task_struct(p); -out_no_task: - return retval; -} - -static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, - unsigned int flags) -{ - return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); -} - -static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) -{ - return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); -} - -static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - char tmp[PROC_FDINFO_MAX]; - int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); - if (!err) - err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); - return err; -} - -static const struct file_operations proc_fdinfo_file_operations = { - .open = nonseekable_open, - .read = proc_fdinfo_read, - .llseek = no_llseek, -}; - -static const struct file_operations proc_fd_operations = { - .read = generic_read_dir, - .readdir = proc_readfd, - .llseek = default_llseek, -}; - #ifdef CONFIG_CHECKPOINT_RESTORE /* @@ -2337,82 +2029,6 @@ static const struct file_operations proc_map_files_operations = { #endif /* CONFIG_CHECKPOINT_RESTORE */ -/* - * /proc/pid/fd needs a special permission handler so that a process can still - * access /proc/self/fd after it has executed a setuid(). - */ -static int proc_fd_permission(struct inode *inode, int mask) -{ - int rv = generic_permission(inode, mask); - if (rv == 0) - return 0; - if (task_pid(current) == proc_pid(inode)) - rv = 0; - return rv; -} - -/* - * proc directories can do almost nothing.. - */ -static const struct inode_operations proc_fd_inode_operations = { - .lookup = proc_lookupfd, - .permission = proc_fd_permission, - .setattr = proc_setattr, -}; - -static struct dentry *proc_fdinfo_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) -{ - unsigned fd = (unsigned long)ptr; - struct inode *inode; - struct proc_inode *ei; - struct dentry *error = ERR_PTR(-ENOENT); - - inode = proc_pid_make_inode(dir->i_sb, task); - if (!inode) - goto out; - ei = PROC_I(inode); - ei->fd = fd; - inode->i_mode = S_IFREG | S_IRUSR; - inode->i_fop = &proc_fdinfo_file_operations; - d_set_d_op(dentry, &tid_fd_dentry_operations); - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, 0)) - error = NULL; - - out: - return error; -} - -static struct dentry *proc_lookupfdinfo(struct inode *dir, - struct dentry *dentry, - unsigned int flags) -{ - return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); -} - -static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) -{ - return proc_readfd_common(filp, dirent, filldir, - proc_fdinfo_instantiate); -} - -static const struct file_operations proc_fdinfo_operations = { - .read = generic_read_dir, - .readdir = proc_readfdinfo, - .llseek = default_llseek, -}; - -/* - * proc directories can do almost nothing.. - */ -static const struct inode_operations proc_fdinfo_inode_operations = { - .lookup = proc_lookupfdinfo, - .setattr = proc_setattr, -}; - - static struct dentry *proc_pident_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { diff --git a/fs/proc/fd.c b/fs/proc/fd.c new file mode 100644 index 000000000000..1b0f932b4bd9 --- /dev/null +++ b/fs/proc/fd.c @@ -0,0 +1,351 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "internal.h" +#include "fd.h" + +#define PROC_FDINFO_MAX 64 + +static int proc_fd_info(struct inode *inode, struct path *path, char *info) +{ + struct task_struct *task = get_proc_task(inode); + struct files_struct *files = NULL; + int fd = proc_fd(inode); + struct file *file; + + if (task) { + files = get_files_struct(task); + put_task_struct(task); + } + if (files) { + /* + * We are not taking a ref to the file structure, so we must + * hold ->file_lock. + */ + spin_lock(&files->file_lock); + file = fcheck_files(files, fd); + if (file) { + unsigned int f_flags; + struct fdtable *fdt; + + fdt = files_fdtable(files); + f_flags = file->f_flags & ~O_CLOEXEC; + if (close_on_exec(fd, fdt)) + f_flags |= O_CLOEXEC; + + if (path) { + *path = file->f_path; + path_get(&file->f_path); + } + if (info) + snprintf(info, PROC_FDINFO_MAX, + "pos:\t%lli\n" + "flags:\t0%o\n", + (long long) file->f_pos, + f_flags); + spin_unlock(&files->file_lock); + put_files_struct(files); + return 0; + } + spin_unlock(&files->file_lock); + put_files_struct(files); + } + return -ENOENT; +} + +static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct files_struct *files; + struct task_struct *task; + const struct cred *cred; + struct inode *inode; + int fd; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; + task = get_proc_task(inode); + fd = proc_fd(inode); + + if (task) { + files = get_files_struct(task); + if (files) { + struct file *file; + + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) { + unsigned f_mode = file->f_mode; + + rcu_read_unlock(); + put_files_struct(files); + + if (task_dumpable(task)) { + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); + } else { + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + } + + if (S_ISLNK(inode->i_mode)) { + unsigned i_mode = S_IFLNK; + if (f_mode & FMODE_READ) + i_mode |= S_IRUSR | S_IXUSR; + if (f_mode & FMODE_WRITE) + i_mode |= S_IWUSR | S_IXUSR; + inode->i_mode = i_mode; + } + + security_task_to_inode(task, inode); + put_task_struct(task); + return 1; + } + rcu_read_unlock(); + put_files_struct(files); + } + put_task_struct(task); + } + + d_drop(dentry); + return 0; +} + +static const struct dentry_operations tid_fd_dentry_operations = { + .d_revalidate = tid_fd_revalidate, + .d_delete = pid_delete_dentry, +}; + +static int proc_fd_link(struct dentry *dentry, struct path *path) +{ + return proc_fd_info(dentry->d_inode, path, NULL); +} + +static struct dentry * +proc_fd_instantiate(struct inode *dir, struct dentry *dentry, + struct task_struct *task, const void *ptr) +{ + struct dentry *error = ERR_PTR(-ENOENT); + unsigned fd = (unsigned long)ptr; + struct proc_inode *ei; + struct inode *inode; + + inode = proc_pid_make_inode(dir->i_sb, task); + if (!inode) + goto out; + + ei = PROC_I(inode); + ei->fd = fd; + + inode->i_mode = S_IFLNK; + inode->i_op = &proc_pid_link_inode_operations; + inode->i_size = 64; + + ei->op.proc_get_link = proc_fd_link; + + d_set_d_op(dentry, &tid_fd_dentry_operations); + d_add(dentry, inode); + + /* Close the race of the process dying before we return the dentry */ + if (tid_fd_revalidate(dentry, 0)) + error = NULL; + out: + return error; +} + +static struct dentry *proc_lookupfd_common(struct inode *dir, + struct dentry *dentry, + instantiate_t instantiate) +{ + struct task_struct *task = get_proc_task(dir); + struct dentry *result = ERR_PTR(-ENOENT); + unsigned fd = name_to_int(dentry); + + if (!task) + goto out_no_task; + if (fd == ~0U) + goto out; + + result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); +out: + put_task_struct(task); +out_no_task: + return result; +} + +static int proc_readfd_common(struct file * filp, void * dirent, + filldir_t filldir, instantiate_t instantiate) +{ + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; + struct task_struct *p = get_proc_task(inode); + struct files_struct *files; + unsigned int fd, ino; + int retval; + + retval = -ENOENT; + if (!p) + goto out_no_task; + retval = 0; + + fd = filp->f_pos; + switch (fd) { + case 0: + if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) + goto out; + filp->f_pos++; + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) + goto out; + filp->f_pos++; + default: + files = get_files_struct(p); + if (!files) + goto out; + rcu_read_lock(); + for (fd = filp->f_pos - 2; + fd < files_fdtable(files)->max_fds; + fd++, filp->f_pos++) { + char name[PROC_NUMBUF]; + int len; + int rv; + + if (!fcheck_files(files, fd)) + continue; + rcu_read_unlock(); + + len = snprintf(name, sizeof(name), "%d", fd); + rv = proc_fill_cache(filp, dirent, filldir, + name, len, instantiate, p, + (void *)(unsigned long)fd); + if (rv < 0) + goto out_fd_loop; + rcu_read_lock(); + } + rcu_read_unlock(); +out_fd_loop: + put_files_struct(files); + } +out: + put_task_struct(p); +out_no_task: + return retval; +} + +static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + char tmp[PROC_FDINFO_MAX]; + int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); + if (!err) + err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); + return err; +} + +static const struct file_operations proc_fdinfo_file_operations = { + .open = nonseekable_open, + .read = proc_fdinfo_read, + .llseek = no_llseek, +}; + +static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) +{ + return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); +} + +const struct file_operations proc_fd_operations = { + .read = generic_read_dir, + .readdir = proc_readfd, + .llseek = default_llseek, +}; + +static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); +} + +/* + * /proc/pid/fd needs a special permission handler so that a process can still + * access /proc/self/fd after it has executed a setuid(). + */ +int proc_fd_permission(struct inode *inode, int mask) +{ + int rv = generic_permission(inode, mask); + if (rv == 0) + return 0; + if (task_pid(current) == proc_pid(inode)) + rv = 0; + return rv; +} + +const struct inode_operations proc_fd_inode_operations = { + .lookup = proc_lookupfd, + .permission = proc_fd_permission, + .setattr = proc_setattr, +}; + +static struct dentry * +proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, + struct task_struct *task, const void *ptr) +{ + struct dentry *error = ERR_PTR(-ENOENT); + unsigned fd = (unsigned long)ptr; + struct proc_inode *ei; + struct inode *inode; + + inode = proc_pid_make_inode(dir->i_sb, task); + if (!inode) + goto out; + + ei = PROC_I(inode); + ei->fd = fd; + + inode->i_mode = S_IFREG | S_IRUSR; + inode->i_fop = &proc_fdinfo_file_operations; + + d_set_d_op(dentry, &tid_fd_dentry_operations); + d_add(dentry, inode); + + /* Close the race of the process dying before we return the dentry */ + if (tid_fd_revalidate(dentry, 0)) + error = NULL; + out: + return error; +} + +static struct dentry * +proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ + return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); +} + +static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) +{ + return proc_readfd_common(filp, dirent, filldir, + proc_fdinfo_instantiate); +} + +const struct inode_operations proc_fdinfo_inode_operations = { + .lookup = proc_lookupfdinfo, + .setattr = proc_setattr, +}; + +const struct file_operations proc_fdinfo_operations = { + .read = generic_read_dir, + .readdir = proc_readfdinfo, + .llseek = default_llseek, +}; diff --git a/fs/proc/fd.h b/fs/proc/fd.h new file mode 100644 index 000000000000..cbb1d47deda8 --- /dev/null +++ b/fs/proc/fd.h @@ -0,0 +1,14 @@ +#ifndef __PROCFS_FD_H__ +#define __PROCFS_FD_H__ + +#include + +extern const struct file_operations proc_fd_operations; +extern const struct inode_operations proc_fd_inode_operations; + +extern const struct file_operations proc_fdinfo_operations; +extern const struct inode_operations proc_fdinfo_inode_operations; + +extern int proc_fd_permission(struct inode *inode, int mask); + +#endif /* __PROCFS_FD_H__ */ diff --git a/fs/proc/internal.h b/fs/proc/internal.h index e1167a1c9126..67925a7bd8cb 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -9,6 +9,7 @@ * 2 of the License, or (at your option) any later version. */ +#include #include struct ctl_table_header; @@ -65,6 +66,7 @@ extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern const struct file_operations proc_net_operations; extern const struct inode_operations proc_net_inode_operations; +extern const struct inode_operations proc_pid_link_inode_operations; struct proc_maps_private { struct pid *pid; @@ -91,6 +93,52 @@ static inline int proc_fd(struct inode *inode) return PROC_I(inode)->fd; } +static inline int task_dumpable(struct task_struct *task) +{ + int dumpable = 0; + struct mm_struct *mm; + + task_lock(task); + mm = task->mm; + if (mm) + dumpable = get_dumpable(mm); + task_unlock(task); + if(dumpable == 1) + return 1; + return 0; +} + +static inline int pid_delete_dentry(const struct dentry * dentry) +{ + /* Is the task we represent dead? + * If so, then don't put the dentry on the lru list, + * kill it immediately. + */ + return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; +} + +static inline unsigned name_to_int(struct dentry *dentry) +{ + const char *name = dentry->d_name.name; + int len = dentry->d_name.len; + unsigned n = 0; + + if (len > 1 && *name == '0') + goto out; + while (len-- > 0) { + unsigned c = *name++ - '0'; + if (c > 9) + goto out; + if (n >= (~0U-9)/10) + goto out; + n *= 10; + n += c; + } + return n; +out: + return ~0U; +} + struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, struct dentry *dentry); int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, -- cgit v1.2.3-58-ga151 From ddd3e0771bc7b869c550687c204e21f0155d5496 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 26 Aug 2012 18:28:20 +0400 Subject: procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2 This patch converts /proc/pid/fdinfo/ handling routines to seq-file which is needed to extend seq operations and plug in auxiliary fdinfo provides from subsystems like eventfd/eventpoll/fsnotify. Note the proc_fd_link no longer call for proc_fd_info, simply because the guts of proc_fd_info() got merged into ->show() of that seq_file Signed-off-by: Al Viro --- fs/proc/fd.c | 112 ++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 64 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 1b0f932b4bd9..9cef449c0f76 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -6,61 +6,68 @@ #include #include #include +#include +#include #include #include "internal.h" #include "fd.h" -#define PROC_FDINFO_MAX 64 - -static int proc_fd_info(struct inode *inode, struct path *path, char *info) +static int seq_show(struct seq_file *m, void *v) { - struct task_struct *task = get_proc_task(inode); struct files_struct *files = NULL; - int fd = proc_fd(inode); - struct file *file; + int f_flags = 0, ret = -ENOENT; + struct file *file = NULL; + struct task_struct *task; + + task = get_proc_task(m->private); + if (!task) + return -ENOENT; + + files = get_files_struct(task); + put_task_struct(task); - if (task) { - files = get_files_struct(task); - put_task_struct(task); - } if (files) { - /* - * We are not taking a ref to the file structure, so we must - * hold ->file_lock. - */ + int fd = proc_fd(m->private); + spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (file) { - unsigned int f_flags; - struct fdtable *fdt; + struct fdtable *fdt = files_fdtable(files); - fdt = files_fdtable(files); f_flags = file->f_flags & ~O_CLOEXEC; if (close_on_exec(fd, fdt)) f_flags |= O_CLOEXEC; - if (path) { - *path = file->f_path; - path_get(&file->f_path); - } - if (info) - snprintf(info, PROC_FDINFO_MAX, - "pos:\t%lli\n" - "flags:\t0%o\n", - (long long) file->f_pos, - f_flags); - spin_unlock(&files->file_lock); - put_files_struct(files); - return 0; + get_file(file); + ret = 0; } spin_unlock(&files->file_lock); put_files_struct(files); } - return -ENOENT; + + if (!ret) { + seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", + (long long)file->f_pos, f_flags); + fput(file); + } + + return ret; } +static int seq_fdinfo_open(struct inode *inode, struct file *file) +{ + return single_open(file, seq_show, inode); +} + +static const struct file_operations proc_fdinfo_file_operations = { + .open = seq_fdinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) { struct files_struct *files; @@ -130,7 +137,32 @@ static const struct dentry_operations tid_fd_dentry_operations = { static int proc_fd_link(struct dentry *dentry, struct path *path) { - return proc_fd_info(dentry->d_inode, path, NULL); + struct files_struct *files = NULL; + struct task_struct *task; + int ret = -ENOENT; + + task = get_proc_task(dentry->d_inode); + if (task) { + files = get_files_struct(task); + put_task_struct(task); + } + + if (files) { + int fd = proc_fd(dentry->d_inode); + struct file *fd_file; + + spin_lock(&files->file_lock); + fd_file = fcheck_files(files, fd); + if (fd_file) { + *path = fd_file->f_path; + path_get(&fd_file->f_path); + ret = 0; + } + spin_unlock(&files->file_lock); + put_files_struct(files); + } + + return ret; } static struct dentry * @@ -245,22 +277,6 @@ out_no_task: return retval; } -static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - char tmp[PROC_FDINFO_MAX]; - int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); - if (!err) - err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); - return err; -} - -static const struct file_operations proc_fdinfo_file_operations = { - .open = nonseekable_open, - .read = proc_fdinfo_read, - .llseek = no_llseek, -}; - static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) { return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); -- cgit v1.2.3-58-ga151 From c6f3d81115989e274c42a852222b80d2e14ced6f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 11:01:04 -0400 Subject: don't leak O_CLOEXEC into ->f_flags Signed-off-by: Al Viro --- fs/open.c | 2 +- fs/proc/fd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 30760017deed..03028d0e7487 100644 --- a/fs/open.c +++ b/fs/open.c @@ -814,7 +814,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o op->mode = 0; /* Must never be set by userspace */ - flags &= ~FMODE_NONOTIFY; + flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC; /* * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 9cef449c0f76..f28a875f8779 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -36,7 +36,7 @@ static int seq_show(struct seq_file *m, void *v) if (file) { struct fdtable *fdt = files_fdtable(files); - f_flags = file->f_flags & ~O_CLOEXEC; + f_flags = file->f_flags; if (close_on_exec(fd, fdt)) f_flags |= O_CLOEXEC; -- cgit v1.2.3-58-ga151 From f6d2ac5ca79d1fd468dbc77e488aec06e86f3035 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 12:55:54 -0400 Subject: namei.c: fix BS comment get_write_access() is needed for nfsd, not binfmt_aout (the latter has no business doing anything of that kind, of course) Signed-off-by: Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index dd1ed1b8e98e..c5b85b3523c0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3971,7 +3971,7 @@ EXPORT_SYMBOL(user_path_at); EXPORT_SYMBOL(follow_down_one); EXPORT_SYMBOL(follow_down); EXPORT_SYMBOL(follow_up); -EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ +EXPORT_SYMBOL(get_write_access); /* nfsd */ EXPORT_SYMBOL(getname); EXPORT_SYMBOL(lock_rename); EXPORT_SYMBOL(lookup_one_len); -- cgit v1.2.3-58-ga151 From bf2965d5b5950d09e934ea5d961d79d0ed1fae7e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 20:13:36 -0400 Subject: switch ftruncate(2) to fget_light Signed-off-by: Al Viro --- fs/open.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 03028d0e7487..9f61d7269d39 100644 --- a/fs/open.c +++ b/fs/open.c @@ -132,16 +132,16 @@ SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { - struct inode * inode; + struct inode *inode; struct dentry *dentry; - struct file * file; - int error; + struct file *file; + int error, fput_needed; error = -EINVAL; if (length < 0) goto out; error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) goto out; @@ -172,7 +172,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); sb_end_write(inode->i_sb); out_putf: - fput(file); + fput_light(file, fput_needed); out: return error; } -- cgit v1.2.3-58-ga151 From 6b48c5b2079af1f81d8f249ae07a988d8c45b32f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 20:15:40 -0400 Subject: switch fallocate(2) to fget_light() Signed-off-by: Al Viro --- fs/open.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 9f61d7269d39..da6d3f1ac243 100644 --- a/fs/open.c +++ b/fs/open.c @@ -277,12 +277,12 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) { struct file *file; - int error = -EBADF; + int error = -EBADF, fput_needed; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (file) { error = do_fallocate(file, mode, offset, len); - fput(file); + fput_light(file, fput_needed); } return error; -- cgit v1.2.3-58-ga151 From d6483b7a78438bc333560d11b69e6a6a6cf55940 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 20:22:10 -0400 Subject: switch fchmod(2) to fget_light() Signed-off-by: Al Viro --- fs/open.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index da6d3f1ac243..3c741eae6b99 100644 --- a/fs/open.c +++ b/fs/open.c @@ -582,23 +582,21 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) { - struct file * file; - int error = -EBADF; - struct dentry * dentry; + struct file *file; + int error = -EBADF, fput_needed; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) goto out; error = mnt_want_write_file(file); if (error) goto out_fput; - dentry = file->f_path.dentry; - audit_inode(NULL, dentry); + audit_inode(NULL, file->f_path.dentry); error = chown_common(&file->f_path, user, group); mnt_drop_write_file(file); out_fput: - fput(file); + fput_light(file, fput_needed); out: return error; } -- cgit v1.2.3-58-ga151 From 399c9b862f853f5c33e5a3b1f9a3c2507bdd526b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 21:00:03 -0400 Subject: ext4: close struct file leak on EXT4_IOC_MOVE_EXT Signed-off-by: Al Viro --- fs/ext4/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7f7dad787603..a0ee682dc394 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -258,7 +258,8 @@ group_extend_out: EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { ext4_msg(sb, KERN_ERR, "Online defrag not supported with bigalloc"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto mext_out; } err = mnt_want_write_file(filp); -- cgit v1.2.3-58-ga151 From 4557c669ef9801d96cf663331cdd1dcb8fa9c2f1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 28 Aug 2012 10:19:41 -0400 Subject: export fget_light Signed-off-by: Al Viro --- fs/file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 15750b80e3ce..0f1bda4bebfa 100644 --- a/fs/file.c +++ b/fs/file.c @@ -797,6 +797,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed) return file; } +EXPORT_SYMBOL(fget_light); struct file *fget_raw_light(unsigned int fd, int *fput_needed) { -- cgit v1.2.3-58-ga151 From 6bdf2954016ef7c1f4d4fa07a338ee197d9c3506 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 21:01:46 -0400 Subject: switch EXT4_IOC_MOVE_EXT to fget_light() Signed-off-by: Al Viro --- fs/ext4/ioctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index a0ee682dc394..39646f224514 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -234,7 +234,7 @@ group_extend_out: case EXT4_IOC_MOVE_EXT: { struct move_extent me; struct file *donor_filp; - int err; + int err, fput_needed; if (!(filp->f_mode & FMODE_READ) || !(filp->f_mode & FMODE_WRITE)) @@ -245,7 +245,7 @@ group_extend_out: return -EFAULT; me.moved_len = 0; - donor_filp = fget(me.donor_fd); + donor_filp = fget_light(me.donor_fd, &fput_needed); if (!donor_filp) return -EBADF; @@ -274,7 +274,7 @@ group_extend_out: &me, sizeof(me))) err = -EFAULT; mext_out: - fput(donor_filp); + fput_light(donor_filp, fput_needed); return err; } -- cgit v1.2.3-58-ga151 From ecd188159efa112770d5f8a4a62f8d3586784f48 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 21:20:24 -0400 Subject: switch btrfs_ioctl_snap_create_transid() to fget_light() Signed-off-by: Al Viro --- fs/btrfs/ioctl.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9df50fa8a078..3c88abb0e265 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1422,7 +1422,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, NULL, transid, readonly, inherit); } else { struct inode *src_inode; - src_file = fget(fd); + int fput_needed; + src_file = fget_light(fd, &fput_needed); if (!src_file) { ret = -EINVAL; goto out_drop_write; @@ -1433,13 +1434,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, printk(KERN_INFO "btrfs: Snapshot src from " "another FS\n"); ret = -EINVAL; - fput(src_file); - goto out_drop_write; + } else { + ret = btrfs_mksubvol(&file->f_path, name, namelen, + BTRFS_I(src_inode)->root, + transid, readonly, inherit); } - ret = btrfs_mksubvol(&file->f_path, name, namelen, - BTRFS_I(src_inode)->root, - transid, readonly, inherit); - fput(src_file); + fput_light(src_file, fput_needed); } out_drop_write: mnt_drop_write_file(file); -- cgit v1.2.3-58-ga151 From 5e196a9cf557dbc2bf808824c6c4998150e18d06 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 21:27:40 -0400 Subject: switch epoll_wait(2) to fget_light() Signed-off-by: Al Viro --- fs/eventpoll.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index eedec84c1809..567ae72e155d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1809,7 +1809,7 @@ error_return: SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout) { - int error; + int error, fput_needed; struct file *file; struct eventpoll *ep; @@ -1825,7 +1825,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, /* Get the "struct file *" for the eventpoll file */ error = -EBADF; - file = fget(epfd); + file = fget_light(epfd, &fput_needed); if (!file) goto error_return; @@ -1847,7 +1847,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, error = ep_poll(ep, events, maxevents, timeout); error_fput: - fput(file); + fput_light(file, fput_needed); error_return: return error; -- cgit v1.2.3-58-ga151 From 4109633f4c4dcdaedf0d85ae74dba334760c577b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 21:32:02 -0400 Subject: switch timerfd_[sg]ettime(2) to fget_light() Signed-off-by: Al Viro --- fs/timerfd.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/timerfd.c b/fs/timerfd.c index dffeb3795af1..dd91e9420c9e 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -234,15 +234,15 @@ static const struct file_operations timerfd_fops = { .llseek = noop_llseek, }; -static struct file *timerfd_fget(int fd) +static struct file *timerfd_fget(int fd, int *fput_needed) { struct file *file; - file = fget(fd); + file = fget_light(fd, fput_needed); if (!file) return ERR_PTR(-EBADF); if (file->f_op != &timerfd_fops) { - fput(file); + fput_light(file, *fput_needed); return ERR_PTR(-EINVAL); } @@ -287,7 +287,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, struct file *file; struct timerfd_ctx *ctx; struct itimerspec ktmr, kotmr; - int ret; + int ret, fput_needed; if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) return -EFAULT; @@ -297,7 +297,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, !timespec_valid(&ktmr.it_interval)) return -EINVAL; - file = timerfd_fget(ufd); + file = timerfd_fget(ufd, &fput_needed); if (IS_ERR(file)) return PTR_ERR(file); ctx = file->private_data; @@ -334,7 +334,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, ret = timerfd_setup(ctx, flags, &ktmr); spin_unlock_irq(&ctx->wqh.lock); - fput(file); + fput_light(file, fput_needed); if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) return -EFAULT; @@ -346,8 +346,9 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) struct file *file; struct timerfd_ctx *ctx; struct itimerspec kotmr; + int fput_needed; - file = timerfd_fget(ufd); + file = timerfd_fget(ufd, &fput_needed); if (IS_ERR(file)) return PTR_ERR(file); ctx = file->private_data; @@ -362,7 +363,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); kotmr.it_interval = ktime_to_timespec(ctx->tintv); spin_unlock_irq(&ctx->wqh.lock); - fput(file); + fput_light(file, fput_needed); return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; } -- cgit v1.2.3-58-ga151 From 8319aa9127a1282b24c3ece473a058d246f35b0d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 03:18:55 -0400 Subject: switch btrfs_ioctl_clone() to fget_light() Signed-off-by: Al Viro --- fs/btrfs/ioctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3c88abb0e265..3494f2f44167 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2350,7 +2350,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, struct btrfs_key key; u32 nritems; int slot; - int ret; + int ret, fput_needed; u64 len = olen; u64 bs = root->fs_info->sb->s_blocksize; u64 hint_byte; @@ -2376,7 +2376,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (ret) return ret; - src_file = fget(srcfd); + src_file = fget_light(srcfd, &fput_needed); if (!src_file) { ret = -EBADF; goto out_drop_write; @@ -2724,7 +2724,7 @@ out_unlock: vfree(buf); btrfs_free_path(path); out_fput: - fput(src_file); + fput_light(src_file, fput_needed); out_drop_write: mnt_drop_write_file(file); return ret; -- cgit v1.2.3-58-ga151 From 78f7d75e5dd9aa1027e90d0b71d394603933c2ed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 12:54:13 -0400 Subject: switch coda get_device_index() to fget_light() Signed-off-by: Al Viro --- fs/coda/inode.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/coda/inode.c b/fs/coda/inode.c index f1813120d753..bd2313d106e5 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -109,41 +109,39 @@ static int get_device_index(struct coda_mount_data *data) { struct file *file; struct inode *inode; - int idx; + int idx, fput_needed; - if(data == NULL) { + if (data == NULL) { printk("coda_read_super: Bad mount data\n"); return -1; } - if(data->version != CODA_MOUNT_VERSION) { + if (data->version != CODA_MOUNT_VERSION) { printk("coda_read_super: Bad mount version\n"); return -1; } - file = fget(data->fd); - inode = NULL; - if(file) - inode = file->f_path.dentry->d_inode; - - if(!inode || !S_ISCHR(inode->i_mode) || - imajor(inode) != CODA_PSDEV_MAJOR) { - if(file) - fput(file); - - printk("coda_read_super: Bad file\n"); - return -1; + file = fget_light(data->fd, &fput_needed); + if (!file) + goto Ebadf; + inode = file->f_path.dentry->d_inode; + if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) { + fput_light(file, fput_needed); + goto Ebadf; } idx = iminor(inode); - fput(file); + fput_light(file, fput_needed); - if(idx < 0 || idx >= MAX_CODADEVS) { + if (idx < 0 || idx >= MAX_CODADEVS) { printk("coda_read_super: Bad minor number\n"); return -1; } return idx; +Ebadf: + printk("coda_read_super: Bad file\n"); + return -1; } static int coda_fill_super(struct super_block *sb, void *data, int silent) -- cgit v1.2.3-58-ga151 From 1ea65c96077f9bb5c0e5e224a4da751d269c5f94 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 12:57:12 -0400 Subject: switch xfs_swapext() to fget_light() Signed-off-by: Al Viro --- fs/xfs/xfs_dfrag.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index e00de08dc8ac..e6cdf224d7ea 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -49,10 +49,10 @@ xfs_swapext( { xfs_inode_t *ip, *tip; struct file *file, *tmp_file; - int error = 0; + int error = 0, fput_needed, fput_needed_tmp; /* Pull information for the target fd */ - file = fget((int)sxp->sx_fdtarget); + file = fget_light((int)sxp->sx_fdtarget, &fput_needed); if (!file) { error = XFS_ERROR(EINVAL); goto out; @@ -65,7 +65,7 @@ xfs_swapext( goto out_put_file; } - tmp_file = fget((int)sxp->sx_fdtmp); + tmp_file = fget_light((int)sxp->sx_fdtmp, &fput_needed_tmp); if (!tmp_file) { error = XFS_ERROR(EINVAL); goto out_put_file; @@ -105,9 +105,9 @@ xfs_swapext( error = xfs_swap_extents(ip, tip, sxp); out_put_tmp_file: - fput(tmp_file); + fput_light(tmp_file, fput_needed_tmp); out_put_file: - fput(file); + fput_light(file, fput_needed); out: return error; } -- cgit v1.2.3-58-ga151 From 64e09fa2e1fef1696a8685c7aad7e0d3dd24ce71 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 12:59:52 -0400 Subject: switch xfs_find_handle() to fget_light() Signed-off-by: Al Viro --- fs/xfs/xfs_ioctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0e0232c3b6d9..21483eac402d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -72,11 +72,11 @@ xfs_find_handle( struct inode *inode; struct file *file = NULL; struct path path; - int error; + int error, fput_needed; struct xfs_inode *ip; if (cmd == XFS_IOC_FD_TO_HANDLE) { - file = fget(hreq->fd); + file = fget_light(hreq->fd, &fput_needed); if (!file) return -EBADF; inode = file->f_path.dentry->d_inode; @@ -134,7 +134,7 @@ xfs_find_handle( out_put: if (cmd == XFS_IOC_FD_TO_HANDLE) - fput(file); + fput_light(file, fput_needed); else path_put(&path); return error; -- cgit v1.2.3-58-ga151 From cb0942b81249798e15c3f04eee2946ef543e8115 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 14:48:26 -0400 Subject: make get_file() return its argument simplifies a bunch of callers... Signed-off-by: Al Viro --- arch/ia64/kernel/perfmon.c | 4 +--- drivers/base/dma-buf.c | 3 +-- drivers/staging/omapdrm/omap_gem.c | 3 +-- drivers/tty/tty_io.c | 9 +++------ fs/autofs4/waitq.c | 3 +-- fs/fuse/dev.c | 3 +-- fs/nfsd/nfs4state.c | 3 +-- fs/proc/base.c | 3 +-- fs/select.c | 3 +-- include/linux/fs.h | 6 +++++- mm/fremap.c | 3 +-- mm/mmap.c | 3 +-- mm/nommu.c | 6 ++---- net/compat.c | 3 +-- net/core/scm.c | 3 +-- security/selinux/hooks.c | 3 +-- 16 files changed, 23 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 79826c13b8b6..ff5d4e4c3733 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2306,7 +2306,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t * partially initialize the vma for the sampling buffer */ vma->vm_mm = mm; - vma->vm_file = filp; + vma->vm_file = get_file(filp); vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ @@ -2345,8 +2345,6 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t goto error; } - get_file(filp); - /* * now insert the vma in the vm list for the process, must be * done with mmap lock held diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index c30f3e1d0efc..460e22dee36d 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -460,8 +460,7 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, if (vma->vm_file) fput(vma->vm_file); - vma->vm_file = dmabuf->file; - get_file(vma->vm_file); + vma->vm_file = get_file(dmabuf->file); vma->vm_pgoff = pgoff; diff --git a/drivers/staging/omapdrm/omap_gem.c b/drivers/staging/omapdrm/omap_gem.c index 3a0d035a9e03..2a6bb7f9ee68 100644 --- a/drivers/staging/omapdrm/omap_gem.c +++ b/drivers/staging/omapdrm/omap_gem.c @@ -566,9 +566,8 @@ int omap_gem_mmap_obj(struct drm_gem_object *obj, * in particular in the case of mmap'd dmabufs) */ fput(vma->vm_file); - get_file(obj->filp); vma->vm_pgoff = 0; - vma->vm_file = obj->filp; + vma->vm_file = get_file(obj->filp); vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); } diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 71d95cfbabec..c7561f29d894 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1163,10 +1163,8 @@ ssize_t redirected_tty_write(struct file *file, const char __user *buf, struct file *p = NULL; spin_lock(&redirect_lock); - if (redirect) { - get_file(redirect); - p = redirect; - } + if (redirect) + p = get_file(redirect); spin_unlock(&redirect_lock); if (p) { @@ -2246,8 +2244,7 @@ static int tioccons(struct file *file) spin_unlock(&redirect_lock); return -EBUSY; } - get_file(file); - redirect = file; + redirect = get_file(file); spin_unlock(&redirect_lock); return 0; } diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index da8876d38a7b..dce436e595c1 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -175,8 +175,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, return; } - pipe = sbi->pipe; - get_file(pipe); + pipe = get_file(sbi->pipe); mutex_unlock(&sbi->wq_mutex); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f4246cfc8d87..8c23fa7a91e6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -148,8 +148,7 @@ static struct fuse_req *get_reserved_req(struct fuse_conn *fc, if (ff->reserved_req) { req = ff->reserved_req; ff->reserved_req = NULL; - get_file(file); - req->stolen_file = file; + req->stolen_file = get_file(file); } spin_unlock(&fc->lock); } while (!req); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cc894eda385a..48a1bad37334 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2837,8 +2837,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag) return -ENOMEM; } fp->fi_lease = fl; - fp->fi_deleg_file = fl->fl_file; - get_file(fp->fi_deleg_file); + fp->fi_deleg_file = get_file(fl->fl_file); atomic_set(&fp->fi_delegees, 1); list_add(&dp->dl_perfile, &fp->fi_delegations); return 0; diff --git a/fs/proc/base.c b/fs/proc/base.c index b55c3bb298e3..f1e8438d21b5 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1979,8 +1979,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) if (++pos <= filp->f_pos) continue; - get_file(vma->vm_file); - info.file = vma->vm_file; + info.file = get_file(vma->vm_file); info.len = snprintf(info.name, sizeof(info.name), "%lx-%lx", vma->vm_start, vma->vm_end); diff --git a/fs/select.c b/fs/select.c index db14c781335e..ffdd16d6e691 100644 --- a/fs/select.c +++ b/fs/select.c @@ -220,8 +220,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, struct poll_table_entry *entry = poll_get_entry(pwq); if (!entry) return; - get_file(filp); - entry->filp = filp; + entry->filp = get_file(filp); entry->wait_address = wait_address; entry->key = p->_key; init_waitqueue_func_entry(&entry->wait, pollwake); diff --git a/include/linux/fs.h b/include/linux/fs.h index aa110476a95b..de1db1c12080 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1074,7 +1074,11 @@ struct file_handle { unsigned char f_handle[0]; }; -#define get_file(x) atomic_long_inc(&(x)->f_count) +static inline struct file *get_file(struct file *f) +{ + atomic_long_inc(&f->f_count); + return f; +} #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) #define file_count(x) atomic_long_read(&(x)->f_count) diff --git a/mm/fremap.c b/mm/fremap.c index 9ed4fd432467..048659c0c03d 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -195,10 +195,9 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, */ if (mapping_cap_account_dirty(mapping)) { unsigned long addr; - struct file *file = vma->vm_file; + struct file *file = get_file(vma->vm_file); flags &= MAP_NONBLOCK; - get_file(file); addr = mmap_region(file, start, size, flags, vma->vm_flags, pgoff); fput(file); diff --git a/mm/mmap.c b/mm/mmap.c index ae18a48e7e4e..872441e81914 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1301,8 +1301,7 @@ munmap_back: goto free_vma; correct_wcount = 1; } - vma->vm_file = file; - get_file(file); + vma->vm_file = get_file(file); error = file->f_op->mmap(file, vma); if (error) goto unmap_and_free_vma; diff --git a/mm/nommu.c b/mm/nommu.c index d4b0c10872de..dee2ff89fd58 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1282,10 +1282,8 @@ unsigned long do_mmap_pgoff(struct file *file, vma->vm_pgoff = pgoff; if (file) { - region->vm_file = file; - get_file(file); - vma->vm_file = file; - get_file(file); + region->vm_file = get_file(file); + vma->vm_file = get_file(file); if (vm_flags & VM_EXECUTABLE) { added_exe_file_vma(current->mm); vma->vm_mm = current->mm; diff --git a/net/compat.c b/net/compat.c index 74ed1d7a84a2..79ae88485001 100644 --- a/net/compat.c +++ b/net/compat.c @@ -301,8 +301,7 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) break; } /* Bump the usage count and install the file. */ - get_file(fp[i]); - fd_install(new_fd, fp[i]); + fd_install(new_fd, get_file(fp[i])); } if (i > 0) { diff --git a/net/core/scm.c b/net/core/scm.c index 040cebeed45b..b0098d259233 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -281,11 +281,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) break; } /* Bump the usage count and install the file. */ - get_file(fp[i]); sock = sock_from_file(fp[i], &err); if (sock) sock_update_netprioidx(sock->sk, current); - fd_install(new_fd, fp[i]); + fd_install(new_fd, get_file(fp[i])); } if (i > 0) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4dfbcea10eb7..651d8456611a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2135,8 +2135,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, if (!IS_ERR(devnull)) { /* replace all the matching ones with this */ do { - get_file(devnull); - replace_fd(n - 1, devnull, 0); + replace_fd(n - 1, get_file(devnull), 0); } while ((n = iterate_fd(files, n, match_file, cred)) != 0); fput(devnull); } else { -- cgit v1.2.3-58-ga151 From 7b540d0646ce122f0ba4520412be91e530719742 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 14:55:26 -0400 Subject: proc_map_files_readdir(): don't bother with grabbing files all we need is their ->f_mode, so just collect _that_ Signed-off-by: Al Viro --- fs/proc/base.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index f1e8438d21b5..df18db61d6d8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1805,7 +1805,7 @@ out: } struct map_files_info { - struct file *file; + fmode_t mode; unsigned long len; unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ }; @@ -1814,13 +1814,10 @@ static struct dentry * proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { - const struct file *file = ptr; + fmode_t mode = (fmode_t)(unsigned long)ptr; struct proc_inode *ei; struct inode *inode; - if (!file) - return ERR_PTR(-ENOENT); - inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) return ERR_PTR(-ENOENT); @@ -1832,9 +1829,9 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, inode->i_size = 64; inode->i_mode = S_IFLNK; - if (file->f_mode & FMODE_READ) + if (mode & FMODE_READ) inode->i_mode |= S_IRUSR; - if (file->f_mode & FMODE_WRITE) + if (mode & FMODE_WRITE) inode->i_mode |= S_IWUSR; d_set_d_op(dentry, &tid_map_files_dentry_operations); @@ -1878,7 +1875,8 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, if (!vma) goto out_no_vma; - result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file); + result = proc_map_files_instantiate(dir, dentry, task, + (void *)(unsigned long)vma->vm_file->f_mode); out_no_vma: up_read(&mm->mmap_sem); @@ -1979,7 +1977,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) if (++pos <= filp->f_pos) continue; - info.file = get_file(vma->vm_file); + info.mode = vma->vm_file->f_mode; info.len = snprintf(info.name, sizeof(info.name), "%lx-%lx", vma->vm_start, vma->vm_end); @@ -1994,19 +1992,11 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) ret = proc_fill_cache(filp, dirent, filldir, p->name, p->len, proc_map_files_instantiate, - task, p->file); + task, + (void *)(unsigned long)p->mode); if (ret) break; filp->f_pos++; - fput(p->file); - } - for (; i < nr_files; i++) { - /* - * In case of error don't forget - * to put rest of file refs. - */ - p = flex_array_get(fa, i); - fput(p->file); } if (fa) flex_array_free(fa); -- cgit v1.2.3-58-ga151 From 2a117354b7bdfe13750a64307755e75436fb157a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Aug 2012 17:55:17 -0400 Subject: switch o2hb_region_dev_write() to fget_light() Signed-off-by: Al Viro --- fs/ocfs2/cluster/heartbeat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index a4e855e3690e..61c28ae266f5 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1750,6 +1750,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, struct inode *inode = NULL; ssize_t ret = -EINVAL; int live_threshold; + int fput_needed; if (reg->hr_bdev) goto out; @@ -1766,7 +1767,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, if (fd < 0 || fd >= INT_MAX) goto out; - filp = fget(fd); + filp = fget_light(fd, &fput_needed); if (filp == NULL) goto out; @@ -1884,7 +1885,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, out: if (filp) - fput(filp); + fput_light(filp, fput_needed); if (inode) iput(inode); if (ret < 0) { -- cgit v1.2.3-58-ga151 From 2903ff019b346ab8d36ebbf54853c3aaf6590608 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 28 Aug 2012 12:52:22 -0400 Subject: switch simple cases of fget_light to fdget Signed-off-by: Al Viro --- arch/alpha/kernel/osf_sys.c | 15 +-- arch/ia64/kernel/perfmon.c | 15 ++- arch/parisc/hpux/fs.c | 17 ++- arch/powerpc/platforms/cell/spu_syscalls.c | 21 ++-- drivers/infiniband/core/ucma.c | 12 +- drivers/infiniband/core/uverbs_cmd.c | 18 +-- drivers/infiniband/core/uverbs_main.c | 12 +- drivers/vfio/vfio.c | 17 ++- drivers/video/msm/mdp.c | 12 +- fs/btrfs/ioctl.c | 26 ++--- fs/coda/inode.c | 14 +-- fs/compat.c | 90 +++++++-------- fs/compat_ioctl.c | 27 ++--- fs/eventpoll.c | 25 ++-- fs/ext4/ioctl.c | 14 +-- fs/fcntl.c | 32 +++--- fs/fhandle.c | 17 ++- fs/ioctl.c | 25 ++-- fs/locks.c | 20 ++-- fs/namei.c | 39 +++---- fs/notify/fanotify/fanotify_user.c | 28 +++-- fs/notify/inotify/inotify_user.c | 28 ++--- fs/ocfs2/cluster/heartbeat.c | 39 ++++--- fs/open.c | 64 +++++------ fs/read_write.c | 176 +++++++++++++---------------- fs/readdir.c | 36 +++--- fs/select.c | 28 ++--- fs/signalfd.c | 13 +-- fs/splice.c | 69 ++++++----- fs/stat.c | 10 +- fs/statfs.c | 9 +- fs/sync.c | 33 +++--- fs/timerfd.c | 48 ++++---- fs/utimes.c | 11 +- fs/xattr.c | 52 ++++----- fs/xfs/xfs_dfrag.c | 36 +++--- fs/xfs/xfs_ioctl.c | 12 +- include/linux/file.h | 5 +- ipc/mqueue.c | 84 +++++++------- kernel/events/core.c | 70 +++++------- kernel/sys.c | 16 +-- kernel/taskstats.c | 11 +- mm/fadvise.c | 35 +++--- mm/readahead.c | 15 ++- 44 files changed, 633 insertions(+), 763 deletions(-) (limited to 'fs') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index d6c49e67d3fc..f1daf7ae42e9 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -144,28 +144,25 @@ SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd, struct osf_dirent __user *, dirent, unsigned int, count, long __user *, basep) { - int error, fput_needed; - struct file *file; + int error; + struct fd arg = fdget(fd); struct osf_dirent_callback buf; - error = -EBADF; - file = fget_light(fd, &fput_needed); - if (!file) - goto out; + if (!arg.file) + return -EBADF; buf.dirent = dirent; buf.basep = basep; buf.count = count; buf.error = 0; - error = vfs_readdir(file, osf_filldir, &buf); + error = vfs_readdir(arg.file, osf_filldir, &buf); if (error >= 0) error = buf.error; if (count != buf.count) error = count - buf.count; - fput_light(file, fput_needed); - out: + fdput(arg); return error; } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index ff5d4e4c3733..e3bd7b8aceab 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -4780,7 +4780,7 @@ recheck: asmlinkage long sys_perfmonctl (int fd, int cmd, void __user *arg, int count) { - struct file *file = NULL; + struct fd f = {NULL, 0}; pfm_context_t *ctx = NULL; unsigned long flags = 0UL; void *args_k = NULL; @@ -4789,7 +4789,6 @@ sys_perfmonctl (int fd, int cmd, void __user *arg, int count) int narg, completed_args = 0, call_made = 0, cmd_flags; int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); int (*getsize)(void *arg, size_t *sz); - int fput_needed; #define PFM_MAX_ARGSIZE 4096 /* @@ -4878,17 +4877,17 @@ restart_args: ret = -EBADF; - file = fget_light(fd, &fput_needed); - if (unlikely(file == NULL)) { + f = fdget(fd); + if (unlikely(f.file == NULL)) { DPRINT(("invalid fd %d\n", fd)); goto error_args; } - if (unlikely(PFM_IS_FILE(file) == 0)) { + if (unlikely(PFM_IS_FILE(f.file) == 0)) { DPRINT(("fd %d not related to perfmon\n", fd)); goto error_args; } - ctx = file->private_data; + ctx = f.file->private_data; if (unlikely(ctx == NULL)) { DPRINT(("no context for fd %d\n", fd)); goto error_args; @@ -4918,8 +4917,8 @@ abort_locked: if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; error_args: - if (file) - fput_light(file, fput_needed); + if (f.file) + fdput(f); kfree(args_k); diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index 41e01832cb21..6785de7bd2a0 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -109,33 +109,32 @@ Efault: int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned int count) { - struct file * file; + struct fd arg; struct hpux_dirent __user * lastdirent; struct getdents_callback buf; - int error = -EBADF, fput_needed; + int error; - file = fget_light(fd, &fput_needed); - if (!file) - goto out; + arg = fdget(fd); + if (!arg.file) + return -EBADF; buf.current_dir = dirent; buf.previous = NULL; buf.count = count; buf.error = 0; - error = vfs_readdir(file, filldir, &buf); + error = vfs_readdir(arg.file, filldir, &buf); if (error >= 0) error = buf.error; lastdirent = buf.previous; if (lastdirent) { - if (put_user(file->f_pos, &lastdirent->d_off)) + if (put_user(arg.file->f_pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } - fput_light(file, fput_needed); -out: + fdput(arg); return error; } diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c index 714bbfc3162c..db4e638cf408 100644 --- a/arch/powerpc/platforms/cell/spu_syscalls.c +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -69,8 +69,6 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags, umode_t, mode, int, neighbor_fd) { long ret; - struct file *neighbor; - int fput_needed; struct spufs_calls *calls; calls = spufs_calls_get(); @@ -78,11 +76,11 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags, return -ENOSYS; if (flags & SPU_CREATE_AFFINITY_SPU) { + struct fd neighbor = fdget(neighbor_fd); ret = -EBADF; - neighbor = fget_light(neighbor_fd, &fput_needed); - if (neighbor) { - ret = calls->create_thread(name, flags, mode, neighbor); - fput_light(neighbor, fput_needed); + if (neighbor.file) { + ret = calls->create_thread(name, flags, mode, neighbor.file); + fdput(neighbor); } } else ret = calls->create_thread(name, flags, mode, NULL); @@ -94,8 +92,7 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags, asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) { long ret; - struct file *filp; - int fput_needed; + struct fd arg; struct spufs_calls *calls; calls = spufs_calls_get(); @@ -103,10 +100,10 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) return -ENOSYS; ret = -EBADF; - filp = fget_light(fd, &fput_needed); - if (filp) { - ret = calls->spu_run(filp, unpc, ustatus); - fput_light(filp, fput_needed); + arg = fdget(fd); + if (arg.file) { + ret = calls->spu_run(arg.file, unpc, ustatus); + fdput(arg); } spufs_calls_put(calls); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 6b2ae729de92..6f28da9f4cad 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1184,20 +1184,20 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, struct rdma_ucm_migrate_id cmd; struct rdma_ucm_migrate_resp resp; struct ucma_context *ctx; - struct file *filp; + struct fd f; struct ucma_file *cur_file; - int ret = 0, fput_needed; + int ret = 0; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; /* Get current fd to protect against it being closed */ - filp = fget_light(cmd.fd, &fput_needed); - if (!filp) + f = fdget(cmd.fd); + if (!f.file) return -ENOENT; /* Validate current fd and prevent destruction of id. */ - ctx = ucma_get_ctx(filp->private_data, cmd.id); + ctx = ucma_get_ctx(f.file->private_data, cmd.id); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); goto file_put; @@ -1231,7 +1231,7 @@ response: ucma_put_ctx(ctx); file_put: - fput_light(filp, fput_needed); + fdput(f); return ret; } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 402679bd30a3..0cb0007724a2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -705,9 +705,9 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, struct ib_udata udata; struct ib_uxrcd_object *obj; struct ib_xrcd *xrcd = NULL; - struct file *f = NULL; + struct fd f = {NULL, 0}; struct inode *inode = NULL; - int ret = 0, fput_needed; + int ret = 0; int new_xrcd = 0; if (out_len < sizeof resp) @@ -724,13 +724,13 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, if (cmd.fd != -1) { /* search for file descriptor */ - f = fget_light(cmd.fd, &fput_needed); - if (!f) { + f = fdget(cmd.fd); + if (!f.file) { ret = -EBADF; goto err_tree_mutex_unlock; } - inode = f->f_dentry->d_inode; + inode = f.file->f_path.dentry->d_inode; xrcd = find_xrcd(file->device, inode); if (!xrcd && !(cmd.oflags & O_CREAT)) { /* no file descriptor. Need CREATE flag */ @@ -795,8 +795,8 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, goto err_copy; } - if (f) - fput_light(f, fput_needed); + if (f.file) + fdput(f); mutex_lock(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); @@ -825,8 +825,8 @@ err: put_uobj_write(&obj->uobject); err_tree_mutex_unlock: - if (f) - fput_light(f, fput_needed); + if (f.file) + fdput(f); mutex_unlock(&file->device->xrcd_tree_mutex); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index acf75c2cf7ef..6f2ce6fa98f8 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -541,17 +541,15 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) { struct ib_uverbs_event_file *ev_file = NULL; - struct file *filp; - int fput_needed; + struct fd f = fdget(fd); - filp = fget_light(fd, &fput_needed); - if (!filp) + if (!f.file) return NULL; - if (filp->f_op != &uverbs_event_fops) + if (f.file->f_op != &uverbs_event_fops) goto out; - ev_file = filp->private_data; + ev_file = f.file->private_data; if (ev_file->is_async) { ev_file = NULL; goto out; @@ -560,7 +558,7 @@ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) kref_get(&ev_file->ref); out: - fput_light(filp, fput_needed); + fdput(f); return ev_file; } diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 91bcd97d3061..56097c6d072d 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1014,25 +1014,25 @@ static void vfio_group_try_dissolve_container(struct vfio_group *group) static int vfio_group_set_container(struct vfio_group *group, int container_fd) { - struct file *filep; + struct fd f; struct vfio_container *container; struct vfio_iommu_driver *driver; - int ret = 0, fput_needed; + int ret = 0; if (atomic_read(&group->container_users)) return -EINVAL; - filep = fget_light(container_fd, &fput_needed); - if (!filep) + f = fdget(container_fd); + if (!f.file) return -EBADF; /* Sanity check, is this really our fd? */ - if (filep->f_op != &vfio_fops) { - fput_light(filep, fput_needed); + if (f.file->f_op != &vfio_fops) { + fdput(f); return -EINVAL; } - container = filep->private_data; + container = f.file->private_data; WARN_ON(!container); /* fget ensures we don't race vfio_release */ mutex_lock(&container->group_lock); @@ -1054,8 +1054,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) unlock_out: mutex_unlock(&container->group_lock); - fput_light(filep, fput_needed); - + fdput(f); return ret; } diff --git a/drivers/video/msm/mdp.c b/drivers/video/msm/mdp.c index cb2ddf164c98..07c9d8ab2c3b 100644 --- a/drivers/video/msm/mdp.c +++ b/drivers/video/msm/mdp.c @@ -257,19 +257,17 @@ int get_img(struct mdp_img *img, struct fb_info *info, unsigned long *start, unsigned long *len, struct file **filep) { - int put_needed, ret = 0; - struct file *file; - - file = fget_light(img->memory_id, &put_needed); - if (file == NULL) + int ret = 0; + struct fd f = fdget(img->memory_id); + if (f.file == NULL) return -1; - if (MAJOR(file->f_dentry->d_inode->i_rdev) == FB_MAJOR) { + if (MAJOR(f.file->f_dentry->d_inode->i_rdev) == FB_MAJOR) { *start = info->fix.smem_start; *len = info->fix.smem_len; } else ret = -1; - fput_light(file, put_needed); + fdput(f); return ret; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3494f2f44167..0a4f0c8bc58f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1397,7 +1397,6 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, u64 *transid, bool readonly, struct btrfs_qgroup_inherit **inherit) { - struct file *src_file; int namelen; int ret = 0; @@ -1421,15 +1420,14 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, ret = btrfs_mksubvol(&file->f_path, name, namelen, NULL, transid, readonly, inherit); } else { + struct fd src = fdget(fd); struct inode *src_inode; - int fput_needed; - src_file = fget_light(fd, &fput_needed); - if (!src_file) { + if (!src.file) { ret = -EINVAL; goto out_drop_write; } - src_inode = src_file->f_path.dentry->d_inode; + src_inode = src.file->f_path.dentry->d_inode; if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { printk(KERN_INFO "btrfs: Snapshot src from " "another FS\n"); @@ -1439,7 +1437,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, BTRFS_I(src_inode)->root, transid, readonly, inherit); } - fput_light(src_file, fput_needed); + fdput(src); } out_drop_write: mnt_drop_write_file(file); @@ -2341,7 +2339,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct file *src_file; + struct fd src_file; struct inode *src; struct btrfs_trans_handle *trans; struct btrfs_path *path; @@ -2350,7 +2348,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, struct btrfs_key key; u32 nritems; int slot; - int ret, fput_needed; + int ret; u64 len = olen; u64 bs = root->fs_info->sb->s_blocksize; u64 hint_byte; @@ -2376,24 +2374,24 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (ret) return ret; - src_file = fget_light(srcfd, &fput_needed); - if (!src_file) { + src_file = fdget(srcfd); + if (!src_file.file) { ret = -EBADF; goto out_drop_write; } ret = -EXDEV; - if (src_file->f_path.mnt != file->f_path.mnt) + if (src_file.file->f_path.mnt != file->f_path.mnt) goto out_fput; - src = src_file->f_dentry->d_inode; + src = src_file.file->f_dentry->d_inode; ret = -EINVAL; if (src == inode) goto out_fput; /* the src must be open for reading */ - if (!(src_file->f_mode & FMODE_READ)) + if (!(src_file.file->f_mode & FMODE_READ)) goto out_fput; /* don't make the dst file partly checksummed */ @@ -2724,7 +2722,7 @@ out_unlock: vfree(buf); btrfs_free_path(path); out_fput: - fput_light(src_file, fput_needed); + fdput(src_file); out_drop_write: mnt_drop_write_file(file); return ret; diff --git a/fs/coda/inode.c b/fs/coda/inode.c index bd2313d106e5..d315c6c5891a 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -107,9 +107,9 @@ static const struct super_operations coda_super_operations = static int get_device_index(struct coda_mount_data *data) { - struct file *file; + struct fd f; struct inode *inode; - int idx, fput_needed; + int idx; if (data == NULL) { printk("coda_read_super: Bad mount data\n"); @@ -121,17 +121,17 @@ static int get_device_index(struct coda_mount_data *data) return -1; } - file = fget_light(data->fd, &fput_needed); - if (!file) + f = fdget(data->fd); + if (!f.file) goto Ebadf; - inode = file->f_path.dentry->d_inode; + inode = f.file->f_path.dentry->d_inode; if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) { - fput_light(file, fput_needed); + fdput(f); goto Ebadf; } idx = iminor(inode); - fput_light(file, fput_needed); + fdput(f); if (idx < 0 || idx >= MAX_CODADEVS) { printk("coda_read_super: Bad minor number\n"); diff --git a/fs/compat.c b/fs/compat.c index 1bdb350ea5d3..d72d51e19025 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -870,22 +870,20 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, struct compat_old_linux_dirent __user *dirent, unsigned int count) { int error; - struct file *file; - int fput_needed; + struct fd f = fdget(fd); struct compat_readdir_callback buf; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) return -EBADF; buf.result = 0; buf.dirent = dirent; - error = vfs_readdir(file, compat_fillonedir, &buf); + error = vfs_readdir(f.file, compat_fillonedir, &buf); if (buf.result) error = buf.result; - fput_light(file, fput_needed); + fdput(f); return error; } @@ -949,17 +947,16 @@ efault: asmlinkage long compat_sys_getdents(unsigned int fd, struct compat_linux_dirent __user *dirent, unsigned int count) { - struct file * file; + struct fd f; struct compat_linux_dirent __user * lastdirent; struct compat_getdents_callback buf; - int fput_needed; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) return -EFAULT; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return -EBADF; buf.current_dir = dirent; @@ -967,17 +964,17 @@ asmlinkage long compat_sys_getdents(unsigned int fd, buf.count = count; buf.error = 0; - error = vfs_readdir(file, compat_filldir, &buf); + error = vfs_readdir(f.file, compat_filldir, &buf); if (error >= 0) error = buf.error; lastdirent = buf.previous; if (lastdirent) { - if (put_user(file->f_pos, &lastdirent->d_off)) + if (put_user(f.file->f_pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } - fput_light(file, fput_needed); + fdput(f); return error; } @@ -1035,17 +1032,16 @@ efault: asmlinkage long compat_sys_getdents64(unsigned int fd, struct linux_dirent64 __user * dirent, unsigned int count) { - struct file * file; + struct fd f; struct linux_dirent64 __user * lastdirent; struct compat_getdents_callback64 buf; - int fput_needed; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) return -EFAULT; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return -EBADF; buf.current_dir = dirent; @@ -1053,18 +1049,18 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, buf.count = count; buf.error = 0; - error = vfs_readdir(file, compat_filldir64, &buf); + error = vfs_readdir(f.file, compat_filldir64, &buf); if (error >= 0) error = buf.error; lastdirent = buf.previous; if (lastdirent) { - typeof(lastdirent->d_off) d_off = file->f_pos; + typeof(lastdirent->d_off) d_off = f.file->f_pos; if (__put_user_unaligned(d_off, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } - fput_light(file, fput_needed); + fdput(f); return error; } #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ @@ -1152,18 +1148,16 @@ asmlinkage ssize_t compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen) { - struct file *file; - int fput_needed; + struct fd f = fdget(fd); ssize_t ret; loff_t pos; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) return -EBADF; - pos = file->f_pos; - ret = compat_readv(file, vec, vlen, &pos); - file->f_pos = pos; - fput_light(file, fput_needed); + pos = f.file->f_pos; + ret = compat_readv(f.file, vec, vlen, &pos); + f.file->f_pos = pos; + fdput(f); return ret; } @@ -1171,19 +1165,18 @@ asmlinkage ssize_t compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen, loff_t pos) { - struct file *file; - int fput_needed; + struct fd f; ssize_t ret; if (pos < 0) return -EINVAL; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return -EBADF; ret = -ESPIPE; - if (file->f_mode & FMODE_PREAD) - ret = compat_readv(file, vec, vlen, &pos); - fput_light(file, fput_needed); + if (f.file->f_mode & FMODE_PREAD) + ret = compat_readv(f.file, vec, vlen, &pos); + fdput(f); return ret; } @@ -1221,18 +1214,16 @@ asmlinkage ssize_t compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen) { - struct file *file; - int fput_needed; + struct fd f = fdget(fd); ssize_t ret; loff_t pos; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) return -EBADF; - pos = file->f_pos; - ret = compat_writev(file, vec, vlen, &pos); - file->f_pos = pos; - fput_light(file, fput_needed); + pos = f.file->f_pos; + ret = compat_writev(f.file, vec, vlen, &pos); + f.file->f_pos = pos; + fdput(f); return ret; } @@ -1240,19 +1231,18 @@ asmlinkage ssize_t compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen, loff_t pos) { - struct file *file; - int fput_needed; + struct fd f; ssize_t ret; if (pos < 0) return -EINVAL; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return -EBADF; ret = -ESPIPE; - if (file->f_mode & FMODE_PWRITE) - ret = compat_writev(file, vec, vlen, &pos); - fput_light(file, fput_needed); + if (f.file->f_mode & FMODE_PWRITE) + ret = compat_writev(f.file, vec, vlen, &pos); + fdput(f); return ret; } diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index debdfe0fc809..48f1987bec34 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1531,16 +1531,13 @@ static int compat_ioctl_check_table(unsigned int xcmd) asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) { - struct file *filp; + struct fd f = fdget(fd); int error = -EBADF; - int fput_needed; - - filp = fget_light(fd, &fput_needed); - if (!filp) + if (!f.file) goto out; /* RED-PEN how should LSM module know it's handling 32bit? */ - error = security_file_ioctl(filp, cmd, arg); + error = security_file_ioctl(f.file, cmd, arg); if (error) goto out_fput; @@ -1560,30 +1557,30 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) case FS_IOC_RESVSP_32: case FS_IOC_RESVSP64_32: - error = compat_ioctl_preallocate(filp, compat_ptr(arg)); + error = compat_ioctl_preallocate(f.file, compat_ptr(arg)); goto out_fput; #else case FS_IOC_RESVSP: case FS_IOC_RESVSP64: - error = ioctl_preallocate(filp, compat_ptr(arg)); + error = ioctl_preallocate(f.file, compat_ptr(arg)); goto out_fput; #endif case FIBMAP: case FIGETBSZ: case FIONREAD: - if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + if (S_ISREG(f.file->f_path.dentry->d_inode->i_mode)) break; /*FALL THROUGH*/ default: - if (filp->f_op && filp->f_op->compat_ioctl) { - error = filp->f_op->compat_ioctl(filp, cmd, arg); + if (f.file->f_op && f.file->f_op->compat_ioctl) { + error = f.file->f_op->compat_ioctl(f.file, cmd, arg); if (error != -ENOIOCTLCMD) goto out_fput; } - if (!filp->f_op || !filp->f_op->unlocked_ioctl) + if (!f.file->f_op || !f.file->f_op->unlocked_ioctl) goto do_ioctl; break; } @@ -1591,7 +1588,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, if (compat_ioctl_check_table(XFORM(cmd))) goto found_handler; - error = do_ioctl_trans(fd, cmd, arg, filp); + error = do_ioctl_trans(fd, cmd, arg, f.file); if (error == -ENOIOCTLCMD) error = -ENOTTY; @@ -1600,9 +1597,9 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, found_handler: arg = (unsigned long)compat_ptr(arg); do_ioctl: - error = do_vfs_ioctl(filp, fd, cmd, arg); + error = do_vfs_ioctl(f.file, fd, cmd, arg); out_fput: - fput_light(filp, fput_needed); + fdput(f); out: return error; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 567ae72e155d..cd96649bfe62 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1809,8 +1809,8 @@ error_return: SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout) { - int error, fput_needed; - struct file *file; + int error; + struct fd f; struct eventpoll *ep; /* The maximum number of event must be greater than zero */ @@ -1818,38 +1818,33 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, return -EINVAL; /* Verify that the area passed by the user is writeable */ - if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) { - error = -EFAULT; - goto error_return; - } + if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) + return -EFAULT; /* Get the "struct file *" for the eventpoll file */ - error = -EBADF; - file = fget_light(epfd, &fput_needed); - if (!file) - goto error_return; + f = fdget(epfd); + if (!f.file) + return -EBADF; /* * We have to check that the file structure underneath the fd * the user passed to us _is_ an eventpoll file. */ error = -EINVAL; - if (!is_file_epoll(file)) + if (!is_file_epoll(f.file)) goto error_fput; /* * At this point it is safe to assume that the "private_data" contains * our own data structure. */ - ep = file->private_data; + ep = f.file->private_data; /* Time to fish for events ... */ error = ep_poll(ep, events, maxevents, timeout); error_fput: - fput_light(file, fput_needed); -error_return: - + fdput(f); return error; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 39646f224514..5439d6a56e99 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -233,8 +233,8 @@ group_extend_out: case EXT4_IOC_MOVE_EXT: { struct move_extent me; - struct file *donor_filp; - int err, fput_needed; + struct fd donor; + int err; if (!(filp->f_mode & FMODE_READ) || !(filp->f_mode & FMODE_WRITE)) @@ -245,11 +245,11 @@ group_extend_out: return -EFAULT; me.moved_len = 0; - donor_filp = fget_light(me.donor_fd, &fput_needed); - if (!donor_filp) + donor = fdget(me.donor_fd); + if (!donor.file) return -EBADF; - if (!(donor_filp->f_mode & FMODE_WRITE)) { + if (!(donor.file->f_mode & FMODE_WRITE)) { err = -EBADF; goto mext_out; } @@ -266,7 +266,7 @@ group_extend_out: if (err) goto mext_out; - err = ext4_move_extents(filp, donor_filp, me.orig_start, + err = ext4_move_extents(filp, donor.file, me.orig_start, me.donor_start, me.len, &me.moved_len); mnt_drop_write_file(filp); @@ -274,7 +274,7 @@ group_extend_out: &me, sizeof(me))) err = -EFAULT; mext_out: - fput_light(donor_filp, fput_needed); + fdput(donor); return err; } diff --git a/fs/fcntl.c b/fs/fcntl.c index 40a5bfb72cca..91af39a33af7 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -348,25 +348,23 @@ static int check_fcntl_cmd(unsigned cmd) SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { - struct file *filp; - int fput_needed; + struct fd f = fdget_raw(fd); long err = -EBADF; - filp = fget_raw_light(fd, &fput_needed); - if (!filp) + if (!f.file) goto out; - if (unlikely(filp->f_mode & FMODE_PATH)) { + if (unlikely(f.file->f_mode & FMODE_PATH)) { if (!check_fcntl_cmd(cmd)) goto out1; } - err = security_file_fcntl(filp, cmd, arg); + err = security_file_fcntl(f.file, cmd, arg); if (!err) - err = do_fcntl(fd, cmd, arg, filp); + err = do_fcntl(fd, cmd, arg, f.file); out1: - fput_light(filp, fput_needed); + fdput(f); out: return err; } @@ -375,38 +373,36 @@ out: SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { - struct file * filp; + struct fd f = fdget_raw(fd); long err = -EBADF; - int fput_needed; - filp = fget_raw_light(fd, &fput_needed); - if (!filp) + if (!f.file) goto out; - if (unlikely(filp->f_mode & FMODE_PATH)) { + if (unlikely(f.file->f_mode & FMODE_PATH)) { if (!check_fcntl_cmd(cmd)) goto out1; } - err = security_file_fcntl(filp, cmd, arg); + err = security_file_fcntl(f.file, cmd, arg); if (err) goto out1; switch (cmd) { case F_GETLK64: - err = fcntl_getlk64(filp, (struct flock64 __user *) arg); + err = fcntl_getlk64(f.file, (struct flock64 __user *) arg); break; case F_SETLK64: case F_SETLKW64: - err = fcntl_setlk64(fd, filp, cmd, + err = fcntl_setlk64(fd, f.file, cmd, (struct flock64 __user *) arg); break; default: - err = do_fcntl(fd, cmd, arg, filp); + err = do_fcntl(fd, cmd, arg, f.file); break; } out1: - fput_light(filp, fput_needed); + fdput(f); out: return err; } diff --git a/fs/fhandle.c b/fs/fhandle.c index a48e4a139be1..f775bfdd6e4a 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -113,24 +113,21 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, static struct vfsmount *get_vfsmount_from_fd(int fd) { - struct path path; + struct vfsmount *mnt; if (fd == AT_FDCWD) { struct fs_struct *fs = current->fs; spin_lock(&fs->lock); - path = fs->pwd; - mntget(path.mnt); + mnt = mntget(fs->pwd.mnt); spin_unlock(&fs->lock); } else { - int fput_needed; - struct file *file = fget_light(fd, &fput_needed); - if (!file) + struct fd f = fdget(fd); + if (!f.file) return ERR_PTR(-EBADF); - path = file->f_path; - mntget(path.mnt); - fput_light(file, fput_needed); + mnt = mntget(f.file->f_path.mnt); + fdput(f); } - return path.mnt; + return mnt; } static int vfs_dentry_acceptable(void *context, struct dentry *dentry) diff --git a/fs/ioctl.c b/fs/ioctl.c index 29167bebe874..3bdad6d1f268 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -603,21 +603,14 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { - struct file *filp; - int error = -EBADF; - int fput_needed; - - filp = fget_light(fd, &fput_needed); - if (!filp) - goto out; - - error = security_file_ioctl(filp, cmd, arg); - if (error) - goto out_fput; - - error = do_vfs_ioctl(filp, fd, cmd, arg); - out_fput: - fput_light(filp, fput_needed); - out: + int error; + struct fd f = fdget(fd); + + if (!f.file) + return -EBADF; + error = security_file_ioctl(f.file, cmd, arg); + if (!error) + error = do_vfs_ioctl(f.file, fd, cmd, arg); + fdput(f); return error; } diff --git a/fs/locks.c b/fs/locks.c index 7e81bfc75164..abc7dc6c490b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1625,15 +1625,13 @@ EXPORT_SYMBOL(flock_lock_file_wait); */ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) { - struct file *filp; - int fput_needed; + struct fd f = fdget(fd); struct file_lock *lock; int can_sleep, unlock; int error; error = -EBADF; - filp = fget_light(fd, &fput_needed); - if (!filp) + if (!f.file) goto out; can_sleep = !(cmd & LOCK_NB); @@ -1641,31 +1639,31 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) unlock = (cmd == LOCK_UN); if (!unlock && !(cmd & LOCK_MAND) && - !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) + !(f.file->f_mode & (FMODE_READ|FMODE_WRITE))) goto out_putf; - error = flock_make_lock(filp, &lock, cmd); + error = flock_make_lock(f.file, &lock, cmd); if (error) goto out_putf; if (can_sleep) lock->fl_flags |= FL_SLEEP; - error = security_file_lock(filp, lock->fl_type); + error = security_file_lock(f.file, lock->fl_type); if (error) goto out_free; - if (filp->f_op && filp->f_op->flock) - error = filp->f_op->flock(filp, + if (f.file->f_op && f.file->f_op->flock) + error = f.file->f_op->flock(f.file, (can_sleep) ? F_SETLKW : F_SETLK, lock); else - error = flock_lock_file_wait(filp, lock); + error = flock_lock_file_wait(f.file, lock); out_free: locks_free_lock(lock); out_putf: - fput_light(filp, fput_needed); + fdput(f); out: return error; } diff --git a/fs/namei.c b/fs/namei.c index c5b85b3523c0..e1c7072c7afa 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1797,8 +1797,6 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd, struct file **fp) { int retval = 0; - int fput_needed; - struct file *file; nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags | LOOKUP_JUMPED; @@ -1850,44 +1848,41 @@ static int path_init(int dfd, const char *name, unsigned int flags, get_fs_pwd(current->fs, &nd->path); } } else { + struct fd f = fdget_raw(dfd); struct dentry *dentry; - file = fget_raw_light(dfd, &fput_needed); - retval = -EBADF; - if (!file) - goto out_fail; + if (!f.file) + return -EBADF; - dentry = file->f_path.dentry; + dentry = f.file->f_path.dentry; if (*name) { - retval = -ENOTDIR; - if (!S_ISDIR(dentry->d_inode->i_mode)) - goto fput_fail; + if (!S_ISDIR(dentry->d_inode->i_mode)) { + fdput(f); + return -ENOTDIR; + } retval = inode_permission(dentry->d_inode, MAY_EXEC); - if (retval) - goto fput_fail; + if (retval) { + fdput(f); + return retval; + } } - nd->path = file->f_path; + nd->path = f.file->f_path; if (flags & LOOKUP_RCU) { - if (fput_needed) - *fp = file; + if (f.need_put) + *fp = f.file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); lock_rcu_walk(); } else { - path_get(&file->f_path); - fput_light(file, fput_needed); + path_get(&nd->path); + fdput(f); } } nd->inode = nd->path.dentry->d_inode; return 0; - -fput_fail: - fput_light(file, fput_needed); -out_fail: - return retval; } static inline int lookup_last(struct nameidata *nd, struct path *path) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index ea48693940f1..721d692fa8d4 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -451,24 +451,22 @@ static int fanotify_find_path(int dfd, const char __user *filename, dfd, filename, flags); if (filename == NULL) { - struct file *file; - int fput_needed; + struct fd f = fdget(dfd); ret = -EBADF; - file = fget_light(dfd, &fput_needed); - if (!file) + if (!f.file) goto out; ret = -ENOTDIR; if ((flags & FAN_MARK_ONLYDIR) && - !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) { - fput_light(file, fput_needed); + !(S_ISDIR(f.file->f_path.dentry->d_inode->i_mode))) { + fdput(f); goto out; } - *path = file->f_path; + *path = f.file->f_path; path_get(path); - fput_light(file, fput_needed); + fdput(f); } else { unsigned int lookup_flags = 0; @@ -748,9 +746,9 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, struct inode *inode = NULL; struct vfsmount *mnt = NULL; struct fsnotify_group *group; - struct file *filp; + struct fd f; struct path path; - int ret, fput_needed; + int ret; pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", __func__, fanotify_fd, flags, dfd, pathname, mask); @@ -784,15 +782,15 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, #endif return -EINVAL; - filp = fget_light(fanotify_fd, &fput_needed); - if (unlikely(!filp)) + f = fdget(fanotify_fd); + if (unlikely(!f.file)) return -EBADF; /* verify that this is indeed an fanotify instance */ ret = -EINVAL; - if (unlikely(filp->f_op != &fanotify_fops)) + if (unlikely(f.file->f_op != &fanotify_fops)) goto fput_and_out; - group = filp->private_data; + group = f.file->private_data; /* * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not @@ -839,7 +837,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, path_put(&path); fput_and_out: - fput_light(filp, fput_needed); + fdput(f); return ret; } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 8445fbc8985c..c311dda054a3 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -757,16 +757,16 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, struct fsnotify_group *group; struct inode *inode; struct path path; - struct file *filp; - int ret, fput_needed; + struct fd f; + int ret; unsigned flags = 0; - filp = fget_light(fd, &fput_needed); - if (unlikely(!filp)) + f = fdget(fd); + if (unlikely(!f.file)) return -EBADF; /* verify that this is indeed an inotify instance */ - if (unlikely(filp->f_op != &inotify_fops)) { + if (unlikely(f.file->f_op != &inotify_fops)) { ret = -EINVAL; goto fput_and_out; } @@ -782,13 +782,13 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, /* inode held in place by reference to path; group by fget on fd */ inode = path.dentry->d_inode; - group = filp->private_data; + group = f.file->private_data; /* create/update an inode mark */ ret = inotify_update_watch(group, inode, mask); path_put(&path); fput_and_out: - fput_light(filp, fput_needed); + fdput(f); return ret; } @@ -796,19 +796,19 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) { struct fsnotify_group *group; struct inotify_inode_mark *i_mark; - struct file *filp; - int ret = 0, fput_needed; + struct fd f; + int ret = 0; - filp = fget_light(fd, &fput_needed); - if (unlikely(!filp)) + f = fdget(fd); + if (unlikely(!f.file)) return -EBADF; /* verify that this is indeed an inotify instance */ ret = -EINVAL; - if (unlikely(filp->f_op != &inotify_fops)) + if (unlikely(f.file->f_op != &inotify_fops)) goto out; - group = filp->private_data; + group = f.file->private_data; ret = -EINVAL; i_mark = inotify_idr_find(group, wd); @@ -823,7 +823,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) fsnotify_put_mark(&i_mark->fsn_mark); out: - fput_light(filp, fput_needed); + fdput(f); return ret; } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 61c28ae266f5..f7c648d7d6bf 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1746,11 +1746,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, long fd; int sectsize; char *p = (char *)page; - struct file *filp = NULL; - struct inode *inode = NULL; + struct fd f; + struct inode *inode; ssize_t ret = -EINVAL; int live_threshold; - int fput_needed; if (reg->hr_bdev) goto out; @@ -1767,26 +1766,26 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, if (fd < 0 || fd >= INT_MAX) goto out; - filp = fget_light(fd, &fput_needed); - if (filp == NULL) + f = fdget(fd); + if (f.file == NULL) goto out; if (reg->hr_blocks == 0 || reg->hr_start_block == 0 || reg->hr_block_bytes == 0) - goto out; + goto out2; - inode = igrab(filp->f_mapping->host); + inode = igrab(f.file->f_mapping->host); if (inode == NULL) - goto out; + goto out2; if (!S_ISBLK(inode->i_mode)) - goto out; + goto out3; - reg->hr_bdev = I_BDEV(filp->f_mapping->host); + reg->hr_bdev = I_BDEV(f.file->f_mapping->host); ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); if (ret) { reg->hr_bdev = NULL; - goto out; + goto out3; } inode = NULL; @@ -1798,7 +1797,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, "blocksize %u incorrect for device, expected %d", reg->hr_block_bytes, sectsize); ret = -EINVAL; - goto out; + goto out3; } o2hb_init_region_params(reg); @@ -1812,13 +1811,13 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, ret = o2hb_map_slot_data(reg); if (ret) { mlog_errno(ret); - goto out; + goto out3; } ret = o2hb_populate_slot_data(reg); if (ret) { mlog_errno(ret); - goto out; + goto out3; } INIT_DELAYED_WORK(®->hr_write_timeout_work, o2hb_write_timeout); @@ -1848,7 +1847,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, if (IS_ERR(hb_task)) { ret = PTR_ERR(hb_task); mlog_errno(ret); - goto out; + goto out3; } spin_lock(&o2hb_live_lock); @@ -1864,7 +1863,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, if (reg->hr_aborted_start) { ret = -EIO; - goto out; + goto out3; } /* Ok, we were woken. Make sure it wasn't by drop_item() */ @@ -1883,11 +1882,11 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n", config_item_name(®->hr_item), reg->hr_dev_name); +out3: + iput(inode); +out2: + fdput(f); out: - if (filp) - fput_light(filp, fput_needed); - if (inode) - iput(inode); if (ret < 0) { if (reg->hr_bdev) { blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); diff --git a/fs/open.c b/fs/open.c index 3c741eae6b99..85603262d8db 100644 --- a/fs/open.c +++ b/fs/open.c @@ -134,25 +134,25 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { struct inode *inode; struct dentry *dentry; - struct file *file; - int error, fput_needed; + struct fd f; + int error; error = -EINVAL; if (length < 0) goto out; error = -EBADF; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) goto out; /* explicitly opened as large or we are on 64-bit box */ - if (file->f_flags & O_LARGEFILE) + if (f.file->f_flags & O_LARGEFILE) small = 0; - dentry = file->f_path.dentry; + dentry = f.file->f_path.dentry; inode = dentry->d_inode; error = -EINVAL; - if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) + if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE)) goto out_putf; error = -EINVAL; @@ -165,14 +165,14 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) goto out_putf; sb_start_write(inode->i_sb); - error = locks_verify_truncate(inode, file, length); + error = locks_verify_truncate(inode, f.file, length); if (!error) - error = security_path_truncate(&file->f_path); + error = security_path_truncate(&f.file->f_path); if (!error) - error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); + error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); sb_end_write(inode->i_sb); out_putf: - fput_light(file, fput_needed); + fdput(f); out: return error; } @@ -276,15 +276,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) { - struct file *file; - int error = -EBADF, fput_needed; + struct fd f = fdget(fd); + int error = -EBADF; - file = fget_light(fd, &fput_needed); - if (file) { - error = do_fallocate(file, mode, offset, len); - fput_light(file, fput_needed); + if (f.file) { + error = do_fallocate(f.file, mode, offset, len); + fdput(f); } - return error; } @@ -400,16 +398,15 @@ out: SYSCALL_DEFINE1(fchdir, unsigned int, fd) { - struct file *file; + struct fd f = fdget_raw(fd); struct inode *inode; - int error, fput_needed; + int error = -EBADF; error = -EBADF; - file = fget_raw_light(fd, &fput_needed); - if (!file) + if (!f.file) goto out; - inode = file->f_path.dentry->d_inode; + inode = f.file->f_path.dentry->d_inode; error = -ENOTDIR; if (!S_ISDIR(inode->i_mode)) @@ -417,9 +414,9 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); if (!error) - set_fs_pwd(current->fs, &file->f_path); + set_fs_pwd(current->fs, &f.file->f_path); out_putf: - fput_light(file, fput_needed); + fdput(f); out: return error; } @@ -582,21 +579,20 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) { - struct file *file; - int error = -EBADF, fput_needed; + struct fd f = fdget(fd); + int error = -EBADF; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) goto out; - error = mnt_want_write_file(file); + error = mnt_want_write_file(f.file); if (error) goto out_fput; - audit_inode(NULL, file->f_path.dentry); - error = chown_common(&file->f_path, user, group); - mnt_drop_write_file(file); + audit_inode(NULL, f.file->f_path.dentry); + error = chown_common(&f.file->f_path, user, group); + mnt_drop_write_file(f.file); out_fput: - fput_light(file, fput_needed); + fdput(f); out: return error; } diff --git a/fs/read_write.c b/fs/read_write.c index 1adfb691e4f1..28b38279a219 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -232,23 +232,18 @@ EXPORT_SYMBOL(vfs_llseek); SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) { off_t retval; - struct file * file; - int fput_needed; - - retval = -EBADF; - file = fget_light(fd, &fput_needed); - if (!file) - goto bad; + struct fd f = fdget(fd); + if (!f.file) + return -EBADF; retval = -EINVAL; if (origin <= SEEK_MAX) { - loff_t res = vfs_llseek(file, offset, origin); + loff_t res = vfs_llseek(f.file, offset, origin); retval = res; if (res != (loff_t)retval) retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ } - fput_light(file, fput_needed); -bad: + fdput(f); return retval; } @@ -258,20 +253,17 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, unsigned int, origin) { int retval; - struct file * file; + struct fd f = fdget(fd); loff_t offset; - int fput_needed; - retval = -EBADF; - file = fget_light(fd, &fput_needed); - if (!file) - goto bad; + if (!f.file) + return -EBADF; retval = -EINVAL; if (origin > SEEK_MAX) goto out_putf; - offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, + offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, origin); retval = (int)offset; @@ -281,8 +273,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, retval = 0; } out_putf: - fput_light(file, fput_needed); -bad: + fdput(f); return retval; } #endif @@ -461,34 +452,29 @@ static inline void file_pos_write(struct file *file, loff_t pos) SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) { - struct file *file; + struct fd f = fdget(fd); ssize_t ret = -EBADF; - int fput_needed; - file = fget_light(fd, &fput_needed); - if (file) { - loff_t pos = file_pos_read(file); - ret = vfs_read(file, buf, count, &pos); - file_pos_write(file, pos); - fput_light(file, fput_needed); + if (f.file) { + loff_t pos = file_pos_read(f.file); + ret = vfs_read(f.file, buf, count, &pos); + file_pos_write(f.file, pos); + fdput(f); } - return ret; } SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, size_t, count) { - struct file *file; + struct fd f = fdget(fd); ssize_t ret = -EBADF; - int fput_needed; - file = fget_light(fd, &fput_needed); - if (file) { - loff_t pos = file_pos_read(file); - ret = vfs_write(file, buf, count, &pos); - file_pos_write(file, pos); - fput_light(file, fput_needed); + if (f.file) { + loff_t pos = file_pos_read(f.file); + ret = vfs_write(f.file, buf, count, &pos); + file_pos_write(f.file, pos); + fdput(f); } return ret; @@ -497,19 +483,18 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, size_t count, loff_t pos) { - struct file *file; + struct fd f; ssize_t ret = -EBADF; - int fput_needed; if (pos < 0) return -EINVAL; - file = fget_light(fd, &fput_needed); - if (file) { + f = fdget(fd); + if (f.file) { ret = -ESPIPE; - if (file->f_mode & FMODE_PREAD) - ret = vfs_read(file, buf, count, &pos); - fput_light(file, fput_needed); + if (f.file->f_mode & FMODE_PREAD) + ret = vfs_read(f.file, buf, count, &pos); + fdput(f); } return ret; @@ -526,19 +511,18 @@ SYSCALL_ALIAS(sys_pread64, SyS_pread64); SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, size_t count, loff_t pos) { - struct file *file; + struct fd f; ssize_t ret = -EBADF; - int fput_needed; if (pos < 0) return -EINVAL; - file = fget_light(fd, &fput_needed); - if (file) { + f = fdget(fd); + if (f.file) { ret = -ESPIPE; - if (file->f_mode & FMODE_PWRITE) - ret = vfs_write(file, buf, count, &pos); - fput_light(file, fput_needed); + if (f.file->f_mode & FMODE_PWRITE) + ret = vfs_write(f.file, buf, count, &pos); + fdput(f); } return ret; @@ -789,16 +773,14 @@ EXPORT_SYMBOL(vfs_writev); SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen) { - struct file *file; + struct fd f = fdget(fd); ssize_t ret = -EBADF; - int fput_needed; - file = fget_light(fd, &fput_needed); - if (file) { - loff_t pos = file_pos_read(file); - ret = vfs_readv(file, vec, vlen, &pos); - file_pos_write(file, pos); - fput_light(file, fput_needed); + if (f.file) { + loff_t pos = file_pos_read(f.file); + ret = vfs_readv(f.file, vec, vlen, &pos); + file_pos_write(f.file, pos); + fdput(f); } if (ret > 0) @@ -810,16 +792,14 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen) { - struct file *file; + struct fd f = fdget(fd); ssize_t ret = -EBADF; - int fput_needed; - file = fget_light(fd, &fput_needed); - if (file) { - loff_t pos = file_pos_read(file); - ret = vfs_writev(file, vec, vlen, &pos); - file_pos_write(file, pos); - fput_light(file, fput_needed); + if (f.file) { + loff_t pos = file_pos_read(f.file); + ret = vfs_writev(f.file, vec, vlen, &pos); + file_pos_write(f.file, pos); + fdput(f); } if (ret > 0) @@ -838,19 +818,18 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) { loff_t pos = pos_from_hilo(pos_h, pos_l); - struct file *file; + struct fd f; ssize_t ret = -EBADF; - int fput_needed; if (pos < 0) return -EINVAL; - file = fget_light(fd, &fput_needed); - if (file) { + f = fdget(fd); + if (f.file) { ret = -ESPIPE; - if (file->f_mode & FMODE_PREAD) - ret = vfs_readv(file, vec, vlen, &pos); - fput_light(file, fput_needed); + if (f.file->f_mode & FMODE_PREAD) + ret = vfs_readv(f.file, vec, vlen, &pos); + fdput(f); } if (ret > 0) @@ -863,19 +842,18 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) { loff_t pos = pos_from_hilo(pos_h, pos_l); - struct file *file; + struct fd f; ssize_t ret = -EBADF; - int fput_needed; if (pos < 0) return -EINVAL; - file = fget_light(fd, &fput_needed); - if (file) { + f = fdget(fd); + if (f.file) { ret = -ESPIPE; - if (file->f_mode & FMODE_PWRITE) - ret = vfs_writev(file, vec, vlen, &pos); - fput_light(file, fput_needed); + if (f.file->f_mode & FMODE_PWRITE) + ret = vfs_writev(f.file, vec, vlen, &pos); + fdput(f); } if (ret > 0) @@ -887,28 +865,28 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, loff_t max) { - struct file * in_file, * out_file; - struct inode * in_inode, * out_inode; + struct fd in, out; + struct inode *in_inode, *out_inode; loff_t pos; ssize_t retval; - int fput_needed_in, fput_needed_out, fl; + int fl; /* * Get input file, and verify that it is ok.. */ retval = -EBADF; - in_file = fget_light(in_fd, &fput_needed_in); - if (!in_file) + in = fdget(in_fd); + if (!in.file) goto out; - if (!(in_file->f_mode & FMODE_READ)) + if (!(in.file->f_mode & FMODE_READ)) goto fput_in; retval = -ESPIPE; if (!ppos) - ppos = &in_file->f_pos; + ppos = &in.file->f_pos; else - if (!(in_file->f_mode & FMODE_PREAD)) + if (!(in.file->f_mode & FMODE_PREAD)) goto fput_in; - retval = rw_verify_area(READ, in_file, ppos, count); + retval = rw_verify_area(READ, in.file, ppos, count); if (retval < 0) goto fput_in; count = retval; @@ -917,15 +895,15 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, * Get output file, and verify that it is ok.. */ retval = -EBADF; - out_file = fget_light(out_fd, &fput_needed_out); - if (!out_file) + out = fdget(out_fd); + if (!out.file) goto fput_in; - if (!(out_file->f_mode & FMODE_WRITE)) + if (!(out.file->f_mode & FMODE_WRITE)) goto fput_out; retval = -EINVAL; - in_inode = in_file->f_path.dentry->d_inode; - out_inode = out_file->f_path.dentry->d_inode; - retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); + in_inode = in.file->f_path.dentry->d_inode; + out_inode = out.file->f_path.dentry->d_inode; + retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count); if (retval < 0) goto fput_out; count = retval; @@ -949,10 +927,10 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, * and the application is arguably buggy if it doesn't expect * EAGAIN on a non-blocking file descriptor. */ - if (in_file->f_flags & O_NONBLOCK) + if (in.file->f_flags & O_NONBLOCK) fl = SPLICE_F_NONBLOCK; #endif - retval = do_splice_direct(in_file, ppos, out_file, count, fl); + retval = do_splice_direct(in.file, ppos, out.file, count, fl); if (retval > 0) { add_rchar(current, retval); @@ -965,9 +943,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, retval = -EOVERFLOW; fput_out: - fput_light(out_file, fput_needed_out); + fdput(out); fput_in: - fput_light(in_file, fput_needed_in); + fdput(in); out: return retval; } diff --git a/fs/readdir.c b/fs/readdir.c index 39e3370d79cf..5e69ef533b77 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -106,22 +106,20 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, struct old_linux_dirent __user *, dirent, unsigned int, count) { int error; - struct file * file; + struct fd f = fdget(fd); struct readdir_callback buf; - int fput_needed; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) return -EBADF; buf.result = 0; buf.dirent = dirent; - error = vfs_readdir(file, fillonedir, &buf); + error = vfs_readdir(f.file, fillonedir, &buf); if (buf.result) error = buf.result; - fput_light(file, fput_needed); + fdput(f); return error; } @@ -191,17 +189,16 @@ efault: SYSCALL_DEFINE3(getdents, unsigned int, fd, struct linux_dirent __user *, dirent, unsigned int, count) { - struct file * file; + struct fd f; struct linux_dirent __user * lastdirent; struct getdents_callback buf; - int fput_needed; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) return -EFAULT; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return -EBADF; buf.current_dir = dirent; @@ -209,17 +206,17 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, buf.count = count; buf.error = 0; - error = vfs_readdir(file, filldir, &buf); + error = vfs_readdir(f.file, filldir, &buf); if (error >= 0) error = buf.error; lastdirent = buf.previous; if (lastdirent) { - if (put_user(file->f_pos, &lastdirent->d_off)) + if (put_user(f.file->f_pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } - fput_light(file, fput_needed); + fdput(f); return error; } @@ -272,17 +269,16 @@ efault: SYSCALL_DEFINE3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count) { - struct file * file; + struct fd f; struct linux_dirent64 __user * lastdirent; struct getdents_callback64 buf; - int fput_needed; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) return -EFAULT; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return -EBADF; buf.current_dir = dirent; @@ -290,17 +286,17 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, buf.count = count; buf.error = 0; - error = vfs_readdir(file, filldir64, &buf); + error = vfs_readdir(f.file, filldir64, &buf); if (error >= 0) error = buf.error; lastdirent = buf.previous; if (lastdirent) { - typeof(lastdirent->d_off) d_off = file->f_pos; + typeof(lastdirent->d_off) d_off = f.file->f_pos; if (__put_user(d_off, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; } - fput_light(file, fput_needed); + fdput(f); return error; } diff --git a/fs/select.c b/fs/select.c index ffdd16d6e691..2ef72d965036 100644 --- a/fs/select.c +++ b/fs/select.c @@ -428,8 +428,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) for (i = 0; i < n; ++rinp, ++routp, ++rexp) { unsigned long in, out, ex, all_bits, bit = 1, mask, j; unsigned long res_in = 0, res_out = 0, res_ex = 0; - const struct file_operations *f_op = NULL; - struct file *file = NULL; in = *inp++; out = *outp++; ex = *exp++; all_bits = in | out | ex; @@ -439,20 +437,21 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) } for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { - int fput_needed; + struct fd f; if (i >= n) break; if (!(bit & all_bits)) continue; - file = fget_light(i, &fput_needed); - if (file) { - f_op = file->f_op; + f = fdget(i); + if (f.file) { + const struct file_operations *f_op; + f_op = f.file->f_op; mask = DEFAULT_POLLMASK; if (f_op && f_op->poll) { wait_key_set(wait, in, out, bit); - mask = (*f_op->poll)(file, wait); + mask = (*f_op->poll)(f.file, wait); } - fput_light(file, fput_needed); + fdput(f); if ((mask & POLLIN_SET) && (in & bit)) { res_in |= bit; retval++; @@ -725,20 +724,17 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) mask = 0; fd = pollfd->fd; if (fd >= 0) { - int fput_needed; - struct file * file; - - file = fget_light(fd, &fput_needed); + struct fd f = fdget(fd); mask = POLLNVAL; - if (file != NULL) { + if (f.file) { mask = DEFAULT_POLLMASK; - if (file->f_op && file->f_op->poll) { + if (f.file->f_op && f.file->f_op->poll) { pwait->_key = pollfd->events|POLLERR|POLLHUP; - mask = file->f_op->poll(file, pwait); + mask = f.file->f_op->poll(f.file, pwait); } /* Mask out unneeded events. */ mask &= pollfd->events | POLLERR | POLLHUP; - fput_light(file, fput_needed); + fdput(f); } } pollfd->revents = mask; diff --git a/fs/signalfd.c b/fs/signalfd.c index 9f35a37173de..8bee4e570911 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -269,13 +269,12 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, if (ufd < 0) kfree(ctx); } else { - int fput_needed; - struct file *file = fget_light(ufd, &fput_needed); - if (!file) + struct fd f = fdget(ufd); + if (!f.file) return -EBADF; - ctx = file->private_data; - if (file->f_op != &signalfd_fops) { - fput_light(file, fput_needed); + ctx = f.file->private_data; + if (f.file->f_op != &signalfd_fops) { + fdput(f); return -EINVAL; } spin_lock_irq(¤t->sighand->siglock); @@ -283,7 +282,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, spin_unlock_irq(¤t->sighand->siglock); wake_up(¤t->sighand->signalfd_wqh); - fput_light(file, fput_needed); + fdput(f); } return ufd; diff --git a/fs/splice.c b/fs/splice.c index 41514dd89462..13e5b4776e7a 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1666,9 +1666,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, unsigned long, nr_segs, unsigned int, flags) { - struct file *file; + struct fd f; long error; - int fput; if (unlikely(nr_segs > UIO_MAXIOV)) return -EINVAL; @@ -1676,14 +1675,14 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, return 0; error = -EBADF; - file = fget_light(fd, &fput); - if (file) { - if (file->f_mode & FMODE_WRITE) - error = vmsplice_to_pipe(file, iov, nr_segs, flags); - else if (file->f_mode & FMODE_READ) - error = vmsplice_to_user(file, iov, nr_segs, flags); - - fput_light(file, fput); + f = fdget(fd); + if (f.file) { + if (f.file->f_mode & FMODE_WRITE) + error = vmsplice_to_pipe(f.file, iov, nr_segs, flags); + else if (f.file->f_mode & FMODE_READ) + error = vmsplice_to_user(f.file, iov, nr_segs, flags); + + fdput(f); } return error; @@ -1693,30 +1692,27 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags) { + struct fd in, out; long error; - struct file *in, *out; - int fput_in, fput_out; if (unlikely(!len)) return 0; error = -EBADF; - in = fget_light(fd_in, &fput_in); - if (in) { - if (in->f_mode & FMODE_READ) { - out = fget_light(fd_out, &fput_out); - if (out) { - if (out->f_mode & FMODE_WRITE) - error = do_splice(in, off_in, - out, off_out, + in = fdget(fd_in); + if (in.file) { + if (in.file->f_mode & FMODE_READ) { + out = fdget(fd_out); + if (out.file) { + if (out.file->f_mode & FMODE_WRITE) + error = do_splice(in.file, off_in, + out.file, off_out, len, flags); - fput_light(out, fput_out); + fdput(out); } } - - fput_light(in, fput_in); + fdput(in); } - return error; } @@ -2027,26 +2023,25 @@ static long do_tee(struct file *in, struct file *out, size_t len, SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) { - struct file *in; - int error, fput_in; + struct fd in; + int error; if (unlikely(!len)) return 0; error = -EBADF; - in = fget_light(fdin, &fput_in); - if (in) { - if (in->f_mode & FMODE_READ) { - int fput_out; - struct file *out = fget_light(fdout, &fput_out); - - if (out) { - if (out->f_mode & FMODE_WRITE) - error = do_tee(in, out, len, flags); - fput_light(out, fput_out); + in = fdget(fdin); + if (in.file) { + if (in.file->f_mode & FMODE_READ) { + struct fd out = fdget(fdout); + if (out.file) { + if (out.file->f_mode & FMODE_WRITE) + error = do_tee(in.file, out.file, + len, flags); + fdput(out); } } - fput_light(in, fput_in); + fdput(in); } return error; diff --git a/fs/stat.c b/fs/stat.c index 40780229a032..ee18fa122ae0 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr); int vfs_fstat(unsigned int fd, struct kstat *stat) { - int fput_needed; - struct file *f = fget_raw_light(fd, &fput_needed); + struct fd f = fdget_raw(fd); int error = -EBADF; - if (f) { - error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); - fput_light(f, fput_needed); + if (f.file) { + error = vfs_getattr(f.file->f_path.mnt, f.file->f_path.dentry, + stat); + fdput(f); } return error; } diff --git a/fs/statfs.c b/fs/statfs.c index 95ad5c0e586c..f8e832e6f0a2 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -87,12 +87,11 @@ int user_statfs(const char __user *pathname, struct kstatfs *st) int fd_statfs(int fd, struct kstatfs *st) { - int fput_needed; - struct file *file = fget_light(fd, &fput_needed); + struct fd f = fdget(fd); int error = -EBADF; - if (file) { - error = vfs_statfs(&file->f_path, st); - fput_light(file, fput_needed); + if (f.file) { + error = vfs_statfs(&f.file->f_path, st); + fdput(f); } return error; } diff --git a/fs/sync.c b/fs/sync.c index eb8722dc556f..14eefeb44636 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -148,21 +148,19 @@ void emergency_sync(void) */ SYSCALL_DEFINE1(syncfs, int, fd) { - struct file *file; + struct fd f = fdget(fd); struct super_block *sb; int ret; - int fput_needed; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) return -EBADF; - sb = file->f_dentry->d_sb; + sb = f.file->f_dentry->d_sb; down_read(&sb->s_umount); ret = sync_filesystem(sb); up_read(&sb->s_umount); - fput_light(file, fput_needed); + fdput(f); return ret; } @@ -201,14 +199,12 @@ EXPORT_SYMBOL(vfs_fsync); static int do_fsync(unsigned int fd, int datasync) { - struct file *file; + struct fd f = fdget(fd); int ret = -EBADF; - int fput_needed; - file = fget_light(fd, &fput_needed); - if (file) { - ret = vfs_fsync(file, datasync); - fput_light(file, fput_needed); + if (f.file) { + ret = vfs_fsync(f.file, datasync); + fdput(f); } return ret; } @@ -291,10 +287,9 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, unsigned int flags) { int ret; - struct file *file; + struct fd f; struct address_space *mapping; loff_t endbyte; /* inclusive */ - int fput_needed; umode_t i_mode; ret = -EINVAL; @@ -333,17 +328,17 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, endbyte--; /* inclusive */ ret = -EBADF; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) goto out; - i_mode = file->f_path.dentry->d_inode->i_mode; + i_mode = f.file->f_path.dentry->d_inode->i_mode; ret = -ESPIPE; if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && !S_ISLNK(i_mode)) goto out_put; - mapping = file->f_mapping; + mapping = f.file->f_mapping; if (!mapping) { ret = -EINVAL; goto out_put; @@ -366,7 +361,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, ret = filemap_fdatawait_range(mapping, offset, endbyte); out_put: - fput_light(file, fput_needed); + fdput(f); out: return ret; } diff --git a/fs/timerfd.c b/fs/timerfd.c index dd91e9420c9e..d03822bbf190 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -234,19 +234,17 @@ static const struct file_operations timerfd_fops = { .llseek = noop_llseek, }; -static struct file *timerfd_fget(int fd, int *fput_needed) +static int timerfd_fget(int fd, struct fd *p) { - struct file *file; - - file = fget_light(fd, fput_needed); - if (!file) - return ERR_PTR(-EBADF); - if (file->f_op != &timerfd_fops) { - fput_light(file, *fput_needed); - return ERR_PTR(-EINVAL); + struct fd f = fdget(fd); + if (!f.file) + return -EBADF; + if (f.file->f_op != &timerfd_fops) { + fdput(f); + return -EINVAL; } - - return file; + *p = f; + return 0; } SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) @@ -284,10 +282,10 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, const struct itimerspec __user *, utmr, struct itimerspec __user *, otmr) { - struct file *file; + struct fd f; struct timerfd_ctx *ctx; struct itimerspec ktmr, kotmr; - int ret, fput_needed; + int ret; if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) return -EFAULT; @@ -297,10 +295,10 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, !timespec_valid(&ktmr.it_interval)) return -EINVAL; - file = timerfd_fget(ufd, &fput_needed); - if (IS_ERR(file)) - return PTR_ERR(file); - ctx = file->private_data; + ret = timerfd_fget(ufd, &f); + if (ret) + return ret; + ctx = f.file->private_data; timerfd_setup_cancel(ctx, flags); @@ -334,7 +332,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, ret = timerfd_setup(ctx, flags, &ktmr); spin_unlock_irq(&ctx->wqh.lock); - fput_light(file, fput_needed); + fdput(f); if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) return -EFAULT; @@ -343,15 +341,13 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) { - struct file *file; + struct fd f; struct timerfd_ctx *ctx; struct itimerspec kotmr; - int fput_needed; - - file = timerfd_fget(ufd, &fput_needed); - if (IS_ERR(file)) - return PTR_ERR(file); - ctx = file->private_data; + int ret = timerfd_fget(ufd, &f); + if (ret) + return ret; + ctx = f.file->private_data; spin_lock_irq(&ctx->wqh.lock); if (ctx->expired && ctx->tintv.tv64) { @@ -363,7 +359,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); kotmr.it_interval = ktime_to_timespec(ctx->tintv); spin_unlock_irq(&ctx->wqh.lock); - fput_light(file, fput_needed); + fdput(f); return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; } diff --git a/fs/utimes.c b/fs/utimes.c index fa4dbe451e27..bb0696a41735 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -140,19 +140,18 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times, goto out; if (filename == NULL && dfd != AT_FDCWD) { - int fput_needed; - struct file *file; + struct fd f; if (flags & AT_SYMLINK_NOFOLLOW) goto out; - file = fget_light(dfd, &fput_needed); + f = fdget(dfd); error = -EBADF; - if (!file) + if (!f.file) goto out; - error = utimes_common(&file->f_path, times); - fput_light(file, fput_needed); + error = utimes_common(&f.file->f_path, times); + fdput(f); } else { struct path path; int lookup_flags = 0; diff --git a/fs/xattr.c b/fs/xattr.c index 4d45b7189e7e..14a7e2544fe3 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -399,22 +399,20 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, const void __user *,value, size_t, size, int, flags) { - int fput_needed; - struct file *f; + struct fd f = fdget(fd); struct dentry *dentry; int error = -EBADF; - f = fget_light(fd, &fput_needed); - if (!f) + if (!f.file) return error; - dentry = f->f_path.dentry; + dentry = f.file->f_path.dentry; audit_inode(NULL, dentry); - error = mnt_want_write_file(f); + error = mnt_want_write_file(f.file); if (!error) { error = setxattr(dentry, name, value, size, flags); - mnt_drop_write_file(f); + mnt_drop_write_file(f.file); } - fput_light(f, fput_needed); + fdput(f); return error; } @@ -495,16 +493,14 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname, SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size) { - int fput_needed; - struct file *f; + struct fd f = fdget(fd); ssize_t error = -EBADF; - f = fget_light(fd, &fput_needed); - if (!f) + if (!f.file) return error; - audit_inode(NULL, f->f_path.dentry); - error = getxattr(f->f_path.dentry, name, value, size); - fput_light(f, fput_needed); + audit_inode(NULL, f.file->f_path.dentry); + error = getxattr(f.file->f_path.dentry, name, value, size); + fdput(f); return error; } @@ -576,16 +572,14 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list, SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) { - int fput_needed; - struct file *f; + struct fd f = fdget(fd); ssize_t error = -EBADF; - f = fget_light(fd, &fput_needed); - if (!f) + if (!f.file) return error; - audit_inode(NULL, f->f_path.dentry); - error = listxattr(f->f_path.dentry, list, size); - fput_light(f, fput_needed); + audit_inode(NULL, f.file->f_path.dentry); + error = listxattr(f.file->f_path.dentry, list, size); + fdput(f); return error; } @@ -645,22 +639,20 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) { - int fput_needed; - struct file *f; + struct fd f = fdget(fd); struct dentry *dentry; int error = -EBADF; - f = fget_light(fd, &fput_needed); - if (!f) + if (!f.file) return error; - dentry = f->f_path.dentry; + dentry = f.file->f_path.dentry; audit_inode(NULL, dentry); - error = mnt_want_write_file(f); + error = mnt_want_write_file(f.file); if (!error) { error = removexattr(dentry, name); - mnt_drop_write_file(f); + mnt_drop_write_file(f.file); } - fput_light(f, fput_needed); + fdput(f); return error; } diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index e6cdf224d7ea..b9b8646e62db 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -48,44 +48,44 @@ xfs_swapext( xfs_swapext_t *sxp) { xfs_inode_t *ip, *tip; - struct file *file, *tmp_file; - int error = 0, fput_needed, fput_needed_tmp; + struct fd f, tmp; + int error = 0; /* Pull information for the target fd */ - file = fget_light((int)sxp->sx_fdtarget, &fput_needed); - if (!file) { + f = fdget((int)sxp->sx_fdtarget); + if (!f.file) { error = XFS_ERROR(EINVAL); goto out; } - if (!(file->f_mode & FMODE_WRITE) || - !(file->f_mode & FMODE_READ) || - (file->f_flags & O_APPEND)) { + if (!(f.file->f_mode & FMODE_WRITE) || + !(f.file->f_mode & FMODE_READ) || + (f.file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); goto out_put_file; } - tmp_file = fget_light((int)sxp->sx_fdtmp, &fput_needed_tmp); - if (!tmp_file) { + tmp = fdget((int)sxp->sx_fdtmp); + if (!tmp.file) { error = XFS_ERROR(EINVAL); goto out_put_file; } - if (!(tmp_file->f_mode & FMODE_WRITE) || - !(tmp_file->f_mode & FMODE_READ) || - (tmp_file->f_flags & O_APPEND)) { + if (!(tmp.file->f_mode & FMODE_WRITE) || + !(tmp.file->f_mode & FMODE_READ) || + (tmp.file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); goto out_put_tmp_file; } - if (IS_SWAPFILE(file->f_path.dentry->d_inode) || - IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) { + if (IS_SWAPFILE(f.file->f_path.dentry->d_inode) || + IS_SWAPFILE(tmp.file->f_path.dentry->d_inode)) { error = XFS_ERROR(EINVAL); goto out_put_tmp_file; } - ip = XFS_I(file->f_path.dentry->d_inode); - tip = XFS_I(tmp_file->f_path.dentry->d_inode); + ip = XFS_I(f.file->f_path.dentry->d_inode); + tip = XFS_I(tmp.file->f_path.dentry->d_inode); if (ip->i_mount != tip->i_mount) { error = XFS_ERROR(EINVAL); @@ -105,9 +105,9 @@ xfs_swapext( error = xfs_swap_extents(ip, tip, sxp); out_put_tmp_file: - fput_light(tmp_file, fput_needed_tmp); + fdput(tmp); out_put_file: - fput_light(file, fput_needed); + fdput(f); out: return error; } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 21483eac402d..8305f2ac6773 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -70,16 +70,16 @@ xfs_find_handle( int hsize; xfs_handle_t handle; struct inode *inode; - struct file *file = NULL; + struct fd f; struct path path; - int error, fput_needed; + int error; struct xfs_inode *ip; if (cmd == XFS_IOC_FD_TO_HANDLE) { - file = fget_light(hreq->fd, &fput_needed); - if (!file) + f = fdget(hreq->fd); + if (!f.file) return -EBADF; - inode = file->f_path.dentry->d_inode; + inode = f.file->f_path.dentry->d_inode; } else { error = user_lpath((const char __user *)hreq->path, &path); if (error) @@ -134,7 +134,7 @@ xfs_find_handle( out_put: if (cmd == XFS_IOC_FD_TO_HANDLE) - fput_light(file, fput_needed); + fdput(f); else path_put(&path); return error; diff --git a/include/linux/file.h b/include/linux/file.h index c38bfbff4647..cbacf4faf447 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -47,6 +47,9 @@ static inline struct fd fdget(unsigned int fd) return (struct fd){f,b}; } +extern struct file *fget_raw(unsigned int fd); +extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); + static inline struct fd fdget_raw(unsigned int fd) { int b; @@ -54,8 +57,6 @@ static inline struct fd fdget_raw(unsigned int fd) return (struct fd){f,b}; } -extern struct file *fget_raw(unsigned int fd); -extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 5db1b69500fd..6d255e535d03 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -944,7 +944,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, size_t, msg_len, unsigned int, msg_prio, const struct timespec __user *, u_abs_timeout) { - struct file *filp; + struct fd f; struct inode *inode; struct ext_wait_queue wait; struct ext_wait_queue *receiver; @@ -953,7 +953,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, ktime_t expires, *timeout = NULL; struct timespec ts; struct posix_msg_tree_node *new_leaf = NULL; - int ret = 0, fput_needed; + int ret = 0; if (u_abs_timeout) { int res = prepare_timeout(u_abs_timeout, &expires, &ts); @@ -967,21 +967,21 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL); - filp = fget_light(mqdes, &fput_needed); - if (unlikely(!filp)) { + f = fdget(mqdes); + if (unlikely(!f.file)) { ret = -EBADF; goto out; } - inode = filp->f_path.dentry->d_inode; - if (unlikely(filp->f_op != &mqueue_file_operations)) { + inode = f.file->f_path.dentry->d_inode; + if (unlikely(f.file->f_op != &mqueue_file_operations)) { ret = -EBADF; goto out_fput; } info = MQUEUE_I(inode); - audit_inode(NULL, filp->f_path.dentry); + audit_inode(NULL, f.file->f_path.dentry); - if (unlikely(!(filp->f_mode & FMODE_WRITE))) { + if (unlikely(!(f.file->f_mode & FMODE_WRITE))) { ret = -EBADF; goto out_fput; } @@ -1023,7 +1023,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, } if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) { - if (filp->f_flags & O_NONBLOCK) { + if (f.file->f_flags & O_NONBLOCK) { ret = -EAGAIN; } else { wait.task = current; @@ -1056,7 +1056,7 @@ out_free: if (ret) free_msg(msg_ptr); out_fput: - fput_light(filp, fput_needed); + fdput(f); out: return ret; } @@ -1067,14 +1067,13 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, { ssize_t ret; struct msg_msg *msg_ptr; - struct file *filp; + struct fd f; struct inode *inode; struct mqueue_inode_info *info; struct ext_wait_queue wait; ktime_t expires, *timeout = NULL; struct timespec ts; struct posix_msg_tree_node *new_leaf = NULL; - int fput_needed; if (u_abs_timeout) { int res = prepare_timeout(u_abs_timeout, &expires, &ts); @@ -1085,21 +1084,21 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL); - filp = fget_light(mqdes, &fput_needed); - if (unlikely(!filp)) { + f = fdget(mqdes); + if (unlikely(!f.file)) { ret = -EBADF; goto out; } - inode = filp->f_path.dentry->d_inode; - if (unlikely(filp->f_op != &mqueue_file_operations)) { + inode = f.file->f_path.dentry->d_inode; + if (unlikely(f.file->f_op != &mqueue_file_operations)) { ret = -EBADF; goto out_fput; } info = MQUEUE_I(inode); - audit_inode(NULL, filp->f_path.dentry); + audit_inode(NULL, f.file->f_path.dentry); - if (unlikely(!(filp->f_mode & FMODE_READ))) { + if (unlikely(!(f.file->f_mode & FMODE_READ))) { ret = -EBADF; goto out_fput; } @@ -1131,7 +1130,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, } if (info->attr.mq_curmsgs == 0) { - if (filp->f_flags & O_NONBLOCK) { + if (f.file->f_flags & O_NONBLOCK) { spin_unlock(&info->lock); ret = -EAGAIN; } else { @@ -1161,7 +1160,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, free_msg(msg_ptr); } out_fput: - fput_light(filp, fput_needed); + fdput(f); out: return ret; } @@ -1174,8 +1173,8 @@ out: SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, const struct sigevent __user *, u_notification) { - int ret, fput_needed; - struct file *filp; + int ret; + struct fd f; struct sock *sock; struct inode *inode; struct sigevent notification; @@ -1221,13 +1220,13 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, skb_put(nc, NOTIFY_COOKIE_LEN); /* and attach it to the socket */ retry: - filp = fget_light(notification.sigev_signo, &fput_needed); - if (!filp) { + f = fdget(notification.sigev_signo); + if (!f.file) { ret = -EBADF; goto out; } - sock = netlink_getsockbyfilp(filp); - fput_light(filp, fput_needed); + sock = netlink_getsockbyfilp(f.file); + fdput(f); if (IS_ERR(sock)) { ret = PTR_ERR(sock); sock = NULL; @@ -1246,14 +1245,14 @@ retry: } } - filp = fget_light(mqdes, &fput_needed); - if (!filp) { + f = fdget(mqdes); + if (!f.file) { ret = -EBADF; goto out; } - inode = filp->f_path.dentry->d_inode; - if (unlikely(filp->f_op != &mqueue_file_operations)) { + inode = f.file->f_path.dentry->d_inode; + if (unlikely(f.file->f_op != &mqueue_file_operations)) { ret = -EBADF; goto out_fput; } @@ -1293,7 +1292,7 @@ retry: } spin_unlock(&info->lock); out_fput: - fput_light(filp, fput_needed); + fdput(f); out: if (sock) { netlink_detachskb(sock, nc); @@ -1309,8 +1308,7 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, { int ret; struct mq_attr mqstat, omqstat; - int fput_needed; - struct file *filp; + struct fd f; struct inode *inode; struct mqueue_inode_info *info; @@ -1321,14 +1319,14 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, return -EINVAL; } - filp = fget_light(mqdes, &fput_needed); - if (!filp) { + f = fdget(mqdes); + if (!f.file) { ret = -EBADF; goto out; } - inode = filp->f_path.dentry->d_inode; - if (unlikely(filp->f_op != &mqueue_file_operations)) { + inode = f.file->f_path.dentry->d_inode; + if (unlikely(f.file->f_op != &mqueue_file_operations)) { ret = -EBADF; goto out_fput; } @@ -1337,15 +1335,15 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, spin_lock(&info->lock); omqstat = info->attr; - omqstat.mq_flags = filp->f_flags & O_NONBLOCK; + omqstat.mq_flags = f.file->f_flags & O_NONBLOCK; if (u_mqstat) { audit_mq_getsetattr(mqdes, &mqstat); - spin_lock(&filp->f_lock); + spin_lock(&f.file->f_lock); if (mqstat.mq_flags & O_NONBLOCK) - filp->f_flags |= O_NONBLOCK; + f.file->f_flags |= O_NONBLOCK; else - filp->f_flags &= ~O_NONBLOCK; - spin_unlock(&filp->f_lock); + f.file->f_flags &= ~O_NONBLOCK; + spin_unlock(&f.file->f_lock); inode->i_atime = inode->i_ctime = CURRENT_TIME; } @@ -1358,7 +1356,7 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, ret = -EFAULT; out_fput: - fput_light(filp, fput_needed); + fdput(f); out: return ret; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 2bd199bfaef8..bd9c5bca42ae 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -467,14 +467,13 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, { struct perf_cgroup *cgrp; struct cgroup_subsys_state *css; - struct file *file; - int ret = 0, fput_needed; + struct fd f = fdget(fd); + int ret = 0; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) return -EBADF; - css = cgroup_css_from_dir(file, perf_subsys_id); + css = cgroup_css_from_dir(f.file, perf_subsys_id); if (IS_ERR(css)) { ret = PTR_ERR(css); goto out; @@ -500,7 +499,7 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, ret = -EINVAL; } out: - fput_light(file, fput_needed); + fdput(f); return ret; } @@ -3233,21 +3232,18 @@ unlock: static const struct file_operations perf_fops; -static struct file *perf_fget_light(int fd, int *fput_needed) +static inline int perf_fget_light(int fd, struct fd *p) { - struct file *file; - - file = fget_light(fd, fput_needed); - if (!file) - return ERR_PTR(-EBADF); + struct fd f = fdget(fd); + if (!f.file) + return -EBADF; - if (file->f_op != &perf_fops) { - fput_light(file, *fput_needed); - *fput_needed = 0; - return ERR_PTR(-EBADF); + if (f.file->f_op != &perf_fops) { + fdput(f); + return -EBADF; } - - return file; + *p = f; + return 0; } static int perf_event_set_output(struct perf_event *event, @@ -3279,22 +3275,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_EVENT_IOC_SET_OUTPUT: { - struct file *output_file = NULL; - struct perf_event *output_event = NULL; - int fput_needed = 0; int ret; - if (arg != -1) { - output_file = perf_fget_light(arg, &fput_needed); - if (IS_ERR(output_file)) - return PTR_ERR(output_file); - output_event = output_file->private_data; + struct perf_event *output_event; + struct fd output; + ret = perf_fget_light(arg, &output); + if (ret) + return ret; + output_event = output.file->private_data; + ret = perf_event_set_output(event, output_event); + fdput(output); + } else { + ret = perf_event_set_output(event, NULL); } - - ret = perf_event_set_output(event, output_event); - if (output_event) - fput_light(output_file, fput_needed); - return ret; } @@ -6229,12 +6222,11 @@ SYSCALL_DEFINE5(perf_event_open, struct perf_event_attr attr; struct perf_event_context *ctx; struct file *event_file = NULL; - struct file *group_file = NULL; + struct fd group = {NULL, 0}; struct task_struct *task = NULL; struct pmu *pmu; int event_fd; int move_group = 0; - int fput_needed = 0; int err; /* for future expandability... */ @@ -6269,12 +6261,10 @@ SYSCALL_DEFINE5(perf_event_open, return event_fd; if (group_fd != -1) { - group_file = perf_fget_light(group_fd, &fput_needed); - if (IS_ERR(group_file)) { - err = PTR_ERR(group_file); + err = perf_fget_light(group_fd, &group); + if (err) goto err_fd; - } - group_leader = group_file->private_data; + group_leader = group.file->private_data; if (flags & PERF_FLAG_FD_OUTPUT) output_event = group_leader; if (flags & PERF_FLAG_FD_NO_GROUP) @@ -6450,7 +6440,7 @@ SYSCALL_DEFINE5(perf_event_open, * of the group leader will find the pointer to itself in * perf_group_detach(). */ - fput_light(group_file, fput_needed); + fdput(group); fd_install(event_fd, event_file); return event_fd; @@ -6464,7 +6454,7 @@ err_task: if (task) put_task_struct(task); err_group_fd: - fput_light(group_file, fput_needed); + fdput(group); err_fd: put_unused_fd(event_fd); return err; diff --git a/kernel/sys.c b/kernel/sys.c index 0cb4283df884..f9492284e5d2 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1788,15 +1788,15 @@ SYSCALL_DEFINE1(umask, int, mask) #ifdef CONFIG_CHECKPOINT_RESTORE static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { - struct file *exe_file; + struct fd exe; struct dentry *dentry; - int err, fput_needed; + int err; - exe_file = fget_light(fd, &fput_needed); - if (!exe_file) + exe = fdget(fd); + if (!exe.file) return -EBADF; - dentry = exe_file->f_path.dentry; + dentry = exe.file->f_path.dentry; /* * Because the original mm->exe_file points to executable file, make @@ -1805,7 +1805,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) */ err = -EACCES; if (!S_ISREG(dentry->d_inode->i_mode) || - exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC) + exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) goto exit; err = inode_permission(dentry->d_inode, MAY_EXEC); @@ -1839,12 +1839,12 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) goto exit_unlock; err = 0; - set_mm_exe_file(mm, exe_file); /* this grabs a reference to exe_file */ + set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ exit_unlock: up_write(&mm->mmap_sem); exit: - fput_light(exe_file, fput_needed); + fdput(exe); return err; } diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d0a32796550f..5116b7e5962e 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -415,16 +415,15 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) struct nlattr *na; size_t size; u32 fd; - struct file *file; - int fput_needed; + struct fd f; na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; if (!na) return -EINVAL; fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return 0; size = nla_total_size(sizeof(struct cgroupstats)); @@ -444,7 +443,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) stats = nla_data(na); memset(stats, 0, sizeof(*stats)); - rc = cgroupstats_build(stats, file->f_dentry); + rc = cgroupstats_build(stats, f.file->f_dentry); if (rc < 0) { nlmsg_free(rep_skb); goto err; @@ -453,7 +452,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) rc = send_reply(rep_skb, info); err: - fput_light(file, fput_needed); + fdput(f); return rc; } diff --git a/mm/fadvise.c b/mm/fadvise.c index a83245763cf8..a47f0f50c89f 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -26,8 +26,7 @@ */ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) { - int fput_needed; - struct file *file = fget_light(fd, &fput_needed); + struct fd f = fdget(fd); struct address_space *mapping; struct backing_dev_info *bdi; loff_t endbyte; /* inclusive */ @@ -36,15 +35,15 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) unsigned long nrpages; int ret = 0; - if (!file) + if (!f.file) return -EBADF; - if (S_ISFIFO(file->f_path.dentry->d_inode->i_mode)) { + if (S_ISFIFO(f.file->f_path.dentry->d_inode->i_mode)) { ret = -ESPIPE; goto out; } - mapping = file->f_mapping; + mapping = f.file->f_mapping; if (!mapping || len < 0) { ret = -EINVAL; goto out; @@ -77,21 +76,21 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) switch (advice) { case POSIX_FADV_NORMAL: - file->f_ra.ra_pages = bdi->ra_pages; - spin_lock(&file->f_lock); - file->f_mode &= ~FMODE_RANDOM; - spin_unlock(&file->f_lock); + f.file->f_ra.ra_pages = bdi->ra_pages; + spin_lock(&f.file->f_lock); + f.file->f_mode &= ~FMODE_RANDOM; + spin_unlock(&f.file->f_lock); break; case POSIX_FADV_RANDOM: - spin_lock(&file->f_lock); - file->f_mode |= FMODE_RANDOM; - spin_unlock(&file->f_lock); + spin_lock(&f.file->f_lock); + f.file->f_mode |= FMODE_RANDOM; + spin_unlock(&f.file->f_lock); break; case POSIX_FADV_SEQUENTIAL: - file->f_ra.ra_pages = bdi->ra_pages * 2; - spin_lock(&file->f_lock); - file->f_mode &= ~FMODE_RANDOM; - spin_unlock(&file->f_lock); + f.file->f_ra.ra_pages = bdi->ra_pages * 2; + spin_lock(&f.file->f_lock); + f.file->f_mode &= ~FMODE_RANDOM; + spin_unlock(&f.file->f_lock); break; case POSIX_FADV_WILLNEED: /* First and last PARTIAL page! */ @@ -107,7 +106,7 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) * Ignore return value because fadvise() shall return * success even if filesystem can't retrieve a hint, */ - force_page_cache_readahead(mapping, file, start_index, + force_page_cache_readahead(mapping, f.file, start_index, nrpages); break; case POSIX_FADV_NOREUSE: @@ -129,7 +128,7 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) ret = -EINVAL; } out: - fput_light(file, fput_needed); + fdput(f); return ret; } #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS diff --git a/mm/readahead.c b/mm/readahead.c index 1011111e2bf4..7963f2391236 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -579,20 +579,19 @@ do_readahead(struct address_space *mapping, struct file *filp, SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count) { ssize_t ret; - struct file *file; - int fput_needed; + struct fd f; ret = -EBADF; - file = fget_light(fd, &fput_needed); - if (file) { - if (file->f_mode & FMODE_READ) { - struct address_space *mapping = file->f_mapping; + f = fdget(fd); + if (f.file) { + if (f.file->f_mode & FMODE_READ) { + struct address_space *mapping = f.file->f_mapping; pgoff_t start = offset >> PAGE_CACHE_SHIFT; pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT; unsigned long len = end - start + 1; - ret = do_readahead(mapping, file, start, len); + ret = do_readahead(mapping, f.file, start, len); } - fput_light(file, fput_needed); + fdput(f); } return ret; } -- cgit v1.2.3-58-ga151 From 1fe0c0230a7c2d5f4061e681a3f3be9512446d23 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 19 Sep 2012 15:49:51 +0100 Subject: vfs: delete surplus inode NULL check Each iteration of d_delete we reload inode from dentry->d_inode and then call S_ISDIR(inode-i_mode), so inode cannot possibly be NULL shortly afterwards unless something went horribly wrong. Signed-off-by: Alan Cox Signed-off-by: Al Viro --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 16521a9f2038..fbee67b92651 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2109,7 +2109,7 @@ again: inode = dentry->d_inode; isdir = S_ISDIR(inode->i_mode); if (dentry->d_count == 1) { - if (inode && !spin_trylock(&inode->i_lock)) { + if (!spin_trylock(&inode->i_lock)) { spin_unlock(&dentry->d_lock); cpu_relax(); goto again; -- cgit v1.2.3-58-ga151 From 2744c171dbaa0b1ec7639e7d0ff817fba9461a38 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 26 Sep 2012 21:41:05 -0400 Subject: ceph: don't abuse d_delete() on failure exits Signed-off-by: Al Viro --- fs/ceph/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 4b5762ef7c2b..ba95eea201bf 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1104,7 +1104,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, pr_err("fill_trace bad get_inode " "%llx.%llx\n", vino.ino, vino.snap); err = PTR_ERR(in); - d_delete(dn); + d_drop(dn); goto done; } dn = splice_dentry(dn, in, &have_lease, true); @@ -1277,7 +1277,7 @@ retry_lookup: in = ceph_get_inode(parent->d_sb, vino); if (IS_ERR(in)) { dout("new_inode badness\n"); - d_delete(dn); + d_drop(dn); dput(dn); err = PTR_ERR(in); goto out; -- cgit v1.2.3-58-ga151 From 63784dd02b2ac85e867be9d6a6f5536c4ff738be Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 26 Sep 2012 21:43:05 -0400 Subject: fcntl: fix misannotations __user * != * __user... Signed-off-by: Al Viro --- fs/fcntl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index 91af39a33af7..8f704291d4ed 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -149,7 +149,7 @@ pid_t f_getown(struct file *filp) static int f_setown_ex(struct file *filp, unsigned long arg) { - struct f_owner_ex * __user owner_p = (void * __user)arg; + struct f_owner_ex __user *owner_p = (void __user *)arg; struct f_owner_ex owner; struct pid *pid; int type; @@ -189,7 +189,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg) static int f_getown_ex(struct file *filp, unsigned long arg) { - struct f_owner_ex * __user owner_p = (void * __user)arg; + struct f_owner_ex __user *owner_p = (void __user *)arg; struct f_owner_ex owner; int ret = 0; @@ -227,7 +227,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg) static int f_getowner_uids(struct file *filp, unsigned long arg) { struct user_namespace *user_ns = current_user_ns(); - uid_t * __user dst = (void * __user)arg; + uid_t __user *dst = (void __user *)arg; uid_t src[2]; int err; -- cgit v1.2.3-58-ga151 From f34f9d186df35e5c39163444c43b4fc6255e39c5 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 26 Sep 2012 11:34:50 +1000 Subject: coredump: prevent double-free on an error path in core dumper In !CORE_DUMP_USE_REGSET case, if elf_note_info_init fails to allocate memory for info->fields, it frees already allocated stuff and returns error to its caller, fill_note_info. Which in turn returns error to its caller, elf_core_dump. Which jumps to cleanup label and calls free_note_info, which will happily try to free all info->fields again. BOOM. This is the fix. Signed-off-by: Oleg Nesterov Signed-off-by: Denys Vlasenko Cc: Venu Byravarasu Cc: Signed-off-by: Andrew Morton --- fs/binfmt_elf.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 1b52956afe33..0225fddf49b7 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1696,30 +1696,19 @@ static int elf_note_info_init(struct elf_note_info *info) return 0; info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); if (!info->psinfo) - goto notes_free; + return 0; info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); if (!info->prstatus) - goto psinfo_free; + return 0; info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); if (!info->fpu) - goto prstatus_free; + return 0; #ifdef ELF_CORE_COPY_XFPREGS info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); if (!info->xfpu) - goto fpu_free; + return 0; #endif return 1; -#ifdef ELF_CORE_COPY_XFPREGS - fpu_free: - kfree(info->fpu); -#endif - prstatus_free: - kfree(info->prstatus); - psinfo_free: - kfree(info->psinfo); - notes_free: - kfree(info->notes); - return 0; } static int fill_note_info(struct elfhdr *elf, int phdrs, -- cgit v1.2.3-58-ga151 From 10c28d937e2cca577c2d804106b50dd0562fb062 Mon Sep 17 00:00:00 2001 From: Alex Kelly Date: Wed, 26 Sep 2012 21:52:08 -0400 Subject: coredump: move core dump functionality into its own file This prepares for making core dump functionality optional. The variable "suid_dumpable" and associated functions are left in fs/exec.c because they're used elsewhere, such as in ptrace. Signed-off-by: Alex Kelly Reviewed-by: Josh Triplett Acked-by: Serge Hallyn Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/Makefile | 2 +- fs/coredump.c | 686 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/exec.c | 645 +---------------------------------------------- include/linux/sched.h | 1 + 4 files changed, 689 insertions(+), 645 deletions(-) create mode 100644 fs/coredump.c (limited to 'fs') diff --git a/fs/Makefile b/fs/Makefile index 2fb977934673..8938f8250320 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o drop_caches.o splice.o sync.o utimes.o \ - stack.o fs_struct.o statfs.o + stack.o fs_struct.o statfs.o coredump.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o diff --git a/fs/coredump.c b/fs/coredump.c new file mode 100644 index 000000000000..f045bbad6822 --- /dev/null +++ b/fs/coredump.c @@ -0,0 +1,686 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include "internal.h" + +#include + +int core_uses_pid; +char core_pattern[CORENAME_MAX_SIZE] = "core"; +unsigned int core_pipe_limit; + +struct core_name { + char *corename; + int used, size; +}; +static atomic_t call_count = ATOMIC_INIT(1); + +/* The maximal length of core_pattern is also specified in sysctl.c */ + +static int expand_corename(struct core_name *cn) +{ + char *old_corename = cn->corename; + + cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); + cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); + + if (!cn->corename) { + kfree(old_corename); + return -ENOMEM; + } + + return 0; +} + +static int cn_printf(struct core_name *cn, const char *fmt, ...) +{ + char *cur; + int need; + int ret; + va_list arg; + + va_start(arg, fmt); + need = vsnprintf(NULL, 0, fmt, arg); + va_end(arg); + + if (likely(need < cn->size - cn->used - 1)) + goto out_printf; + + ret = expand_corename(cn); + if (ret) + goto expand_fail; + +out_printf: + cur = cn->corename + cn->used; + va_start(arg, fmt); + vsnprintf(cur, need + 1, fmt, arg); + va_end(arg); + cn->used += need; + return 0; + +expand_fail: + return ret; +} + +static void cn_escape(char *str) +{ + for (; *str; str++) + if (*str == '/') + *str = '!'; +} + +static int cn_print_exe_file(struct core_name *cn) +{ + struct file *exe_file; + char *pathbuf, *path; + int ret; + + exe_file = get_mm_exe_file(current->mm); + if (!exe_file) { + char *commstart = cn->corename + cn->used; + ret = cn_printf(cn, "%s (path unknown)", current->comm); + cn_escape(commstart); + return ret; + } + + pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); + if (!pathbuf) { + ret = -ENOMEM; + goto put_exe_file; + } + + path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); + if (IS_ERR(path)) { + ret = PTR_ERR(path); + goto free_buf; + } + + cn_escape(path); + + ret = cn_printf(cn, "%s", path); + +free_buf: + kfree(pathbuf); +put_exe_file: + fput(exe_file); + return ret; +} + +/* format_corename will inspect the pattern parameter, and output a + * name into corename, which must have space for at least + * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. + */ +static int format_corename(struct core_name *cn, long signr) +{ + const struct cred *cred = current_cred(); + const char *pat_ptr = core_pattern; + int ispipe = (*pat_ptr == '|'); + int pid_in_pattern = 0; + int err = 0; + + cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); + cn->corename = kmalloc(cn->size, GFP_KERNEL); + cn->used = 0; + + if (!cn->corename) + return -ENOMEM; + + /* Repeat as long as we have more pattern to process and more output + space */ + while (*pat_ptr) { + if (*pat_ptr != '%') { + if (*pat_ptr == 0) + goto out; + err = cn_printf(cn, "%c", *pat_ptr++); + } else { + switch (*++pat_ptr) { + /* single % at the end, drop that */ + case 0: + goto out; + /* Double percent, output one percent */ + case '%': + err = cn_printf(cn, "%c", '%'); + break; + /* pid */ + case 'p': + pid_in_pattern = 1; + err = cn_printf(cn, "%d", + task_tgid_vnr(current)); + break; + /* uid */ + case 'u': + err = cn_printf(cn, "%d", cred->uid); + break; + /* gid */ + case 'g': + err = cn_printf(cn, "%d", cred->gid); + break; + /* signal that caused the coredump */ + case 's': + err = cn_printf(cn, "%ld", signr); + break; + /* UNIX time of coredump */ + case 't': { + struct timeval tv; + do_gettimeofday(&tv); + err = cn_printf(cn, "%lu", tv.tv_sec); + break; + } + /* hostname */ + case 'h': { + char *namestart = cn->corename + cn->used; + down_read(&uts_sem); + err = cn_printf(cn, "%s", + utsname()->nodename); + up_read(&uts_sem); + cn_escape(namestart); + break; + } + /* executable */ + case 'e': { + char *commstart = cn->corename + cn->used; + err = cn_printf(cn, "%s", current->comm); + cn_escape(commstart); + break; + } + case 'E': + err = cn_print_exe_file(cn); + break; + /* core limit size */ + case 'c': + err = cn_printf(cn, "%lu", + rlimit(RLIMIT_CORE)); + break; + default: + break; + } + ++pat_ptr; + } + + if (err) + return err; + } + + /* Backward compatibility with core_uses_pid: + * + * If core_pattern does not include a %p (as is the default) + * and core_uses_pid is set, then .%pid will be appended to + * the filename. Do not do this for piped commands. */ + if (!ispipe && !pid_in_pattern && core_uses_pid) { + err = cn_printf(cn, ".%d", task_tgid_vnr(current)); + if (err) + return err; + } +out: + return ispipe; +} + +static int zap_process(struct task_struct *start, int exit_code) +{ + struct task_struct *t; + int nr = 0; + + start->signal->flags = SIGNAL_GROUP_EXIT; + start->signal->group_exit_code = exit_code; + start->signal->group_stop_count = 0; + + t = start; + do { + task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); + if (t != current && t->mm) { + sigaddset(&t->pending.signal, SIGKILL); + signal_wake_up(t, 1); + nr++; + } + } while_each_thread(start, t); + + return nr; +} + +static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, + struct core_state *core_state, int exit_code) +{ + struct task_struct *g, *p; + unsigned long flags; + int nr = -EAGAIN; + + spin_lock_irq(&tsk->sighand->siglock); + if (!signal_group_exit(tsk->signal)) { + mm->core_state = core_state; + nr = zap_process(tsk, exit_code); + } + spin_unlock_irq(&tsk->sighand->siglock); + if (unlikely(nr < 0)) + return nr; + + if (atomic_read(&mm->mm_users) == nr + 1) + goto done; + /* + * We should find and kill all tasks which use this mm, and we should + * count them correctly into ->nr_threads. We don't take tasklist + * lock, but this is safe wrt: + * + * fork: + * None of sub-threads can fork after zap_process(leader). All + * processes which were created before this point should be + * visible to zap_threads() because copy_process() adds the new + * process to the tail of init_task.tasks list, and lock/unlock + * of ->siglock provides a memory barrier. + * + * do_exit: + * The caller holds mm->mmap_sem. This means that the task which + * uses this mm can't pass exit_mm(), so it can't exit or clear + * its ->mm. + * + * de_thread: + * It does list_replace_rcu(&leader->tasks, ¤t->tasks), + * we must see either old or new leader, this does not matter. + * However, it can change p->sighand, so lock_task_sighand(p) + * must be used. Since p->mm != NULL and we hold ->mmap_sem + * it can't fail. + * + * Note also that "g" can be the old leader with ->mm == NULL + * and already unhashed and thus removed from ->thread_group. + * This is OK, __unhash_process()->list_del_rcu() does not + * clear the ->next pointer, we will find the new leader via + * next_thread(). + */ + rcu_read_lock(); + for_each_process(g) { + if (g == tsk->group_leader) + continue; + if (g->flags & PF_KTHREAD) + continue; + p = g; + do { + if (p->mm) { + if (unlikely(p->mm == mm)) { + lock_task_sighand(p, &flags); + nr += zap_process(p, exit_code); + unlock_task_sighand(p, &flags); + } + break; + } + } while_each_thread(g, p); + } + rcu_read_unlock(); +done: + atomic_set(&core_state->nr_threads, nr); + return nr; +} + +static int coredump_wait(int exit_code, struct core_state *core_state) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + int core_waiters = -EBUSY; + + init_completion(&core_state->startup); + core_state->dumper.task = tsk; + core_state->dumper.next = NULL; + + down_write(&mm->mmap_sem); + if (!mm->core_state) + core_waiters = zap_threads(tsk, mm, core_state, exit_code); + up_write(&mm->mmap_sem); + + if (core_waiters > 0) { + struct core_thread *ptr; + + wait_for_completion(&core_state->startup); + /* + * Wait for all the threads to become inactive, so that + * all the thread context (extended register state, like + * fpu etc) gets copied to the memory. + */ + ptr = core_state->dumper.next; + while (ptr != NULL) { + wait_task_inactive(ptr->task, 0); + ptr = ptr->next; + } + } + + return core_waiters; +} + +static void coredump_finish(struct mm_struct *mm) +{ + struct core_thread *curr, *next; + struct task_struct *task; + + next = mm->core_state->dumper.next; + while ((curr = next) != NULL) { + next = curr->next; + task = curr->task; + /* + * see exit_mm(), curr->task must not see + * ->task == NULL before we read ->next. + */ + smp_mb(); + curr->task = NULL; + wake_up_process(task); + } + + mm->core_state = NULL; +} + +static void wait_for_dump_helpers(struct file *file) +{ + struct pipe_inode_info *pipe; + + pipe = file->f_path.dentry->d_inode->i_pipe; + + pipe_lock(pipe); + pipe->readers++; + pipe->writers--; + + while ((pipe->readers > 1) && (!signal_pending(current))) { + wake_up_interruptible_sync(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + pipe_wait(pipe); + } + + pipe->readers--; + pipe->writers++; + pipe_unlock(pipe); + +} + +/* + * umh_pipe_setup + * helper function to customize the process used + * to collect the core in userspace. Specifically + * it sets up a pipe and installs it as fd 0 (stdin) + * for the process. Returns 0 on success, or + * PTR_ERR on failure. + * Note that it also sets the core limit to 1. This + * is a special value that we use to trap recursive + * core dumps + */ +static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) +{ + struct file *files[2]; + struct coredump_params *cp = (struct coredump_params *)info->data; + int err = create_pipe_files(files, 0); + if (err) + return err; + + cp->file = files[1]; + + replace_fd(0, files[0], 0); + /* and disallow core files too */ + current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; + + return 0; +} + +void do_coredump(long signr, int exit_code, struct pt_regs *regs) +{ + struct core_state core_state; + struct core_name cn; + struct mm_struct *mm = current->mm; + struct linux_binfmt * binfmt; + const struct cred *old_cred; + struct cred *cred; + int retval = 0; + int flag = 0; + int ispipe; + struct files_struct *displaced; + bool need_nonrelative = false; + static atomic_t core_dump_count = ATOMIC_INIT(0); + struct coredump_params cprm = { + .signr = signr, + .regs = regs, + .limit = rlimit(RLIMIT_CORE), + /* + * We must use the same mm->flags while dumping core to avoid + * inconsistency of bit flags, since this flag is not protected + * by any locks. + */ + .mm_flags = mm->flags, + }; + + audit_core_dumps(signr); + + binfmt = mm->binfmt; + if (!binfmt || !binfmt->core_dump) + goto fail; + if (!__get_dumpable(cprm.mm_flags)) + goto fail; + + cred = prepare_creds(); + if (!cred) + goto fail; + /* + * We cannot trust fsuid as being the "true" uid of the process + * nor do we know its entire history. We only know it was tainted + * so we dump it as root in mode 2, and only into a controlled + * environment (pipe handler or fully qualified path). + */ + if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { + /* Setuid core dump mode */ + flag = O_EXCL; /* Stop rewrite attacks */ + cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ + need_nonrelative = true; + } + + retval = coredump_wait(exit_code, &core_state); + if (retval < 0) + goto fail_creds; + + old_cred = override_creds(cred); + + /* + * Clear any false indication of pending signals that might + * be seen by the filesystem code called to write the core file. + */ + clear_thread_flag(TIF_SIGPENDING); + + ispipe = format_corename(&cn, signr); + + if (ispipe) { + int dump_count; + char **helper_argv; + + if (ispipe < 0) { + printk(KERN_WARNING "format_corename failed\n"); + printk(KERN_WARNING "Aborting core\n"); + goto fail_corename; + } + + if (cprm.limit == 1) { + /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. + * + * Normally core limits are irrelevant to pipes, since + * we're not writing to the file system, but we use + * cprm.limit of 1 here as a speacial value, this is a + * consistent way to catch recursive crashes. + * We can still crash if the core_pattern binary sets + * RLIM_CORE = !1, but it runs as root, and can do + * lots of stupid things. + * + * Note that we use task_tgid_vnr here to grab the pid + * of the process group leader. That way we get the + * right pid if a thread in a multi-threaded + * core_pattern process dies. + */ + printk(KERN_WARNING + "Process %d(%s) has RLIMIT_CORE set to 1\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Aborting core\n"); + goto fail_unlock; + } + cprm.limit = RLIM_INFINITY; + + dump_count = atomic_inc_return(&core_dump_count); + if (core_pipe_limit && (core_pipe_limit < dump_count)) { + printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Skipping core dump\n"); + goto fail_dropcount; + } + + helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); + if (!helper_argv) { + printk(KERN_WARNING "%s failed to allocate memory\n", + __func__); + goto fail_dropcount; + } + + retval = call_usermodehelper_fns(helper_argv[0], helper_argv, + NULL, UMH_WAIT_EXEC, umh_pipe_setup, + NULL, &cprm); + argv_free(helper_argv); + if (retval) { + printk(KERN_INFO "Core dump to %s pipe failed\n", + cn.corename); + goto close_fail; + } + } else { + struct inode *inode; + + if (cprm.limit < binfmt->min_coredump) + goto fail_unlock; + + if (need_nonrelative && cn.corename[0] != '/') { + printk(KERN_WARNING "Pid %d(%s) can only dump core "\ + "to fully qualified path!\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Skipping core dump\n"); + goto fail_unlock; + } + + cprm.file = filp_open(cn.corename, + O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, + 0600); + if (IS_ERR(cprm.file)) + goto fail_unlock; + + inode = cprm.file->f_path.dentry->d_inode; + if (inode->i_nlink > 1) + goto close_fail; + if (d_unhashed(cprm.file->f_path.dentry)) + goto close_fail; + /* + * AK: actually i see no reason to not allow this for named + * pipes etc, but keep the previous behaviour for now. + */ + if (!S_ISREG(inode->i_mode)) + goto close_fail; + /* + * Dont allow local users get cute and trick others to coredump + * into their pre-created files. + */ + if (!uid_eq(inode->i_uid, current_fsuid())) + goto close_fail; + if (!cprm.file->f_op || !cprm.file->f_op->write) + goto close_fail; + if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) + goto close_fail; + } + + /* get us an unshared descriptor table; almost always a no-op */ + retval = unshare_files(&displaced); + if (retval) + goto close_fail; + if (displaced) + put_files_struct(displaced); + retval = binfmt->core_dump(&cprm); + if (retval) + current->signal->group_exit_code |= 0x80; + + if (ispipe && core_pipe_limit) + wait_for_dump_helpers(cprm.file); +close_fail: + if (cprm.file) + filp_close(cprm.file, NULL); +fail_dropcount: + if (ispipe) + atomic_dec(&core_dump_count); +fail_unlock: + kfree(cn.corename); +fail_corename: + coredump_finish(mm); + revert_creds(old_cred); +fail_creds: + put_cred(cred); +fail: + return; +} + +/* + * Core dumping helper functions. These are the only things you should + * do on a core-file: use only these functions to write out all the + * necessary info. + */ +int dump_write(struct file *file, const void *addr, int nr) +{ + return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; +} +EXPORT_SYMBOL(dump_write); + +int dump_seek(struct file *file, loff_t off) +{ + int ret = 1; + + if (file->f_op->llseek && file->f_op->llseek != no_llseek) { + if (file->f_op->llseek(file, off, SEEK_CUR) < 0) + return 0; + } else { + char *buf = (char *)get_zeroed_page(GFP_KERNEL); + + if (!buf) + return 0; + while (off > 0) { + unsigned long n = off; + + if (n > PAGE_SIZE) + n = PAGE_SIZE; + if (!dump_write(file, buf, n)) { + ret = 0; + break; + } + off -= n; + } + free_page((unsigned long)buf); + } + return ret; +} +EXPORT_SYMBOL(dump_seek); diff --git a/fs/exec.c b/fs/exec.c index beb05a95e4a3..48fb26ef8a1b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -66,19 +66,8 @@ #include -int core_uses_pid; -char core_pattern[CORENAME_MAX_SIZE] = "core"; -unsigned int core_pipe_limit; int suid_dumpable = 0; -struct core_name { - char *corename; - int used, size; -}; -static atomic_t call_count = ATOMIC_INIT(1); - -/* The maximal length of core_pattern is also specified in sysctl.c */ - static LIST_HEAD(formats); static DEFINE_RWLOCK(binfmt_lock); @@ -1603,353 +1592,6 @@ void set_binfmt(struct linux_binfmt *new) EXPORT_SYMBOL(set_binfmt); -static int expand_corename(struct core_name *cn) -{ - char *old_corename = cn->corename; - - cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); - cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); - - if (!cn->corename) { - kfree(old_corename); - return -ENOMEM; - } - - return 0; -} - -static int cn_printf(struct core_name *cn, const char *fmt, ...) -{ - char *cur; - int need; - int ret; - va_list arg; - - va_start(arg, fmt); - need = vsnprintf(NULL, 0, fmt, arg); - va_end(arg); - - if (likely(need < cn->size - cn->used - 1)) - goto out_printf; - - ret = expand_corename(cn); - if (ret) - goto expand_fail; - -out_printf: - cur = cn->corename + cn->used; - va_start(arg, fmt); - vsnprintf(cur, need + 1, fmt, arg); - va_end(arg); - cn->used += need; - return 0; - -expand_fail: - return ret; -} - -static void cn_escape(char *str) -{ - for (; *str; str++) - if (*str == '/') - *str = '!'; -} - -static int cn_print_exe_file(struct core_name *cn) -{ - struct file *exe_file; - char *pathbuf, *path; - int ret; - - exe_file = get_mm_exe_file(current->mm); - if (!exe_file) { - char *commstart = cn->corename + cn->used; - ret = cn_printf(cn, "%s (path unknown)", current->comm); - cn_escape(commstart); - return ret; - } - - pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); - if (!pathbuf) { - ret = -ENOMEM; - goto put_exe_file; - } - - path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); - if (IS_ERR(path)) { - ret = PTR_ERR(path); - goto free_buf; - } - - cn_escape(path); - - ret = cn_printf(cn, "%s", path); - -free_buf: - kfree(pathbuf); -put_exe_file: - fput(exe_file); - return ret; -} - -/* format_corename will inspect the pattern parameter, and output a - * name into corename, which must have space for at least - * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. - */ -static int format_corename(struct core_name *cn, long signr) -{ - const struct cred *cred = current_cred(); - const char *pat_ptr = core_pattern; - int ispipe = (*pat_ptr == '|'); - int pid_in_pattern = 0; - int err = 0; - - cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); - cn->corename = kmalloc(cn->size, GFP_KERNEL); - cn->used = 0; - - if (!cn->corename) - return -ENOMEM; - - /* Repeat as long as we have more pattern to process and more output - space */ - while (*pat_ptr) { - if (*pat_ptr != '%') { - if (*pat_ptr == 0) - goto out; - err = cn_printf(cn, "%c", *pat_ptr++); - } else { - switch (*++pat_ptr) { - /* single % at the end, drop that */ - case 0: - goto out; - /* Double percent, output one percent */ - case '%': - err = cn_printf(cn, "%c", '%'); - break; - /* pid */ - case 'p': - pid_in_pattern = 1; - err = cn_printf(cn, "%d", - task_tgid_vnr(current)); - break; - /* uid */ - case 'u': - err = cn_printf(cn, "%d", cred->uid); - break; - /* gid */ - case 'g': - err = cn_printf(cn, "%d", cred->gid); - break; - /* signal that caused the coredump */ - case 's': - err = cn_printf(cn, "%ld", signr); - break; - /* UNIX time of coredump */ - case 't': { - struct timeval tv; - do_gettimeofday(&tv); - err = cn_printf(cn, "%lu", tv.tv_sec); - break; - } - /* hostname */ - case 'h': { - char *namestart = cn->corename + cn->used; - down_read(&uts_sem); - err = cn_printf(cn, "%s", - utsname()->nodename); - up_read(&uts_sem); - cn_escape(namestart); - break; - } - /* executable */ - case 'e': { - char *commstart = cn->corename + cn->used; - err = cn_printf(cn, "%s", current->comm); - cn_escape(commstart); - break; - } - case 'E': - err = cn_print_exe_file(cn); - break; - /* core limit size */ - case 'c': - err = cn_printf(cn, "%lu", - rlimit(RLIMIT_CORE)); - break; - default: - break; - } - ++pat_ptr; - } - - if (err) - return err; - } - - /* Backward compatibility with core_uses_pid: - * - * If core_pattern does not include a %p (as is the default) - * and core_uses_pid is set, then .%pid will be appended to - * the filename. Do not do this for piped commands. */ - if (!ispipe && !pid_in_pattern && core_uses_pid) { - err = cn_printf(cn, ".%d", task_tgid_vnr(current)); - if (err) - return err; - } -out: - return ispipe; -} - -static int zap_process(struct task_struct *start, int exit_code) -{ - struct task_struct *t; - int nr = 0; - - start->signal->flags = SIGNAL_GROUP_EXIT; - start->signal->group_exit_code = exit_code; - start->signal->group_stop_count = 0; - - t = start; - do { - task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); - if (t != current && t->mm) { - sigaddset(&t->pending.signal, SIGKILL); - signal_wake_up(t, 1); - nr++; - } - } while_each_thread(start, t); - - return nr; -} - -static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, - struct core_state *core_state, int exit_code) -{ - struct task_struct *g, *p; - unsigned long flags; - int nr = -EAGAIN; - - spin_lock_irq(&tsk->sighand->siglock); - if (!signal_group_exit(tsk->signal)) { - mm->core_state = core_state; - nr = zap_process(tsk, exit_code); - } - spin_unlock_irq(&tsk->sighand->siglock); - if (unlikely(nr < 0)) - return nr; - - if (atomic_read(&mm->mm_users) == nr + 1) - goto done; - /* - * We should find and kill all tasks which use this mm, and we should - * count them correctly into ->nr_threads. We don't take tasklist - * lock, but this is safe wrt: - * - * fork: - * None of sub-threads can fork after zap_process(leader). All - * processes which were created before this point should be - * visible to zap_threads() because copy_process() adds the new - * process to the tail of init_task.tasks list, and lock/unlock - * of ->siglock provides a memory barrier. - * - * do_exit: - * The caller holds mm->mmap_sem. This means that the task which - * uses this mm can't pass exit_mm(), so it can't exit or clear - * its ->mm. - * - * de_thread: - * It does list_replace_rcu(&leader->tasks, ¤t->tasks), - * we must see either old or new leader, this does not matter. - * However, it can change p->sighand, so lock_task_sighand(p) - * must be used. Since p->mm != NULL and we hold ->mmap_sem - * it can't fail. - * - * Note also that "g" can be the old leader with ->mm == NULL - * and already unhashed and thus removed from ->thread_group. - * This is OK, __unhash_process()->list_del_rcu() does not - * clear the ->next pointer, we will find the new leader via - * next_thread(). - */ - rcu_read_lock(); - for_each_process(g) { - if (g == tsk->group_leader) - continue; - if (g->flags & PF_KTHREAD) - continue; - p = g; - do { - if (p->mm) { - if (unlikely(p->mm == mm)) { - lock_task_sighand(p, &flags); - nr += zap_process(p, exit_code); - unlock_task_sighand(p, &flags); - } - break; - } - } while_each_thread(g, p); - } - rcu_read_unlock(); -done: - atomic_set(&core_state->nr_threads, nr); - return nr; -} - -static int coredump_wait(int exit_code, struct core_state *core_state) -{ - struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; - int core_waiters = -EBUSY; - - init_completion(&core_state->startup); - core_state->dumper.task = tsk; - core_state->dumper.next = NULL; - - down_write(&mm->mmap_sem); - if (!mm->core_state) - core_waiters = zap_threads(tsk, mm, core_state, exit_code); - up_write(&mm->mmap_sem); - - if (core_waiters > 0) { - struct core_thread *ptr; - - wait_for_completion(&core_state->startup); - /* - * Wait for all the threads to become inactive, so that - * all the thread context (extended register state, like - * fpu etc) gets copied to the memory. - */ - ptr = core_state->dumper.next; - while (ptr != NULL) { - wait_task_inactive(ptr->task, 0); - ptr = ptr->next; - } - } - - return core_waiters; -} - -static void coredump_finish(struct mm_struct *mm) -{ - struct core_thread *curr, *next; - struct task_struct *task; - - next = mm->core_state->dumper.next; - while ((curr = next) != NULL) { - next = curr->next; - task = curr->task; - /* - * see exit_mm(), curr->task must not see - * ->task == NULL before we read ->next. - */ - smp_mb(); - curr->task = NULL; - wake_up_process(task); - } - - mm->core_state = NULL; -} - /* * set_dumpable converts traditional three-value dumpable to two flags and * stores them into mm->flags. It modifies lower two bits of mm->flags, but @@ -1991,7 +1633,7 @@ void set_dumpable(struct mm_struct *mm, int value) } } -static int __get_dumpable(unsigned long mm_flags) +int __get_dumpable(unsigned long mm_flags) { int ret; @@ -2003,288 +1645,3 @@ int get_dumpable(struct mm_struct *mm) { return __get_dumpable(mm->flags); } - -static void wait_for_dump_helpers(struct file *file) -{ - struct pipe_inode_info *pipe; - - pipe = file->f_path.dentry->d_inode->i_pipe; - - pipe_lock(pipe); - pipe->readers++; - pipe->writers--; - - while ((pipe->readers > 1) && (!signal_pending(current))) { - wake_up_interruptible_sync(&pipe->wait); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - pipe_wait(pipe); - } - - pipe->readers--; - pipe->writers++; - pipe_unlock(pipe); - -} - - -/* - * umh_pipe_setup - * helper function to customize the process used - * to collect the core in userspace. Specifically - * it sets up a pipe and installs it as fd 0 (stdin) - * for the process. Returns 0 on success, or - * PTR_ERR on failure. - * Note that it also sets the core limit to 1. This - * is a special value that we use to trap recursive - * core dumps - */ -static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) -{ - struct file *files[2]; - struct coredump_params *cp = (struct coredump_params *)info->data; - int err = create_pipe_files(files, 0); - if (err) - return err; - - cp->file = files[1]; - - replace_fd(0, files[0], 0); - /* and disallow core files too */ - current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; - - return 0; -} - -void do_coredump(long signr, int exit_code, struct pt_regs *regs) -{ - struct core_state core_state; - struct core_name cn; - struct mm_struct *mm = current->mm; - struct linux_binfmt * binfmt; - const struct cred *old_cred; - struct cred *cred; - int retval = 0; - int flag = 0; - int ispipe; - struct files_struct *displaced; - bool need_nonrelative = false; - static atomic_t core_dump_count = ATOMIC_INIT(0); - struct coredump_params cprm = { - .signr = signr, - .regs = regs, - .limit = rlimit(RLIMIT_CORE), - /* - * We must use the same mm->flags while dumping core to avoid - * inconsistency of bit flags, since this flag is not protected - * by any locks. - */ - .mm_flags = mm->flags, - }; - - audit_core_dumps(signr); - - binfmt = mm->binfmt; - if (!binfmt || !binfmt->core_dump) - goto fail; - if (!__get_dumpable(cprm.mm_flags)) - goto fail; - - cred = prepare_creds(); - if (!cred) - goto fail; - /* - * We cannot trust fsuid as being the "true" uid of the process - * nor do we know its entire history. We only know it was tainted - * so we dump it as root in mode 2, and only into a controlled - * environment (pipe handler or fully qualified path). - */ - if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { - /* Setuid core dump mode */ - flag = O_EXCL; /* Stop rewrite attacks */ - cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ - need_nonrelative = true; - } - - retval = coredump_wait(exit_code, &core_state); - if (retval < 0) - goto fail_creds; - - old_cred = override_creds(cred); - - /* - * Clear any false indication of pending signals that might - * be seen by the filesystem code called to write the core file. - */ - clear_thread_flag(TIF_SIGPENDING); - - ispipe = format_corename(&cn, signr); - - if (ispipe) { - int dump_count; - char **helper_argv; - - if (ispipe < 0) { - printk(KERN_WARNING "format_corename failed\n"); - printk(KERN_WARNING "Aborting core\n"); - goto fail_corename; - } - - if (cprm.limit == 1) { - /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. - * - * Normally core limits are irrelevant to pipes, since - * we're not writing to the file system, but we use - * cprm.limit of 1 here as a speacial value, this is a - * consistent way to catch recursive crashes. - * We can still crash if the core_pattern binary sets - * RLIM_CORE = !1, but it runs as root, and can do - * lots of stupid things. - * - * Note that we use task_tgid_vnr here to grab the pid - * of the process group leader. That way we get the - * right pid if a thread in a multi-threaded - * core_pattern process dies. - */ - printk(KERN_WARNING - "Process %d(%s) has RLIMIT_CORE set to 1\n", - task_tgid_vnr(current), current->comm); - printk(KERN_WARNING "Aborting core\n"); - goto fail_unlock; - } - cprm.limit = RLIM_INFINITY; - - dump_count = atomic_inc_return(&core_dump_count); - if (core_pipe_limit && (core_pipe_limit < dump_count)) { - printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", - task_tgid_vnr(current), current->comm); - printk(KERN_WARNING "Skipping core dump\n"); - goto fail_dropcount; - } - - helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); - if (!helper_argv) { - printk(KERN_WARNING "%s failed to allocate memory\n", - __func__); - goto fail_dropcount; - } - - retval = call_usermodehelper_fns(helper_argv[0], helper_argv, - NULL, UMH_WAIT_EXEC, umh_pipe_setup, - NULL, &cprm); - argv_free(helper_argv); - if (retval) { - printk(KERN_INFO "Core dump to %s pipe failed\n", - cn.corename); - goto close_fail; - } - } else { - struct inode *inode; - - if (cprm.limit < binfmt->min_coredump) - goto fail_unlock; - - if (need_nonrelative && cn.corename[0] != '/') { - printk(KERN_WARNING "Pid %d(%s) can only dump core "\ - "to fully qualified path!\n", - task_tgid_vnr(current), current->comm); - printk(KERN_WARNING "Skipping core dump\n"); - goto fail_unlock; - } - - cprm.file = filp_open(cn.corename, - O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, - 0600); - if (IS_ERR(cprm.file)) - goto fail_unlock; - - inode = cprm.file->f_path.dentry->d_inode; - if (inode->i_nlink > 1) - goto close_fail; - if (d_unhashed(cprm.file->f_path.dentry)) - goto close_fail; - /* - * AK: actually i see no reason to not allow this for named - * pipes etc, but keep the previous behaviour for now. - */ - if (!S_ISREG(inode->i_mode)) - goto close_fail; - /* - * Dont allow local users get cute and trick others to coredump - * into their pre-created files. - */ - if (!uid_eq(inode->i_uid, current_fsuid())) - goto close_fail; - if (!cprm.file->f_op || !cprm.file->f_op->write) - goto close_fail; - if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) - goto close_fail; - } - - /* get us an unshared descriptor table; almost always a no-op */ - retval = unshare_files(&displaced); - if (retval) - goto close_fail; - if (displaced) - put_files_struct(displaced); - retval = binfmt->core_dump(&cprm); - if (retval) - current->signal->group_exit_code |= 0x80; - - if (ispipe && core_pipe_limit) - wait_for_dump_helpers(cprm.file); -close_fail: - if (cprm.file) - filp_close(cprm.file, NULL); -fail_dropcount: - if (ispipe) - atomic_dec(&core_dump_count); -fail_unlock: - kfree(cn.corename); -fail_corename: - coredump_finish(mm); - revert_creds(old_cred); -fail_creds: - put_cred(cred); -fail: - return; -} - -/* - * Core dumping helper functions. These are the only things you should - * do on a core-file: use only these functions to write out all the - * necessary info. - */ -int dump_write(struct file *file, const void *addr, int nr) -{ - return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; -} -EXPORT_SYMBOL(dump_write); - -int dump_seek(struct file *file, loff_t off) -{ - int ret = 1; - - if (file->f_op->llseek && file->f_op->llseek != no_llseek) { - if (file->f_op->llseek(file, off, SEEK_CUR) < 0) - return 0; - } else { - char *buf = (char *)get_zeroed_page(GFP_KERNEL); - - if (!buf) - return 0; - while (off > 0) { - unsigned long n = off; - - if (n > PAGE_SIZE) - n = PAGE_SIZE; - if (!dump_write(file, buf, n)) { - ret = 0; - break; - } - off -= n; - } - free_page((unsigned long)buf); - } - return ret; -} -EXPORT_SYMBOL(dump_seek); diff --git a/include/linux/sched.h b/include/linux/sched.h index 23bddac4bad8..78041f4c7584 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -405,6 +405,7 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); +extern int __get_dumpable(unsigned long mm_flags); /* get/set_dumpable() values */ #define SUID_DUMPABLE_DISABLED 0 -- cgit v1.2.3-58-ga151 From 99621b44aa194eab594e1f17217231c02b519211 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 29 Aug 2012 16:31:33 -0400 Subject: btrfs: reada_extent doesn't need kref for refcount All increments and decrements are under the same spinlock - have to be, since they need to protect the radix_tree it's found in. Just use int, no need to wank with kref... Signed-off-by: Al Viro --- fs/btrfs/reada.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 48a4882d8ad5..a955669519a2 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -68,7 +68,7 @@ struct reada_extent { u32 blocksize; int err; struct list_head extctl; - struct kref refcnt; + int refcnt; spinlock_t lock; struct reada_zone *zones[BTRFS_MAX_MIRRORS]; int nzones; @@ -126,7 +126,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); if (re) - kref_get(&re->refcnt); + re->refcnt++; spin_unlock(&fs_info->reada_lock); if (!re) @@ -336,7 +336,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); if (re) - kref_get(&re->refcnt); + re->refcnt++; spin_unlock(&fs_info->reada_lock); if (re) @@ -352,7 +352,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, re->top = *top; INIT_LIST_HEAD(&re->extctl); spin_lock_init(&re->lock); - kref_init(&re->refcnt); + re->refcnt = 1; /* * map block @@ -398,7 +398,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, if (ret == -EEXIST) { re_exist = radix_tree_lookup(&fs_info->reada_tree, index); BUG_ON(!re_exist); - kref_get(&re_exist->refcnt); + re_exist->refcnt++; spin_unlock(&fs_info->reada_lock); goto error; } @@ -465,10 +465,6 @@ error: return re_exist; } -static void reada_kref_dummy(struct kref *kr) -{ -} - static void reada_extent_put(struct btrfs_fs_info *fs_info, struct reada_extent *re) { @@ -476,7 +472,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, unsigned long index = re->logical >> PAGE_CACHE_SHIFT; spin_lock(&fs_info->reada_lock); - if (!kref_put(&re->refcnt, reada_kref_dummy)) { + if (--re->refcnt) { spin_unlock(&fs_info->reada_lock); return; } @@ -671,7 +667,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, return 0; } dev->reada_next = re->logical + re->blocksize; - kref_get(&re->refcnt); + re->refcnt++; spin_unlock(&fs_info->reada_lock); -- cgit v1.2.3-58-ga151 From 8c0a85377048b64c880e76ec7368904fe46d0b94 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 26 Sep 2012 11:33:07 +1000 Subject: fs: push rcu_barrier() from deactivate_locked_super() to filesystems There's no reason to call rcu_barrier() on every deactivate_locked_super(). We only need to make sure that all delayed rcu free inodes are flushed before we destroy related cache. Removing rcu_barrier() from deactivate_locked_super() affects some fast paths. E.g. on my machine exit_group() of a last process in IPC namespace takes 0.07538s. rcu_barrier() takes 0.05188s of that time. Signed-off-by: Kirill A. Shutemov Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/9p/v9fs.c | 5 +++++ fs/adfs/super.c | 5 +++++ fs/affs/super.c | 5 +++++ fs/afs/super.c | 5 +++++ fs/befs/linuxvfs.c | 5 +++++ fs/bfs/inode.c | 5 +++++ fs/btrfs/extent_io.c | 6 ++++++ fs/btrfs/inode.c | 5 +++++ fs/ceph/super.c | 5 +++++ fs/cifs/cifsfs.c | 5 +++++ fs/coda/inode.c | 5 +++++ fs/ecryptfs/main.c | 6 ++++++ fs/efs/super.c | 5 +++++ fs/exofs/super.c | 5 +++++ fs/ext2/super.c | 5 +++++ fs/ext3/super.c | 5 +++++ fs/ext4/super.c | 5 +++++ fs/fat/inode.c | 5 +++++ fs/freevxfs/vxfs_super.c | 5 +++++ fs/fuse/inode.c | 6 ++++++ fs/hfs/super.c | 6 ++++++ fs/hfsplus/super.c | 6 ++++++ fs/hpfs/super.c | 5 +++++ fs/hugetlbfs/inode.c | 5 +++++ fs/isofs/inode.c | 5 +++++ fs/jffs2/super.c | 6 ++++++ fs/jfs/super.c | 6 ++++++ fs/logfs/inode.c | 5 +++++ fs/minix/inode.c | 5 +++++ fs/ncpfs/inode.c | 5 +++++ fs/nfs/inode.c | 5 +++++ fs/nilfs2/super.c | 6 ++++++ fs/ntfs/super.c | 6 ++++++ fs/ocfs2/dlmfs/dlmfs.c | 5 +++++ fs/ocfs2/super.c | 5 +++++ fs/openpromfs/inode.c | 5 +++++ fs/qnx4/inode.c | 5 +++++ fs/qnx6/inode.c | 5 +++++ fs/reiserfs/super.c | 5 +++++ fs/romfs/super.c | 5 +++++ fs/squashfs/super.c | 5 +++++ fs/super.c | 6 ------ fs/sysv/inode.c | 5 +++++ fs/ubifs/super.c | 6 ++++++ fs/udf/super.c | 5 +++++ fs/ufs/super.c | 5 +++++ fs/xfs/xfs_super.c | 5 +++++ 47 files changed, 240 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index b85efa773949..392c5dac1981 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -560,6 +560,11 @@ static int v9fs_init_inode_cache(void) */ static void v9fs_destroy_inode_cache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(v9fs_inode_cache); } diff --git a/fs/adfs/super.c b/fs/adfs/super.c index bdaec92353c2..c830c857c663 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -275,6 +275,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(adfs_inode_cachep); } diff --git a/fs/affs/super.c b/fs/affs/super.c index c70f1e5fc024..2f57053bf26c 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -147,6 +147,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(affs_inode_cachep); } diff --git a/fs/afs/super.c b/fs/afs/super.c index df8c6047c2a1..43165009428d 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -123,6 +123,11 @@ void __exit afs_fs_exit(void) BUG(); } + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(afs_inode_cachep); _leave(""); } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index cf7f3c67c8b7..962b4f8f7994 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -454,6 +454,11 @@ befs_init_inodecache(void) static void befs_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(befs_inode_cachep); } diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 9870417c26e7..d5fc598d6e4a 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -280,6 +280,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(bfs_inode_cachep); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4c878476bb91..b08ea4717e9d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -107,6 +107,12 @@ void extent_io_exit(void) list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); } + + /* + * Make sure all delayed rcu free are flushed before we + * destroy caches. + */ + rcu_barrier(); if (extent_state_cache) kmem_cache_destroy(extent_state_cache); if (extent_buffer_cache) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ec154f954646..cf03a91d806f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7076,6 +7076,11 @@ static void init_once(void *foo) void btrfs_destroy_cachep(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); if (btrfs_inode_cachep) kmem_cache_destroy(btrfs_inode_cachep); if (btrfs_trans_handle_cachep) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b982239f38f9..3a42d9326378 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -603,6 +603,11 @@ bad_cap: static void destroy_caches(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ceph_inode_cachep); kmem_cache_destroy(ceph_cap_cachep); kmem_cache_destroy(ceph_dentry_cachep); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index db8a404a51dd..d4ce77a02327 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -977,6 +977,11 @@ cifs_init_inodecache(void) static void cifs_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(cifs_inode_cachep); } diff --git a/fs/coda/inode.c b/fs/coda/inode.c index d315c6c5891a..be2aa4909487 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -85,6 +85,11 @@ int coda_init_inodecache(void) void coda_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(coda_inode_cachep); } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 9b627c15010a..34fcde765d24 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -710,6 +710,12 @@ static void ecryptfs_free_kmem_caches(void) { int i; + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { struct ecryptfs_cache_info *info; diff --git a/fs/efs/super.c b/fs/efs/super.c index e755ec746c69..2002431ef9a0 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -96,6 +96,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(efs_inode_cachep); } diff --git a/fs/exofs/super.c b/fs/exofs/super.c index dde41a75c7c8..59e3bbfac0b1 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -206,6 +206,11 @@ static int init_inodecache(void) */ static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(exofs_inode_cachep); } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index af74d9e27b71..6c205d0c565b 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -206,6 +206,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ext2_inode_cachep); } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 8c892e93d8e7..8d41c8889eee 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -532,6 +532,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ext3_inode_cachep); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c6e0cb3d1f4a..455b7d8c6d62 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1019,6 +1019,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ext4_inode_cachep); } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 05e897fe9866..fd8e47cd898b 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -521,6 +521,11 @@ static int __init fat_init_inodecache(void) static void __exit fat_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(fat_inode_cachep); } diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index d4fabd26084e..fed2c8afb3a9 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -279,6 +279,11 @@ static void __exit vxfs_cleanup(void) { unregister_filesystem(&vxfs_fs_type); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(vxfs_inode_cachep); } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index fca222dabe3c..f0eda124cffb 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1197,6 +1197,12 @@ static void fuse_fs_cleanup(void) { unregister_filesystem(&fuse_fs_type); unregister_fuseblk(); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(fuse_inode_cachep); } diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4eb873e0c07b..941d7a8c2197 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -482,6 +482,12 @@ static int __init init_hfs_fs(void) static void __exit exit_hfs_fs(void) { unregister_filesystem(&hfs_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hfs_inode_cachep); } diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index fdafb2d71654..811a84d2d964 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -635,6 +635,12 @@ static int __init init_hfsplus_fs(void) static void __exit exit_hfsplus_fs(void) { unregister_filesystem(&hfsplus_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hfsplus_inode_cachep); } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 706a12c083ea..3cb1da56eb73 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -210,6 +210,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hpfs_inode_cachep); } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8349a899912e..c4b85d064e6b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1042,6 +1042,11 @@ static int __init init_hugetlbfs_fs(void) static void __exit exit_hugetlbfs_fs(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hugetlbfs_inode_cachep); kern_unmount(hugetlbfs_vfsmount); unregister_filesystem(&hugetlbfs_fs_type); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 29037c365ba4..f94cde4527e8 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -114,6 +114,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(isofs_inode_cachep); } diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 61ea41389f90..ff487954cd96 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -418,6 +418,12 @@ static void __exit exit_jffs2_fs(void) unregister_filesystem(&jffs2_fs_type); jffs2_destroy_slab_caches(); jffs2_compressors_exit(); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(jffs2_inode_cachep); } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index c55c7452d285..3735347fd5f6 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -903,6 +903,12 @@ static void __exit exit_jfs_fs(void) jfs_proc_clean(); #endif unregister_filesystem(&jfs_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(jfs_inode_cachep); } diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 6984562738d3..121bba2cf6f2 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -417,5 +417,10 @@ int logfs_init_inode_cache(void) void logfs_destroy_inode_cache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(logfs_inode_cache); } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 2a503ad020d5..dc8d3629c20a 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -100,6 +100,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(minix_inode_cachep); } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 333df07ae3bd..0c62c55b25d7 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -89,6 +89,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ncp_inode_cachep); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9b47610338f5..e4c716d374a8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1571,6 +1571,11 @@ static int __init nfs_init_inodecache(void) static void nfs_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(nfs_inode_cachep); } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 6a10812711c1..3c991dc84f2f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1382,6 +1382,12 @@ static void nilfs_segbuf_init_once(void *obj) static void nilfs_destroy_cachep(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + if (nilfs_inode_cachep) kmem_cache_destroy(nilfs_inode_cachep); if (nilfs_transaction_cachep) diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 2bc149d6a784..fe08d4afa106 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3168,6 +3168,12 @@ static void __exit exit_ntfs_fs(void) ntfs_debug("Unregistering NTFS driver."); unregister_filesystem(&ntfs_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ntfs_big_inode_cache); kmem_cache_destroy(ntfs_inode_cache); kmem_cache_destroy(ntfs_name_cache); diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 83b6f98e0665..16b712d260d4 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -691,6 +691,11 @@ static void __exit exit_dlmfs_fs(void) flush_workqueue(user_dlm_worker); destroy_workqueue(user_dlm_worker); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(dlmfs_inode_cache); bdi_destroy(&dlmfs_backing_dev_info); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 68f4541c2db9..0e91ec22a940 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1818,6 +1818,11 @@ static int ocfs2_initialize_mem_caches(void) static void ocfs2_free_mem_caches(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); if (ocfs2_inode_cachep) kmem_cache_destroy(ocfs2_inode_cachep); ocfs2_inode_cachep = NULL; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 4a3477949bca..2ad080faca34 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -463,6 +463,11 @@ static int __init init_openprom_fs(void) static void __exit exit_openprom_fs(void) { unregister_filesystem(&openprom_fs_type); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(op_inode_cachep); } diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 552e994e3aa1..9534b4f76579 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -391,6 +391,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(qnx4_inode_cachep); } diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 2049c814bda4..1b37fff7b5ff 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -651,6 +651,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(qnx6_inode_cachep); } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 7a37dabf5a96..1078ae179993 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -608,6 +608,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(reiserfs_inode_cachep); } diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 77c5f2173983..fd7c5f60b46b 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -648,6 +648,11 @@ error_register: static void __exit exit_romfs_fs(void) { unregister_filesystem(&romfs_fs_type); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(romfs_inode_cachep); } diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 29cd014ed3a1..260e3928d4f5 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -425,6 +425,11 @@ static int __init init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(squashfs_inode_cachep); } diff --git a/fs/super.c b/fs/super.c index 0902cfa6a12e..5fdf7ff32c4e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -307,12 +307,6 @@ void deactivate_locked_super(struct super_block *s) /* caches are now gone, we can safely kill the shrinker now */ unregister_shrinker(&s->s_shrink); - - /* - * We need to call rcu_barrier so all the delayed rcu free - * inodes are flushed before we release the fs module. - */ - rcu_barrier(); put_filesystem(fs); put_super(s); } else { diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 80e1e2b18df1..0d0c50bd3321 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -360,5 +360,10 @@ int __init sysv_init_icache(void) void sysv_destroy_icache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(sysv_inode_cachep); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 71a197f0f93d..36e09ca9130b 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2298,6 +2298,12 @@ static void __exit ubifs_exit(void) dbg_debugfs_exit(); ubifs_compressors_exit(); unregister_shrinker(&ubifs_shrinker_info); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ubifs_inode_slab); unregister_filesystem(&ubifs_fs_type); } diff --git a/fs/udf/super.c b/fs/udf/super.c index 18fc038a438d..b8d27642ab06 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -171,6 +171,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(udf_inode_cachep); } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 444927e5706b..f7cfecfe1cab 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1466,6 +1466,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ufs_inode_cachep); } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 19e2380fb867..83d36e473d2f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1506,6 +1506,11 @@ xfs_init_zones(void) STATIC void xfs_destroy_zones(void) { + /* + * Make sure all delayed rcu free are flushed before we + * destroy caches. + */ + rcu_barrier(); kmem_zone_destroy(xfs_ili_zone); kmem_zone_destroy(xfs_inode_zone); kmem_zone_destroy(xfs_efi_zone); -- cgit v1.2.3-58-ga151 From 8f9c0119d7ba94c3ad13876acc240d7f12b6d8e1 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 19 Sep 2012 12:01:52 +0100 Subject: compat: fs: Generic compat_sys_sendfile implementation This function is used by sparc, powerpc and arm64 for compat support. The patch adds a generic implementation which calls do_sendfile() directly and avoids set_fs(). The sparc architecture has wrappers for the sign extensions while powerpc relies on the compiler to do the this. The patch adds wrappers for powerpc to handle the u32->int type conversion. compat_sys_sendfile64() can be replaced by a sys_sendfile() call since compat_loff_t has the same size as off_t on a 64-bit system. On powerpc, the patch also changes the 64-bit sendfile call from sys_sendile64 to sys_sendfile. Signed-off-by: Catalin Marinas Acked-by: David S. Miller Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Alexander Viro Cc: Andrew Morton Signed-off-by: Al Viro --- arch/powerpc/include/asm/systbl.h | 4 ++-- arch/powerpc/include/asm/unistd.h | 1 + arch/powerpc/kernel/sys_ppc32.c | 45 ++++++-------------------------------- arch/sparc/include/asm/unistd.h | 1 + arch/sparc/kernel/sys32.S | 2 +- arch/sparc/kernel/sys_sparc32.c | 46 --------------------------------------- fs/compat.c | 22 +++++++++++++++++++ fs/read_write.c | 4 ++-- fs/read_write.h | 2 ++ include/linux/compat.h | 3 +++ 10 files changed, 41 insertions(+), 89 deletions(-) (limited to 'fs') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 559ae1ee6706..840838769853 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -189,7 +189,7 @@ SYSCALL_SPU(getcwd) SYSCALL_SPU(capget) SYSCALL_SPU(capset) COMPAT_SYS(sigaltstack) -SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile) +SYSX_SPU(sys_sendfile,compat_sys_sendfile_wrapper,sys_sendfile) SYSCALL(ni_syscall) SYSCALL(ni_syscall) PPC_SYS(vfork) @@ -229,7 +229,7 @@ COMPAT_SYS_SPU(sched_setaffinity) COMPAT_SYS_SPU(sched_getaffinity) SYSCALL(ni_syscall) SYSCALL(ni_syscall) -SYS32ONLY(sendfile64) +SYSX(sys_ni_syscall,compat_sys_sendfile64_wrapper,sys_sendfile64) COMPAT_SYS_SPU(io_setup) SYSCALL_SPU(io_destroy) COMPAT_SYS_SPU(io_getevents) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index bd377a368611..c683fa350add 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -419,6 +419,7 @@ #define __ARCH_WANT_COMPAT_SYS_TIME #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_NEWFSTATAT +#define __ARCH_WANT_COMPAT_SYS_SENDFILE #endif /* diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 81c570633ead..abd1112da54f 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -143,48 +143,17 @@ long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t pt * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) * and the register representation of a signed int (msr in 64-bit mode) is performed. */ -asmlinkage long compat_sys_sendfile(u32 out_fd, u32 in_fd, compat_off_t __user * offset, u32 count) +asmlinkage long compat_sys_sendfile_wrapper(u32 out_fd, u32 in_fd, + compat_off_t __user *offset, u32 count) { - mm_segment_t old_fs = get_fs(); - int ret; - off_t of; - off_t __user *up; - - if (offset && get_user(of, offset)) - return -EFAULT; - - /* The __user pointer cast is valid because of the set_fs() */ - set_fs(KERNEL_DS); - up = offset ? (off_t __user *) &of : NULL; - ret = sys_sendfile((int)out_fd, (int)in_fd, up, count); - set_fs(old_fs); - - if (offset && put_user(of, offset)) - return -EFAULT; - - return ret; + return compat_sys_sendfile((int)out_fd, (int)in_fd, offset, count); } -asmlinkage int compat_sys_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, s32 count) +asmlinkage long compat_sys_sendfile64_wrapper(u32 out_fd, u32 in_fd, + compat_loff_t __user *offset, u32 count) { - mm_segment_t old_fs = get_fs(); - int ret; - loff_t lof; - loff_t __user *up; - - if (offset && get_user(lof, offset)) - return -EFAULT; - - /* The __user pointer cast is valid because of the set_fs() */ - set_fs(KERNEL_DS); - up = offset ? (loff_t __user *) &lof : NULL; - ret = sys_sendfile64(out_fd, in_fd, up, count); - set_fs(old_fs); - - if (offset && put_user(lof, offset)) - return -EFAULT; - - return ret; + return sys_sendfile((int)out_fd, (int)in_fd, + (off_t __user *)offset, count); } long compat_sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index fb2693464807..d9a677c51926 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -447,6 +447,7 @@ #else #define __ARCH_WANT_COMPAT_SYS_TIME #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND +#define __ARCH_WANT_COMPAT_SYS_SENDFILE #endif /* diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index d97f3eb72e06..44025f4ba41f 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -90,7 +90,7 @@ SIGN1(sys32_mkdir, sys_mkdir, %o1) SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5) SIGN1(sys32_sysfs, compat_sys_sysfs, %o0) SIGN2(sys32_sendfile, compat_sys_sendfile, %o0, %o1) -SIGN2(sys32_sendfile64, compat_sys_sendfile64, %o0, %o1) +SIGN2(sys32_sendfile64, sys_sendfile, %o0, %o1) SIGN1(sys32_prctl, sys_prctl, %o0) SIGN1(sys32_sched_rr_get_interval, compat_sys_sched_rr_get_interval, %o0) SIGN2(sys32_waitpid, sys_waitpid, %o0, %o2) diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index f7392336961f..d862499eb01c 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -506,52 +506,6 @@ long compat_sys_fadvise64_64(int fd, advice); } -asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, - compat_off_t __user *offset, - compat_size_t count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - off_t of; - - if (offset && get_user(of, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile(out_fd, in_fd, - offset ? (off_t __user *) &of : NULL, - count); - set_fs(old_fs); - - if (offset && put_user(of, offset)) - return -EFAULT; - - return ret; -} - -asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, - compat_loff_t __user *offset, - compat_size_t count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - loff_t lof; - - if (offset && get_user(lof, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile64(out_fd, in_fd, - offset ? (loff_t __user *) &lof : NULL, - count); - set_fs(old_fs); - - if (offset && put_user(lof, offset)) - return -EFAULT; - - return ret; -} - /* This is just a version for 32-bit applications which does * not force O_LARGEFILE on. */ diff --git a/fs/compat.c b/fs/compat.c index d72d51e19025..b7a24d0ca30d 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1792,3 +1792,25 @@ compat_sys_open_by_handle_at(int mountdirfd, return do_handle_open(mountdirfd, handle, flags); } #endif + +#ifdef __ARCH_WANT_COMPAT_SYS_SENDFILE +asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, + compat_off_t __user *offset, compat_size_t count) +{ + loff_t pos; + off_t off; + ssize_t ret; + + if (offset) { + if (unlikely(get_user(off, offset))) + return -EFAULT; + pos = off; + ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); + if (unlikely(put_user(pos, offset))) + return -EFAULT; + return ret; + } + + return do_sendfile(out_fd, in_fd, NULL, count, 0); +} +#endif /* __ARCH_WANT_COMPAT_SYS_SENDFILE */ diff --git a/fs/read_write.c b/fs/read_write.c index 28b38279a219..d06534857e9e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -862,8 +862,8 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, return ret; } -static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, - size_t count, loff_t max) +ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, + loff_t max) { struct fd in, out; struct inode *in_inode, *out_inode; diff --git a/fs/read_write.h b/fs/read_write.h index d07b954c6e0c..d3e00ef67420 100644 --- a/fs/read_write.h +++ b/fs/read_write.h @@ -12,3 +12,5 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn); ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, unsigned long nr_segs, loff_t *ppos, io_fn_t fn); +ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, + loff_t max); diff --git a/include/linux/compat.h b/include/linux/compat.h index 09b28b7369d7..fd4e29956d1c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -590,6 +590,9 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, unsigned long liovcnt, const struct compat_iovec __user *rvec, unsigned long riovcnt, unsigned long flags); +asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, + compat_off_t __user *offset, compat_size_t count); + #else #define is_compat_task() (0) -- cgit v1.2.3-58-ga151