From dd20908a8a06b22c171f6c3fcdbdbd65bed07505 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 10:56:20 -0400 Subject: don't bother with {get,put}_write_access() on non-regular files it's pointless and actually leads to wrong behaviour in at least one moderately convoluted case (pipe(), close one end, try to get to another via /proc/*/fd and run into ETXTBUSY). Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/open.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index b9ed8b25c108..2ed7325f713e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -641,23 +641,12 @@ out: static inline int __get_file_write_access(struct inode *inode, struct vfsmount *mnt) { - int error; - error = get_write_access(inode); + int error = get_write_access(inode); if (error) return error; - /* - * Do not take mount writer counts on - * special files since no writes to - * the mount itself will occur. - */ - if (!special_file(inode->i_mode)) { - /* - * Balanced in __fput() - */ - error = __mnt_want_write(mnt); - if (error) - put_write_access(inode); - } + error = __mnt_want_write(mnt); + if (error) + put_write_access(inode); return error; } @@ -690,12 +679,11 @@ static int do_dentry_open(struct file *f, path_get(&f->f_path); inode = f->f_inode = f->f_path.dentry->d_inode; - if (f->f_mode & FMODE_WRITE) { + if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { error = __get_file_write_access(inode, f->f_path.mnt); if (error) goto cleanup_file; - if (!special_file(inode->i_mode)) - file_take_write(f); + file_take_write(f); } f->f_mapping = inode->i_mapping; @@ -742,7 +730,6 @@ static int do_dentry_open(struct file *f, cleanup_all: fops_put(f->f_op); if (f->f_mode & FMODE_WRITE) { - put_write_access(inode); if (!special_file(inode->i_mode)) { /* * We don't consider this a real @@ -750,6 +737,7 @@ cleanup_all: * because it all happenend right * here, so just reset the state. */ + put_write_access(inode); file_reset_write(f); __mnt_drop_write(f->f_path.mnt); } -- cgit v1.2.3-58-ga151 From 4597e695b8baa3e2620da89c7593be70cf20566b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 10:06:32 -0400 Subject: get rid of DEBUG_WRITECOUNT it only makes control flow in __fput() and friends more convoluted. Signed-off-by: Al Viro --- arch/powerpc/configs/ppc6xx_defconfig | 1 - arch/powerpc/configs/ps3_defconfig | 1 - arch/s390/configs/default_defconfig | 1 - arch/sh/configs/rsk7203_defconfig | 1 - arch/xtensa/configs/iss_defconfig | 1 - arch/xtensa/configs/s6105_defconfig | 1 - fs/file_table.c | 5 ---- fs/open.c | 8 ------ include/linux/fs.h | 49 ----------------------------------- lib/Kconfig.debug | 10 ------- 10 files changed, 78 deletions(-) (limited to 'fs/open.c') diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index c2353bf059fd..175a8b99c196 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1244,7 +1244,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_HIGHMEM=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_VM=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 139a8308070c..fdee37fab81c 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -174,7 +174,6 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_PROVE_LOCKING=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_LIST=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index e0af2ee58751..3f538468a86e 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -550,7 +550,6 @@ CONFIG_LOCK_STAT=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_LOCKING_API_SELFTESTS=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig index 4e5229b0c5bb..47236573db83 100644 --- a/arch/sh/configs/rsk7203_defconfig +++ b/arch/sh/configs/rsk7203_defconfig @@ -128,7 +128,6 @@ CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_VM=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_FRAME_POINTER=y diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig index 4f233204faf9..711f8aa14743 100644 --- a/arch/xtensa/configs/iss_defconfig +++ b/arch/xtensa/configs/iss_defconfig @@ -627,7 +627,6 @@ CONFIG_SCHED_DEBUG=y # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set -# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_MEMORY_INIT is not set # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig index d929f77a0360..78318a76fa16 100644 --- a/arch/xtensa/configs/s6105_defconfig +++ b/arch/xtensa/configs/s6105_defconfig @@ -569,7 +569,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set CONFIG_DEBUG_NOMMU_REGIONS=y -# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_MEMORY_INIT is not set # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set diff --git a/fs/file_table.c b/fs/file_table.c index 79ecae62209a..ee20658a0647 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -52,7 +52,6 @@ static void file_free_rcu(struct rcu_head *head) static inline void file_free(struct file *f) { percpu_counter_dec(&nr_files); - file_check_state(f); call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); } @@ -186,7 +185,6 @@ struct file *alloc_file(struct path *path, fmode_t mode, * that we can do debugging checks at __fput() */ if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) { - file_take_write(file); WARN_ON(mnt_clone_write(path->mnt)); } if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) @@ -213,10 +211,7 @@ static void drop_file_write_access(struct file *file) return; put_write_access(inode); - if (file_check_writeable(file) != 0) - return; __mnt_drop_write(mnt); - file_release_write(file); } /* the real guts of fput() - releasing the last reference to file diff --git a/fs/open.c b/fs/open.c index 2ed7325f713e..8d0b6adfe7b8 100644 --- a/fs/open.c +++ b/fs/open.c @@ -683,7 +683,6 @@ static int do_dentry_open(struct file *f, error = __get_file_write_access(inode, f->f_path.mnt); if (error) goto cleanup_file; - file_take_write(f); } f->f_mapping = inode->i_mapping; @@ -731,14 +730,7 @@ cleanup_all: fops_put(f->f_op); if (f->f_mode & FMODE_WRITE) { if (!special_file(inode->i_mode)) { - /* - * We don't consider this a real - * mnt_want/drop_write() pair - * because it all happenend right - * here, so just reset the state. - */ put_write_access(inode); - file_reset_write(f); __mnt_drop_write(f->f_path.mnt); } } diff --git a/include/linux/fs.h b/include/linux/fs.h index 23b2a35d712e..e80659ed78fc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -769,9 +769,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) index < ra->start + ra->size); } -#define FILE_MNT_WRITE_TAKEN 1 -#define FILE_MNT_WRITE_RELEASED 2 - struct file { union { struct llist_node fu_llist; @@ -809,9 +806,6 @@ struct file { struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; -#ifdef CONFIG_DEBUG_WRITECOUNT - unsigned long f_mnt_write_state; -#endif } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { @@ -829,49 +823,6 @@ static inline struct file *get_file(struct file *f) #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) #define file_count(x) atomic_long_read(&(x)->f_count) -#ifdef CONFIG_DEBUG_WRITECOUNT -static inline void file_take_write(struct file *f) -{ - WARN_ON(f->f_mnt_write_state != 0); - f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; -} -static inline void file_release_write(struct file *f) -{ - f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; -} -static inline void file_reset_write(struct file *f) -{ - f->f_mnt_write_state = 0; -} -static inline void file_check_state(struct file *f) -{ - /* - * At this point, either both or neither of these bits - * should be set. - */ - WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); - WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); -} -static inline int file_check_writeable(struct file *f) -{ - if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) - return 0; - printk(KERN_WARNING "writeable file with no " - "mnt_want_write()\n"); - WARN_ON(1); - return -EINVAL; -} -#else /* !CONFIG_DEBUG_WRITECOUNT */ -static inline void file_take_write(struct file *filp) {} -static inline void file_release_write(struct file *filp) {} -static inline void file_reset_write(struct file *filp) {} -static inline void file_check_state(struct file *filp) {} -static inline int file_check_writeable(struct file *filp) -{ - return 0; -} -#endif /* CONFIG_DEBUG_WRITECOUNT */ - #define MAX_NON_LFS ((1UL<<31) - 1) /* Page cache limit. The filesystems should put that into their s_maxbytes diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a48abeac753f..7a0859314bdf 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1030,16 +1030,6 @@ config DEBUG_BUGVERBOSE of the BUG call as well as the EIP and oops trace. This aids debugging but costs about 70-100K of memory. -config DEBUG_WRITECOUNT - bool "Debug filesystem writers count" - depends on DEBUG_KERNEL - help - Enable this to catch wrong use of the writers count in struct - vfsmount. This will increase the size of each file struct by - 32 bits. - - If unsure, say N. - config DEBUG_LIST bool "Debug linked list manipulation" depends on DEBUG_KERNEL -- cgit v1.2.3-58-ga151 From 0ccb286346c4c0644be17f04a9eb23ad99262882 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 10:40:46 -0400 Subject: fold __get_file_write_access() into its only caller Signed-off-by: Al Viro --- fs/open.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 8d0b6adfe7b8..ebef0c5fa10c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -632,24 +632,6 @@ out: return error; } -/* - * You have to be very careful that these write - * counts get cleaned up in error cases and - * upon __fput(). This should probably never - * be called outside of __dentry_open(). - */ -static inline int __get_file_write_access(struct inode *inode, - struct vfsmount *mnt) -{ - int error = get_write_access(inode); - if (error) - return error; - error = __mnt_want_write(mnt); - if (error) - put_write_access(inode); - return error; -} - int open_check_o_direct(struct file *f) { /* NB: we're sure to have correct a_ops only after f_op->open */ @@ -680,9 +662,14 @@ static int do_dentry_open(struct file *f, path_get(&f->f_path); inode = f->f_inode = f->f_path.dentry->d_inode; if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { - error = __get_file_write_access(inode, f->f_path.mnt); + error = get_write_access(inode); if (error) goto cleanup_file; + error = __mnt_want_write(f->f_path.mnt); + if (error) { + put_write_access(inode); + goto cleanup_file; + } } f->f_mapping = inode->i_mapping; -- cgit v1.2.3-58-ga151 From 83f936c75e3689a63253d89c47a4d239c56d7410 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 12:02:47 -0400 Subject: mark struct file that had write access grabbed by open() new flag in ->f_mode - FMODE_WRITER. Set by do_dentry_open() in case when it has grabbed write access, checked by __fput() to decide whether it wants to drop the sucker. Allows to stop bothering with mnt_clone_write() in alloc_file(), along with fewer special_file() checks. Signed-off-by: Al Viro --- fs/file_table.c | 37 ++++--------------------------------- fs/namespace.c | 4 +--- fs/open.c | 9 ++++----- include/linux/fs.h | 2 ++ 4 files changed, 11 insertions(+), 41 deletions(-) (limited to 'fs/open.c') diff --git a/fs/file_table.c b/fs/file_table.c index ee20658a0647..ce1504fec5a1 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -177,43 +177,12 @@ struct file *alloc_file(struct path *path, fmode_t mode, file->f_mapping = path->dentry->d_inode->i_mapping; file->f_mode = mode; file->f_op = fop; - - /* - * These mounts don't really matter in practice - * for r/o bind mounts. They aren't userspace- - * visible. We do this for consistency, and so - * that we can do debugging checks at __fput() - */ - if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) { - WARN_ON(mnt_clone_write(path->mnt)); - } if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(path->dentry->d_inode); return file; } EXPORT_SYMBOL(alloc_file); -/** - * drop_file_write_access - give up ability to write to a file - * @file: the file to which we will stop writing - * - * This is a central place which will give up the ability - * to write to @file, along with access to write through - * its vfsmount. - */ -static void drop_file_write_access(struct file *file) -{ - struct vfsmount *mnt = file->f_path.mnt; - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; - - if (special_file(inode->i_mode)) - return; - - put_write_access(inode); - __mnt_drop_write(mnt); -} - /* the real guts of fput() - releasing the last reference to file */ static void __fput(struct file *file) @@ -248,8 +217,10 @@ static void __fput(struct file *file) put_pid(file->f_owner.pid); if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_dec(inode); - if (file->f_mode & FMODE_WRITE) - drop_file_write_access(file); + if (file->f_mode & FMODE_WRITER) { + put_write_access(inode); + __mnt_drop_write(mnt); + } file->f_path.dentry = NULL; file->f_path.mnt = NULL; file->f_inode = NULL; diff --git a/fs/namespace.c b/fs/namespace.c index a66aff5bd3fe..20e8696c31a7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -414,9 +414,7 @@ EXPORT_SYMBOL_GPL(mnt_clone_write); */ int __mnt_want_write_file(struct file *file) { - struct inode *inode = file_inode(file); - - if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) + if (!(file->f_mode & FMODE_WRITER)) return __mnt_want_write(file->f_path.mnt); else return mnt_clone_write(file->f_path.mnt); diff --git a/fs/open.c b/fs/open.c index ebef0c5fa10c..dcefb2f02d10 100644 --- a/fs/open.c +++ b/fs/open.c @@ -670,6 +670,7 @@ static int do_dentry_open(struct file *f, put_write_access(inode); goto cleanup_file; } + f->f_mode |= FMODE_WRITER; } f->f_mapping = inode->i_mapping; @@ -715,11 +716,9 @@ static int do_dentry_open(struct file *f, cleanup_all: fops_put(f->f_op); - if (f->f_mode & FMODE_WRITE) { - if (!special_file(inode->i_mode)) { - put_write_access(inode); - __mnt_drop_write(f->f_path.mnt); - } + if (f->f_mode & FMODE_WRITER) { + put_write_access(inode); + __mnt_drop_write(f->f_path.mnt); } cleanup_file: path_put(&f->f_path); diff --git a/include/linux/fs.h b/include/linux/fs.h index e80659ed78fc..d9d88a0b456e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -125,6 +125,8 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File needs atomic accesses to f_pos */ #define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) +/* Write access to underlying fs */ +#define FMODE_WRITER ((__force fmode_t)0x10000) /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) -- cgit v1.2.3-58-ga151 From 3f4d5a00076b7e340625a2014cb83e10bf0d6dd1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 09:43:29 -0400 Subject: tidy do_dentry_open() up a bit Signed-off-by: Al Viro --- fs/open.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index dcefb2f02d10..37f65fa44dbf 100644 --- a/fs/open.c +++ b/fs/open.c @@ -656,30 +656,28 @@ static int do_dentry_open(struct file *f, f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; - if (unlikely(f->f_flags & O_PATH)) - f->f_mode = FMODE_PATH; - path_get(&f->f_path); inode = f->f_inode = f->f_path.dentry->d_inode; + f->f_mapping = inode->i_mapping; + + if (unlikely(f->f_flags & O_PATH)) { + f->f_mode = FMODE_PATH; + f->f_op = &empty_fops; + return 0; + } + if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { error = get_write_access(inode); - if (error) + if (unlikely(error)) goto cleanup_file; error = __mnt_want_write(f->f_path.mnt); - if (error) { + if (unlikely(error)) { put_write_access(inode); goto cleanup_file; } f->f_mode |= FMODE_WRITER; } - f->f_mapping = inode->i_mapping; - - if (unlikely(f->f_mode & FMODE_PATH)) { - f->f_op = &empty_fops; - return 0; - } - /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ if (S_ISREG(inode->i_mode)) f->f_mode |= FMODE_ATOMIC_POS; -- cgit v1.2.3-58-ga151