diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-03-02 15:20:00 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-03-02 15:20:00 -0800 |
commit | 94e877d0fb43bec0540d6a37d49cb4f7f05a5348 (patch) | |
tree | dc301912dff9c390cba59df00cb76b62ac862d6a | |
parent | 69fd110eb650ea7baa82158f3b89a7d86da1d056 (diff) | |
parent | 653a7746fa2f5369985f5368ffc162b6510db6c8 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs pile two from Al Viro:
- orangefs fix
- series of fs/namei.c cleanups from me
- VFS stuff coming from overlayfs tree
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
orangefs: Use RCU for destroy_inode
vfs: use helper for calling f_op->fsync()
mm: use helper for calling f_op->mmap()
vfs: use helpers for calling f_op->{read,write}_iter()
vfs: pass type instead of fn to do_{loop,iter}_readv_writev()
vfs: extract common parts of {compat_,}do_readv_writev()
vfs: wrap write f_ops with file_{start,end}_write()
vfs: deny copy_file_range() for non regular files
vfs: deny fallocate() on directory
vfs: create vfs helper vfs_tmpfile()
namei.c: split unlazy_walk()
namei.c: fold the check for DCACHE_OP_REVALIDATE into d_revalidate()
lookup_fast(): clean up the logics around the fallback to non-rcu mode
namei: fold unlazy_link() into its sole caller
-rw-r--r-- | drivers/block/loop.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/vgem/vgem_drv.c | 2 | ||||
-rw-r--r-- | fs/aio.c | 4 | ||||
-rw-r--r-- | fs/coda/file.c | 2 | ||||
-rw-r--r-- | fs/namei.c | 251 | ||||
-rw-r--r-- | fs/open.c | 14 | ||||
-rw-r--r-- | fs/orangefs/super.c | 9 | ||||
-rw-r--r-- | fs/read_write.c | 130 | ||||
-rw-r--r-- | fs/splice.c | 2 | ||||
-rw-r--r-- | fs/sync.c | 2 | ||||
-rw-r--r-- | include/linux/fs.h | 52 | ||||
-rw-r--r-- | ipc/shm.c | 4 | ||||
-rw-r--r-- | mm/mmap.c | 2 | ||||
-rw-r--r-- | mm/nommu.c | 4 |
15 files changed, 261 insertions, 223 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 4b52a1690329..eeb1db73f44e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -501,9 +501,9 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, cmd->iocb.ki_flags = IOCB_DIRECT; if (rw == WRITE) - ret = file->f_op->write_iter(&cmd->iocb, &iter); + ret = call_write_iter(file, &cmd->iocb, &iter); else - ret = file->f_op->read_iter(&cmd->iocb, &iter); + ret = call_read_iter(file, &cmd->iocb, &iter); if (ret != -EIOCBQUEUED) cmd->iocb.ki_complete(&cmd->iocb, ret, 0); diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index d037adcda6f2..29bb8011dbc4 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -141,7 +141,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct * if (!obj->base.filp) return -ENODEV; - ret = obj->base.filp->f_op->mmap(obj->base.filp, vma); + ret = call_mmap(obj->base.filp, vma); if (ret) return ret; diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 7ccbb03e98de..a1f42d125e6e 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -288,7 +288,7 @@ static int vgem_prime_mmap(struct drm_gem_object *obj, if (!obj->filp) return -ENODEV; - ret = obj->filp->f_op->mmap(obj->filp, vma); + ret = call_mmap(obj->filp, vma); if (ret) return ret; @@ -1495,7 +1495,7 @@ static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored, return ret; ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); if (!ret) - ret = aio_ret(req, file->f_op->read_iter(req, &iter)); + ret = aio_ret(req, call_read_iter(file, req, &iter)); kfree(iovec); return ret; } @@ -1520,7 +1520,7 @@ static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored, if (!ret) { req->ki_flags |= IOCB_WRITE; file_start_write(file); - ret = aio_ret(req, file->f_op->write_iter(req, &iter)); + ret = aio_ret(req, call_write_iter(file, req, &iter)); /* * We release freeze protection in aio_complete(). Fool lockdep * by telling it the lock got released so that it doesn't diff --git a/fs/coda/file.c b/fs/coda/file.c index 6e0154eb6fcc..9d956cd6d46f 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -96,7 +96,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) cfi->cfi_mapcount++; spin_unlock(&cii->c_lock); - return host_file->f_op->mmap(host_file, vma); + return call_mmap(host_file, vma); } int coda_open(struct inode *coda_inode, struct file *coda_file) diff --git a/fs/namei.c b/fs/namei.c index da689c9c005e..d41fab78798b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -672,17 +672,15 @@ static bool legitimize_links(struct nameidata *nd) /** * unlazy_walk - try to switch to ref-walk mode. * @nd: nameidata pathwalk data - * @dentry: child of nd->path.dentry or NULL - * @seq: seq number to check dentry against * Returns: 0 on success, -ECHILD on failure * - * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry - * for ref-walk mode. @dentry must be a path found by a do_lookup call on - * @nd or NULL. Must be called from rcu-walk context. + * unlazy_walk attempts to legitimize the current nd->path and nd->root + * for ref-walk mode. + * Must be called from rcu-walk context. * Nothing should touch nameidata between unlazy_walk() failure and * terminate_walk(). */ -static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq) +static int unlazy_walk(struct nameidata *nd) { struct dentry *parent = nd->path.dentry; @@ -691,33 +689,66 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq nd->flags &= ~LOOKUP_RCU; if (unlikely(!legitimize_links(nd))) goto out2; + if (unlikely(!legitimize_path(nd, &nd->path, nd->seq))) + goto out1; + if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { + if (unlikely(!legitimize_path(nd, &nd->root, nd->root_seq))) + goto out; + } + rcu_read_unlock(); + BUG_ON(nd->inode != parent->d_inode); + return 0; + +out2: + nd->path.mnt = NULL; + nd->path.dentry = NULL; +out1: + if (!(nd->flags & LOOKUP_ROOT)) + nd->root.mnt = NULL; +out: + rcu_read_unlock(); + return -ECHILD; +} + +/** + * unlazy_child - try to switch to ref-walk mode. + * @nd: nameidata pathwalk data + * @dentry: child of nd->path.dentry + * @seq: seq number to check dentry against + * Returns: 0 on success, -ECHILD on failure + * + * unlazy_child attempts to legitimize the current nd->path, nd->root and dentry + * for ref-walk mode. @dentry must be a path found by a do_lookup call on + * @nd. Must be called from rcu-walk context. + * Nothing should touch nameidata between unlazy_child() failure and + * terminate_walk(). + */ +static int unlazy_child(struct nameidata *nd, struct dentry *dentry, unsigned seq) +{ + BUG_ON(!(nd->flags & LOOKUP_RCU)); + + nd->flags &= ~LOOKUP_RCU; + if (unlikely(!legitimize_links(nd))) + goto out2; if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq))) goto out2; - if (unlikely(!lockref_get_not_dead(&parent->d_lockref))) + if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref))) goto out1; /* - * For a negative lookup, the lookup sequence point is the parents - * sequence point, and it only needs to revalidate the parent dentry. - * - * For a positive lookup, we need to move both the parent and the - * dentry from the RCU domain to be properly refcounted. And the - * sequence number in the dentry validates *both* dentry counters, - * since we checked the sequence number of the parent after we got - * the child sequence number. So we know the parent must still - * be valid if the child sequence number is still valid. + * We need to move both the parent and the dentry from the RCU domain + * to be properly refcounted. And the sequence number in the dentry + * validates *both* dentry counters, since we checked the sequence + * number of the parent after we got the child sequence number. So we + * know the parent must still be valid if the child sequence number is */ - if (!dentry) { - if (read_seqcount_retry(&parent->d_seq, nd->seq)) - goto out; - BUG_ON(nd->inode != parent->d_inode); - } else { - if (!lockref_get_not_dead(&dentry->d_lockref)) - goto out; - if (read_seqcount_retry(&dentry->d_seq, seq)) - goto drop_dentry; + if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) + goto out; + if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) { + rcu_read_unlock(); + dput(dentry); + goto drop_root_mnt; } - /* * Sequence counts matched. Now make sure that the root is * still valid and get it if required. @@ -733,10 +764,6 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq rcu_read_unlock(); return 0; -drop_dentry: - rcu_read_unlock(); - dput(dentry); - goto drop_root_mnt; out2: nd->path.mnt = NULL; out1: @@ -749,27 +776,12 @@ drop_root_mnt: return -ECHILD; } -static int unlazy_link(struct nameidata *nd, struct path *link, unsigned seq) -{ - if (unlikely(!legitimize_path(nd, link, seq))) { - drop_links(nd); - nd->depth = 0; - nd->flags &= ~LOOKUP_RCU; - nd->path.mnt = NULL; - nd->path.dentry = NULL; - if (!(nd->flags & LOOKUP_ROOT)) - nd->root.mnt = NULL; - rcu_read_unlock(); - } else if (likely(unlazy_walk(nd, NULL, 0)) == 0) { - return 0; - } - path_put(link); - return -ECHILD; -} - static inline int d_revalidate(struct dentry *dentry, unsigned int flags) { - return dentry->d_op->d_revalidate(dentry, flags); + if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) + return dentry->d_op->d_revalidate(dentry, flags); + else + return 1; } /** @@ -790,7 +802,7 @@ static int complete_walk(struct nameidata *nd) if (nd->flags & LOOKUP_RCU) { if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - if (unlikely(unlazy_walk(nd, NULL, 0))) + if (unlikely(unlazy_walk(nd))) return -ECHILD; } @@ -1016,7 +1028,7 @@ const char *get_link(struct nameidata *nd) touch_atime(&last->link); cond_resched(); } else if (atime_needs_update_rcu(&last->link, inode)) { - if (unlikely(unlazy_walk(nd, NULL, 0))) + if (unlikely(unlazy_walk(nd))) return ERR_PTR(-ECHILD); touch_atime(&last->link); } @@ -1035,7 +1047,7 @@ const char *get_link(struct nameidata *nd) if (nd->flags & LOOKUP_RCU) { res = get(NULL, inode, &last->done); if (res == ERR_PTR(-ECHILD)) { - if (unlikely(unlazy_walk(nd, NULL, 0))) + if (unlikely(unlazy_walk(nd))) return ERR_PTR(-ECHILD); res = get(dentry, inode, &last->done); } @@ -1469,19 +1481,14 @@ static struct dentry *lookup_dcache(const struct qstr *name, struct dentry *dir, unsigned int flags) { - struct dentry *dentry; - int error; - - dentry = d_lookup(dir, name); + struct dentry *dentry = d_lookup(dir, name); if (dentry) { - if (dentry->d_flags & DCACHE_OP_REVALIDATE) { - error = d_revalidate(dentry, flags); - if (unlikely(error <= 0)) { - if (!error) - d_invalidate(dentry); - dput(dentry); - return ERR_PTR(error); - } + int error = d_revalidate(dentry, flags); + if (unlikely(error <= 0)) { + if (!error) + d_invalidate(dentry); + dput(dentry); + return ERR_PTR(error); } } return dentry; @@ -1546,7 +1553,7 @@ static int lookup_fast(struct nameidata *nd, bool negative; dentry = __d_lookup_rcu(parent, &nd->last, &seq); if (unlikely(!dentry)) { - if (unlazy_walk(nd, NULL, 0)) + if (unlazy_walk(nd)) return -ECHILD; return 0; } @@ -1571,14 +1578,8 @@ static int lookup_fast(struct nameidata *nd, return -ECHILD; *seqp = seq; - if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) - status = d_revalidate(dentry, nd->flags); - if (unlikely(status <= 0)) { - if (unlazy_walk(nd, dentry, seq)) - return -ECHILD; - if (status == -ECHILD) - status = d_revalidate(dentry, nd->flags); - } else { + status = d_revalidate(dentry, nd->flags); + if (likely(status > 0)) { /* * Note: do negative dentry check after revalidation in * case that drops it. @@ -1589,15 +1590,17 @@ static int lookup_fast(struct nameidata *nd, path->dentry = dentry; if (likely(__follow_mount_rcu(nd, path, inode, seqp))) return 1; - if (unlazy_walk(nd, dentry, seq)) - return -ECHILD; } + if (unlazy_child(nd, dentry, seq)) + return -ECHILD; + if (unlikely(status == -ECHILD)) + /* we'd been told to redo it in non-rcu mode */ + status = d_revalidate(dentry, nd->flags); } else { dentry = __d_lookup(parent, &nd->last); if (unlikely(!dentry)) return 0; - if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(dentry, nd->flags); } if (unlikely(status <= 0)) { if (!status) @@ -1636,8 +1639,7 @@ again: if (IS_ERR(dentry)) goto out; if (unlikely(!d_in_lookup(dentry))) { - if ((dentry->d_flags & DCACHE_OP_REVALIDATE) && - !(flags & LOOKUP_NO_REVAL)) { + if (!(flags & LOOKUP_NO_REVAL)) { int error = d_revalidate(dentry, flags); if (unlikely(error <= 0)) { if (!error) { @@ -1668,7 +1670,7 @@ static inline int may_lookup(struct nameidata *nd) int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK); if (err != -ECHILD) return err; - if (unlazy_walk(nd, NULL, 0)) + if (unlazy_walk(nd)) return -ECHILD; } return inode_permission(nd->inode, MAY_EXEC); @@ -1703,9 +1705,17 @@ static int pick_link(struct nameidata *nd, struct path *link, error = nd_alloc_stack(nd); if (unlikely(error)) { if (error == -ECHILD) { - if (unlikely(unlazy_link(nd, link, seq))) - return -ECHILD; - error = nd_alloc_stack(nd); + if (unlikely(!legitimize_path(nd, link, seq))) { + drop_links(nd); + nd->depth = 0; + nd->flags &= ~LOOKUP_RCU; + nd->path.mnt = NULL; + nd->path.dentry = NULL; + if (!(nd->flags & LOOKUP_ROOT)) + nd->root.mnt = NULL; + rcu_read_unlock(); + } else if (likely(unlazy_walk(nd)) == 0) + error = nd_alloc_stack(nd); } if (error) { path_put(link); @@ -2122,7 +2132,7 @@ OK: } if (unlikely(!d_can_lookup(nd->path.dentry))) { if (nd->flags & LOOKUP_RCU) { - if (unlazy_walk(nd, NULL, 0)) + if (unlazy_walk(nd)) return -ECHILD; } return -ENOTDIR; @@ -2579,7 +2589,7 @@ mountpoint_last(struct nameidata *nd) /* If we're in rcuwalk, drop out of it to handle last component */ if (nd->flags & LOOKUP_RCU) { - if (unlazy_walk(nd, NULL, 0)) + if (unlazy_walk(nd)) return -ECHILD; } @@ -3072,9 +3082,6 @@ static int lookup_open(struct nameidata *nd, struct path *path, if (d_in_lookup(dentry)) break; - if (!(dentry->d_flags & DCACHE_OP_REVALIDATE)) - break; - error = d_revalidate(dentry, nd->flags); if (likely(error > 0)) break; @@ -3356,13 +3363,50 @@ out: return error; } +struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag) +{ + static const struct qstr name = QSTR_INIT("/", 1); + struct dentry *child = NULL; + struct inode *dir = dentry->d_inode; + struct inode *inode; + int error; + + /* we want directory to be writable */ + error = inode_permission(dir, MAY_WRITE | MAY_EXEC); + if (error) + goto out_err; + error = -EOPNOTSUPP; + if (!dir->i_op->tmpfile) + goto out_err; + error = -ENOMEM; + child = d_alloc(dentry, &name); + if (unlikely(!child)) + goto out_err; + error = dir->i_op->tmpfile(dir, child, mode); + if (error) + goto out_err; + error = -ENOENT; + inode = child->d_inode; + if (unlikely(!inode)) + goto out_err; + if (!(open_flag & O_EXCL)) { + spin_lock(&inode->i_lock); + inode->i_state |= I_LINKABLE; + spin_unlock(&inode->i_lock); + } + return child; + +out_err: + dput(child); + return ERR_PTR(error); +} +EXPORT_SYMBOL(vfs_tmpfile); + static int do_tmpfile(struct nameidata *nd, unsigned flags, const struct open_flags *op, struct file *file, int *opened) { - static const struct qstr name = QSTR_INIT("/", 1); struct dentry *child; - struct inode *dir; struct path path; int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path); if (unlikely(error)) @@ -3370,25 +3414,12 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, error = mnt_want_write(path.mnt); if (unlikely(error)) goto out; - dir = path.dentry->d_inode; - /* we want directory to be writable */ - error = inode_permission(dir, MAY_WRITE | MAY_EXEC); - if (error) + child = vfs_tmpfile(path.dentry, op->mode, op->open_flag); + error = PTR_ERR(child); + if (unlikely(IS_ERR(child))) goto out2; - if (!dir->i_op->tmpfile) { - error = -EOPNOTSUPP; - goto out2; - } - child = d_alloc(path.dentry, &name); - if (unlikely(!child)) { - error = -ENOMEM; - goto out2; - } dput(path.dentry); path.dentry = child; - error = dir->i_op->tmpfile(dir, child, op->mode); - if (error) - goto out2; audit_inode(nd->name, child, 0); /* Don't check for other permissions, the inode was just created */ error = may_open(&path, 0, op->open_flag); @@ -3399,14 +3430,8 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, if (error) goto out2; error = open_check_o_direct(file); - if (error) { + if (error) fput(file); - } else if (!(op->open_flag & O_EXCL)) { - struct inode *inode = file_inode(file); - spin_lock(&inode->i_lock); - inode->i_state |= I_LINKABLE; - spin_unlock(&inode->i_lock); - } out2: mnt_drop_write(path.mnt); out: diff --git a/fs/open.c b/fs/open.c index 9921f70bc5ca..949cef29c3bb 100644 --- a/fs/open.c +++ b/fs/open.c @@ -301,12 +301,10 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (S_ISFIFO(inode->i_mode)) return -ESPIPE; - /* - * Let individual file system decide if it supports preallocation - * for directories or not. - */ - if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && - !S_ISBLK(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) + return -EISDIR; + + if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) return -ENODEV; /* Check for wrap through zero too */ @@ -316,7 +314,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (!file->f_op->fallocate) return -EOPNOTSUPP; - sb_start_write(inode->i_sb); + file_start_write(file); ret = file->f_op->fallocate(file, mode, offset, len); /* @@ -329,7 +327,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (ret == 0) fsnotify_modify(file); - sb_end_write(inode->i_sb); + file_end_write(file); return ret; } EXPORT_SYMBOL_GPL(vfs_fallocate); diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index c48859f16e7b..67c24351a67f 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -115,6 +115,13 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb) return &orangefs_inode->vfs_inode; } +static void orangefs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); + kmem_cache_free(orangefs_inode_cache, orangefs_inode); +} + static void orangefs_destroy_inode(struct inode *inode) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); @@ -123,7 +130,7 @@ static void orangefs_destroy_inode(struct inode *inode) "%s: deallocated %p destroying inode %pU\n", __func__, orangefs_inode, get_khandle_from_ino(inode)); - kmem_cache_free(orangefs_inode_cache, orangefs_inode); + call_rcu(&inode->i_rcu, orangefs_i_callback); } /* diff --git a/fs/read_write.c b/fs/read_write.c index 5816d4c4cab0..f2ed9fdc98fd 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -23,9 +23,6 @@ #include <linux/uaccess.h> #include <asm/unistd.h> -typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); -typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *); - const struct file_operations generic_ro_fops = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, @@ -370,7 +367,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos) kiocb.ki_pos = *ppos; iter->type |= READ; - ret = file->f_op->read_iter(&kiocb, iter); + ret = call_read_iter(file, &kiocb, iter); BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; @@ -390,7 +387,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos) kiocb.ki_pos = *ppos; iter->type |= WRITE; - ret = file->f_op->write_iter(&kiocb, iter); + ret = call_write_iter(file, &kiocb, iter); BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; @@ -439,7 +436,7 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo kiocb.ki_pos = *ppos; iov_iter_init(&iter, READ, &iov, 1, len); - ret = filp->f_op->read_iter(&kiocb, &iter); + ret = call_read_iter(filp, &kiocb, &iter); BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; @@ -496,7 +493,7 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t kiocb.ki_pos = *ppos; iov_iter_init(&iter, WRITE, &iov, 1, len); - ret = filp->f_op->write_iter(&kiocb, &iter); + ret = call_write_iter(filp, &kiocb, &iter); BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; @@ -675,7 +672,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) EXPORT_SYMBOL(iov_shorten); static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, - loff_t *ppos, iter_fn_t fn, int flags) + loff_t *ppos, int type, int flags) { struct kiocb kiocb; ssize_t ret; @@ -692,7 +689,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC); kiocb.ki_pos = *ppos; - ret = fn(&kiocb, iter); + if (type == READ) + ret = call_read_iter(filp, &kiocb, iter); + else + ret = call_write_iter(filp, &kiocb, iter); BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; @@ -700,7 +700,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, /* Do it by hand, with file-ops */ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, - loff_t *ppos, io_fn_t fn, int flags) + loff_t *ppos, int type, int flags) { ssize_t ret = 0; @@ -711,7 +711,13 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, struct iovec iovec = iov_iter_iovec(iter); ssize_t nr; - nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos); + if (type == READ) { + nr = filp->f_op->read(filp, iovec.iov_base, + iovec.iov_len, ppos); + } else { + nr = filp->f_op->write(filp, iovec.iov_base, + iovec.iov_len, ppos); + } if (nr < 0) { if (!ret) @@ -834,50 +840,32 @@ out: return ret; } -static ssize_t do_readv_writev(int type, struct file *file, - const struct iovec __user * uvector, - unsigned long nr_segs, loff_t *pos, - int flags) +static ssize_t __do_readv_writev(int type, struct file *file, + struct iov_iter *iter, loff_t *pos, int flags) { size_t tot_len; - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov = iovstack; - struct iov_iter iter; - ssize_t ret; - io_fn_t fn; - iter_fn_t iter_fn; - - ret = import_iovec(type, uvector, nr_segs, - ARRAY_SIZE(iovstack), &iov, &iter); - if (ret < 0) - return ret; + ssize_t ret = 0; - tot_len = iov_iter_count(&iter); + tot_len = iov_iter_count(iter); if (!tot_len) goto out; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; - if (type == READ) { - fn = file->f_op->read; - iter_fn = file->f_op->read_iter; - } else { - fn = (io_fn_t)file->f_op->write; - iter_fn = file->f_op->write_iter; + if (type != READ) file_start_write(file); - } - if (iter_fn) - ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags); + if ((type == READ && file->f_op->read_iter) || + (type == WRITE && file->f_op->write_iter)) + ret = do_iter_readv_writev(file, iter, pos, type, flags); else - ret = do_loop_readv_writev(file, &iter, pos, fn, flags); + ret = do_loop_readv_writev(file, iter, pos, type, flags); if (type != READ) file_end_write(file); out: - kfree(iov); if ((ret + (type == READ)) > 0) { if (type == READ) fsnotify_access(file); @@ -887,6 +875,27 @@ out: return ret; } +static ssize_t do_readv_writev(int type, struct file *file, + const struct iovec __user *uvector, + unsigned long nr_segs, loff_t *pos, + int flags) +{ + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov = iovstack; + struct iov_iter iter; + ssize_t ret; + + ret = import_iovec(type, uvector, nr_segs, + ARRAY_SIZE(iovstack), &iov, &iter); + if (ret < 0) + return ret; + + ret = __do_readv_writev(type, file, &iter, pos, flags); + kfree(iov); + + return ret; +} + ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, unsigned long vlen, loff_t *pos, int flags) { @@ -1064,51 +1073,19 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, unsigned long nr_segs, loff_t *pos, int flags) { - compat_ssize_t tot_len; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; ssize_t ret; - io_fn_t fn; - iter_fn_t iter_fn; ret = compat_import_iovec(type, uvector, nr_segs, UIO_FASTIOV, &iov, &iter); if (ret < 0) return ret; - tot_len = iov_iter_count(&iter); - if (!tot_len) - goto out; - ret = rw_verify_area(type, file, pos, tot_len); - if (ret < 0) - goto out; - - if (type == READ) { - fn = file->f_op->read; - iter_fn = file->f_op->read_iter; - } else { - fn = (io_fn_t)file->f_op->write; - iter_fn = file->f_op->write_iter; - file_start_write(file); - } - - if (iter_fn) - ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags); - else - ret = do_loop_readv_writev(file, &iter, pos, fn, flags); - - if (type != READ) - file_end_write(file); - -out: + ret = __do_readv_writev(type, file, &iter, pos, flags); kfree(iov); - if ((ret + (type == READ)) > 0) { - if (type == READ) - fsnotify_access(file); - else - fsnotify_modify(file); - } + return ret; } @@ -1518,6 +1495,11 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (flags != 0) return -EINVAL; + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + return -EISDIR; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + return -EINVAL; + ret = rw_verify_area(READ, file_in, &pos_in, len); if (unlikely(ret)) return ret; @@ -1538,7 +1520,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (len == 0) return 0; - sb_start_write(inode_out->i_sb); + file_start_write(file_out); /* * Try cloning first, this is supported by more file systems, and @@ -1574,7 +1556,7 @@ done: inc_syscr(current); inc_syscw(current); - sb_end_write(inode_out->i_sb); + file_end_write(file_out); return ret; } diff --git a/fs/splice.c b/fs/splice.c index 4ef78aa8ef61..eaafa3d8869a 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -307,7 +307,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, idx = to.idx; init_sync_kiocb(&kiocb, in); kiocb.ki_pos = *ppos; - ret = in->f_op->read_iter(&kiocb, &to); + ret = call_read_iter(in, &kiocb, &to); if (ret > 0) { *ppos = kiocb.ki_pos; file_accessed(in); diff --git a/fs/sync.c b/fs/sync.c index 2a54c1f22035..11ba023434b1 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -192,7 +192,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) spin_unlock(&inode->i_lock); mark_inode_dirty_sync(inode); } - return file->f_op->fsync(file, start, end, datasync); + return call_fsync(file, start, end, datasync); } EXPORT_SYMBOL(vfs_fsync_range); diff --git a/include/linux/fs.h b/include/linux/fs.h index c64f2cb7d364..52350947c670 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1567,6 +1567,9 @@ extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); extern int vfs_whiteout(struct inode *, struct dentry *); +extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, + int open_flag); + /* * VFS file helper functions. */ @@ -1718,6 +1721,29 @@ struct inode_operations { int (*set_acl)(struct inode *, struct posix_acl *, int); } ____cacheline_aligned; +static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, + struct iov_iter *iter) +{ + return file->f_op->read_iter(kio, iter); +} + +static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio, + struct iov_iter *iter) +{ + return file->f_op->write_iter(kio, iter); +} + +static inline int call_mmap(struct file *file, struct vm_area_struct *vma) +{ + return file->f_op->mmap(file, vma); +} + +static inline int call_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + return file->f_op->fsync(file, start, end, datasync); +} + ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, @@ -1744,19 +1770,6 @@ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); -static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len) -{ - int ret; - - sb_start_write(file_inode(file_out)->i_sb); - ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); - sb_end_write(file_inode(file_out)->i_sb); - - return ret; -} - struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); @@ -2568,6 +2581,19 @@ static inline void file_end_write(struct file *file) __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); } +static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 len) +{ + int ret; + + file_start_write(file_out); + ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); + file_end_write(file_out); + + return ret; +} + /* * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. diff --git a/ipc/shm.c b/ipc/shm.c index 06ea9ef7f54a..481d2a9c298a 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -423,7 +423,7 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) if (ret) return ret; - ret = sfd->file->f_op->mmap(sfd->file, vma); + ret = call_mmap(sfd->file, vma); if (ret) { shm_close(vma); return ret; @@ -452,7 +452,7 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) if (!sfd->file->f_op->fsync) return -EINVAL; - return sfd->file->f_op->fsync(sfd->file, start, end, datasync); + return call_fsync(sfd->file, start, end, datasync); } static long shm_fallocate(struct file *file, int mode, loff_t offset, diff --git a/mm/mmap.c b/mm/mmap.c index 499b988b1639..bfbe8856d134 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1672,7 +1672,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, * new file must not have been exposed to user-space, yet. */ vma->vm_file = get_file(file); - error = file->f_op->mmap(file, vma); + error = call_mmap(file, vma); if (error) goto unmap_and_free_vma; diff --git a/mm/nommu.c b/mm/nommu.c index fe9f4fa4a7a7..5bbef9cb89eb 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1084,7 +1084,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) { int ret; - ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); + ret = call_mmap(vma->vm_file, vma); if (ret == 0) { vma->vm_region->vm_top = vma->vm_region->vm_end; return 0; @@ -1115,7 +1115,7 @@ static int do_mmap_private(struct vm_area_struct *vma, * - VM_MAYSHARE will be set if it may attempt to share */ if (capabilities & NOMMU_MAP_DIRECT) { - ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); + ret = call_mmap(vma->vm_file, vma); if (ret == 0) { /* shouldn't return success if we're not sharing */ BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); |