summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-22 09:59:21 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-22 09:59:21 -0700
commitf56e65dff6ad52395ef45738799b4fb70ff43376 (patch)
tree230e0ac39888f219a6859fb15ef548fb2bd6511c /fs
parent24717cfbbbbfa415d1e3dca0f21c417e5faf8208 (diff)
parent7b84b665c874f60d84547635341e418f20cbbab2 (diff)
Merge branch 'work.set_fs' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull initial set_fs() removal from Al Viro: "Christoph's set_fs base series + fixups" * 'work.set_fs' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: fs: Allow a NULL pos pointer to __kernel_read fs: Allow a NULL pos pointer to __kernel_write powerpc: remove address space overrides using set_fs() powerpc: use non-set_fs based maccess routines x86: remove address space overrides using set_fs() x86: make TASK_SIZE_MAX usable from assembly code x86: move PAGE_OFFSET, TASK_SIZE & friends to page_{32,64}_types.h lkdtm: remove set_fs-based tests test_bitmap: remove user bitmap tests uaccess: add infrastructure for kernel builds with set_fs() fs: don't allow splice read/write without explicit ops fs: don't allow kernel reads and writes without iter ops sysctl: Convert to iter interfaces proc: add a read_iter method to proc proc_ops proc: cleanup the compat vs no compat file ops proc: remove a level of indentation in proc_get_inode
Diffstat (limited to 'fs')
-rw-r--r--fs/proc/inode.c119
-rw-r--r--fs/proc/proc_sysctl.c48
-rw-r--r--fs/read_write.c71
-rw-r--r--fs/splice.c130
4 files changed, 166 insertions, 202 deletions
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 28d6105e908e..58c075e2a452 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -297,6 +297,21 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
return rv;
}
+static ssize_t proc_reg_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct proc_dir_entry *pde = PDE(file_inode(iocb->ki_filp));
+ ssize_t ret;
+
+ if (pde_is_permanent(pde))
+ return pde->proc_ops->proc_read_iter(iocb, iter);
+
+ if (!use_pde(pde))
+ return -EIO;
+ ret = pde->proc_ops->proc_read_iter(iocb, iter);
+ unuse_pde(pde);
+ return ret;
+}
+
static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
typeof_member(struct proc_ops, proc_read) read;
@@ -572,9 +587,18 @@ static const struct file_operations proc_reg_file_ops = {
.write = proc_reg_write,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = proc_reg_compat_ioctl,
-#endif
+ .mmap = proc_reg_mmap,
+ .get_unmapped_area = proc_reg_get_unmapped_area,
+ .open = proc_reg_open,
+ .release = proc_reg_release,
+};
+
+static const struct file_operations proc_iter_file_ops = {
+ .llseek = proc_reg_llseek,
+ .read_iter = proc_reg_read_iter,
+ .write = proc_reg_write,
+ .poll = proc_reg_poll,
+ .unlocked_ioctl = proc_reg_unlocked_ioctl,
.mmap = proc_reg_mmap,
.get_unmapped_area = proc_reg_get_unmapped_area,
.open = proc_reg_open,
@@ -582,12 +606,26 @@ static const struct file_operations proc_reg_file_ops = {
};
#ifdef CONFIG_COMPAT
-static const struct file_operations proc_reg_file_ops_no_compat = {
+static const struct file_operations proc_reg_file_ops_compat = {
.llseek = proc_reg_llseek,
.read = proc_reg_read,
.write = proc_reg_write,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
+ .compat_ioctl = proc_reg_compat_ioctl,
+ .mmap = proc_reg_mmap,
+ .get_unmapped_area = proc_reg_get_unmapped_area,
+ .open = proc_reg_open,
+ .release = proc_reg_release,
+};
+
+static const struct file_operations proc_iter_file_ops_compat = {
+ .llseek = proc_reg_llseek,
+ .read_iter = proc_reg_read_iter,
+ .write = proc_reg_write,
+ .poll = proc_reg_poll,
+ .unlocked_ioctl = proc_reg_unlocked_ioctl,
+ .compat_ioctl = proc_reg_compat_ioctl,
.mmap = proc_reg_mmap,
.get_unmapped_area = proc_reg_get_unmapped_area,
.open = proc_reg_open,
@@ -619,42 +657,51 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
{
struct inode *inode = new_inode(sb);
- if (inode) {
- inode->i_ino = de->low_ino;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
- PROC_I(inode)->pde = de;
+ if (!inode) {
+ pde_put(de);
+ return NULL;
+ }
- if (is_empty_pde(de)) {
- make_empty_dir_inode(inode);
- return inode;
- }
- if (de->mode) {
- inode->i_mode = de->mode;
- inode->i_uid = de->uid;
- inode->i_gid = de->gid;
- }
- if (de->size)
- inode->i_size = de->size;
- if (de->nlink)
- set_nlink(inode, de->nlink);
+ inode->i_ino = de->low_ino;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ PROC_I(inode)->pde = de;
+ if (is_empty_pde(de)) {
+ make_empty_dir_inode(inode);
+ return inode;
+ }
- if (S_ISREG(inode->i_mode)) {
- inode->i_op = de->proc_iops;
+ if (de->mode) {
+ inode->i_mode = de->mode;
+ inode->i_uid = de->uid;
+ inode->i_gid = de->gid;
+ }
+ if (de->size)
+ inode->i_size = de->size;
+ if (de->nlink)
+ set_nlink(inode, de->nlink);
+
+ if (S_ISREG(inode->i_mode)) {
+ inode->i_op = de->proc_iops;
+ if (de->proc_ops->proc_read_iter)
+ inode->i_fop = &proc_iter_file_ops;
+ else
inode->i_fop = &proc_reg_file_ops;
#ifdef CONFIG_COMPAT
- if (!de->proc_ops->proc_compat_ioctl) {
- inode->i_fop = &proc_reg_file_ops_no_compat;
- }
+ if (de->proc_ops->proc_compat_ioctl) {
+ if (de->proc_ops->proc_read_iter)
+ inode->i_fop = &proc_iter_file_ops_compat;
+ else
+ inode->i_fop = &proc_reg_file_ops_compat;
+ }
#endif
- } else if (S_ISDIR(inode->i_mode)) {
- inode->i_op = de->proc_iops;
- inode->i_fop = de->proc_dir_ops;
- } else if (S_ISLNK(inode->i_mode)) {
- inode->i_op = de->proc_iops;
- inode->i_fop = NULL;
- } else
- BUG();
- } else
- pde_put(de);
+ } else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = de->proc_iops;
+ inode->i_fop = de->proc_dir_ops;
+ } else if (S_ISLNK(inode->i_mode)) {
+ inode->i_op = de->proc_iops;
+ inode->i_fop = NULL;
+ } else {
+ BUG();
+ }
return inode;
}
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 6c1166ccdaea..317899222d7f 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -12,6 +12,7 @@
#include <linux/cred.h>
#include <linux/namei.h>
#include <linux/mm.h>
+#include <linux/uio.h>
#include <linux/module.h>
#include <linux/bpf-cgroup.h>
#include <linux/mount.h>
@@ -540,13 +541,14 @@ out:
return err;
}
-static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
- size_t count, loff_t *ppos, int write)
+static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
+ int write)
{
- struct inode *inode = file_inode(filp);
+ struct inode *inode = file_inode(iocb->ki_filp);
struct ctl_table_header *head = grab_header(inode);
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
- void *kbuf;
+ size_t count = iov_iter_count(iter);
+ char *kbuf;
ssize_t error;
if (IS_ERR(head))
@@ -569,32 +571,30 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
error = -ENOMEM;
if (count >= KMALLOC_MAX_SIZE)
goto out;
+ kbuf = kzalloc(count + 1, GFP_KERNEL);
+ if (!kbuf)
+ goto out;
if (write) {
- kbuf = memdup_user_nul(ubuf, count);
- if (IS_ERR(kbuf)) {
- error = PTR_ERR(kbuf);
- goto out;
- }
- } else {
- kbuf = kzalloc(count, GFP_KERNEL);
- if (!kbuf)
- goto out;
+ error = -EFAULT;
+ if (!copy_from_iter_full(kbuf, count, iter))
+ goto out_free_buf;
+ kbuf[count] = '\0';
}
error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count,
- ppos);
+ &iocb->ki_pos);
if (error)
goto out_free_buf;
/* careful: calling conventions are nasty here */
- error = table->proc_handler(table, write, kbuf, &count, ppos);
+ error = table->proc_handler(table, write, kbuf, &count, &iocb->ki_pos);
if (error)
goto out_free_buf;
if (!write) {
error = -EFAULT;
- if (copy_to_user(ubuf, kbuf, count))
+ if (copy_to_iter(kbuf, count, iter) < count)
goto out_free_buf;
}
@@ -607,16 +607,14 @@ out:
return error;
}
-static ssize_t proc_sys_read(struct file *filp, char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t proc_sys_read(struct kiocb *iocb, struct iov_iter *iter)
{
- return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
+ return proc_sys_call_handler(iocb, iter, 0);
}
-static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t proc_sys_write(struct kiocb *iocb, struct iov_iter *iter)
{
- return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
+ return proc_sys_call_handler(iocb, iter, 1);
}
static int proc_sys_open(struct inode *inode, struct file *filp)
@@ -853,8 +851,10 @@ static int proc_sys_getattr(const struct path *path, struct kstat *stat,
static const struct file_operations proc_sys_file_operations = {
.open = proc_sys_open,
.poll = proc_sys_poll,
- .read = proc_sys_read,
- .write = proc_sys_write,
+ .read_iter = proc_sys_read,
+ .write_iter = proc_sys_write,
+ .splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.llseek = default_llseek,
};
diff --git a/fs/read_write.c b/fs/read_write.c
index 19f5c4bf75aa..a669fb049b84 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -419,27 +419,42 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo
return ret;
}
+static int warn_unsupported(struct file *file, const char *op)
+{
+ pr_warn_ratelimited(
+ "kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
+ op, file, current->pid, current->comm);
+ return -EINVAL;
+}
+
ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
{
- mm_segment_t old_fs = get_fs();
+ struct kvec iov = {
+ .iov_base = buf,
+ .iov_len = min_t(size_t, count, MAX_RW_COUNT),
+ };
+ struct kiocb kiocb;
+ struct iov_iter iter;
ssize_t ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ)))
return -EINVAL;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
+ /*
+ * Also fail if ->read_iter and ->read are both wired up as that
+ * implies very convoluted semantics.
+ */
+ if (unlikely(!file->f_op->read_iter || file->f_op->read))
+ return warn_unsupported(file, "read");
- if (count > MAX_RW_COUNT)
- count = MAX_RW_COUNT;
- set_fs(KERNEL_DS);
- if (file->f_op->read)
- ret = file->f_op->read(file, (void __user *)buf, count, pos);
- else if (file->f_op->read_iter)
- ret = new_sync_read(file, (void __user *)buf, count, pos);
- else
- ret = -EINVAL;
- set_fs(old_fs);
+ init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = pos ? *pos : 0;
+ iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len);
+ ret = file->f_op->read_iter(&kiocb, &iter);
if (ret > 0) {
+ if (pos)
+ *pos = kiocb.ki_pos;
fsnotify_access(file);
add_rchar(current, ret);
}
@@ -510,28 +525,32 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
/* caller is responsible for file_start_write/file_end_write */
ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
{
- mm_segment_t old_fs;
- const char __user *p;
+ struct kvec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = min_t(size_t, count, MAX_RW_COUNT),
+ };
+ struct kiocb kiocb;
+ struct iov_iter iter;
ssize_t ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE)))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
+ /*
+ * Also fail if ->write_iter and ->write are both wired up as that
+ * implies very convoluted semantics.
+ */
+ if (unlikely(!file->f_op->write_iter || file->f_op->write))
+ return warn_unsupported(file, "write");
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- p = (__force const char __user *)buf;
- if (count > MAX_RW_COUNT)
- count = MAX_RW_COUNT;
- if (file->f_op->write)
- ret = file->f_op->write(file, p, count, pos);
- else if (file->f_op->write_iter)
- ret = new_sync_write(file, p, count, pos);
- else
- ret = -EINVAL;
- set_fs(old_fs);
+ init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = pos ? *pos : 0;
+ iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len);
+ ret = file->f_op->write_iter(&kiocb, &iter);
if (ret > 0) {
+ if (pos)
+ *pos = kiocb.ki_pos;
fsnotify_modify(file);
add_wchar(current, ret);
}
@@ -889,7 +908,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
}
EXPORT_SYMBOL(vfs_iter_write);
-ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
+static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
unsigned long vlen, loff_t *pos, rwf_t flags)
{
struct iovec iovstack[UIO_FASTIOV];
diff --git a/fs/splice.c b/fs/splice.c
index 70cc52af780b..599b740f1098 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -341,89 +341,6 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = {
};
EXPORT_SYMBOL(nosteal_pipe_buf_ops);
-static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
- unsigned long vlen, loff_t offset)
-{
- mm_segment_t old_fs;
- loff_t pos = offset;
- ssize_t res;
-
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- /* The cast to a user pointer is valid due to the set_fs() */
- res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0);
- set_fs(old_fs);
-
- return res;
-}
-
-static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
-{
- struct kvec *vec, __vec[PIPE_DEF_BUFFERS];
- struct iov_iter to;
- struct page **pages;
- unsigned int nr_pages;
- unsigned int mask;
- size_t offset, base, copied = 0;
- ssize_t res;
- int i;
-
- if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
- return -EAGAIN;
-
- /*
- * Try to keep page boundaries matching to source pagecache ones -
- * it probably won't be much help, but...
- */
- offset = *ppos & ~PAGE_MASK;
-
- iov_iter_pipe(&to, READ, pipe, len + offset);
-
- res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base);
- if (res <= 0)
- return -ENOMEM;
-
- nr_pages = DIV_ROUND_UP(res + base, PAGE_SIZE);
-
- vec = __vec;
- if (nr_pages > PIPE_DEF_BUFFERS) {
- vec = kmalloc_array(nr_pages, sizeof(struct kvec), GFP_KERNEL);
- if (unlikely(!vec)) {
- res = -ENOMEM;
- goto out;
- }
- }
-
- mask = pipe->ring_size - 1;
- pipe->bufs[to.head & mask].offset = offset;
- pipe->bufs[to.head & mask].len -= offset;
-
- for (i = 0; i < nr_pages; i++) {
- size_t this_len = min_t(size_t, len, PAGE_SIZE - offset);
- vec[i].iov_base = page_address(pages[i]) + offset;
- vec[i].iov_len = this_len;
- len -= this_len;
- offset = 0;
- }
-
- res = kernel_readv(in, vec, nr_pages, *ppos);
- if (res > 0) {
- copied = res;
- *ppos += res;
- }
-
- if (vec != __vec)
- kfree(vec);
-out:
- for (i = 0; i < nr_pages; i++)
- put_page(pages[i]);
- kvfree(pages);
- iov_iter_advance(&to, copied); /* truncates and discards */
- return res;
-}
-
/*
* Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
* using sendpage(). Return the number of bytes sent.
@@ -807,33 +724,6 @@ done:
EXPORT_SYMBOL(iter_file_splice_write);
-static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
- struct splice_desc *sd)
-{
- int ret;
- void *data;
- loff_t tmp = sd->pos;
-
- data = kmap(buf->page);
- ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
- kunmap(buf->page);
-
- return ret;
-}
-
-static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
- struct file *out, loff_t *ppos,
- size_t len, unsigned int flags)
-{
- ssize_t ret;
-
- ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
- if (ret > 0)
- *ppos += ret;
-
- return ret;
-}
-
/**
* generic_splice_sendpage - splice data from a pipe to a socket
* @pipe: pipe to splice from
@@ -855,15 +745,23 @@ ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
EXPORT_SYMBOL(generic_splice_sendpage);
+static int warn_unsupported(struct file *file, const char *op)
+{
+ pr_debug_ratelimited(
+ "splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
+ op, file, current->pid, current->comm);
+ return -EINVAL;
+}
+
/*
* Attempt to initiate a splice from pipe to file.
*/
static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags)
{
- if (out->f_op->splice_write)
- return out->f_op->splice_write(pipe, out, ppos, len, flags);
- return default_file_splice_write(pipe, out, ppos, len, flags);
+ if (unlikely(!out->f_op->splice_write))
+ return warn_unsupported(out, "write");
+ return out->f_op->splice_write(pipe, out, ppos, len, flags);
}
/*
@@ -885,9 +783,9 @@ static long do_splice_to(struct file *in, loff_t *ppos,
if (unlikely(len > MAX_RW_COUNT))
len = MAX_RW_COUNT;
- if (in->f_op->splice_read)
- return in->f_op->splice_read(in, ppos, pipe, len, flags);
- return default_file_splice_read(in, ppos, pipe, len, flags);
+ if (unlikely(!in->f_op->splice_read))
+ return warn_unsupported(in, "read");
+ return in->f_op->splice_read(in, ppos, pipe, len, flags);
}
/**