diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-13 12:36:21 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-13 12:36:21 -0700 |
commit | 6ad4bf6ea1609fb539a62f10fca87ddbd53a0315 (patch) | |
tree | 3e507c82cb2b6fdfd8473c394058207203e37cff /include | |
parent | 3ad11d7ac8872b1c8da54494721fad8907ee41f7 (diff) | |
parent | b2e9685283127f30e7f2b466af0046ff9bd27a86 (diff) |
Merge tag 'io_uring-5.10-2020-10-12' of git://git.kernel.dk/linux-block
Pull io_uring updates from Jens Axboe:
- Add blkcg accounting for io-wq offload (Dennis)
- A use-after-free fix for io-wq (Hillf)
- Cancelation fixes and improvements
- Use proper files_struct references for offload
- Cleanup of io_uring_get_socket() since that can now go into our own
header
- SQPOLL fixes and cleanups, and support for sharing the thread
- Improvement to how page accounting is done for registered buffers and
huge pages, accounting the real pinned state
- Series cleaning up the xarray code (Willy)
- Various cleanups, refactoring, and improvements (Pavel)
- Use raw spinlock for io-wq (Sebastian)
- Add support for ring restrictions (Stefano)
* tag 'io_uring-5.10-2020-10-12' of git://git.kernel.dk/linux-block: (62 commits)
io_uring: keep a pointer ref_node in file_data
io_uring: refactor *files_register()'s error paths
io_uring: clean file_data access in files_register
io_uring: don't delay io_init_req() error check
io_uring: clean leftovers after splitting issue
io_uring: remove timeout.list after hrtimer cancel
io_uring: use a separate struct for timeout_remove
io_uring: improve submit_state.ios_left accounting
io_uring: simplify io_file_get()
io_uring: kill extra check in fixed io_file_get()
io_uring: clean up ->files grabbing
io_uring: don't io_prep_async_work() linked reqs
io_uring: Convert advanced XArray uses to the normal API
io_uring: Fix XArray usage in io_uring_add_task_file
io_uring: Fix use of XArray in __io_uring_files_cancel
io_uring: fix break condition for __io_uring_register() waiting
io_uring: no need to call xa_destroy() on empty xarray
io_uring: batch account ->req_issue and task struct references
io_uring: kill callback_head argument for io_req_task_work_add()
io_uring: move req preps out of io_issue_sqe()
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/fs.h | 46 | ||||
-rw-r--r-- | include/linux/io_uring.h | 58 | ||||
-rw-r--r-- | include/linux/sched.h | 5 | ||||
-rw-r--r-- | include/uapi/linux/io_uring.h | 61 |
4 files changed, 132 insertions, 38 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index 0b1e2f1f388b..2e621d28cd65 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -302,17 +302,20 @@ enum rw_hint { WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, }; -#define IOCB_EVENTFD (1 << 0) -#define IOCB_APPEND (1 << 1) -#define IOCB_DIRECT (1 << 2) -#define IOCB_HIPRI (1 << 3) -#define IOCB_DSYNC (1 << 4) -#define IOCB_SYNC (1 << 5) -#define IOCB_WRITE (1 << 6) -#define IOCB_NOWAIT (1 << 7) +/* Match RWF_* bits to IOCB bits */ +#define IOCB_HIPRI (__force int) RWF_HIPRI +#define IOCB_DSYNC (__force int) RWF_DSYNC +#define IOCB_SYNC (__force int) RWF_SYNC +#define IOCB_NOWAIT (__force int) RWF_NOWAIT +#define IOCB_APPEND (__force int) RWF_APPEND + +/* non-RWF related bits - start at 16 */ +#define IOCB_EVENTFD (1 << 16) +#define IOCB_DIRECT (1 << 17) +#define IOCB_WRITE (1 << 18) /* iocb->ki_waitq is valid */ -#define IOCB_WAITQ (1 << 8) -#define IOCB_NOIO (1 << 9) +#define IOCB_WAITQ (1 << 19) +#define IOCB_NOIO (1 << 20) struct kiocb { struct file *ki_filp; @@ -3302,6 +3305,9 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) { int kiocb_flags = 0; + /* make sure there's no overlap between RWF and private IOCB flags */ + BUILD_BUG_ON((__force int) RWF_SUPPORTED & IOCB_EVENTFD); + if (!flags) return 0; if (unlikely(flags & ~RWF_SUPPORTED)) @@ -3310,16 +3316,11 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; - kiocb_flags |= IOCB_NOWAIT | IOCB_NOIO; + kiocb_flags |= IOCB_NOIO; } - if (flags & RWF_HIPRI) - kiocb_flags |= IOCB_HIPRI; - if (flags & RWF_DSYNC) - kiocb_flags |= IOCB_DSYNC; + kiocb_flags |= (__force int) (flags & RWF_SUPPORTED); if (flags & RWF_SYNC) - kiocb_flags |= (IOCB_DSYNC | IOCB_SYNC); - if (flags & RWF_APPEND) - kiocb_flags |= IOCB_APPEND; + kiocb_flags |= IOCB_DSYNC; ki->ki_flags |= kiocb_flags; return 0; @@ -3499,15 +3500,6 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice); -#if defined(CONFIG_IO_URING) -extern struct sock *io_uring_get_socket(struct file *file); -#else -static inline struct sock *io_uring_get_socket(struct file *file) -{ - return NULL; -} -#endif - int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags, unsigned int flags); diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h new file mode 100644 index 000000000000..96315cfaf6d1 --- /dev/null +++ b/include/linux/io_uring.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_IO_URING_H +#define _LINUX_IO_URING_H + +#include <linux/sched.h> +#include <linux/xarray.h> +#include <linux/percpu-refcount.h> + +struct io_uring_task { + /* submission side */ + struct xarray xa; + struct wait_queue_head wait; + struct file *last; + atomic_long_t req_issue; + + /* completion side */ + bool in_idle ____cacheline_aligned_in_smp; + atomic_long_t req_complete; +}; + +#if defined(CONFIG_IO_URING) +struct sock *io_uring_get_socket(struct file *file); +void __io_uring_task_cancel(void); +void __io_uring_files_cancel(struct files_struct *files); +void __io_uring_free(struct task_struct *tsk); + +static inline void io_uring_task_cancel(void) +{ + if (current->io_uring && !xa_empty(¤t->io_uring->xa)) + __io_uring_task_cancel(); +} +static inline void io_uring_files_cancel(struct files_struct *files) +{ + if (current->io_uring && !xa_empty(¤t->io_uring->xa)) + __io_uring_files_cancel(files); +} +static inline void io_uring_free(struct task_struct *tsk) +{ + if (tsk->io_uring) + __io_uring_free(tsk); +} +#else +static inline struct sock *io_uring_get_socket(struct file *file) +{ + return NULL; +} +static inline void io_uring_task_cancel(void) +{ +} +static inline void io_uring_files_cancel(struct files_struct *files) +{ +} +static inline void io_uring_free(struct task_struct *tsk) +{ +} +#endif + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index d383cf09e78f..829b0697d19c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -63,6 +63,7 @@ struct sighand_struct; struct signal_struct; struct task_delay_info; struct task_group; +struct io_uring_task; /* * Task state bitmask. NOTE! These bits are also @@ -935,6 +936,10 @@ struct task_struct { /* Open file information: */ struct files_struct *files; +#ifdef CONFIG_IO_URING + struct io_uring_task *io_uring; +#endif + /* Namespaces: */ struct nsproxy *nsproxy; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index d65fde732518..98d8e06dea22 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -95,6 +95,7 @@ enum { #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ +#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ enum { IORING_OP_NOP, @@ -224,6 +225,7 @@ struct io_cqring_offsets { */ #define IORING_ENTER_GETEVENTS (1U << 0) #define IORING_ENTER_SQ_WAKEUP (1U << 1) +#define IORING_ENTER_SQ_WAIT (1U << 2) /* * Passed in for io_uring_setup(2). Copied back with updated info on success @@ -255,17 +257,24 @@ struct io_uring_params { /* * io_uring_register(2) opcodes and arguments */ -#define IORING_REGISTER_BUFFERS 0 -#define IORING_UNREGISTER_BUFFERS 1 -#define IORING_REGISTER_FILES 2 -#define IORING_UNREGISTER_FILES 3 -#define IORING_REGISTER_EVENTFD 4 -#define IORING_UNREGISTER_EVENTFD 5 -#define IORING_REGISTER_FILES_UPDATE 6 -#define IORING_REGISTER_EVENTFD_ASYNC 7 -#define IORING_REGISTER_PROBE 8 -#define IORING_REGISTER_PERSONALITY 9 -#define IORING_UNREGISTER_PERSONALITY 10 +enum { + IORING_REGISTER_BUFFERS = 0, + IORING_UNREGISTER_BUFFERS = 1, + IORING_REGISTER_FILES = 2, + IORING_UNREGISTER_FILES = 3, + IORING_REGISTER_EVENTFD = 4, + IORING_UNREGISTER_EVENTFD = 5, + IORING_REGISTER_FILES_UPDATE = 6, + IORING_REGISTER_EVENTFD_ASYNC = 7, + IORING_REGISTER_PROBE = 8, + IORING_REGISTER_PERSONALITY = 9, + IORING_UNREGISTER_PERSONALITY = 10, + IORING_REGISTER_RESTRICTIONS = 11, + IORING_REGISTER_ENABLE_RINGS = 12, + + /* this goes last */ + IORING_REGISTER_LAST +}; struct io_uring_files_update { __u32 offset; @@ -290,4 +299,34 @@ struct io_uring_probe { struct io_uring_probe_op ops[0]; }; +struct io_uring_restriction { + __u16 opcode; + union { + __u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */ + __u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */ + __u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */ + }; + __u8 resv; + __u32 resv2[3]; +}; + +/* + * io_uring_restriction->opcode values + */ +enum { + /* Allow an io_uring_register(2) opcode */ + IORING_RESTRICTION_REGISTER_OP = 0, + + /* Allow an sqe opcode */ + IORING_RESTRICTION_SQE_OP = 1, + + /* Allow sqe flags */ + IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2, + + /* Require sqe flags (these flags must be set on each submission) */ + IORING_RESTRICTION_SQE_FLAGS_REQUIRED = 3, + + IORING_RESTRICTION_LAST +}; + #endif |