summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-07-22 10:46:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-07-22 10:46:30 -0700
commitbdd1d82e7d02bd2764a68a5cc54533dfc2ba452a (patch)
tree2fb501fab4fe4c34032b0f5a231d2c965303b85e
parent725d444db6b0a8ed98461583ed1e6f12b8a7f0e9 (diff)
parent07e981137f17e5275b6fa5fd0c28b0ddb4519702 (diff)
Merge tag 'io_uring-6.5-2023-07-21' of git://git.kernel.dk/linux
Pull io_uring fixes from Jens Axboe: - Fix for io-wq not always honoring REQ_F_NOWAIT, if it was set and punted directly (eg via DRAIN) (me) - Capability check fix (Ondrej) - Regression fix for the mmap changes that went into 6.4, which apparently broke IA64 (Helge) * tag 'io_uring-6.5-2023-07-21' of git://git.kernel.dk/linux: ia64: mmap: Consider pgoff when searching for free mapping io_uring: Fix io_uring mmap() by using architecture-provided get_unmapped_area() io_uring: treat -EAGAIN for REQ_F_NOWAIT as final for io-wq io_uring: don't audit the capability check in io_uring_create()
-rw-r--r--arch/ia64/kernel/sys_ia64.c2
-rw-r--r--arch/parisc/kernel/sys_parisc.c15
-rw-r--r--io_uring/io_uring.c52
3 files changed, 37 insertions, 32 deletions
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index 6e948d015332..eb561cc93632 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -63,7 +63,7 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
info.low_limit = addr;
info.high_limit = TASK_SIZE;
info.align_mask = align_mask;
- info.align_offset = 0;
+ info.align_offset = pgoff << PAGE_SHIFT;
return vm_unmapped_area(&info);
}
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index 9915062d5243..ca2d537e25b1 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -27,12 +27,17 @@
#include <linux/elf-randomize.h>
/*
- * Construct an artificial page offset for the mapping based on the physical
+ * Construct an artificial page offset for the mapping based on the virtual
* address of the kernel file mapping variable.
+ * If filp is zero the calculated pgoff value aliases the memory of the given
+ * address. This is useful for io_uring where the mapping shall alias a kernel
+ * address and a userspace adress where both the kernel and the userspace
+ * access the same memory region.
*/
-#define GET_FILP_PGOFF(filp) \
- (filp ? (((unsigned long) filp->f_mapping) >> 8) \
- & ((SHM_COLOUR-1) >> PAGE_SHIFT) : 0UL)
+#define GET_FILP_PGOFF(filp, addr) \
+ ((filp ? (((unsigned long) filp->f_mapping) >> 8) \
+ & ((SHM_COLOUR-1) >> PAGE_SHIFT) : 0UL) \
+ + (addr >> PAGE_SHIFT))
static unsigned long shared_align_offset(unsigned long filp_pgoff,
unsigned long pgoff)
@@ -112,7 +117,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
do_color_align = 0;
if (filp || (flags & MAP_SHARED))
do_color_align = 1;
- filp_pgoff = GET_FILP_PGOFF(filp);
+ filp_pgoff = GET_FILP_PGOFF(filp, addr);
if (flags & MAP_FIXED) {
/* Even MAP_FIXED mappings must reside within TASK_SIZE */
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 7505de2428e0..89a611541bc4 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1948,6 +1948,14 @@ fail:
ret = io_issue_sqe(req, issue_flags);
if (ret != -EAGAIN)
break;
+
+ /*
+ * If REQ_F_NOWAIT is set, then don't wait or retry with
+ * poll. -EAGAIN is final for that case.
+ */
+ if (req->flags & REQ_F_NOWAIT)
+ break;
+
/*
* We can get EAGAIN for iopolled IO even though we're
* forcing a sync submission from here, since we can't
@@ -3429,8 +3437,6 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
- const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
- struct vm_unmapped_area_info info;
void *ptr;
/*
@@ -3445,32 +3451,26 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
if (IS_ERR(ptr))
return -ENOMEM;
- info.flags = VM_UNMAPPED_AREA_TOPDOWN;
- info.length = len;
- info.low_limit = max(PAGE_SIZE, mmap_min_addr);
- info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
+ /*
+ * Some architectures have strong cache aliasing requirements.
+ * For such architectures we need a coherent mapping which aliases
+ * kernel memory *and* userspace memory. To achieve that:
+ * - use a NULL file pointer to reference physical memory, and
+ * - use the kernel virtual address of the shared io_uring context
+ * (instead of the userspace-provided address, which has to be 0UL
+ * anyway).
+ * For architectures without such aliasing requirements, the
+ * architecture will return any suitable mapping because addr is 0.
+ */
+ filp = NULL;
+ flags |= MAP_SHARED;
+ pgoff = 0; /* has been translated to ptr above */
#ifdef SHM_COLOUR
- info.align_mask = PAGE_MASK & (SHM_COLOUR - 1UL);
+ addr = (uintptr_t) ptr;
#else
- info.align_mask = PAGE_MASK & (SHMLBA - 1UL);
+ addr = 0UL;
#endif
- info.align_offset = (unsigned long) ptr;
-
- /*
- * A failed mmap() very likely causes application failure,
- * so fall back to the bottom-up function here. This scenario
- * can happen with large stack limits and large mmap()
- * allocations.
- */
- addr = vm_unmapped_area(&info);
- if (offset_in_page(addr)) {
- info.flags = 0;
- info.low_limit = TASK_UNMAPPED_BASE;
- info.high_limit = mmap_end;
- addr = vm_unmapped_area(&info);
- }
-
- return addr;
+ return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
}
#else /* !CONFIG_MMU */
@@ -3870,7 +3870,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
ctx->syscall_iopoll = 1;
ctx->compat = in_compat_syscall();
- if (!capable(CAP_IPC_LOCK))
+ if (!ns_capable_noaudit(&init_user_ns, CAP_IPC_LOCK))
ctx->user = get_uid(current_user());
/*