summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-16 12:59:20 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-16 12:59:20 -0700
commit72fda6c8e553699f6ba8d3ddc34f0bbe7a5898df (patch)
tree3ba8d2f3ed3417390f404131f4efd2f00fd503a7 /fs
parentf83e38fc9f1092f8a7a65ff2ea6a1ea6502efaf0 (diff)
parent21f93108306026b8066db31c24a097192c8c36c7 (diff)
Merge tag 'execve-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull execve updates from Kees Cook: - Use value of kernel.randomize_va_space once per exec (Alexey Dobriyan) - Honor PT_LOAD alignment for static PIE - Make bprm->argmin only visible under CONFIG_MMU - Add KUnit testing of bprm_stack_limits() * tag 'execve-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: exec: Avoid pathological argc, envc, and bprm->p values execve: Keep bprm->argmin behind CONFIG_MMU ELF: fix kernel.randomize_va_space double read exec: Add KUnit test for bprm_stack_limits() binfmt_elf: Honor PT_LOAD alignment for static PIE binfmt_elf: Calculate total_size earlier selftests/exec: Build both static and non-static load_address tests
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig.binfmt8
-rw-r--r--fs/binfmt_elf.c99
-rw-r--r--fs/exec.c49
-rw-r--r--fs/exec_test.c141
4 files changed, 258 insertions, 39 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index f5693164ca9a..bd2f530e5740 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -176,4 +176,12 @@ config COREDUMP
certainly want to say Y here. Not necessary on systems that never
need debugging or only ever run flawless code.
+config EXEC_KUNIT_TEST
+ bool "Build execve tests" if !KUNIT_ALL_TESTS
+ depends on KUNIT=y
+ default KUNIT_ALL_TESTS
+ help
+ This builds the exec KUnit tests, which tests boundary conditions
+ of various aspects of the exec internals.
+
endmenu
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6fdec541f8bf..5ae8045f4df4 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1003,7 +1003,8 @@ out_free_interp:
if (elf_read_implies_exec(*elf_ex, executable_stack))
current->personality |= READ_IMPLIES_EXEC;
- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+ const int snapshot_randomize_va_space = READ_ONCE(randomize_va_space);
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && snapshot_randomize_va_space)
current->flags |= PF_RANDOMIZE;
setup_new_exec(bprm);
@@ -1061,10 +1062,40 @@ out_free_interp:
* Header for ET_DYN binaries to calculate the
* randomization (load_bias) for all the LOAD
* Program Headers.
+ */
+
+ /*
+ * Calculate the entire size of the ELF mapping
+ * (total_size), used for the initial mapping,
+ * due to load_addr_set which is set to true later
+ * once the initial mapping is performed.
+ *
+ * Note that this is only sensible when the LOAD
+ * segments are contiguous (or overlapping). If
+ * used for LOADs that are far apart, this would
+ * cause the holes between LOADs to be mapped,
+ * running the risk of having the mapping fail,
+ * as it would be larger than the ELF file itself.
*
+ * As a result, only ET_DYN does this, since
+ * some ET_EXEC (e.g. ia64) may have large virtual
+ * memory holes between LOADs.
+ *
+ */
+ total_size = total_mapping_size(elf_phdata,
+ elf_ex->e_phnum);
+ if (!total_size) {
+ retval = -EINVAL;
+ goto out_free_dentry;
+ }
+
+ /* Calculate any requested alignment. */
+ alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
+
+ /*
* There are effectively two types of ET_DYN
- * binaries: programs (i.e. PIE: ET_DYN with INTERP)
- * and loaders (ET_DYN without INTERP, since they
+ * binaries: programs (i.e. PIE: ET_DYN with PT_INTERP)
+ * and loaders (ET_DYN without PT_INTERP, since they
* _are_ the ELF interpreter). The loaders must
* be loaded away from programs since the program
* may otherwise collide with the loader (especially
@@ -1084,15 +1115,44 @@ out_free_interp:
* without MAP_FIXED nor MAP_FIXED_NOREPLACE).
*/
if (interpreter) {
+ /* On ET_DYN with PT_INTERP, we do the ASLR. */
load_bias = ELF_ET_DYN_BASE;
if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd();
- alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
+ /* Adjust alignment as requested. */
if (alignment)
load_bias &= ~(alignment - 1);
elf_flags |= MAP_FIXED_NOREPLACE;
- } else
- load_bias = 0;
+ } else {
+ /*
+ * For ET_DYN without PT_INTERP, we rely on
+ * the architectures's (potentially ASLR) mmap
+ * base address (via a load_bias of 0).
+ *
+ * When a large alignment is requested, we
+ * must do the allocation at address "0" right
+ * now to discover where things will load so
+ * that we can adjust the resulting alignment.
+ * In this case (load_bias != 0), we can use
+ * MAP_FIXED_NOREPLACE to make sure the mapping
+ * doesn't collide with anything.
+ */
+ if (alignment > ELF_MIN_ALIGN) {
+ load_bias = elf_load(bprm->file, 0, elf_ppnt,
+ elf_prot, elf_flags, total_size);
+ if (BAD_ADDR(load_bias)) {
+ retval = IS_ERR_VALUE(load_bias) ?
+ PTR_ERR((void*)load_bias) : -EINVAL;
+ goto out_free_dentry;
+ }
+ vm_munmap(load_bias, total_size);
+ /* Adjust alignment as requested. */
+ if (alignment)
+ load_bias &= ~(alignment - 1);
+ elf_flags |= MAP_FIXED_NOREPLACE;
+ } else
+ load_bias = 0;
+ }
/*
* Since load_bias is used for all subsequent loading
@@ -1102,31 +1162,6 @@ out_free_interp:
* is then page aligned.
*/
load_bias = ELF_PAGESTART(load_bias - vaddr);
-
- /*
- * Calculate the entire size of the ELF mapping
- * (total_size), used for the initial mapping,
- * due to load_addr_set which is set to true later
- * once the initial mapping is performed.
- *
- * Note that this is only sensible when the LOAD
- * segments are contiguous (or overlapping). If
- * used for LOADs that are far apart, this would
- * cause the holes between LOADs to be mapped,
- * running the risk of having the mapping fail,
- * as it would be larger than the ELF file itself.
- *
- * As a result, only ET_DYN does this, since
- * some ET_EXEC (e.g. ia64) may have large virtual
- * memory holes between LOADs.
- *
- */
- total_size = total_mapping_size(elf_phdata,
- elf_ex->e_phnum);
- if (!total_size) {
- retval = -EINVAL;
- goto out_free_dentry;
- }
}
error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
@@ -1250,7 +1285,7 @@ out_free_interp:
mm->end_data = end_data;
mm->start_stack = bprm->p;
- if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
+ if ((current->flags & PF_RANDOMIZE) && (snapshot_randomize_va_space > 1)) {
/*
* For architectures with ELF randomization, when executing
* a loader directly (i.e. no interpreter listed in ELF
diff --git a/fs/exec.c b/fs/exec.c
index 4dee205452e2..a47d0e4c54f6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -486,6 +486,35 @@ static int count_strings_kernel(const char *const *argv)
return i;
}
+static inline int bprm_set_stack_limit(struct linux_binprm *bprm,
+ unsigned long limit)
+{
+#ifdef CONFIG_MMU
+ /* Avoid a pathological bprm->p. */
+ if (bprm->p < limit)
+ return -E2BIG;
+ bprm->argmin = bprm->p - limit;
+#endif
+ return 0;
+}
+static inline bool bprm_hit_stack_limit(struct linux_binprm *bprm)
+{
+#ifdef CONFIG_MMU
+ return bprm->p < bprm->argmin;
+#else
+ return false;
+#endif
+}
+
+/*
+ * Calculate bprm->argmin from:
+ * - _STK_LIM
+ * - ARG_MAX
+ * - bprm->rlim_stack.rlim_cur
+ * - bprm->argc
+ * - bprm->envc
+ * - bprm->p
+ */
static int bprm_stack_limits(struct linux_binprm *bprm)
{
unsigned long limit, ptr_size;
@@ -505,6 +534,9 @@ static int bprm_stack_limits(struct linux_binprm *bprm)
* of argument strings even with small stacks
*/
limit = max_t(unsigned long, limit, ARG_MAX);
+ /* Reject totally pathological counts. */
+ if (bprm->argc < 0 || bprm->envc < 0)
+ return -E2BIG;
/*
* We must account for the size of all the argv and envp pointers to
* the argv and envp strings, since they will also take up space in
@@ -518,13 +550,14 @@ static int bprm_stack_limits(struct linux_binprm *bprm)
* argc can never be 0, to keep them from walking envp by accident.
* See do_execveat_common().
*/
- ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *);
+ if (check_add_overflow(max(bprm->argc, 1), bprm->envc, &ptr_size) ||
+ check_mul_overflow(ptr_size, sizeof(void *), &ptr_size))
+ return -E2BIG;
if (limit <= ptr_size)
return -E2BIG;
limit -= ptr_size;
- bprm->argmin = bprm->p - limit;
- return 0;
+ return bprm_set_stack_limit(bprm, limit);
}
/*
@@ -562,10 +595,8 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
pos = bprm->p;
str += len;
bprm->p -= len;
-#ifdef CONFIG_MMU
- if (bprm->p < bprm->argmin)
+ if (bprm_hit_stack_limit(bprm))
goto out;
-#endif
while (len > 0) {
int offset, bytes_to_copy;
@@ -640,7 +671,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
/* We're going to work our way backwards. */
arg += len;
bprm->p -= len;
- if (IS_ENABLED(CONFIG_MMU) && bprm->p < bprm->argmin)
+ if (bprm_hit_stack_limit(bprm))
return -E2BIG;
while (len > 0) {
@@ -2203,3 +2234,7 @@ static int __init init_fs_exec_sysctls(void)
fs_initcall(init_fs_exec_sysctls);
#endif /* CONFIG_SYSCTL */
+
+#ifdef CONFIG_EXEC_KUNIT_TEST
+#include "exec_test.c"
+#endif
diff --git a/fs/exec_test.c b/fs/exec_test.c
new file mode 100644
index 000000000000..7c77d039680b
--- /dev/null
+++ b/fs/exec_test.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <kunit/test.h>
+
+struct bprm_stack_limits_result {
+ struct linux_binprm bprm;
+ int expected_rc;
+ unsigned long expected_argmin;
+};
+
+static const struct bprm_stack_limits_result bprm_stack_limits_results[] = {
+ /* Negative argc/envc counts produce -E2BIG */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = INT_MIN, .envc = INT_MIN }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = 5, .envc = -1 }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = -1, .envc = 10 }, .expected_rc = -E2BIG },
+ /* The max value of argc or envc is MAX_ARG_STRINGS. */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = INT_MAX, .envc = INT_MAX }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = MAX_ARG_STRINGS, .envc = MAX_ARG_STRINGS }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = 0, .envc = MAX_ARG_STRINGS }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = MAX_ARG_STRINGS, .envc = 0 }, .expected_rc = -E2BIG },
+ /*
+ * On 32-bit system these argc and envc counts, while likely impossible
+ * to represent within the associated TASK_SIZE, could overflow the
+ * limit calculation, and bypass the ptr_size <= limit check.
+ */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = 0x20000001, .envc = 0x20000001 }, .expected_rc = -E2BIG },
+#ifdef CONFIG_MMU
+ /* Make sure a pathological bprm->p doesn't cause an overflow. */
+ { { .p = sizeof(void *), .rlim_stack.rlim_cur = ULONG_MAX,
+ .argc = 10, .envc = 10 }, .expected_rc = -E2BIG },
+#endif
+ /*
+ * 0 rlim_stack will get raised to ARG_MAX. With 1 string pointer,
+ * we should see p - ARG_MAX + sizeof(void *).
+ */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = 1, .envc = 0 }, .expected_argmin = ULONG_MAX - ARG_MAX + sizeof(void *)},
+ /* Validate that argc is always raised to a minimum of 1. */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = 0, .envc = 0 }, .expected_argmin = ULONG_MAX - ARG_MAX + sizeof(void *)},
+ /*
+ * 0 rlim_stack will get raised to ARG_MAX. With pointers filling ARG_MAX,
+ * we should see -E2BIG. (Note argc is always raised to at least 1.)
+ */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = ARG_MAX / sizeof(void *), .envc = 0 }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = 0, .envc = ARG_MAX / sizeof(void *) - 1 }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = ARG_MAX / sizeof(void *) + 1, .envc = 0 }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = 0, .envc = ARG_MAX / sizeof(void *) }, .expected_rc = -E2BIG },
+ /* And with one less, we see space for exactly 1 pointer. */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = (ARG_MAX / sizeof(void *)) - 1, .envc = 0 },
+ .expected_argmin = ULONG_MAX - sizeof(void *) },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0,
+ .argc = 0, .envc = (ARG_MAX / sizeof(void *)) - 2, },
+ .expected_argmin = ULONG_MAX - sizeof(void *) },
+ /* If we raise rlim_stack / 4 to exactly ARG_MAX, nothing changes. */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4,
+ .argc = ARG_MAX / sizeof(void *), .envc = 0 }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4,
+ .argc = 0, .envc = ARG_MAX / sizeof(void *) - 1 }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4,
+ .argc = ARG_MAX / sizeof(void *) + 1, .envc = 0 }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4,
+ .argc = 0, .envc = ARG_MAX / sizeof(void *) }, .expected_rc = -E2BIG },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4,
+ .argc = (ARG_MAX / sizeof(void *)) - 1, .envc = 0 },
+ .expected_argmin = ULONG_MAX - sizeof(void *) },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4,
+ .argc = 0, .envc = (ARG_MAX / sizeof(void *)) - 2, },
+ .expected_argmin = ULONG_MAX - sizeof(void *) },
+ /* But raising it another pointer * 4 will provide space for 1 more pointer. */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = (ARG_MAX + sizeof(void *)) * 4,
+ .argc = ARG_MAX / sizeof(void *), .envc = 0 },
+ .expected_argmin = ULONG_MAX - sizeof(void *) },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = (ARG_MAX + sizeof(void *)) * 4,
+ .argc = 0, .envc = ARG_MAX / sizeof(void *) - 1 },
+ .expected_argmin = ULONG_MAX - sizeof(void *) },
+ /* Raising rlim_stack / 4 to _STK_LIM / 4 * 3 will see more space. */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * 3),
+ .argc = 0, .envc = 0 },
+ .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * 3),
+ .argc = 0, .envc = 0 },
+ .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
+ /* But raising it any further will see no increase. */
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * 3 + sizeof(void *)),
+ .argc = 0, .envc = 0 },
+ .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * + sizeof(void *)),
+ .argc = 0, .envc = 0 },
+ .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * _STK_LIM,
+ .argc = 0, .envc = 0 },
+ .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
+ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * _STK_LIM,
+ .argc = 0, .envc = 0 },
+ .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) },
+};
+
+static void exec_test_bprm_stack_limits(struct kunit *test)
+{
+ /* Double-check the constants. */
+ KUNIT_EXPECT_EQ(test, _STK_LIM, SZ_8M);
+ KUNIT_EXPECT_EQ(test, ARG_MAX, 32 * SZ_4K);
+ KUNIT_EXPECT_EQ(test, MAX_ARG_STRINGS, 0x7FFFFFFF);
+
+ for (int i = 0; i < ARRAY_SIZE(bprm_stack_limits_results); i++) {
+ const struct bprm_stack_limits_result *result = &bprm_stack_limits_results[i];
+ struct linux_binprm bprm = result->bprm;
+ int rc;
+
+ rc = bprm_stack_limits(&bprm);
+ KUNIT_EXPECT_EQ_MSG(test, rc, result->expected_rc, "on loop %d", i);
+#ifdef CONFIG_MMU
+ KUNIT_EXPECT_EQ_MSG(test, bprm.argmin, result->expected_argmin, "on loop %d", i);
+#endif
+ }
+}
+
+static struct kunit_case exec_test_cases[] = {
+ KUNIT_CASE(exec_test_bprm_stack_limits),
+ {},
+};
+
+static struct kunit_suite exec_test_suite = {
+ .name = "exec",
+ .test_cases = exec_test_cases,
+};
+
+kunit_test_suite(exec_test_suite);