mm: introduce execmem_alloc() and execmem_free()

module_alloc() is used everywhere as a mean to allocate memory for code. Beside being semantically wrong, this unnecessarily ties all subsystems that need to allocate code, such as ftrace, kprobes and BPF to modules and puts the burden of code allocation to the modules code. Several architectures override module_alloc() because of various constraints where the executable memory can be located and this causes additional obstacles for improvements of code allocation. Start splitting code allocation from modules by introducing execmem_alloc() and execmem_free() APIs. Initially, execmem_alloc() is a wrapper for module_alloc() and execmem_free() is a replacement of module_memfree() to allow updating all call sites to use the new APIs. Since architectures define different restrictions on placement, permissions, alignment and other parameters for memory that can be used by different subsystems that allocate executable memory, execmem_alloc() takes a type argument, that will be used to identify the calling subsystem and to allow architectures define parameters for ranges suitable for that subsystem. No functional changes. Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org> Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> Acked-by: Song Liu <song@kernel.org> Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org> Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
author: Mike Rapoport (IBM) <rppt@kernel.org> 2024-05-05 19:06:18 +0300
committer: Luis Chamberlain <mcgrof@kernel.org> 2024-05-14 00:31:43 -0700
commit: 12af2b83d0b17ec8b379b721dd4a8fbcd5d791f3 (patch)
tree: c04ce85d6f0d1c09aec941554f4623719581ce84 /kernel
parent: bc6b94d3ea062454ca889884db99e145efffcb93 (diff)
4 files changed, 16 insertions, 24 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1ea5ce5bb599..892e50afda59 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -22,7 +22,6 @@
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
 #include <linux/random.h>
-#include <linux/moduleloader.h>
 #include <linux/bpf.h>
 #include <linux/btf.h>
 #include <linux/objtool.h>
@@ -37,6 +36,7 @@
 #include <linux/nospec.h>
 #include <linux/bpf_mem_alloc.h>
 #include <linux/memcontrol.h>
+#include <linux/execmem.h>
 
 #include <asm/barrier.h>
 #include <asm/unaligned.h>
@@ -1050,12 +1050,12 @@ void bpf_jit_uncharge_modmem(u32 size)
 
 void *__weak bpf_jit_alloc_exec(unsigned long size)
 {
-	return module_alloc(size);
+	return execmem_alloc(EXECMEM_BPF, size);
 }
 
 void __weak bpf_jit_free_exec(void *addr)
 {
-	module_memfree(addr);
+	execmem_free(addr);
 }
 
 struct bpf_binary_header *
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 65adc815fc6e..ddd7cdc16edf 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -26,7 +26,6 @@
 #include <linux/slab.h>
 #include <linux/stddef.h>
 #include <linux/export.h>
-#include <linux/moduleloader.h>
 #include <linux/kallsyms.h>
 #include <linux/freezer.h>
 #include <linux/seq_file.h>
@@ -39,6 +38,7 @@
 #include <linux/jump_label.h>
 #include <linux/static_call.h>
 #include <linux/perf_event.h>
+#include <linux/execmem.h>
 
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
@@ -113,17 +113,17 @@ enum kprobe_slot_state {
 void __weak *alloc_insn_page(void)
 {
 	/*
-	 * Use module_alloc() so this page is within +/- 2GB of where the
+	 * Use execmem_alloc() so this page is within +/- 2GB of where the
 	 * kernel image and loaded module images reside. This is required
 	 * for most of the architectures.
 	 * (e.g. x86-64 needs this to handle the %rip-relative fixups.)
 	 */
-	return module_alloc(PAGE_SIZE);
+	return execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
 }
 
 static void free_insn_page(void *page)
 {
-	module_memfree(page);
+	execmem_free(page);
 }
 
 struct kprobe_insn_cache kprobe_insn_slots = {
diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index cb8377a18927..4047b6d48255 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -2,6 +2,7 @@
 menuconfig MODULES
 	bool "Enable loadable module support"
 	modules
+	select EXECMEM
 	help
 	  Kernel modules are small pieces of compiled code which can
 	  be inserted in the running kernel, rather than being
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 5b82b069e0d3..d56b7df0cbb6 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -57,6 +57,7 @@
 #include <linux/audit.h>
 #include <linux/cfi.h>
 #include <linux/debugfs.h>
+#include <linux/execmem.h>
 #include <uapi/linux/module.h>
 #include "internal.h"
 
@@ -1179,16 +1180,6 @@ resolve_symbol_wait(struct module *mod,
 	return ksym;
 }
 
-void __weak module_memfree(void *module_region)
-{
-	/*
-	 * This memory may be RO, and freeing RO memory in an interrupt is not
-	 * supported by vmalloc.
-	 */
-	WARN_ON(in_interrupt());
-	vfree(module_region);
-}
-
 void __weak module_arch_cleanup(struct module *mod)
 {
 }
@@ -1213,7 +1204,7 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
 	if (mod_mem_use_vmalloc(type))
 		ptr = vmalloc(size);
 	else
-		ptr = module_alloc(size);
+		ptr = execmem_alloc(EXECMEM_MODULE_TEXT, size);
 
 	if (!ptr)
 		return -ENOMEM;
@@ -1244,7 +1235,7 @@ static void module_memory_free(struct module *mod, enum mod_mem_type type)
 	if (mod_mem_use_vmalloc(type))
 		vfree(ptr);
 	else
-		module_memfree(ptr);
+		execmem_free(ptr);
 }
 
 static void free_mod_mem(struct module *mod)
@@ -2496,9 +2487,9 @@ static void do_free_init(struct work_struct *w)
 
 	llist_for_each_safe(pos, n, list) {
 		initfree = container_of(pos, struct mod_initfree, node);
-		module_memfree(initfree->init_text);
-		module_memfree(initfree->init_data);
-		module_memfree(initfree->init_rodata);
+		execmem_free(initfree->init_text);
+		execmem_free(initfree->init_data);
+		execmem_free(initfree->init_rodata);
 		kfree(initfree);
 	}
 }
@@ -2608,10 +2599,10 @@ static noinline int do_init_module(struct module *mod)
 	 * We want to free module_init, but be aware that kallsyms may be
 	 * walking this with preempt disabled.  In all the failure paths, we
 	 * call synchronize_rcu(), but we don't want to slow down the success
-	 * path. module_memfree() cannot be called in an interrupt, so do the
+	 * path. execmem_free() cannot be called in an interrupt, so do the
 	 * work and call synchronize_rcu() in a work queue.
 	 *
-	 * Note that module_alloc() on most architectures creates W+X page
+	 * Note that execmem_alloc() on most architectures creates W+X page
 	 * mappings which won't be cleaned up until do_free_init() runs.  Any
 	 * code such as mark_rodata_ro() which depends on those mappings to
 	 * be cleaned up needs to sync with the queued work by invoking
author	Mike Rapoport (IBM) <rppt@kernel.org>	2024-05-05 19:06:18 +0300
committer	Luis Chamberlain <mcgrof@kernel.org>	2024-05-14 00:31:43 -0700
commit	12af2b83d0b17ec8b379b721dd4a8fbcd5d791f3 (patch)
tree	c04ce85d6f0d1c09aec941554f4623719581ce84 /kernel
parent	bc6b94d3ea062454ca889884db99e145efffcb93 (diff)