summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
authorWill Deacon <will@kernel.org>2024-01-04 12:27:29 +0000
committerWill Deacon <will@kernel.org>2024-01-04 12:27:29 +0000
commit79eb42b269d425ec9c9e1d0613f62b273026de78 (patch)
tree0fdd7d88b1637a5b94464f9ecd9c6f05e4bebb32 /arch/arm64
parente90a8a210fd0fc633ed9c6c7d76ad35ae65d37dc (diff)
parent2632e25217696712681dd1f3ecc0d71624ea3b23 (diff)
Merge branch 'for-next/fpsimd' into for-next/core
* for-next/fpsimd: arm64: fpsimd: Implement lazy restore for kernel mode FPSIMD arm64: fpsimd: Preserve/restore kernel mode NEON at context switch arm64: fpsimd: Drop unneeded 'busy' flag
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/include/asm/processor.h3
-rw-r--r--arch/arm64/include/asm/simd.h11
-rw-r--r--arch/arm64/include/asm/thread_info.h1
-rw-r--r--arch/arm64/kernel/fpsimd.c165
4 files changed, 111 insertions, 69 deletions
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index e5bc54522e71..5b0a04810b23 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -167,6 +167,9 @@ struct thread_struct {
unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
+
+ struct user_fpsimd_state kernel_fpsimd_state;
+ unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;
#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 6a75d7ecdcaa..8e86c9e70e48 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -12,8 +12,6 @@
#include <linux/preempt.h>
#include <linux/types.h>
-DECLARE_PER_CPU(bool, fpsimd_context_busy);
-
#ifdef CONFIG_KERNEL_MODE_NEON
/*
@@ -28,17 +26,10 @@ static __must_check inline bool may_use_simd(void)
/*
* We must make sure that the SVE has been initialized properly
* before using the SIMD in kernel.
- * fpsimd_context_busy is only set while preemption is disabled,
- * and is clear whenever preemption is enabled. Since
- * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
- * cannot change under our feet -- if it's set we cannot be
- * migrated, and if it's clear we cannot be migrated to a CPU
- * where it is set.
*/
return !WARN_ON(!system_capabilities_finalized()) &&
system_supports_fpsimd() &&
- !in_hardirq() && !irqs_disabled() && !in_nmi() &&
- !this_cpu_read(fpsimd_context_busy);
+ !in_hardirq() && !irqs_disabled() && !in_nmi();
}
#else /* ! CONFIG_KERNEL_MODE_NEON */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 553d1bc559c6..e72a3bf9e563 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -80,6 +80,7 @@ void arch_setup_new_exec(void);
#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */
#define TIF_SME 27 /* SME in use */
#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
+#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index bc9384517db3..505f389be3e0 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -85,13 +85,13 @@
* softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
* flag the register state as invalid.
*
- * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
- * save the task's FPSIMD context back to task_struct from softirq context.
- * To prevent this from racing with the manipulation of the task's FPSIMD state
- * from task context and thereby corrupting the state, it is necessary to
- * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
- * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
- * run but prevent them to use FPSIMD.
+ * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be
+ * called from softirq context, which will save the task's FPSIMD context back
+ * to task_struct. To prevent this from racing with the manipulation of the
+ * task's FPSIMD state from task context and thereby corrupting the state, it
+ * is necessary to protect any manipulation of a task's fpsimd_state or
+ * TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend
+ * softirq servicing entirely until put_cpu_fpsimd_context() is called.
*
* For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_cpu field
@@ -209,27 +209,14 @@ static inline void sme_free(struct task_struct *t) { }
#endif
-DEFINE_PER_CPU(bool, fpsimd_context_busy);
-EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
-
static void fpsimd_bind_task_to_cpu(void);
-static void __get_cpu_fpsimd_context(void)
-{
- bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
-
- WARN_ON(busy);
-}
-
/*
* Claim ownership of the CPU FPSIMD context for use by the calling context.
*
* The caller may freely manipulate the FPSIMD context metadata until
* put_cpu_fpsimd_context() is called.
*
- * The double-underscore version must only be called if you know the task
- * can't be preempted.
- *
* On RT kernels local_bh_disable() is not sufficient because it only
* serializes soft interrupt related sections via a local lock, but stays
* preemptible. Disabling preemption is the right choice here as bottom
@@ -242,14 +229,6 @@ static void get_cpu_fpsimd_context(void)
local_bh_disable();
else
preempt_disable();
- __get_cpu_fpsimd_context();
-}
-
-static void __put_cpu_fpsimd_context(void)
-{
- bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
-
- WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
}
/*
@@ -261,18 +240,12 @@ static void __put_cpu_fpsimd_context(void)
*/
static void put_cpu_fpsimd_context(void)
{
- __put_cpu_fpsimd_context();
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_bh_enable();
else
preempt_enable();
}
-static bool have_cpu_fpsimd_context(void)
-{
- return !preemptible() && __this_cpu_read(fpsimd_context_busy);
-}
-
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
{
return task->thread.vl[type];
@@ -383,7 +356,8 @@ static void task_fpsimd_load(void)
bool restore_ffr;
WARN_ON(!system_supports_fpsimd());
- WARN_ON(!have_cpu_fpsimd_context());
+ WARN_ON(preemptible());
+ WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
if (system_supports_sve() || system_supports_sme()) {
switch (current->thread.fp_type) {
@@ -406,7 +380,7 @@ static void task_fpsimd_load(void)
default:
/*
* This indicates either a bug in
- * fpsimd_save() or memory corruption, we
+ * fpsimd_save_user_state() or memory corruption, we
* should always record an explicit format
* when we save. We always at least have the
* memory allocated for FPSMID registers so
@@ -457,7 +431,7 @@ static void task_fpsimd_load(void)
* than via current, if we are saving KVM state then it will have
* ensured that the type of registers to save is set in last->to_save.
*/
-static void fpsimd_save(void)
+static void fpsimd_save_user_state(void)
{
struct cpu_fp_state const *last =
this_cpu_ptr(&fpsimd_last_state);
@@ -467,7 +441,7 @@ static void fpsimd_save(void)
unsigned int vl;
WARN_ON(!system_supports_fpsimd());
- WARN_ON(!have_cpu_fpsimd_context());
+ WARN_ON(preemptible());
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return;
@@ -888,7 +862,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
if (task == current) {
get_cpu_fpsimd_context();
- fpsimd_save();
+ fpsimd_save_user_state();
}
fpsimd_flush_task_state(task);
@@ -1500,6 +1474,34 @@ void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
current);
}
+static void fpsimd_load_kernel_state(struct task_struct *task)
+{
+ struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
+
+ /*
+ * Elide the load if this CPU holds the most recent kernel mode
+ * FPSIMD context of the current task.
+ */
+ if (last->st == &task->thread.kernel_fpsimd_state &&
+ task->thread.kernel_fpsimd_cpu == smp_processor_id())
+ return;
+
+ fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+}
+
+static void fpsimd_save_kernel_state(struct task_struct *task)
+{
+ struct cpu_fp_state cpu_fp_state = {
+ .st = &task->thread.kernel_fpsimd_state,
+ .to_save = FP_STATE_FPSIMD,
+ };
+
+ fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+ fpsimd_bind_state_to_cpu(&cpu_fp_state);
+
+ task->thread.kernel_fpsimd_cpu = smp_processor_id();
+}
+
void fpsimd_thread_switch(struct task_struct *next)
{
bool wrong_task, wrong_cpu;
@@ -1507,24 +1509,31 @@ void fpsimd_thread_switch(struct task_struct *next)
if (!system_supports_fpsimd())
return;
- __get_cpu_fpsimd_context();
+ WARN_ON_ONCE(!irqs_disabled());
/* Save unsaved fpsimd state, if any: */
- fpsimd_save();
-
- /*
- * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
- * state. For kernel threads, FPSIMD registers are never loaded
- * and wrong_task and wrong_cpu will always be true.
- */
- wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
- &next->thread.uw.fpsimd_state;
- wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
+ if (test_thread_flag(TIF_KERNEL_FPSTATE))
+ fpsimd_save_kernel_state(current);
+ else
+ fpsimd_save_user_state();
- update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
- wrong_task || wrong_cpu);
+ if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
+ fpsimd_load_kernel_state(next);
+ set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
+ } else {
+ /*
+ * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
+ * state. For kernel threads, FPSIMD registers are never
+ * loaded with user mode FPSIMD state and so wrong_task and
+ * wrong_cpu will always be true.
+ */
+ wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
+ &next->thread.uw.fpsimd_state;
+ wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
- __put_cpu_fpsimd_context();
+ update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
+ wrong_task || wrong_cpu);
+ }
}
static void fpsimd_flush_thread_vl(enum vec_type type)
@@ -1614,7 +1623,7 @@ void fpsimd_preserve_current_state(void)
return;
get_cpu_fpsimd_context();
- fpsimd_save();
+ fpsimd_save_user_state();
put_cpu_fpsimd_context();
}
@@ -1826,13 +1835,15 @@ static void fpsimd_flush_cpu_state(void)
*/
void fpsimd_save_and_flush_cpu_state(void)
{
+ unsigned long flags;
+
if (!system_supports_fpsimd())
return;
WARN_ON(preemptible());
- __get_cpu_fpsimd_context();
- fpsimd_save();
+ local_irq_save(flags);
+ fpsimd_save_user_state();
fpsimd_flush_cpu_state();
- __put_cpu_fpsimd_context();
+ local_irq_restore(flags);
}
#ifdef CONFIG_KERNEL_MODE_NEON
@@ -1864,10 +1875,37 @@ void kernel_neon_begin(void)
get_cpu_fpsimd_context();
/* Save unsaved fpsimd state, if any: */
- fpsimd_save();
+ if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
+ BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
+ fpsimd_save_kernel_state(current);
+ } else {
+ fpsimd_save_user_state();
+
+ /*
+ * Set the thread flag so that the kernel mode FPSIMD state
+ * will be context switched along with the rest of the task
+ * state.
+ *
+ * On non-PREEMPT_RT, softirqs may interrupt task level kernel
+ * mode FPSIMD, but the task will not be preemptible so setting
+ * TIF_KERNEL_FPSTATE for those would be both wrong (as it
+ * would mark the task context FPSIMD state as requiring a
+ * context switch) and unnecessary.
+ *
+ * On PREEMPT_RT, softirqs are serviced from a separate thread,
+ * which is scheduled as usual, and this guarantees that these
+ * softirqs are not interrupting use of the FPSIMD in kernel
+ * mode in task context. So in this case, setting the flag here
+ * is always appropriate.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
+ set_thread_flag(TIF_KERNEL_FPSTATE);
+ }
/* Invalidate any task state remaining in the fpsimd regs: */
fpsimd_flush_cpu_state();
+
+ put_cpu_fpsimd_context();
}
EXPORT_SYMBOL_GPL(kernel_neon_begin);
@@ -1885,7 +1923,16 @@ void kernel_neon_end(void)
if (!system_supports_fpsimd())
return;
- put_cpu_fpsimd_context();
+ /*
+ * If we are returning from a nested use of kernel mode FPSIMD, restore
+ * the task context kernel mode FPSIMD state. This can only happen when
+ * running in softirq context on non-PREEMPT_RT.
+ */
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
+ test_thread_flag(TIF_KERNEL_FPSTATE))
+ fpsimd_load_kernel_state(current);
+ else
+ clear_thread_flag(TIF_KERNEL_FPSTATE);
}
EXPORT_SYMBOL_GPL(kernel_neon_end);