diff options
Diffstat (limited to 'arch/x86/kernel/fpu')
-rw-r--r-- | arch/x86/kernel/fpu/bugs.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/context.h | 83 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/core.c | 392 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/init.c | 76 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/internal.h | 28 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/legacy.h | 115 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/regset.c | 36 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/signal.c | 285 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 898 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.h | 278 |
10 files changed, 1731 insertions, 462 deletions
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c index 2954fab15e51..794e70151203 100644 --- a/arch/x86/kernel/fpu/bugs.c +++ b/arch/x86/kernel/fpu/bugs.c @@ -2,7 +2,7 @@ /* * x86 FPU bug checks: */ -#include <asm/fpu/internal.h> +#include <asm/fpu/api.h> /* * Boot time CPU/FPU FDIV bug detection code: diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h new file mode 100644 index 000000000000..958accf2ccf0 --- /dev/null +++ b/arch/x86/kernel/fpu/context.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __X86_KERNEL_FPU_CONTEXT_H +#define __X86_KERNEL_FPU_CONTEXT_H + +#include <asm/fpu/xstate.h> +#include <asm/trace/fpu.h> + +/* Functions related to FPU context tracking */ + +/* + * The in-register FPU state for an FPU context on a CPU is assumed to be + * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx + * matches the FPU. + * + * If the FPU register state is valid, the kernel can skip restoring the + * FPU state from memory. + * + * Any code that clobbers the FPU registers or updates the in-memory + * FPU state for a task MUST let the rest of the kernel know that the + * FPU registers are no longer valid for this task. + * + * Either one of these invalidation functions is enough. Invalidate + * a resource you control: CPU if using the CPU for something else + * (with preemption disabled), FPU for the current task, or a task that + * is prevented from running by the current task. + */ +static inline void __cpu_invalidate_fpregs_state(void) +{ + __this_cpu_write(fpu_fpregs_owner_ctx, NULL); +} + +static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) +{ + fpu->last_cpu = -1; +} + +static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) +{ + return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; +} + +static inline void fpregs_deactivate(struct fpu *fpu) +{ + __this_cpu_write(fpu_fpregs_owner_ctx, NULL); + trace_x86_fpu_regs_deactivated(fpu); +} + +static inline void fpregs_activate(struct fpu *fpu) +{ + __this_cpu_write(fpu_fpregs_owner_ctx, fpu); + trace_x86_fpu_regs_activated(fpu); +} + +/* Internal helper for switch_fpu_return() and signal frame setup */ +static inline void fpregs_restore_userregs(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + int cpu = smp_processor_id(); + + if (WARN_ON_ONCE(current->flags & PF_KTHREAD)) + return; + + if (!fpregs_state_valid(fpu, cpu)) { + /* + * This restores _all_ xstate which has not been + * established yet. + * + * If PKRU is enabled, then the PKRU value is already + * correct because it was either set in switch_to() or in + * flush_thread(). So it is excluded because it might be + * not up to date in current->thread.fpu.xsave state. + * + * XFD state is handled in restore_fpregs_from_fpstate(). + */ + restore_fpregs_from_fpstate(fpu->fpstate, XFEATURE_MASK_FPSTATE); + + fpregs_activate(fpu); + fpu->last_cpu = cpu; + } + clear_thread_flag(TIF_NEED_FPU_LOAD); +} + +#endif diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 7ada7bd03a32..8ea306b1bf8e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -6,8 +6,9 @@ * General FPU state handling cleanups * Gareth Hughes <gareth@valinux.com>, May 2000 */ -#include <asm/fpu/internal.h> +#include <asm/fpu/api.h> #include <asm/fpu/regset.h> +#include <asm/fpu/sched.h> #include <asm/fpu/signal.h> #include <asm/fpu/types.h> #include <asm/traps.h> @@ -15,15 +16,30 @@ #include <linux/hardirq.h> #include <linux/pkeys.h> +#include <linux/vmalloc.h> + +#include "context.h" +#include "internal.h" +#include "legacy.h" +#include "xstate.h" #define CREATE_TRACE_POINTS #include <asm/trace/fpu.h> +#ifdef CONFIG_X86_64 +DEFINE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); +DEFINE_PER_CPU(u64, xfd_state); +#endif + +/* The FPU state configuration data for kernel and user space */ +struct fpu_state_config fpu_kernel_cfg __ro_after_init; +struct fpu_state_config fpu_user_cfg __ro_after_init; + /* * Represents the initial FPU state. It's mostly (but not completely) zeroes, * depending on the FPU hardware format: */ -union fpregs_state init_fpstate __ro_after_init; +struct fpstate init_fpstate __ro_after_init; /* * Track whether the kernel is using the FPU state @@ -83,7 +99,7 @@ bool irq_fpu_usable(void) EXPORT_SYMBOL(irq_fpu_usable); /* - * Save the FPU register state in fpu->state. The register state is + * Save the FPU register state in fpu->fpstate->regs. The register state is * preserved. * * Must be called with fpregs_lock() held. @@ -99,19 +115,19 @@ EXPORT_SYMBOL(irq_fpu_usable); void save_fpregs_to_fpstate(struct fpu *fpu) { if (likely(use_xsave())) { - os_xsave(&fpu->state.xsave); + os_xsave(fpu->fpstate); /* * AVX512 state is tracked here because its use is * known to slow the max clock speed of the core. */ - if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512) + if (fpu->fpstate->regs.xsave.header.xfeatures & XFEATURE_MASK_AVX512) fpu->avx512_timestamp = jiffies; return; } if (likely(use_fxsr())) { - fxsave(&fpu->state.fxsave); + fxsave(&fpu->fpstate->regs.fxsave); return; } @@ -119,12 +135,11 @@ void save_fpregs_to_fpstate(struct fpu *fpu) * Legacy FPU register saving, FNSAVE always clears FPU registers, * so we have to reload them from the memory state. */ - asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave)); - frstor(&fpu->state.fsave); + asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->fpstate->regs.fsave)); + frstor(&fpu->fpstate->regs.fsave); } -EXPORT_SYMBOL(save_fpregs_to_fpstate); -void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask) +void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask) { /* * AMD K7/K8 and later CPUs up to Zen don't save/restore @@ -141,15 +156,181 @@ void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask) } if (use_xsave()) { - os_xrstor(&fpstate->xsave, mask); + /* + * Dynamically enabled features are enabled in XCR0, but + * usage requires also that the corresponding bits in XFD + * are cleared. If the bits are set then using a related + * instruction will raise #NM. This allows to do the + * allocation of the larger FPU buffer lazy from #NM or if + * the task has no permission to kill it which would happen + * via #UD if the feature is disabled in XCR0. + * + * XFD state is following the same life time rules as + * XSTATE and to restore state correctly XFD has to be + * updated before XRSTORS otherwise the component would + * stay in or go into init state even if the bits are set + * in fpstate::regs::xsave::xfeatures. + */ + xfd_update_state(fpstate); + + /* + * Restoring state always needs to modify all features + * which are in @mask even if the current task cannot use + * extended features. + * + * So fpstate->xfeatures cannot be used here, because then + * a feature for which the task has no permission but was + * used by the previous task would not go into init state. + */ + mask = fpu_kernel_cfg.max_features & mask; + + os_xrstor(fpstate, mask); } else { if (use_fxsr()) - fxrstor(&fpstate->fxsave); + fxrstor(&fpstate->regs.fxsave); else - frstor(&fpstate->fsave); + frstor(&fpstate->regs.fsave); } } -EXPORT_SYMBOL_GPL(__restore_fpregs_from_fpstate); + +void fpu_reset_from_exception_fixup(void) +{ + restore_fpregs_from_fpstate(&init_fpstate, XFEATURE_MASK_FPSTATE); +} + +#if IS_ENABLED(CONFIG_KVM) +static void __fpstate_reset(struct fpstate *fpstate); + +bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) +{ + struct fpstate *fpstate; + unsigned int size; + + size = fpu_user_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64); + fpstate = vzalloc(size); + if (!fpstate) + return false; + + __fpstate_reset(fpstate); + fpstate_init_user(fpstate); + fpstate->is_valloc = true; + fpstate->is_guest = true; + + gfpu->fpstate = fpstate; + return true; +} +EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate); + +void fpu_free_guest_fpstate(struct fpu_guest *gfpu) +{ + struct fpstate *fps = gfpu->fpstate; + + if (!fps) + return; + + if (WARN_ON_ONCE(!fps->is_valloc || !fps->is_guest || fps->in_use)) + return; + + gfpu->fpstate = NULL; + vfree(fps); +} +EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate); + +int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) +{ + struct fpstate *guest_fps = guest_fpu->fpstate; + struct fpu *fpu = ¤t->thread.fpu; + struct fpstate *cur_fps = fpu->fpstate; + + fpregs_lock(); + if (!cur_fps->is_confidential && !test_thread_flag(TIF_NEED_FPU_LOAD)) + save_fpregs_to_fpstate(fpu); + + /* Swap fpstate */ + if (enter_guest) { + fpu->__task_fpstate = cur_fps; + fpu->fpstate = guest_fps; + guest_fps->in_use = true; + } else { + guest_fps->in_use = false; + fpu->fpstate = fpu->__task_fpstate; + fpu->__task_fpstate = NULL; + } + + cur_fps = fpu->fpstate; + + if (!cur_fps->is_confidential) { + /* Includes XFD update */ + restore_fpregs_from_fpstate(cur_fps, XFEATURE_MASK_FPSTATE); + } else { + /* + * XSTATE is restored by firmware from encrypted + * memory. Make sure XFD state is correct while + * running with guest fpstate + */ + xfd_update_state(cur_fps); + } + + fpregs_mark_activate(); + fpregs_unlock(); + return 0; +} +EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate); + +void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, + unsigned int size, u32 pkru) +{ + struct fpstate *kstate = gfpu->fpstate; + union fpregs_state *ustate = buf; + struct membuf mb = { .p = buf, .left = size }; + + if (cpu_feature_enabled(X86_FEATURE_XSAVE)) { + __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE); + } else { + memcpy(&ustate->fxsave, &kstate->regs.fxsave, + sizeof(ustate->fxsave)); + /* Make it restorable on a XSAVE enabled host */ + ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE; + } +} +EXPORT_SYMBOL_GPL(fpu_copy_guest_fpstate_to_uabi); + +int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, + u64 xcr0, u32 *vpkru) +{ + struct fpstate *kstate = gfpu->fpstate; + const union fpregs_state *ustate = buf; + struct pkru_state *xpkru; + int ret; + + if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) { + if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE) + return -EINVAL; + if (ustate->fxsave.mxcsr & ~mxcsr_feature_mask) + return -EINVAL; + memcpy(&kstate->regs.fxsave, &ustate->fxsave, sizeof(ustate->fxsave)); + return 0; + } + + if (ustate->xsave.header.xfeatures & ~xcr0) + return -EINVAL; + + ret = copy_uabi_from_kernel_to_xstate(kstate, ustate); + if (ret) + return ret; + + /* Retrieve PKRU if not in init state */ + if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) { + xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU); + *vpkru = xpkru->pkru; + } + + /* Ensure that XCOMP_BV is set up for XSAVES */ + xstate_init_xcomp_bv(&kstate->regs.xsave, kstate->xfeatures); + return 0; +} +EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate); +#endif /* CONFIG_KVM */ void kernel_fpu_begin_mask(unsigned int kfpu_mask) { @@ -203,52 +384,88 @@ void fpu_sync_fpstate(struct fpu *fpu) fpregs_unlock(); } -static inline void fpstate_init_xstate(struct xregs_state *xsave) +static inline unsigned int init_fpstate_copy_size(void) { - /* - * XRSTORS requires these bits set in xcomp_bv, or it will - * trigger #GP: - */ - xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all; + if (!use_xsave()) + return fpu_kernel_cfg.default_size; + + /* XSAVE(S) just needs the legacy and the xstate header part */ + return sizeof(init_fpstate.regs.xsave); } -static inline void fpstate_init_fxstate(struct fxregs_state *fx) +static inline void fpstate_init_fxstate(struct fpstate *fpstate) { - fx->cwd = 0x37f; - fx->mxcsr = MXCSR_DEFAULT; + fpstate->regs.fxsave.cwd = 0x37f; + fpstate->regs.fxsave.mxcsr = MXCSR_DEFAULT; } /* * Legacy x87 fpstate state init: */ -static inline void fpstate_init_fstate(struct fregs_state *fp) +static inline void fpstate_init_fstate(struct fpstate *fpstate) { - fp->cwd = 0xffff037fu; - fp->swd = 0xffff0000u; - fp->twd = 0xffffffffu; - fp->fos = 0xffff0000u; + fpstate->regs.fsave.cwd = 0xffff037fu; + fpstate->regs.fsave.swd = 0xffff0000u; + fpstate->regs.fsave.twd = 0xffffffffu; + fpstate->regs.fsave.fos = 0xffff0000u; } -void fpstate_init(union fpregs_state *state) +/* + * Used in two places: + * 1) Early boot to setup init_fpstate for non XSAVE systems + * 2) fpu_init_fpstate_user() which is invoked from KVM + */ +void fpstate_init_user(struct fpstate *fpstate) { - if (!static_cpu_has(X86_FEATURE_FPU)) { - fpstate_init_soft(&state->soft); + if (!cpu_feature_enabled(X86_FEATURE_FPU)) { + fpstate_init_soft(&fpstate->regs.soft); return; } - memset(state, 0, fpu_kernel_xstate_size); + xstate_init_xcomp_bv(&fpstate->regs.xsave, fpstate->xfeatures); - if (static_cpu_has(X86_FEATURE_XSAVES)) - fpstate_init_xstate(&state->xsave); - if (static_cpu_has(X86_FEATURE_FXSR)) - fpstate_init_fxstate(&state->fxsave); + if (cpu_feature_enabled(X86_FEATURE_FXSR)) + fpstate_init_fxstate(fpstate); else - fpstate_init_fstate(&state->fsave); + fpstate_init_fstate(fpstate); +} + +static void __fpstate_reset(struct fpstate *fpstate) +{ + /* Initialize sizes and feature masks */ + fpstate->size = fpu_kernel_cfg.default_size; + fpstate->user_size = fpu_user_cfg.default_size; + fpstate->xfeatures = fpu_kernel_cfg.default_features; + fpstate->user_xfeatures = fpu_user_cfg.default_features; + fpstate->xfd = init_fpstate.xfd; +} + +void fpstate_reset(struct fpu *fpu) +{ + /* Set the fpstate pointer to the default fpstate */ + fpu->fpstate = &fpu->__fpstate; + __fpstate_reset(fpu->fpstate); + + /* Initialize the permission related info in fpu */ + fpu->perm.__state_perm = fpu_kernel_cfg.default_features; + fpu->perm.__state_size = fpu_kernel_cfg.default_size; + fpu->perm.__user_state_size = fpu_user_cfg.default_size; +} + +static inline void fpu_inherit_perms(struct fpu *dst_fpu) +{ + if (fpu_state_size_dynamic()) { + struct fpu *src_fpu = ¤t->group_leader->thread.fpu; + + spin_lock_irq(¤t->sighand->siglock); + /* Fork also inherits the permissions of the parent */ + dst_fpu->perm = src_fpu->perm; + spin_unlock_irq(¤t->sighand->siglock); + } } -EXPORT_SYMBOL_GPL(fpstate_init); /* Clone current's FPU state on fork */ -int fpu_clone(struct task_struct *dst) +int fpu_clone(struct task_struct *dst, unsigned long clone_flags) { struct fpu *src_fpu = ¤t->thread.fpu; struct fpu *dst_fpu = &dst->thread.fpu; @@ -256,30 +473,51 @@ int fpu_clone(struct task_struct *dst) /* The new task's FPU state cannot be valid in the hardware. */ dst_fpu->last_cpu = -1; + fpstate_reset(dst_fpu); + if (!cpu_feature_enabled(X86_FEATURE_FPU)) return 0; /* - * Don't let 'init optimized' areas of the XSAVE area - * leak into the child task: + * Enforce reload for user space tasks and prevent kernel threads + * from trying to save the FPU registers on context switch. + */ + set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); + + /* + * No FPU state inheritance for kernel threads and IO + * worker threads. + */ + if (dst->flags & (PF_KTHREAD | PF_IO_WORKER)) { + /* Clear out the minimal state */ + memcpy(&dst_fpu->fpstate->regs, &init_fpstate.regs, + init_fpstate_copy_size()); + return 0; + } + + /* + * If a new feature is added, ensure all dynamic features are + * caller-saved from here! */ - memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); + BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA); /* - * If the FPU registers are not owned by current just memcpy() the - * state. Otherwise save the FPU registers directly into the - * child's FPU context, without any memory-to-memory copying. + * Save the default portion of the current FPU state into the + * clone. Assume all dynamic features to be defined as caller- + * saved, which enables skipping both the expansion of fpstate + * and the copying of any dynamic state. + * + * Do not use memcpy() when TIF_NEED_FPU_LOAD is set because + * copying is not valid when current uses non-default states. */ fpregs_lock(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) - memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size); - - else - save_fpregs_to_fpstate(dst_fpu); + fpregs_restore_userregs(); + save_fpregs_to_fpstate(dst_fpu); + if (!(clone_flags & CLONE_THREAD)) + fpu_inherit_perms(dst_fpu); fpregs_unlock(); - set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); - trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); @@ -287,6 +525,16 @@ int fpu_clone(struct task_struct *dst) } /* + * Whitelist the FPU register state embedded into task_struct for hardened + * usercopy. + */ +void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size) +{ + *offset = offsetof(struct thread_struct, fpu.__fpstate.regs); + *size = fpu_kernel_cfg.default_size; +} + +/* * Drops current FPU state: deactivates the fpregs and * the fpstate. NOTE: it still leaves previous contents * in the fpregs in the eager-FPU case. @@ -319,28 +567,19 @@ void fpu__drop(struct fpu *fpu) static inline void restore_fpregs_from_init_fpstate(u64 features_mask) { if (use_xsave()) - os_xrstor(&init_fpstate.xsave, features_mask); + os_xrstor(&init_fpstate, features_mask); else if (use_fxsr()) - fxrstor(&init_fpstate.fxsave); + fxrstor(&init_fpstate.regs.fxsave); else - frstor(&init_fpstate.fsave); + frstor(&init_fpstate.regs.fsave); pkru_write_default(); } -static inline unsigned int init_fpstate_copy_size(void) -{ - if (!use_xsave()) - return fpu_kernel_xstate_size; - - /* XSAVE(S) just needs the legacy and the xstate header part */ - return sizeof(init_fpstate.xsave); -} - /* * Reset current->fpu memory state to the init values. */ -static void fpu_reset_fpstate(void) +static void fpu_reset_fpregs(void) { struct fpu *fpu = ¤t->thread.fpu; @@ -359,7 +598,7 @@ static void fpu_reset_fpstate(void) * user space as PKRU is eagerly written in switch_to() and * flush_thread(). */ - memcpy(&fpu->state, &init_fpstate, init_fpstate_copy_size()); + memcpy(&fpu->fpstate->regs, &init_fpstate.regs, init_fpstate_copy_size()); set_thread_flag(TIF_NEED_FPU_LOAD); fpregs_unlock(); } @@ -375,7 +614,7 @@ void fpu__clear_user_states(struct fpu *fpu) fpregs_lock(); if (!cpu_feature_enabled(X86_FEATURE_FPU)) { - fpu_reset_fpstate(); + fpu_reset_fpregs(); fpregs_unlock(); return; } @@ -385,12 +624,11 @@ void fpu__clear_user_states(struct fpu *fpu) * corresponding registers. */ if (xfeatures_mask_supervisor() && - !fpregs_state_valid(fpu, smp_processor_id())) { - os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor()); - } + !fpregs_state_valid(fpu, smp_processor_id())) + os_xrstor_supervisor(fpu->fpstate); /* Reset user states in registers. */ - restore_fpregs_from_init_fpstate(xfeatures_mask_restore_user()); + restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE); /* * Now all FPU registers have their desired values. Inform the FPU @@ -405,7 +643,8 @@ void fpu__clear_user_states(struct fpu *fpu) void fpu_flush_thread(void) { - fpu_reset_fpstate(); + fpstate_reset(¤t->thread.fpu); + fpu_reset_fpregs(); } /* * Load FPU context before returning to userspace. @@ -445,7 +684,6 @@ void fpregs_mark_activate(void) fpu->last_cpu = smp_processor_id(); clear_thread_flag(TIF_NEED_FPU_LOAD); } -EXPORT_SYMBOL_GPL(fpregs_mark_activate); /* * x87 math exception handling: @@ -468,11 +706,11 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr) * fully reproduce the context of the exception. */ if (boot_cpu_has(X86_FEATURE_FXSR)) { - cwd = fpu->state.fxsave.cwd; - swd = fpu->state.fxsave.swd; + cwd = fpu->fpstate->regs.fxsave.cwd; + swd = fpu->fpstate->regs.fxsave.swd; } else { - cwd = (unsigned short)fpu->state.fsave.cwd; - swd = (unsigned short)fpu->state.fsave.swd; + cwd = (unsigned short)fpu->fpstate->regs.fsave.cwd; + swd = (unsigned short)fpu->fpstate->regs.fsave.swd; } err = swd & ~cwd; @@ -486,7 +724,7 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr) unsigned short mxcsr = MXCSR_DEFAULT; if (boot_cpu_has(X86_FEATURE_XMM)) - mxcsr = fpu->state.fxsave.mxcsr; + mxcsr = fpu->fpstate->regs.fxsave.mxcsr; err = ~(mxcsr >> 7) & mxcsr; } diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 64e29927cc32..621f4b6cac4a 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -2,7 +2,7 @@ /* * x86 FPU boot time init code: */ -#include <asm/fpu/internal.h> +#include <asm/fpu/api.h> #include <asm/tlbflush.h> #include <asm/setup.h> @@ -10,6 +10,10 @@ #include <linux/sched/task.h> #include <linux/init.h> +#include "internal.h" +#include "legacy.h" +#include "xstate.h" + /* * Initialize the registers found in all CPUs, CR0 and CR4: */ @@ -34,7 +38,7 @@ static void fpu__init_cpu_generic(void) /* Flush out any pending x87 state: */ #ifdef CONFIG_MATH_EMULATION if (!boot_cpu_has(X86_FEATURE_FPU)) - fpstate_init_soft(¤t->thread.fpu.state.soft); + fpstate_init_soft(¤t->thread.fpu.fpstate->regs.soft); else #endif asm volatile ("fninit"); @@ -121,23 +125,14 @@ static void __init fpu__init_system_mxcsr(void) static void __init fpu__init_system_generic(void) { /* - * Set up the legacy init FPU context. (xstate init might overwrite this - * with a more modern format, if the CPU supports it.) + * Set up the legacy init FPU context. Will be updated when the + * CPU supports XSAVE[S]. */ - fpstate_init(&init_fpstate); + fpstate_init_user(&init_fpstate); fpu__init_system_mxcsr(); } -/* - * Size of the FPU context state. All tasks in the system use the - * same context size, regardless of what portion they use. - * This is inherent to the XSAVE architecture which puts all state - * components into a single, continuous memory block: - */ -unsigned int fpu_kernel_xstate_size __ro_after_init; -EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size); - /* Get alignment of the TYPE. */ #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test) @@ -162,13 +157,13 @@ static void __init fpu__init_task_struct_size(void) * Subtract off the static size of the register state. * It potentially has a bunch of padding. */ - task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state); + task_size -= sizeof(current->thread.fpu.__fpstate.regs); /* * Add back the dynamically-calculated register state * size. */ - task_size += fpu_kernel_xstate_size; + task_size += fpu_kernel_cfg.default_size; /* * We dynamically size 'struct fpu', so we require that @@ -177,7 +172,7 @@ static void __init fpu__init_task_struct_size(void) * you hit a compile error here, check the structure to * see if something got added to the end. */ - CHECK_MEMBER_AT_END_OF(struct fpu, state); + CHECK_MEMBER_AT_END_OF(struct fpu, __fpstate); CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu); CHECK_MEMBER_AT_END_OF(struct task_struct, thread); @@ -192,37 +187,34 @@ static void __init fpu__init_task_struct_size(void) */ static void __init fpu__init_system_xstate_size_legacy(void) { - static int on_boot_cpu __initdata = 1; - - WARN_ON_FPU(!on_boot_cpu); - on_boot_cpu = 0; + unsigned int size; /* - * Note that xstate sizes might be overwritten later during - * fpu__init_system_xstate(). + * Note that the size configuration might be overwritten later + * during fpu__init_system_xstate(). */ - - if (!boot_cpu_has(X86_FEATURE_FPU)) { - fpu_kernel_xstate_size = sizeof(struct swregs_state); + if (!cpu_feature_enabled(X86_FEATURE_FPU)) { + size = sizeof(struct swregs_state); + } else if (cpu_feature_enabled(X86_FEATURE_FXSR)) { + size = sizeof(struct fxregs_state); + fpu_user_cfg.legacy_features = XFEATURE_MASK_FPSSE; } else { - if (boot_cpu_has(X86_FEATURE_FXSR)) - fpu_kernel_xstate_size = - sizeof(struct fxregs_state); - else - fpu_kernel_xstate_size = - sizeof(struct fregs_state); + size = sizeof(struct fregs_state); + fpu_user_cfg.legacy_features = XFEATURE_MASK_FP; } - fpu_user_xstate_size = fpu_kernel_xstate_size; + fpu_kernel_cfg.max_size = size; + fpu_kernel_cfg.default_size = size; + fpu_user_cfg.max_size = size; + fpu_user_cfg.default_size = size; + fpstate_reset(¤t->thread.fpu); } -/* Legacy code to initialize eager fpu mode. */ -static void __init fpu__init_system_ctx_switch(void) +static void __init fpu__init_init_fpstate(void) { - static bool on_boot_cpu __initdata = 1; - - WARN_ON_FPU(!on_boot_cpu); - on_boot_cpu = 0; + /* Bring init_fpstate size and features up to date */ + init_fpstate.size = fpu_kernel_cfg.max_size; + init_fpstate.xfeatures = fpu_kernel_cfg.max_features; } /* @@ -231,6 +223,7 @@ static void __init fpu__init_system_ctx_switch(void) */ void __init fpu__init_system(struct cpuinfo_x86 *c) { + fpstate_reset(¤t->thread.fpu); fpu__init_system_early_generic(c); /* @@ -241,8 +234,7 @@ void __init fpu__init_system(struct cpuinfo_x86 *c) fpu__init_system_generic(); fpu__init_system_xstate_size_legacy(); - fpu__init_system_xstate(); + fpu__init_system_xstate(fpu_kernel_cfg.max_size); fpu__init_task_struct_size(); - - fpu__init_system_ctx_switch(); + fpu__init_init_fpstate(); } diff --git a/arch/x86/kernel/fpu/internal.h b/arch/x86/kernel/fpu/internal.h new file mode 100644 index 000000000000..dbdb31f55fc7 --- /dev/null +++ b/arch/x86/kernel/fpu/internal.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __X86_KERNEL_FPU_INTERNAL_H +#define __X86_KERNEL_FPU_INTERNAL_H + +extern struct fpstate init_fpstate; + +/* CPU feature check wrappers */ +static __always_inline __pure bool use_xsave(void) +{ + return cpu_feature_enabled(X86_FEATURE_XSAVE); +} + +static __always_inline __pure bool use_fxsr(void) +{ + return cpu_feature_enabled(X86_FEATURE_FXSR); +} + +#ifdef CONFIG_X86_DEBUG_FPU +# define WARN_ON_FPU(x) WARN_ON_ONCE(x) +#else +# define WARN_ON_FPU(x) ({ (void)(x); 0; }) +#endif + +/* Used in init.c */ +extern void fpstate_init_user(struct fpstate *fpstate); +extern void fpstate_reset(struct fpu *fpu); + +#endif diff --git a/arch/x86/kernel/fpu/legacy.h b/arch/x86/kernel/fpu/legacy.h new file mode 100644 index 000000000000..17c26b164c63 --- /dev/null +++ b/arch/x86/kernel/fpu/legacy.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __X86_KERNEL_FPU_LEGACY_H +#define __X86_KERNEL_FPU_LEGACY_H + +#include <asm/fpu/types.h> + +extern unsigned int mxcsr_feature_mask; + +static inline void ldmxcsr(u32 mxcsr) +{ + asm volatile("ldmxcsr %0" :: "m" (mxcsr)); +} + +/* + * Returns 0 on success or the trap number when the operation raises an + * exception. + */ +#define user_insn(insn, output, input...) \ +({ \ + int err; \ + \ + might_fault(); \ + \ + asm volatile(ASM_STAC "\n" \ + "1: " #insn "\n" \ + "2: " ASM_CLAC "\n" \ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \ + : [err] "=a" (err), output \ + : "0"(0), input); \ + err; \ +}) + +#define kernel_insn_err(insn, output, input...) \ +({ \ + int err; \ + asm volatile("1:" #insn "\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + +#define kernel_insn(insn, output, input...) \ + asm volatile("1:" #insn "\n\t" \ + "2:\n" \ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FPU_RESTORE) \ + : output : input) + +static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx) +{ + return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); +} + +static inline int fxsave_to_user_sigframe(struct fxregs_state __user *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); + else + return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); + +} + +static inline void fxrstor(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else + kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline int fxrstor_safe(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else + return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline int fxrstor_from_user_sigframe(struct fxregs_state __user *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else + return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline void frstor(struct fregs_state *fx) +{ + kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline int frstor_safe(struct fregs_state *fx) +{ + return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline int frstor_from_user_sigframe(struct fregs_state __user *fx) +{ + return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline void fxsave(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); + else + asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); +} + +#endif diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 66ed317ebc0d..437d7c930c0b 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -5,10 +5,14 @@ #include <linux/sched/task_stack.h> #include <linux/vmalloc.h> -#include <asm/fpu/internal.h> +#include <asm/fpu/api.h> #include <asm/fpu/signal.h> #include <asm/fpu/regset.h> -#include <asm/fpu/xstate.h> + +#include "context.h" +#include "internal.h" +#include "legacy.h" +#include "xstate.h" /* * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, @@ -74,8 +78,8 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, sync_fpstate(fpu); if (!use_xsave()) { - return membuf_write(&to, &fpu->state.fxsave, - sizeof(fpu->state.fxsave)); + return membuf_write(&to, &fpu->fpstate->regs.fxsave, + sizeof(fpu->fpstate->regs.fxsave)); } copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_FX); @@ -110,15 +114,15 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, fpu_force_restore(fpu); /* Copy the state */ - memcpy(&fpu->state.fxsave, &newstate, sizeof(newstate)); + memcpy(&fpu->fpstate->regs.fxsave, &newstate, sizeof(newstate)); /* Clear xmm8..15 */ - BUILD_BUG_ON(sizeof(fpu->state.fxsave.xmm_space) != 16 * 16); - memset(&fpu->state.fxsave.xmm_space[8], 0, 8 * 16); + BUILD_BUG_ON(sizeof(fpu->__fpstate.regs.fxsave.xmm_space) != 16 * 16); + memset(&fpu->fpstate->regs.fxsave.xmm_space[8], 0, 8 * 16); /* Mark FP and SSE as in use when XSAVE is enabled */ if (use_xsave()) - fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; + fpu->fpstate->regs.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; return 0; } @@ -149,7 +153,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, /* * A whole standard-format XSAVE buffer is needed: */ - if (pos != 0 || count != fpu_user_xstate_size) + if (pos != 0 || count != fpu_user_cfg.max_size) return -EFAULT; if (!kbuf) { @@ -164,7 +168,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, } fpu_force_restore(fpu); - ret = copy_uabi_from_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf); + ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf); out: vfree(tmpbuf); @@ -283,7 +287,7 @@ static void __convert_from_fxsr(struct user_i387_ia32_struct *env, void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { - __convert_from_fxsr(env, tsk, &tsk->thread.fpu.state.fxsave); + __convert_from_fxsr(env, tsk, &tsk->thread.fpu.fpstate->regs.fxsave); } void convert_to_fxsr(struct fxregs_state *fxsave, @@ -326,7 +330,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, return fpregs_soft_get(target, regset, to); if (!cpu_feature_enabled(X86_FEATURE_FXSR)) { - return membuf_write(&to, &fpu->state.fsave, + return membuf_write(&to, &fpu->fpstate->regs.fsave, sizeof(struct fregs_state)); } @@ -337,7 +341,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, copy_xstate_to_uabi_buf(mb, target, XSTATE_COPY_FP); fx = &fxsave; } else { - fx = &fpu->state.fxsave; + fx = &fpu->fpstate->regs.fxsave; } __convert_from_fxsr(&env, target, fx); @@ -366,16 +370,16 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, fpu_force_restore(fpu); if (cpu_feature_enabled(X86_FEATURE_FXSR)) - convert_to_fxsr(&fpu->state.fxsave, &env); + convert_to_fxsr(&fpu->fpstate->regs.fxsave, &env); else - memcpy(&fpu->state.fsave, &env, sizeof(env)); + memcpy(&fpu->fpstate->regs.fsave, &env, sizeof(env)); /* * Update the header bit in the xsave header, indicating the * presence of FP. */ if (cpu_feature_enabled(X86_FEATURE_XSAVE)) - fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP; + fpu->fpstate->regs.xsave.header.xfeatures |= XFEATURE_MASK_FP; return 0; } diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 831b25c5e705..cc977da6e128 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -7,23 +7,25 @@ #include <linux/cpu.h> #include <linux/pagemap.h> -#include <asm/fpu/internal.h> #include <asm/fpu/signal.h> #include <asm/fpu/regset.h> #include <asm/fpu/xstate.h> #include <asm/sigframe.h> +#include <asm/trapnr.h> #include <asm/trace/fpu.h> -static struct _fpx_sw_bytes fx_sw_reserved __ro_after_init; -static struct _fpx_sw_bytes fx_sw_reserved_ia32 __ro_after_init; +#include "context.h" +#include "internal.h" +#include "legacy.h" +#include "xstate.h" /* * Check for the presence of extended state information in the * user fpstate pointer in the sigcontext. */ -static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, - struct _fpx_sw_bytes *fx_sw) +static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, + struct _fpx_sw_bytes *fx_sw) { int min_xstate_size = sizeof(struct fxregs_state) + sizeof(struct xstate_header); @@ -31,12 +33,12 @@ static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, unsigned int magic2; if (__copy_from_user(fx_sw, &fxbuf->sw_reserved[0], sizeof(*fx_sw))) - return -EFAULT; + return false; /* Check for the first magic field and other error scenarios. */ if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || fx_sw->xstate_size < min_xstate_size || - fx_sw->xstate_size > fpu_user_xstate_size || + fx_sw->xstate_size > current->thread.fpu.fpstate->user_size || fx_sw->xstate_size > fx_sw->extended_size) goto setfx; @@ -47,10 +49,10 @@ static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, * in the memory layout. */ if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))) - return -EFAULT; + return false; if (likely(magic2 == FP_XSTATE_MAGIC2)) - return 0; + return true; setfx: trace_x86_fpu_xstate_check_failed(¤t->thread.fpu); @@ -58,22 +60,22 @@ setfx: fx_sw->magic1 = 0; fx_sw->xstate_size = sizeof(struct fxregs_state); fx_sw->xfeatures = XFEATURE_MASK_FPSSE; - return 0; + return true; } /* * Signal frame handlers. */ -static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) +static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf) { if (use_fxsr()) { - struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; + struct xregs_state *xsave = &tsk->thread.fpu.fpstate->regs.xsave; struct user_i387_ia32_struct env; struct _fpstate_32 __user *fp = buf; fpregs_lock(); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - fxsave(&tsk->thread.fpu.state.fxsave); + fxsave(&tsk->thread.fpu.fpstate->regs.fxsave); fpregs_unlock(); convert_from_fxsr(&env, tsk); @@ -81,33 +83,54 @@ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) if (__copy_to_user(buf, &env, sizeof(env)) || __put_user(xsave->i387.swd, &fp->status) || __put_user(X86_FXSR_MAGIC, &fp->magic)) - return -1; + return false; } else { struct fregs_state __user *fp = buf; u32 swd; + if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) - return -1; + return false; } - return 0; + return true; +} + +/* + * Prepare the SW reserved portion of the fxsave memory layout, indicating + * the presence of the extended state information in the memory layout + * pointed to by the fpstate pointer in the sigcontext. + * This is saved when ever the FP and extended state context is + * saved on the user stack during the signal handler delivery to the user. + */ +static inline void save_sw_bytes(struct _fpx_sw_bytes *sw_bytes, bool ia32_frame, + struct fpstate *fpstate) +{ + sw_bytes->magic1 = FP_XSTATE_MAGIC1; + sw_bytes->extended_size = fpstate->user_size + FP_XSTATE_MAGIC2_SIZE; + sw_bytes->xfeatures = fpstate->user_xfeatures; + sw_bytes->xstate_size = fpstate->user_size; + + if (ia32_frame) + sw_bytes->extended_size += sizeof(struct fregs_state); } -static inline int save_xstate_epilog(void __user *buf, int ia32_frame) +static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, + struct fpstate *fpstate) { struct xregs_state __user *x = buf; - struct _fpx_sw_bytes *sw_bytes; + struct _fpx_sw_bytes sw_bytes; u32 xfeatures; int err; /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ - sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; - err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); + save_sw_bytes(&sw_bytes, ia32_frame, fpstate); + err = __copy_to_user(&x->i387.sw_reserved, &sw_bytes, sizeof(sw_bytes)); if (!use_xsave()) - return err; + return !err; err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *)(buf + fpu_user_xstate_size)); + (__u32 __user *)(buf + fpstate->user_size)); /* * Read the xfeatures which we copied (directly from the cpu or @@ -130,23 +153,17 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures); - return err; + return !err; } static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) { - int err; - if (use_xsave()) - err = xsave_to_user_sigframe(buf); - else if (use_fxsr()) - err = fxsave_to_user_sigframe((struct fxregs_state __user *) buf); + return xsave_to_user_sigframe(buf); + if (use_fxsr()) + return fxsave_to_user_sigframe((struct fxregs_state __user *) buf); else - err = fnsave_to_user_sigframe((struct fregs_state __user *) buf); - - if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size)) - err = -EFAULT; - return err; + return fnsave_to_user_sigframe((struct fregs_state __user *) buf); } /* @@ -159,10 +176,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) * buf == buf_fx for 64-bit frames and 32-bit fsave frame. * buf != buf_fx for 32-bit frames with fxstate. * - * Try to save it directly to the user frame with disabled page fault handler. - * If this fails then do the slow path where the FPU state is first saved to - * task's fpu->state and then copy it to the user frame pointed to by the - * aligned pointer 'buf_fx'. + * Save it directly to the user frame with disabled page fault handler. If + * that faults, try to clear the frame which handles the page fault. * * If this is a 32-bit frame with fxstate, put a fsave header before * the aligned state at 'buf_fx'. @@ -170,10 +185,11 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) * For [f]xsave state, update the SW reserved fields in the [f]xsave frame * indicating the absence/presence of the extended state to the user. */ -int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) +bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) { struct task_struct *tsk = current; - int ia32_fxstate = (buf != buf_fx); + struct fpstate *fpstate = tsk->thread.fpu.fpstate; + bool ia32_fxstate = (buf != buf_fx); int ret; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || @@ -181,13 +197,25 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) if (!static_cpu_has(X86_FEATURE_FPU)) { struct user_i387_ia32_struct fp; + fpregs_soft_get(current, NULL, (struct membuf){.p = &fp, .left = sizeof(fp)}); - return copy_to_user(buf, &fp, sizeof(fp)) ? -EFAULT : 0; + return !copy_to_user(buf, &fp, sizeof(fp)); } if (!access_ok(buf, size)) - return -EACCES; + return false; + + if (use_xsave()) { + struct xregs_state __user *xbuf = buf_fx; + + /* + * Clear the xsave header first, so that reserved fields are + * initialized to zero. + */ + if (__clear_user(&xbuf->header, sizeof(xbuf->header))) + return false; + } retry: /* * Load the FPU registers if they are not valid for the current task. @@ -205,26 +233,26 @@ retry: fpregs_unlock(); if (ret) { - if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size)) + if (!__clear_user(buf_fx, fpstate->user_size)) goto retry; - return -EFAULT; + return false; } /* Save the fsave header for the 32-bit frames. */ - if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) - return -1; + if ((ia32_fxstate || !use_fxsr()) && !save_fsave_header(tsk, buf)) + return false; - if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) - return -1; + if (use_fxsr() && !save_xstate_epilog(buf_fx, ia32_fxstate, fpstate)) + return false; - return 0; + return true; } -static int __restore_fpregs_from_user(void __user *buf, u64 xrestore, - bool fx_only) +static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures, + u64 xrestore, bool fx_only) { if (use_xsave()) { - u64 init_bv = xfeatures_mask_uabi() & ~xrestore; + u64 init_bv = ufeatures & ~xrestore; int ret; if (likely(!fx_only)) @@ -233,7 +261,7 @@ static int __restore_fpregs_from_user(void __user *buf, u64 xrestore, ret = fxrstor_from_user_sigframe(buf); if (!ret && unlikely(init_bv)) - os_xrstor(&init_fpstate.xsave, init_bv); + os_xrstor(&init_fpstate, init_bv); return ret; } else if (use_fxsr()) { return fxrstor_from_user_sigframe(buf); @@ -246,16 +274,19 @@ static int __restore_fpregs_from_user(void __user *buf, u64 xrestore, * Attempt to restore the FPU registers directly from user memory. * Pagefaults are handled and any errors returned are fatal. */ -static int restore_fpregs_from_user(void __user *buf, u64 xrestore, - bool fx_only, unsigned int size) +static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, + bool fx_only, unsigned int size) { struct fpu *fpu = ¤t->thread.fpu; int ret; retry: fpregs_lock(); + /* Ensure that XFD is up to date */ + xfd_update_state(fpu->fpstate); pagefault_disable(); - ret = __restore_fpregs_from_user(buf, xrestore, fx_only); + ret = __restore_fpregs_from_user(buf, fpu->fpstate->user_xfeatures, + xrestore, fx_only); pagefault_enable(); if (unlikely(ret)) { @@ -275,13 +306,12 @@ retry: fpregs_unlock(); /* Try to handle #PF, but anything else is fatal. */ - if (ret != -EFAULT) - return -EINVAL; + if (ret != X86_TRAP_PF) + return false; - ret = fault_in_pages_readable(buf, size); - if (!ret) + if (!fault_in_pages_readable(buf, size)) goto retry; - return ret; + return false; } /* @@ -294,45 +324,40 @@ retry: * been restored from a user buffer directly. */ if (test_thread_flag(TIF_NEED_FPU_LOAD) && xfeatures_mask_supervisor()) - os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor()); + os_xrstor_supervisor(fpu->fpstate); fpregs_mark_activate(); fpregs_unlock(); - return 0; + return true; } -static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, - bool ia32_fxstate) +static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, + bool ia32_fxstate) { - int state_size = fpu_kernel_xstate_size; struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; + bool success, fx_only = false; + union fpregs_state *fpregs; + unsigned int state_size; u64 user_xfeatures = 0; - bool fx_only = false; - int ret; if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; - ret = check_xstate_in_sigframe(buf_fx, &fx_sw_user); - if (unlikely(ret)) - return ret; + if (!check_xstate_in_sigframe(buf_fx, &fx_sw_user)) + return false; fx_only = !fx_sw_user.magic1; state_size = fx_sw_user.xstate_size; user_xfeatures = fx_sw_user.xfeatures; } else { user_xfeatures = XFEATURE_MASK_FPSSE; + state_size = fpu->fpstate->user_size; } if (likely(!ia32_fxstate)) { - /* - * Attempt to restore the FPU registers directly from user - * memory. For that to succeed, the user access cannot cause page - * faults. If it does, fall back to the slow path below, going - * through the kernel buffer with the enabled pagefault handler. - */ + /* Restore the FPU registers directly from user memory. */ return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only, state_size); } @@ -342,9 +367,8 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, * to be ignored for histerical raisins. The legacy state is folded * in once the larger state has been copied. */ - ret = __copy_from_user(&env, buf, sizeof(env)); - if (ret) - return ret; + if (__copy_from_user(&env, buf, sizeof(env))) + return false; /* * By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is @@ -363,38 +387,38 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, * the right place in memory. It's ia32 mode. Shrug. */ if (xfeatures_mask_supervisor()) - os_xsave(&fpu->state.xsave); + os_xsave(fpu->fpstate); set_thread_flag(TIF_NEED_FPU_LOAD); } __fpu_invalidate_fpregs_state(fpu); __cpu_invalidate_fpregs_state(); fpregs_unlock(); + fpregs = &fpu->fpstate->regs; if (use_xsave() && !fx_only) { - ret = copy_sigframe_from_user_to_xstate(&fpu->state.xsave, buf_fx); - if (ret) - return ret; + if (copy_sigframe_from_user_to_xstate(fpu->fpstate, buf_fx)) + return false; } else { - if (__copy_from_user(&fpu->state.fxsave, buf_fx, - sizeof(fpu->state.fxsave))) - return -EFAULT; + if (__copy_from_user(&fpregs->fxsave, buf_fx, + sizeof(fpregs->fxsave))) + return false; if (IS_ENABLED(CONFIG_X86_64)) { /* Reject invalid MXCSR values. */ - if (fpu->state.fxsave.mxcsr & ~mxcsr_feature_mask) - return -EINVAL; + if (fpregs->fxsave.mxcsr & ~mxcsr_feature_mask) + return false; } else { /* Mask invalid bits out for historical reasons (broken hardware). */ - fpu->state.fxsave.mxcsr &= mxcsr_feature_mask; + fpregs->fxsave.mxcsr &= mxcsr_feature_mask; } /* Enforce XFEATURE_MASK_FPSSE when XSAVE is enabled */ if (use_xsave()) - fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; + fpregs->xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; } /* Fold the legacy FP storage */ - convert_to_fxsr(&fpu->state.fxsave, &env); + convert_to_fxsr(&fpregs->fxsave, &env); fpregs_lock(); if (use_xsave()) { @@ -409,40 +433,45 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, */ u64 mask = user_xfeatures | xfeatures_mask_supervisor(); - fpu->state.xsave.header.xfeatures &= mask; - ret = os_xrstor_safe(&fpu->state.xsave, xfeatures_mask_all); + fpregs->xsave.header.xfeatures &= mask; + success = !os_xrstor_safe(fpu->fpstate, + fpu_kernel_cfg.max_features); } else { - ret = fxrstor_safe(&fpu->state.fxsave); + success = !fxrstor_safe(&fpregs->fxsave); } - if (likely(!ret)) + if (likely(success)) fpregs_mark_activate(); fpregs_unlock(); - return ret; + return success; } -static inline int xstate_sigframe_size(void) + +static inline unsigned int xstate_sigframe_size(struct fpstate *fpstate) { - return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE : - fpu_user_xstate_size; + unsigned int size = fpstate->user_size; + + return use_xsave() ? size + FP_XSTATE_MAGIC2_SIZE : size; } /* * Restore FPU state from a sigframe: */ -int fpu__restore_sig(void __user *buf, int ia32_frame) +bool fpu__restore_sig(void __user *buf, int ia32_frame) { - unsigned int size = xstate_sigframe_size(); struct fpu *fpu = ¤t->thread.fpu; void __user *buf_fx = buf; bool ia32_fxstate = false; - int ret; + bool success = false; + unsigned int size; if (unlikely(!buf)) { fpu__clear_user_states(fpu); - return 0; + return true; } + size = xstate_sigframe_size(fpu->fpstate); + ia32_frame &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); @@ -456,30 +485,28 @@ int fpu__restore_sig(void __user *buf, int ia32_frame) ia32_fxstate = true; } - if (!access_ok(buf, size)) { - ret = -EACCES; + if (!access_ok(buf, size)) goto out; - } if (!IS_ENABLED(CONFIG_X86_64) && !cpu_feature_enabled(X86_FEATURE_FPU)) { - ret = fpregs_soft_set(current, NULL, 0, - sizeof(struct user_i387_ia32_struct), - NULL, buf); + success = !fpregs_soft_set(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), + NULL, buf); } else { - ret = __fpu_restore_sig(buf, buf_fx, ia32_fxstate); + success = __fpu_restore_sig(buf, buf_fx, ia32_fxstate); } out: - if (unlikely(ret)) + if (unlikely(!success)) fpu__clear_user_states(fpu); - return ret; + return success; } unsigned long fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, unsigned long *size) { - unsigned long frame_size = xstate_sigframe_size(); + unsigned long frame_size = xstate_sigframe_size(current->thread.fpu.fpstate); *buf_fx = sp = round_down(sp - frame_size, 64); if (ia32_frame && use_fxsr()) { @@ -492,9 +519,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, return sp; } -unsigned long fpu__get_fpstate_size(void) +unsigned long __init fpu__get_fpstate_size(void) { - unsigned long ret = xstate_sigframe_size(); + unsigned long ret = fpu_user_cfg.max_size; + + if (use_xsave()) + ret += FP_XSTATE_MAGIC2_SIZE; /* * This space is needed on (most) 32-bit kernels, or when a 32-bit @@ -510,28 +540,3 @@ unsigned long fpu__get_fpstate_size(void) return ret; } -/* - * Prepare the SW reserved portion of the fxsave memory layout, indicating - * the presence of the extended state information in the memory layout - * pointed by the fpstate pointer in the sigcontext. - * This will be saved when ever the FP and extended state context is - * saved on the user stack during the signal handler delivery to the user. - */ -void fpu__init_prepare_fx_sw_frame(void) -{ - int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE; - - fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; - fx_sw_reserved.extended_size = size; - fx_sw_reserved.xfeatures = xfeatures_mask_uabi(); - fx_sw_reserved.xstate_size = fpu_user_xstate_size; - - if (IS_ENABLED(CONFIG_IA32_EMULATION) || - IS_ENABLED(CONFIG_X86_32)) { - int fsave_header_size = sizeof(struct fregs_state); - - fx_sw_reserved_ia32 = fx_sw_reserved; - fx_sw_reserved_ia32.extended_size = size + fsave_header_size; - } -} - diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index c8def1b7f8fb..d28829403ed0 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -4,21 +4,33 @@ * * Author: Suresh Siddha <suresh.b.siddha@intel.com> */ +#include <linux/bitops.h> #include <linux/compat.h> #include <linux/cpu.h> #include <linux/mman.h> +#include <linux/nospec.h> #include <linux/pkeys.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> +#include <linux/vmalloc.h> #include <asm/fpu/api.h> -#include <asm/fpu/internal.h> -#include <asm/fpu/signal.h> #include <asm/fpu/regset.h> -#include <asm/fpu/xstate.h> +#include <asm/fpu/signal.h> +#include <asm/fpu/xcr.h> #include <asm/tlbflush.h> -#include <asm/cpufeature.h> +#include <asm/prctl.h> +#include <asm/elf.h> + +#include "context.h" +#include "internal.h" +#include "legacy.h" +#include "xstate.h" + +#define for_each_extended_xfeature(bit, mask) \ + (bit) = FIRST_EXTENDED_XFEATURE; \ + for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask)) /* * Although we spell it out in here, the Processor Trace @@ -39,29 +51,32 @@ static const char *xfeature_names[] = "Protection Keys User registers", "PASID state", "unknown xstate feature" , + "unknown xstate feature" , + "unknown xstate feature" , + "unknown xstate feature" , + "unknown xstate feature" , + "unknown xstate feature" , + "AMX Tile config" , + "AMX Tile data" , + "unknown xstate feature" , }; -static short xsave_cpuid_features[] __initdata = { - X86_FEATURE_FPU, - X86_FEATURE_XMM, - X86_FEATURE_AVX, - X86_FEATURE_MPX, - X86_FEATURE_MPX, - X86_FEATURE_AVX512F, - X86_FEATURE_AVX512F, - X86_FEATURE_AVX512F, - X86_FEATURE_INTEL_PT, - X86_FEATURE_PKU, - X86_FEATURE_ENQCMD, +static unsigned short xsave_cpuid_features[] __initdata = { + [XFEATURE_FP] = X86_FEATURE_FPU, + [XFEATURE_SSE] = X86_FEATURE_XMM, + [XFEATURE_YMM] = X86_FEATURE_AVX, + [XFEATURE_BNDREGS] = X86_FEATURE_MPX, + [XFEATURE_BNDCSR] = X86_FEATURE_MPX, + [XFEATURE_OPMASK] = X86_FEATURE_AVX512F, + [XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F, + [XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F, + [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT, + [XFEATURE_PKRU] = X86_FEATURE_PKU, + [XFEATURE_PASID] = X86_FEATURE_ENQCMD, + [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE, + [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE, }; -/* - * This represents the full set of bits that should ever be set in a kernel - * XSAVE buffer, both supervisor and user xstates. - */ -u64 xfeatures_mask_all __ro_after_init; -EXPORT_SYMBOL_GPL(xfeatures_mask_all); - static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init = @@ -72,20 +87,13 @@ static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] __ro_after_init { [ 0 ... XFEATURE_MAX - 1] = -1}; /* - * The XSAVE area of kernel can be in standard or compacted format; - * it is always in standard format for user mode. This is the user - * mode standard format size used for signal and ptrace frames. - */ -unsigned int fpu_user_xstate_size __ro_after_init; - -/* * Return whether the system supports a given xfeature. * * Also return the name of the (most advanced) feature that the caller requested: */ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) { - u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask_all; + u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features; if (unlikely(feature_name)) { long xfeature_idx, max_idx; @@ -135,17 +143,26 @@ static bool xfeature_is_supervisor(int xfeature_nr) */ void fpu__init_cpu_xstate(void) { - if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask_all) + if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features) return; cr4_set_bits(X86_CR4_OSXSAVE); /* + * Must happen after CR4 setup and before xsetbv() to allow KVM + * lazy passthrough. Write independent of the dynamic state static + * key as that does not work on the boot CPU. This also ensures + * that any stale state is wiped out from XFD. + */ + if (cpu_feature_enabled(X86_FEATURE_XFD)) + wrmsrl(MSR_IA32_XFD, init_fpstate.xfd); + + /* * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user * states can be set here. */ - xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_uabi()); + xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features); /* * MSR_IA32_XSS sets supervisor states managed by XSAVES. @@ -158,7 +175,7 @@ void fpu__init_cpu_xstate(void) static bool xfeature_enabled(enum xfeature xfeature) { - return xfeatures_mask_all & BIT_ULL(xfeature); + return fpu_kernel_cfg.max_features & BIT_ULL(xfeature); } /* @@ -184,10 +201,7 @@ static void __init setup_xstate_features(void) xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state, xmm_space); - for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { - if (!xfeature_enabled(i)) - continue; - + for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); xstate_sizes[i] = eax; @@ -236,6 +250,8 @@ static void __init print_xstate_features(void) print_xstate_feature(XFEATURE_MASK_Hi16_ZMM); print_xstate_feature(XFEATURE_MASK_PKRU); print_xstate_feature(XFEATURE_MASK_PASID); + print_xstate_feature(XFEATURE_MASK_XTILE_CFG); + print_xstate_feature(XFEATURE_MASK_XTILE_DATA); } /* @@ -291,20 +307,15 @@ static void __init setup_xstate_comp_offsets(void) xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state, xmm_space); - if (!boot_cpu_has(X86_FEATURE_XSAVES)) { - for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { - if (xfeature_enabled(i)) - xstate_comp_offsets[i] = xstate_offsets[i]; - } + if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) { + for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) + xstate_comp_offsets[i] = xstate_offsets[i]; return; } next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE; - for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { - if (!xfeature_enabled(i)) - continue; - + for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { if (xfeature_is_aligned(i)) next_offset = ALIGN(next_offset, 64); @@ -328,8 +339,8 @@ static void __init setup_supervisor_only_offsets(void) next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE; - for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { - if (!xfeature_enabled(i) || !xfeature_is_supervisor(i)) + for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { + if (!xfeature_is_supervisor(i)) continue; if (xfeature_is_aligned(i)) @@ -347,15 +358,36 @@ static void __init print_xstate_offset_size(void) { int i; - for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { - if (!xfeature_enabled(i)) - continue; + for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, xstate_comp_offsets[i], i, xstate_sizes[i]); } } /* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ +static __init void os_xrstor_booting(struct xregs_state *xstate) +{ + u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err; + + if (cpu_feature_enabled(X86_FEATURE_XSAVES)) + XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); + else + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); + + /* + * We should never fault when copying from a kernel buffer, and the FPU + * state we set at boot time should be valid. + */ + WARN_ON_FPU(err); +} + +/* * All supported features have either init state all zeros or are * handled in setup_init_fpu() individually. This is an explicit * feature list and does not use XFEATURE_MASK*SUPPORTED to catch @@ -372,36 +404,30 @@ static void __init print_xstate_offset_size(void) XFEATURE_MASK_PKRU | \ XFEATURE_MASK_BNDREGS | \ XFEATURE_MASK_BNDCSR | \ - XFEATURE_MASK_PASID) + XFEATURE_MASK_PASID | \ + XFEATURE_MASK_XTILE) /* * setup the xstate image representing the init state */ static void __init setup_init_fpu_buf(void) { - static int on_boot_cpu __initdata = 1; - BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | XFEATURE_MASK_SUPERVISOR_SUPPORTED) != XFEATURES_INIT_FPSTATE_HANDLED); - WARN_ON_FPU(!on_boot_cpu); - on_boot_cpu = 0; - if (!boot_cpu_has(X86_FEATURE_XSAVE)) return; setup_xstate_features(); print_xstate_features(); - if (boot_cpu_has(X86_FEATURE_XSAVES)) - init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | - xfeatures_mask_all; + xstate_init_xcomp_bv(&init_fpstate.regs.xsave, fpu_kernel_cfg.max_features); /* * Init all the features state with header.xfeatures being 0x0 */ - os_xrstor_booting(&init_fpstate.xsave); + os_xrstor_booting(&init_fpstate.regs.xsave); /* * All components are now in init state. Read the state back so @@ -419,7 +445,7 @@ static void __init setup_init_fpu_buf(void) * state is all zeroes or if not to add the necessary handling * here. */ - fxsave(&init_fpstate.fxsave); + fxsave(&init_fpstate.regs.fxsave); } static int xfeature_uncompacted_offset(int xfeature_nr) @@ -451,10 +477,11 @@ int xfeature_size(int xfeature_nr) } /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ -static int validate_user_xstate_header(const struct xstate_header *hdr) +static int validate_user_xstate_header(const struct xstate_header *hdr, + struct fpstate *fpstate) { /* No unknown or supervisor features may be set */ - if (hdr->xfeatures & ~xfeatures_mask_uabi()) + if (hdr->xfeatures & ~fpstate->user_xfeatures) return -EINVAL; /* Userspace must use the uncompacted format */ @@ -474,7 +501,7 @@ static int validate_user_xstate_header(const struct xstate_header *hdr) return 0; } -static void __xstate_dump_leaves(void) +static void __init __xstate_dump_leaves(void) { int i; u32 eax, ebx, ecx, edx; @@ -509,12 +536,73 @@ static void __xstate_dump_leaves(void) } \ } while (0) +/** + * check_xtile_data_against_struct - Check tile data state size. + * + * Calculate the state size by multiplying the single tile size which is + * recorded in a C struct, and the number of tiles that the CPU informs. + * Compare the provided size with the calculation. + * + * @size: The tile data state size + * + * Returns: 0 on success, -EINVAL on mismatch. + */ +static int __init check_xtile_data_against_struct(int size) +{ + u32 max_palid, palid, state_size; + u32 eax, ebx, ecx, edx; + u16 max_tile; + + /* + * Check the maximum palette id: + * eax: the highest numbered palette subleaf. + */ + cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx); + + /* + * Cross-check each tile size and find the maximum number of + * supported tiles. + */ + for (palid = 1, max_tile = 0; palid <= max_palid; palid++) { + u16 tile_size, max; + + /* + * Check the tile size info: + * eax[31:16]: bytes per title + * ebx[31:16]: the max names (or max number of tiles) + */ + cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx); + tile_size = eax >> 16; + max = ebx >> 16; + + if (tile_size != sizeof(struct xtile_data)) { + pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n", + __stringify(XFEATURE_XTILE_DATA), + sizeof(struct xtile_data), tile_size); + __xstate_dump_leaves(); + return -EINVAL; + } + + if (max > max_tile) + max_tile = max; + } + + state_size = sizeof(struct xtile_data) * max_tile; + if (size != state_size) { + pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n", + __stringify(XFEATURE_XTILE_DATA), state_size, size); + __xstate_dump_leaves(); + return -EINVAL; + } + return 0; +} + /* * We have a C struct for each 'xstate'. We need to ensure * that our software representation matches what the CPU * tells us about the state's size. */ -static void check_xstate_against_struct(int nr) +static bool __init check_xstate_against_struct(int nr) { /* * Ask the CPU for the size of the state. @@ -532,6 +620,11 @@ static void check_xstate_against_struct(int nr) XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state); XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state); XCHECK_SZ(sz, nr, XFEATURE_PASID, struct ia32_pasid_state); + XCHECK_SZ(sz, nr, XFEATURE_XTILE_CFG, struct xtile_cfg); + + /* The tile data size varies between implementations. */ + if (nr == XFEATURE_XTILE_DATA) + check_xtile_data_against_struct(sz); /* * Make *SURE* to add any feature numbers in below if @@ -541,10 +634,39 @@ static void check_xstate_against_struct(int nr) if ((nr < XFEATURE_YMM) || (nr >= XFEATURE_MAX) || (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) || - ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_LBR))) { + ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_RSRVD_COMP_16))) { WARN_ONCE(1, "no structure for xstate: %d\n", nr); XSTATE_WARN_ON(1); + return false; + } + return true; +} + +static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted) +{ + unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE; + int i; + + for_each_extended_xfeature(i, xfeatures) { + /* Align from the end of the previous feature */ + if (xfeature_is_aligned(i)) + size = ALIGN(size, 64); + /* + * In compacted format the enabled features are packed, + * i.e. disabled features do not occupy space. + * + * In non-compacted format the offsets are fixed and + * disabled states still occupy space in the memory buffer. + */ + if (!compacted) + size = xfeature_uncompacted_offset(i); + /* + * Add the feature size even for non-compacted format + * to make the end result correct + */ + size += xfeature_size(i); } + return size; } /* @@ -556,44 +678,29 @@ static void check_xstate_against_struct(int nr) * covered by these checks. Only the size of the buffer for task->fpu * is checked here. */ -static void do_extra_xstate_size_checks(void) +static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) { - int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; + bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); + unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE; int i; - for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { - if (!xfeature_enabled(i)) - continue; - - check_xstate_against_struct(i); + for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { + if (!check_xstate_against_struct(i)) + return false; /* * Supervisor state components can be managed only by * XSAVES. */ - if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) - XSTATE_WARN_ON(xfeature_is_supervisor(i)); - - /* Align from the end of the previous feature */ - if (xfeature_is_aligned(i)) - paranoid_xstate_size = ALIGN(paranoid_xstate_size, 64); - /* - * The offset of a given state in the non-compacted - * format is given to us in a CPUID leaf. We check - * them for being ordered (increasing offsets) in - * setup_xstate_features(). XSAVES uses compacted format. - */ - if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) - paranoid_xstate_size = xfeature_uncompacted_offset(i); - /* - * The compacted-format offset always depends on where - * the previous state ended. - */ - paranoid_xstate_size += xfeature_size(i); + if (!compacted && xfeature_is_supervisor(i)) { + XSTATE_WARN_ON(1); + return false; + } } - XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size); + size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted); + XSTATE_WARN_ON(size != kernel_size); + return size == kernel_size; } - /* * Get total size of enabled xstates in XCR0 | IA32_XSS. * @@ -644,7 +751,7 @@ static unsigned int __init get_xsaves_size_no_independent(void) return size; } -static unsigned int __init get_xsave_size(void) +static unsigned int __init get_xsave_size_user(void) { unsigned int eax, ebx, ecx, edx; /* @@ -662,44 +769,54 @@ static unsigned int __init get_xsave_size(void) * Will the runtime-enumerated 'xstate_size' fit in the init * task's statically-allocated buffer? */ -static bool is_supported_xstate_size(unsigned int test_xstate_size) +static bool __init is_supported_xstate_size(unsigned int test_xstate_size) { - if (test_xstate_size <= sizeof(union fpregs_state)) + if (test_xstate_size <= sizeof(init_fpstate.regs)) return true; pr_warn("x86/fpu: xstate buffer too small (%zu < %d), disabling xsave\n", - sizeof(union fpregs_state), test_xstate_size); + sizeof(init_fpstate.regs), test_xstate_size); return false; } static int __init init_xstate_size(void) { /* Recompute the context size for enabled features: */ - unsigned int possible_xstate_size; - unsigned int xsave_size; + unsigned int user_size, kernel_size, kernel_default_size; + bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); - xsave_size = get_xsave_size(); + /* Uncompacted user space size */ + user_size = get_xsave_size_user(); - if (boot_cpu_has(X86_FEATURE_XSAVES)) - possible_xstate_size = get_xsaves_size_no_independent(); + /* + * XSAVES kernel size includes supervisor states and + * uses compacted format when available. + * + * XSAVE does not support supervisor states so + * kernel and user size is identical. + */ + if (compacted) + kernel_size = get_xsaves_size_no_independent(); else - possible_xstate_size = xsave_size; + kernel_size = user_size; + + kernel_default_size = + xstate_calculate_size(fpu_kernel_cfg.default_features, compacted); - /* Ensure we have the space to store all enabled: */ - if (!is_supported_xstate_size(possible_xstate_size)) + /* Ensure we have the space to store all default enabled features. */ + if (!is_supported_xstate_size(kernel_default_size)) return -EINVAL; - /* - * The size is OK, we are definitely going to use xsave, - * make it known to the world that we need more space. - */ - fpu_kernel_xstate_size = possible_xstate_size; - do_extra_xstate_size_checks(); + if (!paranoid_xstate_size_valid(kernel_size)) + return -EINVAL; + + fpu_kernel_cfg.max_size = kernel_size; + fpu_user_cfg.max_size = user_size; + + fpu_kernel_cfg.default_size = kernel_default_size; + fpu_user_cfg.default_size = + xstate_calculate_size(fpu_user_cfg.default_features, false); - /* - * User space is always in standard format. - */ - fpu_user_xstate_size = xsave_size; return 0; } @@ -707,28 +824,38 @@ static int __init init_xstate_size(void) * We enabled the XSAVE hardware, but something went wrong and * we can not use it. Disable it. */ -static void fpu__init_disable_system_xstate(void) +static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) { - xfeatures_mask_all = 0; + fpu_kernel_cfg.max_features = 0; cr4_clear_bits(X86_CR4_OSXSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVE); + + /* Restore the legacy size.*/ + fpu_kernel_cfg.max_size = legacy_size; + fpu_kernel_cfg.default_size = legacy_size; + fpu_user_cfg.max_size = legacy_size; + fpu_user_cfg.default_size = legacy_size; + + /* + * Prevent enabling the static branch which enables writes to the + * XFD MSR. + */ + init_fpstate.xfd = 0; + + fpstate_reset(¤t->thread.fpu); } /* * Enable and initialize the xsave feature. * Called once per system bootup. */ -void __init fpu__init_system_xstate(void) +void __init fpu__init_system_xstate(unsigned int legacy_size) { unsigned int eax, ebx, ecx, edx; - static int on_boot_cpu __initdata = 1; u64 xfeatures; int err; int i; - WARN_ON_FPU(!on_boot_cpu); - on_boot_cpu = 0; - if (!boot_cpu_has(X86_FEATURE_FPU)) { pr_info("x86/fpu: No FPU detected\n"); return; @@ -749,22 +876,22 @@ void __init fpu__init_system_xstate(void) * Find user xstates supported by the processor. */ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - xfeatures_mask_all = eax + ((u64)edx << 32); + fpu_kernel_cfg.max_features = eax + ((u64)edx << 32); /* * Find supervisor xstates supported by the processor. */ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); - xfeatures_mask_all |= ecx + ((u64)edx << 32); + fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32); - if ((xfeatures_mask_uabi() & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { + if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { /* * This indicates that something really unexpected happened * with the enumeration. Disable XSAVE and try to continue * booting without it. This is too early to BUG(). */ pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", - xfeatures_mask_all); + fpu_kernel_cfg.max_features); goto out_disable; } @@ -772,15 +899,39 @@ void __init fpu__init_system_xstate(void) * Clear XSAVE features that are disabled in the normal CPUID. */ for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { - if (!boot_cpu_has(xsave_cpuid_features[i])) - xfeatures_mask_all &= ~BIT_ULL(i); + unsigned short cid = xsave_cpuid_features[i]; + + /* Careful: X86_FEATURE_FPU is 0! */ + if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid)) + fpu_kernel_cfg.max_features &= ~BIT_ULL(i); } - xfeatures_mask_all &= XFEATURE_MASK_USER_SUPPORTED | + if (!cpu_feature_enabled(X86_FEATURE_XFD)) + fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC; + + fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED | XFEATURE_MASK_SUPERVISOR_SUPPORTED; + fpu_user_cfg.max_features = fpu_kernel_cfg.max_features; + fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; + + /* Clean out dynamic features from default */ + fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features; + fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC; + + fpu_user_cfg.default_features = fpu_user_cfg.max_features; + fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC; + /* Store it for paranoia check at the end */ - xfeatures = xfeatures_mask_all; + xfeatures = fpu_kernel_cfg.max_features; + + /* + * Initialize the default XFD state in initfp_state and enable the + * dynamic sizing mechanism if dynamic states are available. The + * static key cannot be enabled here because this runs before + * jump_label_init(). This is delayed to an initcall. + */ + init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC; /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); @@ -788,13 +939,16 @@ void __init fpu__init_system_xstate(void) if (err) goto out_disable; + /* Reset the state for the current task */ + fpstate_reset(¤t->thread.fpu); + /* * Update info used for ptrace frames; use standard-format size and no * supervisor xstates: */ - update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask_uabi()); + update_regset_xstate_info(fpu_user_cfg.max_size, + fpu_user_cfg.max_features); - fpu__init_prepare_fx_sw_frame(); setup_init_fpu_buf(); setup_xstate_comp_offsets(); setup_supervisor_only_offsets(); @@ -803,22 +957,22 @@ void __init fpu__init_system_xstate(void) * Paranoia check whether something in the setup modified the * xfeatures mask. */ - if (xfeatures != xfeatures_mask_all) { + if (xfeatures != fpu_kernel_cfg.max_features) { pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n", - xfeatures, xfeatures_mask_all); + xfeatures, fpu_kernel_cfg.max_features); goto out_disable; } print_xstate_offset_size(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", - xfeatures_mask_all, - fpu_kernel_xstate_size, + fpu_kernel_cfg.max_features, + fpu_kernel_cfg.max_size, boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); return; out_disable: /* something went wrong, try to boot without any XSAVE support */ - fpu__init_disable_system_xstate(); + fpu__init_disable_system_xstate(legacy_size); } /* @@ -830,7 +984,7 @@ void fpu__resume_cpu(void) * Restore XCR0 on xsave capable CPUs: */ if (cpu_feature_enabled(X86_FEATURE_XSAVE)) - xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_uabi()); + xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features); /* * Restore IA32_XSS. The same CPUID bit enumerates support @@ -840,6 +994,9 @@ void fpu__resume_cpu(void) wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | xfeatures_mask_independent()); } + + if (fpu_state_size_dynamic()) + wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd); } /* @@ -886,7 +1043,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) * We should not ever be requesting features that we * have not enabled. */ - WARN_ONCE(!(xfeatures_mask_all & BIT_ULL(xfeature_nr)), + WARN_ONCE(!(fpu_kernel_cfg.max_features & BIT_ULL(xfeature_nr)), "get of unsupported state"); /* * This assumes the last 'xsave*' instruction to @@ -904,7 +1061,6 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) return __raw_xsave_addr(xsave, xfeature_nr); } -EXPORT_SYMBOL_GPL(get_xsave_addr); #ifdef CONFIG_ARCH_HAS_PKEYS @@ -961,9 +1117,10 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, } /** - * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer + * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer * @to: membuf descriptor - * @tsk: The task from which to copy the saved xstate + * @fpstate: The fpstate buffer from which to copy + * @pkru_val: The PKRU value to store in the PKRU component * @copy_mode: The requested copy mode * * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming @@ -972,14 +1129,15 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, * * It supports partial copy but @to.pos always starts from zero. */ -void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, - enum xstate_copy_mode copy_mode) +void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, + u32 pkru_val, enum xstate_copy_mode copy_mode) { const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); - struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; - struct xregs_state *xinit = &init_fpstate.xsave; + struct xregs_state *xinit = &init_fpstate.regs.xsave; + struct xregs_state *xsave = &fpstate->regs.xsave; struct xstate_header header; unsigned int zerofrom; + u64 mask; int i; memset(&header, 0, sizeof(header)); @@ -996,7 +1154,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, break; case XSTATE_COPY_XSAVE: - header.xfeatures &= xfeatures_mask_uabi(); + header.xfeatures &= fpstate->user_xfeatures; break; } @@ -1033,17 +1191,15 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, zerofrom = offsetof(struct xregs_state, extended_state_area); - for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { - /* - * The ptrace buffer is in non-compacted XSAVE format. - * In non-compacted format disabled features still occupy - * state space, but there is no state to copy from in the - * compacted init_fpstate. The gap tracking will zero this - * later. - */ - if (!(xfeatures_mask_uabi() & BIT_ULL(i))) - continue; + /* + * The ptrace buffer is in non-compacted XSAVE format. In + * non-compacted format disabled features still occupy state space, + * but there is no state to copy from in the compacted + * init_fpstate. The gap tracking will zero these states. + */ + mask = fpstate->user_xfeatures; + for_each_extended_xfeature(i, mask) { /* * If there was a feature or alignment gap, zero the space * in the destination buffer. @@ -1055,10 +1211,9 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, struct pkru_state pkru = {0}; /* * PKRU is not necessarily up to date in the - * thread's XSAVE buffer. Fill this part from the - * per-thread storage. + * XSAVE buffer. Use the provided value. */ - pkru.pkru = tsk->thread.pkru; + pkru.pkru = pkru_val; membuf_write(&to, &pkru, sizeof(pkru)); } else { copy_feature(header.xfeatures & BIT_ULL(i), &to, @@ -1078,6 +1233,25 @@ out: membuf_zero(&to, to.left); } +/** + * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer + * @to: membuf descriptor + * @tsk: The task from which to copy the saved xstate + * @copy_mode: The requested copy mode + * + * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming + * format, i.e. from the kernel internal hardware dependent storage format + * to the requested @mode. UABI XSTATE is always uncompacted! + * + * It supports partial copy but @to.pos always starts from zero. + */ +void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, + enum xstate_copy_mode copy_mode) +{ + __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate, + tsk->thread.pkru, copy_mode); +} + static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, const void *kbuf, const void __user *ubuf) { @@ -1091,9 +1265,10 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, } -static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, +static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, const void __user *ubuf) { + struct xregs_state *xsave = &fpstate->regs.xsave; unsigned int offset, size; struct xstate_header hdr; u64 mask; @@ -1103,7 +1278,7 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf)) return -EFAULT; - if (validate_user_xstate_header(&hdr)) + if (validate_user_xstate_header(&hdr, fpstate)) return -EINVAL; /* Validate MXCSR when any of the related features is in use */ @@ -1156,12 +1331,11 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, /* * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] - * format and copy to the target thread. This is called from - * xstateregs_set(). + * format and copy to the target thread. Used by ptrace and KVM. */ -int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) +int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf) { - return copy_uabi_to_xstate(xsave, kbuf, NULL); + return copy_uabi_to_xstate(fpstate, kbuf, NULL); } /* @@ -1169,26 +1343,20 @@ int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) * XSAVE[S] format and copy to the target thread. This is called from the * sigreturn() and rt_sigreturn() system calls. */ -int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, +int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void __user *ubuf) { - return copy_uabi_to_xstate(xsave, NULL, ubuf); + return copy_uabi_to_xstate(fpstate, NULL, ubuf); } -static bool validate_xsaves_xrstors(u64 mask) +static bool validate_independent_components(u64 mask) { u64 xchk; if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES))) return false; - /* - * Validate that this is either a task->fpstate related component - * subset or an independent one. - */ - if (mask & xfeatures_mask_independent()) - xchk = ~xfeatures_mask_independent(); - else - xchk = ~xfeatures_mask_all; + + xchk = ~xfeatures_mask_independent(); if (WARN_ON_ONCE(!mask || mask & xchk)) return false; @@ -1206,14 +1374,13 @@ static bool validate_xsaves_xrstors(u64 mask) * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer * can #GP. * - * The feature mask must either be a subset of the independent features or - * a subset of the task->fpstate related features. + * The feature mask must be a subset of the independent features. */ void xsaves(struct xregs_state *xstate, u64 mask) { int err; - if (!validate_xsaves_xrstors(mask)) + if (!validate_independent_components(mask)) return; XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err); @@ -1231,20 +1398,379 @@ void xsaves(struct xregs_state *xstate, u64 mask) * Proper usage is to restore the state which was saved with * xsaves() into @xstate. * - * The feature mask must either be a subset of the independent features or - * a subset of the task->fpstate related features. + * The feature mask must be a subset of the independent features. */ void xrstors(struct xregs_state *xstate, u64 mask) { int err; - if (!validate_xsaves_xrstors(mask)) + if (!validate_independent_components(mask)) return; XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err); WARN_ON_ONCE(err); } +#if IS_ENABLED(CONFIG_KVM) +void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature) +{ + void *addr = get_xsave_addr(&fps->regs.xsave, xfeature); + + if (addr) + memset(addr, 0, xstate_sizes[xfeature]); +} +EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component); +#endif + +#ifdef CONFIG_X86_64 + +#ifdef CONFIG_X86_DEBUG_FPU +/* + * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask + * can safely operate on the @fpstate buffer. + */ +static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor) +{ + u64 xfd = __this_cpu_read(xfd_state); + + if (fpstate->xfd == xfd) + return true; + + /* + * The XFD MSR does not match fpstate->xfd. That's invalid when + * the passed in fpstate is current's fpstate. + */ + if (fpstate->xfd == current->thread.fpu.fpstate->xfd) + return false; + + /* + * XRSTOR(S) from init_fpstate are always correct as it will just + * bring all components into init state and not read from the + * buffer. XSAVE(S) raises #PF after init. + */ + if (fpstate == &init_fpstate) + return rstor; + + /* + * XSAVE(S): clone(), fpu_swap_kvm_fpu() + * XRSTORS(S): fpu_swap_kvm_fpu() + */ + + /* + * No XSAVE/XRSTOR instructions (except XSAVE itself) touch + * the buffer area for XFD-disabled state components. + */ + mask &= ~xfd; + + /* + * Remove features which are valid in fpstate. They + * have space allocated in fpstate. + */ + mask &= ~fpstate->xfeatures; + + /* + * Any remaining state components in 'mask' might be written + * by XSAVE/XRSTOR. Fail validation it found. + */ + return !mask; +} + +void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) +{ + WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor)); +} +#endif /* CONFIG_X86_DEBUG_FPU */ + +static int __init xfd_update_static_branch(void) +{ + /* + * If init_fpstate.xfd has bits set then dynamic features are + * available and the dynamic sizing must be enabled. + */ + if (init_fpstate.xfd) + static_branch_enable(&__fpu_state_size_dynamic); + return 0; +} +arch_initcall(xfd_update_static_branch) + +void fpstate_free(struct fpu *fpu) +{ + if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate) + vfree(fpu->fpstate); +} + +/** + * fpu_install_fpstate - Update the active fpstate in the FPU + * + * @fpu: A struct fpu * pointer + * @newfps: A struct fpstate * pointer + * + * Returns: A null pointer if the last active fpstate is the embedded + * one or the new fpstate is already installed; + * otherwise, a pointer to the old fpstate which has to + * be freed by the caller. + */ +static struct fpstate *fpu_install_fpstate(struct fpu *fpu, + struct fpstate *newfps) +{ + struct fpstate *oldfps = fpu->fpstate; + + if (fpu->fpstate == newfps) + return NULL; + + fpu->fpstate = newfps; + return oldfps != &fpu->__fpstate ? oldfps : NULL; +} + +/** + * fpstate_realloc - Reallocate struct fpstate for the requested new features + * + * @xfeatures: A bitmap of xstate features which extend the enabled features + * of that task + * @ksize: The required size for the kernel buffer + * @usize: The required size for user space buffers + * + * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer + * terminates quickly, vfree()-induced IPIs may be a concern, but tasks + * with large states are likely to live longer. + * + * Returns: 0 on success, -ENOMEM on allocation error. + */ +static int fpstate_realloc(u64 xfeatures, unsigned int ksize, + unsigned int usize) +{ + struct fpu *fpu = ¤t->thread.fpu; + struct fpstate *curfps, *newfps = NULL; + unsigned int fpsize; + + curfps = fpu->fpstate; + fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64); + + newfps = vzalloc(fpsize); + if (!newfps) + return -ENOMEM; + newfps->size = ksize; + newfps->user_size = usize; + newfps->is_valloc = true; + + fpregs_lock(); + /* + * Ensure that the current state is in the registers before + * swapping fpstate as that might invalidate it due to layout + * changes. + */ + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + fpregs_restore_userregs(); + + newfps->xfeatures = curfps->xfeatures | xfeatures; + newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; + newfps->xfd = curfps->xfd & ~xfeatures; + + curfps = fpu_install_fpstate(fpu, newfps); + + /* Do the final updates within the locked region */ + xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures); + xfd_update_state(newfps); + + fpregs_unlock(); + + vfree(curfps); + return 0; +} + +static int validate_sigaltstack(unsigned int usize) +{ + struct task_struct *thread, *leader = current->group_leader; + unsigned long framesize = get_sigframe_size(); + + lockdep_assert_held(¤t->sighand->siglock); + + /* get_sigframe_size() is based on fpu_user_cfg.max_size */ + framesize -= fpu_user_cfg.max_size; + framesize += usize; + for_each_thread(leader, thread) { + if (thread->sas_ss_size && thread->sas_ss_size < framesize) + return -ENOSPC; + } + return 0; +} + +static int __xstate_request_perm(u64 permitted, u64 requested) +{ + /* + * This deliberately does not exclude !XSAVES as we still might + * decide to optionally context switch XCR0 or talk the silicon + * vendors into extending XFD for the pre AMX states, especially + * AVX512. + */ + bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES); + struct fpu *fpu = ¤t->group_leader->thread.fpu; + unsigned int ksize, usize; + u64 mask; + int ret; + + /* Check whether fully enabled */ + if ((permitted & requested) == requested) + return 0; + + /* Calculate the resulting kernel state size */ + mask = permitted | requested; + ksize = xstate_calculate_size(mask, compacted); + + /* Calculate the resulting user state size */ + mask &= XFEATURE_MASK_USER_SUPPORTED; + usize = xstate_calculate_size(mask, false); + + ret = validate_sigaltstack(usize); + if (ret) + return ret; + + /* Pairs with the READ_ONCE() in xstate_get_group_perm() */ + WRITE_ONCE(fpu->perm.__state_perm, requested); + /* Protected by sighand lock */ + fpu->perm.__state_size = ksize; + fpu->perm.__user_state_size = usize; + return ret; +} + +/* + * Permissions array to map facilities with more than one component + */ +static const u64 xstate_prctl_req[XFEATURE_MAX] = { + [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA, +}; + +static int xstate_request_perm(unsigned long idx) +{ + u64 permitted, requested; + int ret; + + if (idx >= XFEATURE_MAX) + return -EINVAL; + + /* + * Look up the facility mask which can require more than + * one xstate component. + */ + idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req)); + requested = xstate_prctl_req[idx]; + if (!requested) + return -EOPNOTSUPP; + + if ((fpu_user_cfg.max_features & requested) != requested) + return -EOPNOTSUPP; + + /* Lockless quick check */ + permitted = xstate_get_host_group_perm(); + if ((permitted & requested) == requested) + return 0; + + /* Protect against concurrent modifications */ + spin_lock_irq(¤t->sighand->siglock); + permitted = xstate_get_host_group_perm(); + ret = __xstate_request_perm(permitted, requested); + spin_unlock_irq(¤t->sighand->siglock); + return ret; +} + +int xfd_enable_feature(u64 xfd_err) +{ + u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC; + unsigned int ksize, usize; + struct fpu *fpu; + + if (!xfd_event) { + pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err); + return 0; + } + + /* Protect against concurrent modifications */ + spin_lock_irq(¤t->sighand->siglock); + + /* If not permitted let it die */ + if ((xstate_get_host_group_perm() & xfd_event) != xfd_event) { + spin_unlock_irq(¤t->sighand->siglock); + return -EPERM; + } + + fpu = ¤t->group_leader->thread.fpu; + ksize = fpu->perm.__state_size; + usize = fpu->perm.__user_state_size; + /* + * The feature is permitted. State size is sufficient. Dropping + * the lock is safe here even if more features are added from + * another task, the retrieved buffer sizes are valid for the + * currently requested feature(s). + */ + spin_unlock_irq(¤t->sighand->siglock); + + /* + * Try to allocate a new fpstate. If that fails there is no way + * out. + */ + if (fpstate_realloc(xfd_event, ksize, usize)) + return -EFAULT; + return 0; +} +#else /* CONFIG_X86_64 */ +static inline int xstate_request_perm(unsigned long idx) +{ + return -EPERM; +} +#endif /* !CONFIG_X86_64 */ + +/** + * fpu_xstate_prctl - xstate permission operations + * @tsk: Redundant pointer to current + * @option: A subfunction of arch_prctl() + * @arg2: option argument + * Return: 0 if successful; otherwise, an error code + * + * Option arguments: + * + * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info + * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info + * ARCH_REQ_XCOMP_PERM: Facility number requested + * + * For facilities which require more than one XSTATE component, the request + * must be the highest state component number related to that facility, + * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and + * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18). + */ +long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2) +{ + u64 __user *uptr = (u64 __user *)arg2; + u64 permitted, supported; + unsigned long idx = arg2; + + if (tsk != current) + return -EPERM; + + switch (option) { + case ARCH_GET_XCOMP_SUPP: + supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features; + return put_user(supported, uptr); + + case ARCH_GET_XCOMP_PERM: + /* + * Lockless snapshot as it can also change right after the + * dropping the lock. + */ + permitted = xstate_get_host_group_perm(); + permitted &= XFEATURE_MASK_USER_SUPPORTED; + return put_user(permitted, uptr); + + case ARCH_REQ_XCOMP_PERM: + if (!IS_ENABLED(CONFIG_X86_64)) + return -EOPNOTSUPP; + + return xstate_request_perm(idx); + + default: + return -EINVAL; + } +} + #ifdef CONFIG_PROC_PID_ARCH_STATUS /* * Report the amount of time elapsed in millisecond since last AVX512 diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h new file mode 100644 index 000000000000..e18210dff88c --- /dev/null +++ b/arch/x86/kernel/fpu/xstate.h @@ -0,0 +1,278 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __X86_KERNEL_FPU_XSTATE_H +#define __X86_KERNEL_FPU_XSTATE_H + +#include <asm/cpufeature.h> +#include <asm/fpu/xstate.h> + +#ifdef CONFIG_X86_64 +DECLARE_PER_CPU(u64, xfd_state); +#endif + +static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask) +{ + /* + * XRSTORS requires these bits set in xcomp_bv, or it will + * trigger #GP: + */ + if (cpu_feature_enabled(X86_FEATURE_XSAVES)) + xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT; +} + +static inline u64 xstate_get_host_group_perm(void) +{ + /* Pairs with WRITE_ONCE() in xstate_request_perm() */ + return READ_ONCE(current->group_leader->thread.fpu.perm.__state_perm); +} + +enum xstate_copy_mode { + XSTATE_COPY_FP, + XSTATE_COPY_FX, + XSTATE_COPY_XSAVE, +}; + +struct membuf; +extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, + u32 pkru_val, enum xstate_copy_mode copy_mode); +extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, + enum xstate_copy_mode mode); +extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf); +extern int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void __user *ubuf); + + +extern void fpu__init_cpu_xstate(void); +extern void fpu__init_system_xstate(unsigned int legacy_size); + +extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); + +static inline u64 xfeatures_mask_supervisor(void) +{ + return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED; +} + +static inline u64 xfeatures_mask_independent(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) + return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR; + + return XFEATURE_MASK_INDEPENDENT; +} + +/* XSAVE/XRSTOR wrapper functions */ + +#ifdef CONFIG_X86_64 +#define REX_PREFIX "0x48, " +#else +#define REX_PREFIX +#endif + +/* These macros all use (%edi)/(%rdi) as the single memory argument. */ +#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" +#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" +#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" +#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" +#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" + +/* + * After this @err contains 0 on success or the trap number when the + * operation raises an exception. + */ +#define XSTATE_OP(op, st, lmask, hmask, err) \ + asm volatile("1:" op "\n\t" \ + "xor %[err], %[err]\n" \ + "2:\n\t" \ + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \ + : [err] "=a" (err) \ + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ + : "memory") + +/* + * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact + * format and supervisor states in addition to modified optimization in + * XSAVEOPT. + * + * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT + * supports modified optimization which is not supported by XSAVE. + * + * We use XSAVE as a fallback. + * + * The 661 label is defined in the ALTERNATIVE* macros as the address of the + * original instruction which gets replaced. We need to use it here as the + * address of the instruction where we might get an exception at. + */ +#define XSTATE_XSAVE(st, lmask, hmask, err) \ + asm volatile(ALTERNATIVE_2(XSAVE, \ + XSAVEOPT, X86_FEATURE_XSAVEOPT, \ + XSAVES, X86_FEATURE_XSAVES) \ + "\n" \ + "xor %[err], %[err]\n" \ + "3:\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "4: movl $-2, %[err]\n" \ + "jmp 3b\n" \ + ".popsection\n" \ + _ASM_EXTABLE(661b, 4b) \ + : [err] "=r" (err) \ + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ + : "memory") + +/* + * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact + * XSAVE area format. + */ +#define XSTATE_XRESTORE(st, lmask, hmask) \ + asm volatile(ALTERNATIVE(XRSTOR, \ + XRSTORS, X86_FEATURE_XSAVES) \ + "\n" \ + "3:\n" \ + _ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE) \ + : \ + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ + : "memory") + +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_DEBUG_FPU) +extern void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor); +#else +static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) { } +#endif + +#ifdef CONFIG_X86_64 +static inline void xfd_update_state(struct fpstate *fpstate) +{ + if (fpu_state_size_dynamic()) { + u64 xfd = fpstate->xfd; + + if (__this_cpu_read(xfd_state) != xfd) { + wrmsrl(MSR_IA32_XFD, xfd); + __this_cpu_write(xfd_state, xfd); + } + } +} +#else +static inline void xfd_update_state(struct fpstate *fpstate) { } +#endif + +/* + * Save processor xstate to xsave area. + * + * Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features + * and command line options. The choice is permanent until the next reboot. + */ +static inline void os_xsave(struct fpstate *fpstate) +{ + u64 mask = fpstate->xfeatures; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err; + + WARN_ON_FPU(!alternatives_patched); + xfd_validate_state(fpstate, mask, false); + + XSTATE_XSAVE(&fpstate->regs.xsave, lmask, hmask, err); + + /* We should never fault when copying to a kernel buffer: */ + WARN_ON_FPU(err); +} + +/* + * Restore processor xstate from xsave area. + * + * Uses XRSTORS when XSAVES is used, XRSTOR otherwise. + */ +static inline void os_xrstor(struct fpstate *fpstate, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + + xfd_validate_state(fpstate, mask, true); + XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask); +} + +/* Restore of supervisor state. Does not require XFD */ +static inline void os_xrstor_supervisor(struct fpstate *fpstate) +{ + u64 mask = xfeatures_mask_supervisor(); + u32 lmask = mask; + u32 hmask = mask >> 32; + + XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask); +} + +/* + * Save xstate to user space xsave area. + * + * We don't use modified optimization because xrstor/xrstors might track + * a different application. + * + * We don't use compacted format xsave area for backward compatibility for + * old applications which don't understand the compacted format of the + * xsave area. + * + * The caller has to zero buf::header before calling this because XSAVE* + * does not touch the reserved fields in the header. + */ +static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) +{ + /* + * Include the features which are not xsaved/rstored by the kernel + * internally, e.g. PKRU. That's user space ABI and also required + * to allow the signal handler to modify PKRU. + */ + struct fpstate *fpstate = current->thread.fpu.fpstate; + u64 mask = fpstate->user_xfeatures; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err; + + xfd_validate_state(fpstate, mask, false); + + stac(); + XSTATE_OP(XSAVE, buf, lmask, hmask, err); + clac(); + + return err; +} + +/* + * Restore xstate from user space xsave area. + */ +static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 mask) +{ + struct xregs_state *xstate = ((__force struct xregs_state *)buf); + u32 lmask = mask; + u32 hmask = mask >> 32; + int err; + + xfd_validate_state(current->thread.fpu.fpstate, mask, true); + + stac(); + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); + clac(); + + return err; +} + +/* + * Restore xstate from kernel space xsave area, return an error code instead of + * an exception. + */ +static inline int os_xrstor_safe(struct fpstate *fpstate, u64 mask) +{ + struct xregs_state *xstate = &fpstate->regs.xsave; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err; + + /* Ensure that XFD is up to date */ + xfd_update_state(fpstate); + + if (cpu_feature_enabled(X86_FEATURE_XSAVES)) + XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); + else + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); + + return err; +} + + +#endif |