From 5a83d60c074ddf4f6364be25654a643d0e941824 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 31 Oct 2016 15:18:44 -0700 Subject: x86/fpu: Remove irq_ts_save() and irq_ts_restore() Now that lazy FPU is gone, we don't use CR0.TS (except possibly in KVM guest mode). Remove irq_ts_save(), irq_ts_restore(), and all of their callers. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Rik van Riel Cc: Rusty Russell Cc: Thomas Gleixner Cc: kvm list Link: http://lkml.kernel.org/r/70b9b9e7ba70659bedcb08aba63d0f9214f338f2.1477951965.git.luto@kernel.org Signed-off-by: Ingo Molnar --- drivers/char/hw_random/via-rng.c | 8 ++------ drivers/crypto/padlock-aes.c | 23 ++--------------------- drivers/crypto/padlock-sha.c | 18 ------------------ 3 files changed, 4 insertions(+), 45 deletions(-) (limited to 'drivers') diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index 44ce80606944..d1f5bb534e0e 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -70,21 +70,17 @@ enum { * until we have 4 bytes, thus returning a u32 at a time, * instead of the current u8-at-a-time. * - * Padlock instructions can generate a spurious DNA fault, so - * we have to call them in the context of irq_ts_save/restore() + * Padlock instructions can generate a spurious DNA fault, but the + * kernel doesn't use CR0.TS, so this doesn't matter. */ static inline u32 xstore(u32 *addr, u32 edx_in) { u32 eax_out; - int ts_state; - - ts_state = irq_ts_save(); asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */" : "=m" (*addr), "=a" (eax_out), "+d" (edx_in), "+D" (addr)); - irq_ts_restore(ts_state); return eax_out; } diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 441e86b23571..b3869748cc6b 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -183,8 +183,8 @@ static inline void padlock_store_cword(struct cword *cword) /* * While the padlock instructions don't use FP/SSE registers, they - * generate a spurious DNA fault when cr0.ts is '1'. These instructions - * should be used only inside the irq_ts_save/restore() context + * generate a spurious DNA fault when CR0.TS is '1'. Fortunately, + * the kernel doesn't use CR0.TS. */ static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key, @@ -298,24 +298,18 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct aes_ctx *ctx = aes_ctx(tfm); - int ts_state; padlock_reset_key(&ctx->cword.encrypt); - ts_state = irq_ts_save(); ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); - irq_ts_restore(ts_state); padlock_store_cword(&ctx->cword.encrypt); } static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct aes_ctx *ctx = aes_ctx(tfm); - int ts_state; padlock_reset_key(&ctx->cword.encrypt); - ts_state = irq_ts_save(); ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); - irq_ts_restore(ts_state); padlock_store_cword(&ctx->cword.encrypt); } @@ -346,14 +340,12 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); struct blkcipher_walk walk; int err; - int ts_state; padlock_reset_key(&ctx->cword.encrypt); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); - ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, ctx->E, &ctx->cword.encrypt, @@ -361,7 +353,6 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - irq_ts_restore(ts_state); padlock_store_cword(&ctx->cword.encrypt); @@ -375,14 +366,12 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); struct blkcipher_walk walk; int err; - int ts_state; padlock_reset_key(&ctx->cword.decrypt); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); - ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, ctx->D, &ctx->cword.decrypt, @@ -390,7 +379,6 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - irq_ts_restore(ts_state); padlock_store_cword(&ctx->cword.encrypt); @@ -425,14 +413,12 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); struct blkcipher_walk walk; int err; - int ts_state; padlock_reset_key(&ctx->cword.encrypt); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); - ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr, ctx->E, @@ -442,7 +428,6 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - irq_ts_restore(ts_state); padlock_store_cword(&ctx->cword.decrypt); @@ -456,14 +441,12 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); struct blkcipher_walk walk; int err; - int ts_state; padlock_reset_key(&ctx->cword.encrypt); blkcipher_walk_init(&walk, dst, src, nbytes); err = blkcipher_walk_virt(desc, &walk); - ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr, ctx->D, walk.iv, &ctx->cword.decrypt, @@ -472,8 +455,6 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_done(desc, &walk, nbytes); } - irq_ts_restore(ts_state); - padlock_store_cword(&ctx->cword.encrypt); return err; diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 8c5f90647b7a..bc72d20c32c3 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -89,7 +89,6 @@ static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in, struct sha1_state state; unsigned int space; unsigned int leftover; - int ts_state; int err; dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; @@ -120,14 +119,11 @@ static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in, memcpy(result, &state.state, SHA1_DIGEST_SIZE); - /* prevent taking the spurious DNA fault with padlock. */ - ts_state = irq_ts_save(); asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */ : \ : "c"((unsigned long)state.count + count), \ "a"((unsigned long)state.count), \ "S"(in), "D"(result)); - irq_ts_restore(ts_state); padlock_output_block((uint32_t *)result, (uint32_t *)out, 5); @@ -155,7 +151,6 @@ static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in, struct sha256_state state; unsigned int space; unsigned int leftover; - int ts_state; int err; dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; @@ -186,14 +181,11 @@ static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in, memcpy(result, &state.state, SHA256_DIGEST_SIZE); - /* prevent taking the spurious DNA fault with padlock. */ - ts_state = irq_ts_save(); asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */ : \ : "c"((unsigned long)state.count + count), \ "a"((unsigned long)state.count), \ "S"(in), "D"(result)); - irq_ts_restore(ts_state); padlock_output_block((uint32_t *)result, (uint32_t *)out, 8); @@ -312,7 +304,6 @@ static int padlock_sha1_update_nano(struct shash_desc *desc, u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ ((aligned(STACK_ALIGN))); u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); - int ts_state; partial = sctx->count & 0x3f; sctx->count += len; @@ -328,23 +319,19 @@ static int padlock_sha1_update_nano(struct shash_desc *desc, memcpy(sctx->buffer + partial, data, done + SHA1_BLOCK_SIZE); src = sctx->buffer; - ts_state = irq_ts_save(); asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" : "+S"(src), "+D"(dst) \ : "a"((long)-1), "c"((unsigned long)1)); - irq_ts_restore(ts_state); done += SHA1_BLOCK_SIZE; src = data + done; } /* Process the left bytes from the input data */ if (len - done >= SHA1_BLOCK_SIZE) { - ts_state = irq_ts_save(); asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" : "+S"(src), "+D"(dst) : "a"((long)-1), "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE))); - irq_ts_restore(ts_state); done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE); src = data + done; } @@ -401,7 +388,6 @@ static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data, u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ ((aligned(STACK_ALIGN))); u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); - int ts_state; partial = sctx->count & 0x3f; sctx->count += len; @@ -417,23 +403,19 @@ static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data, memcpy(sctx->buf + partial, data, done + SHA256_BLOCK_SIZE); src = sctx->buf; - ts_state = irq_ts_save(); asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" : "+S"(src), "+D"(dst) : "a"((long)-1), "c"((unsigned long)1)); - irq_ts_restore(ts_state); done += SHA256_BLOCK_SIZE; src = data + done; } /* Process the left bytes from input data*/ if (len - done >= SHA256_BLOCK_SIZE) { - ts_state = irq_ts_save(); asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" : "+S"(src), "+D"(dst) : "a"((long)-1), "c"((unsigned long)((len - done) / 64))); - irq_ts_restore(ts_state); done += ((len - done) - (len - done) % 64); src = data + done; } -- cgit v1.2.3-58-ga151 From cd95ea81f25608c403052d0508ee5c9b32e2bc7d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 31 Oct 2016 15:18:46 -0700 Subject: x86/fpu, lguest: Remove CR0.TS support Now that Linux never sets CR0.TS, lguest doesn't need to support it. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Rik van Riel Cc: Rusty Russell Cc: Thomas Gleixner Cc: kvm list Link: http://lkml.kernel.org/r/8a7bf2c11231c082258fd67705d0f275639b8475.1477951965.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/lguest_hcall.h | 1 - arch/x86/lguest/boot.c | 17 +++++++---------- drivers/lguest/hypercalls.c | 4 ---- drivers/lguest/lg.h | 1 - drivers/lguest/x86/core.c | 19 +------------------ 5 files changed, 8 insertions(+), 34 deletions(-) (limited to 'drivers') diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index ef01fef3eebc..6c119cfae218 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h @@ -9,7 +9,6 @@ #define LHCALL_FLUSH_TLB 5 #define LHCALL_LOAD_IDT_ENTRY 6 #define LHCALL_SET_STACK 7 -#define LHCALL_TS 8 #define LHCALL_SET_CLOCKEVENT 9 #define LHCALL_HALT 10 #define LHCALL_SET_PMD 13 diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 25da5bc8d83d..d74afcdbc580 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -497,27 +497,24 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, * a whole series of functions like read_cr0() and write_cr0(). * * We start with cr0. cr0 allows you to turn on and off all kinds of basic - * features, but Linux only really cares about one: the horrifically-named Task - * Switched (TS) bit at bit 3 (ie. 8) + * features, but the only cr0 bit that Linux ever used at runtime was the + * horrifically-named Task Switched (TS) bit at bit 3 (ie. 8) * * What does the TS bit do? Well, it causes the CPU to trap (interrupt 7) if * the floating point unit is used. Which allows us to restore FPU state - * lazily after a task switch, and Linux uses that gratefully, but wouldn't a - * name like "FPUTRAP bit" be a little less cryptic? + * lazily after a task switch if we wanted to, but wouldn't a name like + * "FPUTRAP bit" be a little less cryptic? * - * We store cr0 locally because the Host never changes it. The Guest sometimes - * wants to read it and we'd prefer not to bother the Host unnecessarily. + * Fortunately, Linux keeps it simple and doesn't use TS, so we can ignore + * cr0. */ -static unsigned long current_cr0; static void lguest_write_cr0(unsigned long val) { - lazy_hcall1(LHCALL_TS, val & X86_CR0_TS); - current_cr0 = val; } static unsigned long lguest_read_cr0(void) { - return current_cr0; + return 0; } /* diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 19a32280731d..601f81c04873 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -109,10 +109,6 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) case LHCALL_SET_CLOCKEVENT: guest_set_clockevent(cpu, args->arg1); break; - case LHCALL_TS: - /* This sets the TS flag, as we saw used in run_guest(). */ - cpu->ts = args->arg1; - break; case LHCALL_HALT: /* Similarly, this sets the halted flag for run_guest(). */ cpu->halted = 1; diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 69b3814afd2f..2356a2318034 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -43,7 +43,6 @@ struct lg_cpu { struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ u32 cr2; - int ts; u32 esp1; u16 ss1; diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 6e9042e3d2a9..743253fc638f 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -246,14 +246,6 @@ unsigned long *lguest_arch_regptr(struct lg_cpu *cpu, size_t reg_off, bool any) */ void lguest_arch_run_guest(struct lg_cpu *cpu) { - /* - * Remember the awfully-named TS bit? If the Guest has asked to set it - * we set it now, so we can trap and pass that trap to the Guest if it - * uses the FPU. - */ - if (cpu->ts && fpregs_active()) - stts(); - /* * SYSENTER is an optimized way of doing system calls. We can't allow * it because it always jumps to privilege level 0. A normal Guest @@ -282,10 +274,6 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) if (boot_cpu_has(X86_FEATURE_SEP)) wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); - /* Clear the host TS bit if it was set above. */ - if (cpu->ts && fpregs_active()) - clts(); - /* * If the Guest page faulted, then the cr2 register will tell us the * bad virtual address. We have to grab this now, because once we @@ -421,12 +409,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) kill_guest(cpu, "Writing cr2"); break; case 7: /* We've intercepted a Device Not Available fault. */ - /* - * If the Guest doesn't want to know, we already restored the - * Floating Point Unit, so we just continue without telling it. - */ - if (!cpu->ts) - return; + /* No special handling is needed here. */ break; case 32 ... 255: /* This might be a syscall. */ -- cgit v1.2.3-58-ga151