diff options
author | Ard Biesheuvel <ardb@kernel.org> | 2023-04-12 13:00:24 +0200 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2023-04-20 18:20:04 +0800 |
commit | c75962f1c439de1590b038cb18466a859d59f209 (patch) | |
tree | 00c46c9ce6e1b497da25679487d82bf90fcabcd8 | |
parent | 9d5aef1222337f593e52293bb94c5cf7139d4d83 (diff) |
crypto: x86/aesni - Use RIP-relative addressing
Prefer RIP-relative addressing where possible, which removes the need
for boot time relocation fixups. In the GCM case, we can get rid of the
oversized permutation array entirely while at it.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r-- | arch/x86/crypto/aesni-intel_asm.S | 2 | ||||
-rw-r--r-- | arch/x86/crypto/aesni-intel_avx-x86_64.S | 36 |
2 files changed, 8 insertions, 30 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 837c1e0aa021..ca99a2274d55 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2717,7 +2717,7 @@ SYM_FUNC_END(aesni_cts_cbc_dec) * BSWAP_MASK == endian swapping mask */ SYM_FUNC_START_LOCAL(_aesni_inc_init) - movaps .Lbswap_mask, BSWAP_MASK + movaps .Lbswap_mask(%rip), BSWAP_MASK movaps IV, CTR pshufb BSWAP_MASK, CTR mov $1, TCTR_LOW diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 0852ab573fd3..b6ca80f188ff 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -154,30 +154,6 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 ALL_F: .octa 0xffffffffffffffffffffffffffffffff .octa 0x00000000000000000000000000000000 -.section .rodata -.align 16 -.type aad_shift_arr, @object -.size aad_shift_arr, 272 -aad_shift_arr: - .octa 0xffffffffffffffffffffffffffffffff - .octa 0xffffffffffffffffffffffffffffff0C - .octa 0xffffffffffffffffffffffffffff0D0C - .octa 0xffffffffffffffffffffffffff0E0D0C - .octa 0xffffffffffffffffffffffff0F0E0D0C - .octa 0xffffffffffffffffffffff0C0B0A0908 - .octa 0xffffffffffffffffffff0D0C0B0A0908 - .octa 0xffffffffffffffffff0E0D0C0B0A0908 - .octa 0xffffffffffffffff0F0E0D0C0B0A0908 - .octa 0xffffffffffffff0C0B0A090807060504 - .octa 0xffffffffffff0D0C0B0A090807060504 - .octa 0xffffffffff0E0D0C0B0A090807060504 - .octa 0xffffffff0F0E0D0C0B0A090807060504 - .octa 0xffffff0C0B0A09080706050403020100 - .octa 0xffff0D0C0B0A09080706050403020100 - .octa 0xff0E0D0C0B0A09080706050403020100 - .octa 0x0F0E0D0C0B0A09080706050403020100 - - .text @@ -646,11 +622,13 @@ _get_AAD_rest4\@: _get_AAD_rest0\@: /* finalize: shift out the extra bytes we read, and align left. since pslldq can only shift by an immediate, we use - vpshufb and an array of shuffle masks */ - movq %r12, %r11 - salq $4, %r11 - vmovdqu aad_shift_arr(%r11), \T1 - vpshufb \T1, \T7, \T7 + vpshufb and a pair of shuffle masks */ + leaq ALL_F(%rip), %r11 + subq %r12, %r11 + vmovdqu 16(%r11), \T1 + andq $~3, %r11 + vpshufb (%r11), \T7, \T7 + vpand \T1, \T7, \T7 _get_AAD_rest_final\@: vpshufb SHUF_MASK(%rip), \T7, \T7 vpxor \T8, \T7, \T7 |