From da3c6c836fb1a0b9f08a7efabbfb7e31a0c816f7 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 8 Feb 2021 17:10:38 +0800 Subject: crypto: powepc/sha1 - remove unneeded semicolon Eliminate the following coccicheck warning: ./arch/powerpc/crypto/sha1-spe-glue.c:110:2-3: Unneeded semicolon Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Herbert Xu --- arch/powerpc/crypto/sha1-spe-glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c index b1e577cbf00c..88e8ea73bfa7 100644 --- a/arch/powerpc/crypto/sha1-spe-glue.c +++ b/arch/powerpc/crypto/sha1-spe-glue.c @@ -107,7 +107,7 @@ static int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data, src += bytes; len -= bytes; - }; + } memcpy((char *)sctx->buffer, src, len); return 0; -- cgit v1.2.3-58-ga151 From 6131e970770da0e1d667f96efafd3f859aa4ea74 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 8 Feb 2021 18:28:16 -0800 Subject: crypto: arm/blake2b - drop unnecessary return statement Neither crypto_unregister_shashes() nor the module_exit function return a value, so the explicit 'return' is unnecessary. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm/crypto/blake2b-neon-glue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/blake2b-neon-glue.c b/arch/arm/crypto/blake2b-neon-glue.c index 34d73200e7fa..4b59d027ba4a 100644 --- a/arch/arm/crypto/blake2b-neon-glue.c +++ b/arch/arm/crypto/blake2b-neon-glue.c @@ -85,8 +85,8 @@ static int __init blake2b_neon_mod_init(void) static void __exit blake2b_neon_mod_exit(void) { - return crypto_unregister_shashes(blake2b_neon_algs, - ARRAY_SIZE(blake2b_neon_algs)); + crypto_unregister_shashes(blake2b_neon_algs, + ARRAY_SIZE(blake2b_neon_algs)); } module_init(blake2b_neon_mod_init); -- cgit v1.2.3-58-ga151 From d2f2516a3882c0c6463e33c9b112b39bd483f821 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 9 Mar 2021 23:27:26 -0800 Subject: crypto: arm/blake2s - fix for big endian The new ARM BLAKE2s code doesn't work correctly (fails the self-tests) in big endian kernel builds because it doesn't swap the endianness of the message words when loading them. Fix this. Fixes: 5172d322d34c ("crypto: arm/blake2s - add ARM scalar optimized BLAKE2s") Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/blake2s-core.S | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch') diff --git a/arch/arm/crypto/blake2s-core.S b/arch/arm/crypto/blake2s-core.S index bed897e9a181..86345751bbf3 100644 --- a/arch/arm/crypto/blake2s-core.S +++ b/arch/arm/crypto/blake2s-core.S @@ -8,6 +8,7 @@ */ #include +#include // Registers used to hold message words temporarily. There aren't // enough ARM registers to hold the whole message block, so we have to @@ -38,6 +39,23 @@ #endif .endm +.macro _le32_bswap a, tmp +#ifdef __ARMEB__ + rev_l \a, \tmp +#endif +.endm + +.macro _le32_bswap_8x a, b, c, d, e, f, g, h, tmp + _le32_bswap \a, \tmp + _le32_bswap \b, \tmp + _le32_bswap \c, \tmp + _le32_bswap \d, \tmp + _le32_bswap \e, \tmp + _le32_bswap \f, \tmp + _le32_bswap \g, \tmp + _le32_bswap \h, \tmp +.endm + // Execute a quarter-round of BLAKE2s by mixing two columns or two diagonals. // (a0, b0, c0, d0) and (a1, b1, c1, d1) give the registers containing the two // columns/diagonals. s0-s1 are the word offsets to the message words the first @@ -180,8 +198,10 @@ ENTRY(blake2s_compress_arch) tst r1, #3 bne .Lcopy_block_misaligned ldmia r1!, {r2-r9} + _le32_bswap_8x r2, r3, r4, r5, r6, r7, r8, r9, r14 stmia r12!, {r2-r9} ldmia r1!, {r2-r9} + _le32_bswap_8x r2, r3, r4, r5, r6, r7, r8, r9, r14 stmia r12, {r2-r9} .Lcopy_block_done: str r1, [sp, #68] // Update message pointer @@ -268,6 +288,7 @@ ENTRY(blake2s_compress_arch) 1: #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS ldr r3, [r1], #4 + _le32_bswap r3, r4 #else ldrb r3, [r1, #0] ldrb r4, [r1, #1] -- cgit v1.2.3-58-ga151 From d5adb9d1f7f8ccabbfa105e148d1465dfebd8cd2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Mar 2021 11:14:20 +0100 Subject: crypto: arm/aes-scalar - switch to common rev_l/mov_l macros The scalar AES implementation has some locally defined macros which reimplement things that are now available in macros defined in assembler.h. So let's switch to those. Signed-off-by: Ard Biesheuvel Reviewed-by: Nicolas Pitre Reviewed-by: Geert Uytterhoeven Reviewed-by: Linus Walleij Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-cipher-core.S | 42 ++++++++++----------------------------- 1 file changed, 10 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S index 472e56d09eea..1da3f41359aa 100644 --- a/arch/arm/crypto/aes-cipher-core.S +++ b/arch/arm/crypto/aes-cipher-core.S @@ -99,28 +99,6 @@ __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr .endm - .macro __rev, out, in - .if __LINUX_ARM_ARCH__ < 6 - lsl t0, \in, #24 - and t1, \in, #0xff00 - and t2, \in, #0xff0000 - orr \out, t0, \in, lsr #24 - orr \out, \out, t1, lsl #8 - orr \out, \out, t2, lsr #8 - .else - rev \out, \in - .endif - .endm - - .macro __adrl, out, sym, c - .if __LINUX_ARM_ARCH__ < 7 - ldr\c \out, =\sym - .else - movw\c \out, #:lower16:\sym - movt\c \out, #:upper16:\sym - .endif - .endm - .macro do_crypt, round, ttab, ltab, bsz push {r3-r11, lr} @@ -133,10 +111,10 @@ ldr r7, [in, #12] #ifdef CONFIG_CPU_BIG_ENDIAN - __rev r4, r4 - __rev r5, r5 - __rev r6, r6 - __rev r7, r7 + rev_l r4, t0 + rev_l r5, t0 + rev_l r6, t0 + rev_l r7, t0 #endif eor r4, r4, r8 @@ -144,7 +122,7 @@ eor r6, r6, r10 eor r7, r7, r11 - __adrl ttab, \ttab + mov_l ttab, \ttab /* * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into * L1 cache, assuming cacheline size >= 32. This is a hardening measure @@ -180,7 +158,7 @@ 2: .ifb \ltab add ttab, ttab, #1 .else - __adrl ttab, \ltab + mov_l ttab, \ltab // Prefetch inverse S-box for final round; see explanation above .set i, 0 .rept 256 / 64 @@ -194,10 +172,10 @@ \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds #ifdef CONFIG_CPU_BIG_ENDIAN - __rev r4, r4 - __rev r5, r5 - __rev r6, r6 - __rev r7, r7 + rev_l r4, t0 + rev_l r5, t0 + rev_l r6, t0 + rev_l r7, t0 #endif ldr out, [sp] -- cgit v1.2.3-58-ga151 From e0ba808db7bae1837249c19fe24cc95364a4d483 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Mar 2021 11:14:21 +0100 Subject: crypto: arm/chacha-scalar - switch to common rev_l macro Drop the local definition of a byte swapping macro and use the common one instead. Signed-off-by: Ard Biesheuvel Reviewed-by: Nicolas Pitre Reviewed-by: Geert Uytterhoeven Reviewed-by: Linus Walleij Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm/crypto/chacha-scalar-core.S | 43 +++++++++++------------------------- 1 file changed, 13 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S index 2985b80a45b5..083fe1ab96d0 100644 --- a/arch/arm/crypto/chacha-scalar-core.S +++ b/arch/arm/crypto/chacha-scalar-core.S @@ -41,32 +41,15 @@ X14 .req r12 X15 .req r14 -.macro __rev out, in, t0, t1, t2 -.if __LINUX_ARM_ARCH__ >= 6 - rev \out, \in -.else - lsl \t0, \in, #24 - and \t1, \in, #0xff00 - and \t2, \in, #0xff0000 - orr \out, \t0, \in, lsr #24 - orr \out, \out, \t1, lsl #8 - orr \out, \out, \t2, lsr #8 -.endif -.endm - -.macro _le32_bswap x, t0, t1, t2 +.macro _le32_bswap_4x a, b, c, d, tmp #ifdef __ARMEB__ - __rev \x, \x, \t0, \t1, \t2 + rev_l \a, \tmp + rev_l \b, \tmp + rev_l \c, \tmp + rev_l \d, \tmp #endif .endm -.macro _le32_bswap_4x a, b, c, d, t0, t1, t2 - _le32_bswap \a, \t0, \t1, \t2 - _le32_bswap \b, \t0, \t1, \t2 - _le32_bswap \c, \t0, \t1, \t2 - _le32_bswap \d, \t0, \t1, \t2 -.endm - .macro __ldrd a, b, src, offset #if __LINUX_ARM_ARCH__ >= 6 ldrd \a, \b, [\src, #\offset] @@ -200,7 +183,7 @@ add X1, X1, r9 add X2, X2, r10 add X3, X3, r11 - _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10 + _le32_bswap_4x X0, X1, X2, X3, r8 ldmia r12!, {r8-r11} eor X0, X0, r8 eor X1, X1, r9 @@ -216,7 +199,7 @@ ldmia r12!, {X0-X3} add X6, r10, X6, ror #brot add X7, r11, X7, ror #brot - _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10 + _le32_bswap_4x X4, X5, X6, X7, r8 eor X4, X4, X0 eor X5, X5, X1 eor X6, X6, X2 @@ -231,7 +214,7 @@ add r1, r1, r9 // x9 add r6, r6, r10 // x10 add r7, r7, r11 // x11 - _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10 + _le32_bswap_4x r0, r1, r6, r7, r8 ldmia r12!, {r8-r11} eor r0, r0, r8 // x8 eor r1, r1, r9 // x9 @@ -245,7 +228,7 @@ add r3, r9, r3, ror #drot // x13 add r4, r10, r4, ror #drot // x14 add r5, r11, r5, ror #drot // x15 - _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11 + _le32_bswap_4x r2, r3, r4, r5, r9 ldr r9, [sp, #72] // load LEN eor r2, r2, r0 // x12 eor r3, r3, r1 // x13 @@ -301,7 +284,7 @@ add X1, X1, r9 add X2, X2, r10 add X3, X3, r11 - _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10 + _le32_bswap_4x X0, X1, X2, X3, r8 stmia r14!, {X0-X3} // Save keystream for x4-x7 @@ -311,7 +294,7 @@ add X5, r9, X5, ror #brot add X6, r10, X6, ror #brot add X7, r11, X7, ror #brot - _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10 + _le32_bswap_4x X4, X5, X6, X7, r8 add r8, sp, #64 stmia r14!, {X4-X7} @@ -323,7 +306,7 @@ add r1, r1, r9 // x9 add r6, r6, r10 // x10 add r7, r7, r11 // x11 - _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10 + _le32_bswap_4x r0, r1, r6, r7, r8 stmia r14!, {r0,r1,r6,r7} __ldrd r8, r9, sp, 144 __ldrd r10, r11, sp, 152 @@ -331,7 +314,7 @@ add r3, r9, r3, ror #drot // x13 add r4, r10, r4, ror #drot // x14 add r5, r11, r5, ror #drot // x15 - _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11 + _le32_bswap_4x r2, r3, r4, r5, r9 stmia r14, {r2-r5} // Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN -- cgit v1.2.3-58-ga151 From 8d195e7a8ada68928f2aedb2c18302a4518fe68e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Mar 2021 18:05:15 +0100 Subject: crypto: poly1305 - fix poly1305_core_setkey() declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc-11 points out a mismatch between the declaration and the definition of poly1305_core_setkey(): lib/crypto/poly1305-donna32.c:13:67: error: argument 2 of type ‘const u8[16]’ {aka ‘const unsigned char[16]’} with mismatched bound [-Werror=array-parameter=] 13 | void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) | ~~~~~~~~~^~~~~~~~~~~ In file included from lib/crypto/poly1305-donna32.c:11: include/crypto/internal/poly1305.h:21:68: note: previously declared as ‘const u8 *’ {aka ‘const unsigned char *’} 21 | void poly1305_core_setkey(struct poly1305_core_key *key, const u8 *raw_key); This is harmless in principle, as the calling conventions are the same, but the more specific prototype allows better type checking in the caller. Change the declaration to match the actual function definition. The poly1305_simd_init() is a bit suspicious here, as it previously had a 32-byte argument type, but looks like it needs to take the 16-byte POLY1305_BLOCK_SIZE array instead. Fixes: 1c08a104360f ("crypto: poly1305 - add new 32 and 64-bit generic versions") Signed-off-by: Arnd Bergmann Reviewed-by: Ard Biesheuvel Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm/crypto/poly1305-glue.c | 2 +- arch/arm64/crypto/poly1305-glue.c | 2 +- arch/mips/crypto/poly1305-glue.c | 2 +- arch/x86/crypto/poly1305_glue.c | 6 +++--- include/crypto/internal/poly1305.h | 3 ++- include/crypto/poly1305.h | 6 ++++-- lib/crypto/poly1305-donna32.c | 3 ++- lib/crypto/poly1305-donna64.c | 3 ++- lib/crypto/poly1305.c | 3 ++- 9 files changed, 18 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c index 3023c1acfa19..c31bd8f7c092 100644 --- a/arch/arm/crypto/poly1305-glue.c +++ b/arch/arm/crypto/poly1305-glue.c @@ -29,7 +29,7 @@ void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) { poly1305_init_arm(&dctx->h, key); dctx->s[0] = get_unaligned_le32(key + 16); diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c index 683de671741a..9c3d86e397bf 100644 --- a/arch/arm64/crypto/poly1305-glue.c +++ b/arch/arm64/crypto/poly1305-glue.c @@ -25,7 +25,7 @@ asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce); static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) { poly1305_init_arm64(&dctx->h, key); dctx->s[0] = get_unaligned_le32(key + 16); diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c index fc881b46d911..bc6110fb98e0 100644 --- a/arch/mips/crypto/poly1305-glue.c +++ b/arch/mips/crypto/poly1305-glue.c @@ -17,7 +17,7 @@ asmlinkage void poly1305_init_mips(void *state, const u8 *key); asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit); asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce); -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) { poly1305_init_mips(&dctx->h, key); dctx->s[0] = get_unaligned_le32(key + 16); diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index 646da46e8d10..1dfb8af48a3c 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -16,7 +16,7 @@ #include asmlinkage void poly1305_init_x86_64(void *ctx, - const u8 key[POLY1305_KEY_SIZE]); + const u8 key[POLY1305_BLOCK_SIZE]); asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, const size_t len, const u32 padbit); asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], @@ -81,7 +81,7 @@ static void convert_to_base2_64(void *ctx) state->is_base2_26 = 0; } -static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_KEY_SIZE]) +static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_BLOCK_SIZE]) { poly1305_init_x86_64(ctx, key); } @@ -129,7 +129,7 @@ static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], poly1305_emit_avx(ctx, mac, nonce); } -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) { poly1305_simd_init(&dctx->h, key); dctx->s[0] = get_unaligned_le32(&key[16]); diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h index 064e52ca5248..196aa769f296 100644 --- a/include/crypto/internal/poly1305.h +++ b/include/crypto/internal/poly1305.h @@ -18,7 +18,8 @@ * only the ε-almost-∆-universal hash function (not the full MAC) is computed. */ -void poly1305_core_setkey(struct poly1305_core_key *key, const u8 *raw_key); +void poly1305_core_setkey(struct poly1305_core_key *key, + const u8 raw_key[POLY1305_BLOCK_SIZE]); static inline void poly1305_core_init(struct poly1305_state *state) { *state = (struct poly1305_state){}; diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index f1f67fc749cf..090692ec3bc7 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -58,8 +58,10 @@ struct poly1305_desc_ctx { }; }; -void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key); -void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key); +void poly1305_init_arch(struct poly1305_desc_ctx *desc, + const u8 key[POLY1305_KEY_SIZE]); +void poly1305_init_generic(struct poly1305_desc_ctx *desc, + const u8 key[POLY1305_KEY_SIZE]); static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key) { diff --git a/lib/crypto/poly1305-donna32.c b/lib/crypto/poly1305-donna32.c index 3cc77d94390b..7fb71845cc84 100644 --- a/lib/crypto/poly1305-donna32.c +++ b/lib/crypto/poly1305-donna32.c @@ -10,7 +10,8 @@ #include #include -void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) +void poly1305_core_setkey(struct poly1305_core_key *key, + const u8 raw_key[POLY1305_BLOCK_SIZE]) { /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ key->key.r[0] = (get_unaligned_le32(&raw_key[0])) & 0x3ffffff; diff --git a/lib/crypto/poly1305-donna64.c b/lib/crypto/poly1305-donna64.c index 6ae181bb4345..d34cf4053668 100644 --- a/lib/crypto/poly1305-donna64.c +++ b/lib/crypto/poly1305-donna64.c @@ -12,7 +12,8 @@ typedef __uint128_t u128; -void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) +void poly1305_core_setkey(struct poly1305_core_key *key, + const u8 raw_key[POLY1305_BLOCK_SIZE]) { u64 t0, t1; diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c index 9d2d14df0fee..26d87fc3823e 100644 --- a/lib/crypto/poly1305.c +++ b/lib/crypto/poly1305.c @@ -12,7 +12,8 @@ #include #include -void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key) +void poly1305_init_generic(struct poly1305_desc_ctx *desc, + const u8 key[POLY1305_KEY_SIZE]) { poly1305_core_setkey(&desc->core_r, key); desc->s[0] = get_unaligned_le32(key + 16); -- cgit v1.2.3-58-ga151 From 0f19dbc994dcb7f7137f2e056e813c84530b7538 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 6 Apr 2021 16:25:23 +0200 Subject: crypto: arm64/aes-ce - deal with oversight in new CTR carry code The new carry handling code in the CTR driver can deal with a carry occurring in the 4x/5x parallel code path, by using a computed goto to jump into the carry sequence at the right place as to only apply the carry to a subset of the blocks being processed. If the lower half of the counter wraps and ends up at exactly 0x0, a carry needs to be applied to the counter, but not to the counter values taken for the 4x/5x parallel sequence. In this case, the computed goto skips all register assignments, and branches straight to the jump instruction that gets us back to the fast path. This produces the correct result, but due to the fact that this branch target does not carry the correct BTI annotation, this fails when BTI is enabled. Let's omit the computed goto entirely in this case, and jump straight back to the fast path after applying the carry to the main counter. Fixes: 5318d3db465d ("crypto: arm64/aes-ctr - improve tail handling") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-modes.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index bbdb54702aa7..247011356d11 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -359,6 +359,7 @@ ST5( mov v4.16b, vctr.16b ) ins vctr.d[0], x8 /* apply carry to N counter blocks for N := x12 */ + cbz x12, 2f adr x16, 1f sub x16, x16, x12, lsl #3 br x16 -- cgit v1.2.3-58-ga151 From 44200f2d9b8b52389c70e6c7bbe51e0dc6eaf938 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 9 Apr 2021 15:11:55 -0700 Subject: crypto: arm/curve25519 - Move '.fpu' after '.arch' Debian's clang carries a patch that makes the default FPU mode 'vfp3-d16' instead of 'neon' for 'armv7-a' to avoid generating NEON instructions on hardware that does not support them: https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/raw/5a61ca6f21b4ad8c6ac4970e5ea5a7b5b4486d22/debian/patches/clang-arm-default-vfp3-on-armv7a.patch https://bugs.debian.org/841474 https://bugs.debian.org/842142 https://bugs.debian.org/914268 This results in the following build error when clang's integrated assembler is used because the '.arch' directive overrides the '.fpu' directive: arch/arm/crypto/curve25519-core.S:25:2: error: instruction requires: NEON vmov.i32 q0, #1 ^ arch/arm/crypto/curve25519-core.S:26:2: error: instruction requires: NEON vshr.u64 q1, q0, #7 ^ arch/arm/crypto/curve25519-core.S:27:2: error: instruction requires: NEON vshr.u64 q0, q0, #8 ^ arch/arm/crypto/curve25519-core.S:28:2: error: instruction requires: NEON vmov.i32 d4, #19 ^ Shuffle the order of the '.arch' and '.fpu' directives so that the code builds regardless of the default FPU mode. This has been tested against both clang with and without Debian's patch and GCC. Cc: stable@vger.kernel.org Fixes: d8f1308a025f ("crypto: arm/curve25519 - wire up NEON implementation") Link: https://github.com/ClangBuiltLinux/continuous-integration2/issues/118 Reported-by: Arnd Bergmann Suggested-by: Arnd Bergmann Suggested-by: Jessica Clarke Signed-off-by: Nathan Chancellor Acked-by: Jason A. Donenfeld Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Signed-off-by: Herbert Xu --- arch/arm/crypto/curve25519-core.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S index be18af52e7dc..b697fa5d059a 100644 --- a/arch/arm/crypto/curve25519-core.S +++ b/arch/arm/crypto/curve25519-core.S @@ -10,8 +10,8 @@ #include .text -.fpu neon .arch armv7-a +.fpu neon .align 4 ENTRY(curve25519_neon) -- cgit v1.2.3-58-ga151