1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
|
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
*
* Copyright (C) 2013 - 2017 Linaro Ltd.
* Copyright (C) 2024 Google LLC
*
* Author: Ard Biesheuvel <ardb@kernel.org>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.arch armv8-a+crypto
.macro load_round_keys, rk, nr, tmp
sub w\tmp, \nr, #10
add \tmp, \rk, w\tmp, sxtw #4
ld1 {v10.4s-v13.4s}, [\rk]
ld1 {v14.4s-v17.4s}, [\tmp], #64
ld1 {v18.4s-v21.4s}, [\tmp], #64
ld1 {v3.4s-v5.4s}, [\tmp]
.endm
.macro dround, va, vb, vk
aese \va\().16b, \vk\().16b
aesmc \va\().16b, \va\().16b
aese \vb\().16b, \vk\().16b
aesmc \vb\().16b, \vb\().16b
.endm
.macro aes_encrypt, va, vb, nr
tbz \nr, #2, .L\@
dround \va, \vb, v10
dround \va, \vb, v11
tbz \nr, #1, .L\@
dround \va, \vb, v12
dround \va, \vb, v13
.L\@: .irp v, v14, v15, v16, v17, v18, v19, v20, v21, v3
dround \va, \vb, \v
.endr
aese \va\().16b, v4.16b
aese \vb\().16b, v4.16b
.endm
.macro aes_ccm_do_crypt,enc
load_round_keys x3, w4, x10
ld1 {v0.16b}, [x5] /* load mac */
cbz x2, ce_aes_ccm_final
ldr x8, [x6, #8] /* load lower ctr */
CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
0: /* outer loop */
ld1 {v1.8b}, [x6] /* load upper ctr */
prfm pldl1strm, [x1]
add x8, x8, #1
rev x9, x8
ins v1.d[1], x9 /* no carry in lower ctr */
aes_encrypt v0, v1, w4
subs w2, w2, #16
bmi ce_aes_ccm_crypt_tail
ld1 {v2.16b}, [x1], #16 /* load next input block */
.if \enc == 1
eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */
.else
eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
eor v6.16b, v2.16b, v5.16b /* final round enc */
.endif
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
st1 {v6.16b}, [x0], #16 /* write output block */
bne 0b
CPU_LE( rev x8, x8 )
str x8, [x6, #8] /* store lsb end of ctr (BE) */
cbnz x7, ce_aes_ccm_final
st1 {v0.16b}, [x5] /* store mac */
ret
.endm
SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
eor v0.16b, v0.16b, v5.16b /* final round mac */
eor v1.16b, v1.16b, v5.16b /* final round enc */
add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */
add x0, x0, w2, sxtw /* rewind the output pointer */
adr_l x8, .Lpermute /* load permute vectors */
add x9, x8, w2, sxtw
sub x8, x8, w2, sxtw
ld1 {v7.16b-v8.16b}, [x9]
ld1 {v9.16b}, [x8]
ld1 {v2.16b}, [x1] /* load a full block of input */
tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */
eor v7.16b, v2.16b, v1.16b /* encrypt partial input block */
bif v2.16b, v7.16b, v22.16b /* select plaintext */
tbx v7.16b, {v6.16b}, v8.16b /* insert output from previous iteration */
tbl v2.16b, {v2.16b}, v9.16b /* copy plaintext to start of v2 */
eor v0.16b, v0.16b, v2.16b /* fold plaintext into mac */
st1 {v7.16b}, [x0] /* store output block */
cbz x7, 0f
SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL)
ld1 {v1.16b}, [x7] /* load 1st ctriv */
aes_encrypt v0, v1, w4
/* final round key cancels out */
eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
0: st1 {v0.16b}, [x5] /* store result */
ret
SYM_FUNC_END(ce_aes_ccm_crypt_tail)
/*
* void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
* u8 const rk[], u32 rounds, u8 mac[],
* u8 ctr[], u8 const final_iv[]);
* void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
* u8 const rk[], u32 rounds, u8 mac[],
* u8 ctr[], u8 const final_iv[]);
*/
SYM_FUNC_START(ce_aes_ccm_encrypt)
movi v22.16b, #255
aes_ccm_do_crypt 1
SYM_FUNC_END(ce_aes_ccm_encrypt)
SYM_FUNC_START(ce_aes_ccm_decrypt)
movi v22.16b, #0
aes_ccm_do_crypt 0
SYM_FUNC_END(ce_aes_ccm_decrypt)
.section ".rodata", "a"
.align 6
.fill 15, 1, 0xff
.Lpermute:
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
.fill 15, 1, 0xff
|