diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-21 18:10:50 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-21 18:10:50 -0800 |
commit | 36289a03bcd3aabdf66de75cb6d1b4ee15726438 (patch) | |
tree | 1230c6391678f9255f74d7a4f65e95ea8a39d452 | |
parent | 69308402ca6f5b80a5a090ade0b13bd146891420 (diff) | |
parent | 8b84475318641c2b89320859332544cf187e1cbd (diff) |
Merge tag 'v6.3-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu:
"API:
- Use kmap_local instead of kmap_atomic
- Change request callback to take void pointer
- Print FIPS status in /proc/crypto (when enabled)
Algorithms:
- Add rfc4106/gcm support on arm64
- Add ARIA AVX2/512 support on x86
Drivers:
- Add TRNG driver for StarFive SoC
- Delete ux500/hash driver (subsumed by stm32/hash)
- Add zlib support in qat
- Add RSA support in aspeed"
* tag 'v6.3-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (156 commits)
crypto: x86/aria-avx - Do not use avx2 instructions
crypto: aspeed - Fix modular aspeed-acry
crypto: hisilicon/qm - fix coding style issues
crypto: hisilicon/qm - update comments to match function
crypto: hisilicon/qm - change function names
crypto: hisilicon/qm - use min() instead of min_t()
crypto: hisilicon/qm - remove some unused defines
crypto: proc - Print fips status
crypto: crypto4xx - Call dma_unmap_page when done
crypto: octeontx2 - Fix objects shared between several modules
crypto: nx - Fix sparse warnings
crypto: ecc - Silence sparse warning
tls: Pass rec instead of aead_req into tls_encrypt_done
crypto: api - Remove completion function scaffolding
tls: Remove completion function scaffolding
tipc: Remove completion function scaffolding
net: ipv6: Remove completion function scaffolding
net: ipv4: Remove completion function scaffolding
net: macsec: Remove completion function scaffolding
dm: Remove completion function scaffolding
...
186 files changed, 6379 insertions, 4053 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-qat b/Documentation/ABI/testing/sysfs-driver-qat index 185f81a2aab3..087842b1969e 100644 --- a/Documentation/ABI/testing/sysfs-driver-qat +++ b/Documentation/ABI/testing/sysfs-driver-qat @@ -1,6 +1,6 @@ What: /sys/bus/pci/devices/<BDF>/qat/state Date: June 2022 -KernelVersion: 5.20 +KernelVersion: 6.0 Contact: qat-linux@intel.com Description: (RW) Reports the current state of the QAT device. Write to the file to start or stop the device. @@ -18,7 +18,7 @@ Description: (RW) Reports the current state of the QAT device. Write to What: /sys/bus/pci/devices/<BDF>/qat/cfg_services Date: June 2022 -KernelVersion: 5.20 +KernelVersion: 6.0 Contact: qat-linux@intel.com Description: (RW) Reports the current configuration of the QAT device. Write to the file to change the configured services. diff --git a/Documentation/devicetree/bindings/bus/aspeed,ast2600-ahbc.yaml b/Documentation/devicetree/bindings/bus/aspeed,ast2600-ahbc.yaml new file mode 100644 index 000000000000..2894256c976d --- /dev/null +++ b/Documentation/devicetree/bindings/bus/aspeed,ast2600-ahbc.yaml @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/bus/aspeed,ast2600-ahbc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ASPEED Advanced High-Performance Bus Controller (AHBC) + +maintainers: + - Neal Liu <neal_liu@aspeedtech.com> + - Chia-Wei Wang <chiawei_wang@aspeedtech.com> + +description: | + Advanced High-performance Bus Controller (AHBC) supports plenty of mechanisms + including a priority arbiter, an address decoder and a data multiplexer + to control the overall operations of Advanced High-performance Bus (AHB). + +properties: + compatible: + enum: + - aspeed,ast2600-ahbc + + reg: + maxItems: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + ahbc@1e600000 { + compatible = "aspeed,ast2600-ahbc"; + reg = <0x1e600000 0x100>; + }; diff --git a/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ce.yaml b/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ce.yaml index 026a9f9e1aeb..4287678aa79f 100644 --- a/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ce.yaml +++ b/Documentation/devicetree/bindings/crypto/allwinner,sun8i-ce.yaml @@ -14,6 +14,7 @@ properties: enum: - allwinner,sun8i-h3-crypto - allwinner,sun8i-r40-crypto + - allwinner,sun20i-d1-crypto - allwinner,sun50i-a64-crypto - allwinner,sun50i-h5-crypto - allwinner,sun50i-h6-crypto @@ -29,6 +30,7 @@ properties: - description: Bus clock - description: Module clock - description: MBus clock + - description: TRNG clock (RC oscillator) minItems: 2 clock-names: @@ -36,6 +38,7 @@ properties: - const: bus - const: mod - const: ram + - const: trng minItems: 2 resets: @@ -44,19 +47,33 @@ properties: if: properties: compatible: - const: allwinner,sun50i-h6-crypto + enum: + - allwinner,sun20i-d1-crypto then: properties: clocks: - minItems: 3 + minItems: 4 clock-names: - minItems: 3 + minItems: 4 else: - properties: - clocks: - maxItems: 2 - clock-names: - maxItems: 2 + if: + properties: + compatible: + const: allwinner,sun50i-h6-crypto + then: + properties: + clocks: + minItems: 3 + maxItems: 3 + clock-names: + minItems: 3 + maxItems: 3 + else: + properties: + clocks: + maxItems: 2 + clock-names: + maxItems: 2 required: - compatible diff --git a/Documentation/devicetree/bindings/crypto/aspeed,ast2600-acry.yaml b/Documentation/devicetree/bindings/crypto/aspeed,ast2600-acry.yaml new file mode 100644 index 000000000000..b18f178aac06 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/aspeed,ast2600-acry.yaml @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/aspeed,ast2600-acry.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ASPEED ACRY ECDSA/RSA Hardware Accelerator Engines + +maintainers: + - Neal Liu <neal_liu@aspeedtech.com> + +description: + The ACRY ECDSA/RSA engines is designed to accelerate the throughput + of ECDSA/RSA signature and verification. Basically, ACRY can be + divided into two independent engines - ECC Engine and RSA Engine. + +properties: + compatible: + enum: + - aspeed,ast2600-acry + + reg: + items: + - description: acry base address & size + - description: acry sram base address & size + + clocks: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - interrupts + +additionalProperties: false + +examples: + - | + #include <dt-bindings/clock/ast2600-clock.h> + acry: crypto@1e6fa000 { + compatible = "aspeed,ast2600-acry"; + reg = <0x1e6fa000 0x400>, <0x1e710000 0x1800>; + interrupts = <160>; + clocks = <&syscon ASPEED_CLK_GATE_RSACLK>; + }; diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml index 4ccb335e8063..b767ec72a999 100644 --- a/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml +++ b/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml @@ -6,12 +6,18 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: STMicroelectronics STM32 HASH +description: The STM32 HASH block is built on the HASH block found in + the STn8820 SoC introduced in 2007, and subsequently used in the U8500 + SoC in 2010. + maintainers: - Lionel Debieve <lionel.debieve@foss.st.com> properties: compatible: enum: + - st,stn8820-hash + - stericsson,ux500-hash - st,stm32f456-hash - st,stm32f756-hash @@ -41,11 +47,26 @@ properties: maximum: 2 default: 0 + power-domains: + maxItems: 1 + required: - compatible - reg - clocks - - interrupts + +allOf: + - if: + properties: + compatible: + items: + const: stericsson,ux500-hash + then: + properties: + interrupts: false + else: + required: + - interrupts additionalProperties: false diff --git a/Documentation/devicetree/bindings/rng/starfive,jh7110-trng.yaml b/Documentation/devicetree/bindings/rng/starfive,jh7110-trng.yaml new file mode 100644 index 000000000000..2b76ce25acc4 --- /dev/null +++ b/Documentation/devicetree/bindings/rng/starfive,jh7110-trng.yaml @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rng/starfive,jh7110-trng.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: StarFive SoC TRNG Module + +maintainers: + - Jia Jie Ho <jiajie.ho@starfivetech.com> + +properties: + compatible: + const: starfive,jh7110-trng + + reg: + maxItems: 1 + + clocks: + items: + - description: Hardware reference clock + - description: AHB reference clock + + clock-names: + items: + - const: hclk + - const: ahb + + resets: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - clock-names + - resets + - interrupts + +additionalProperties: false + +examples: + - | + rng: rng@1600C000 { + compatible = "starfive,jh7110-trng"; + reg = <0x1600C000 0x4000>; + clocks = <&clk 15>, <&clk 16>; + clock-names = "hclk", "ahb"; + resets = <&reset 3>; + interrupts = <30>; + }; +... diff --git a/MAINTAINERS b/MAINTAINERS index 2368a813e7ae..b4d198cca5aa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3149,7 +3149,7 @@ ASPEED CRYPTO DRIVER M: Neal Liu <neal_liu@aspeedtech.com> L: linux-aspeed@lists.ozlabs.org (moderated for non-subscribers) S: Maintained -F: Documentation/devicetree/bindings/crypto/aspeed,ast2500-hace.yaml +F: Documentation/devicetree/bindings/crypto/aspeed,* F: drivers/crypto/aspeed/ ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS @@ -19769,6 +19769,12 @@ F: Documentation/devicetree/bindings/reset/starfive,jh7100-reset.yaml F: drivers/reset/reset-starfive-jh7100.c F: include/dt-bindings/reset/starfive-jh7100.h +STARFIVE TRNG DRIVER +M: Jia Jie Ho <jiajie.ho@starfivetech.com> +S: Supported +F: Documentation/devicetree/bindings/rng/starfive* +F: drivers/char/hw_random/jh7110-trng.c + STATIC BRANCH/CALL M: Peter Zijlstra <peterz@infradead.org> M: Josh Poimboeuf <jpoimboe@kernel.org> diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi index cc2f8b785917..8246a60de0d0 100644 --- a/arch/arm/boot/dts/aspeed-g6.dtsi +++ b/arch/arm/boot/dts/aspeed-g6.dtsi @@ -98,6 +98,11 @@ <0x40466000 0x2000>; }; + ahbc: bus@1e600000 { + compatible = "aspeed,ast2600-ahbc", "syscon"; + reg = <0x1e600000 0x100>; + }; + fmc: spi@1e620000 { reg = <0x1e620000 0xc4>, <0x20000000 0x10000000>; #address-cells = <1>; @@ -431,6 +436,14 @@ reg = <0x1e6f2000 0x1000>; }; + acry: crypto@1e6fa000 { + compatible = "aspeed,ast2600-acry"; + reg = <0x1e6fa000 0x400>, <0x1e710000 0x1800>; + interrupts = <GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&syscon ASPEED_CLK_GATE_RSACLK>; + aspeed,ahbc = <&ahbc>; + }; + video: video@1e700000 { compatible = "aspeed,ast2600-video-engine"; reg = <0x1e700000 0x1000>; diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index 6c2b849e459d..95a727bcd664 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c @@ -21,31 +21,29 @@ #include "sha1.h" -asmlinkage void sha1_block_data_order(u32 *digest, - const unsigned char *data, unsigned int rounds); +asmlinkage void sha1_block_data_order(struct sha1_state *digest, + const u8 *data, int rounds); int sha1_update_arm(struct shash_desc *desc, const u8 *data, unsigned int len) { - /* make sure casting to sha1_block_fn() is safe */ + /* make sure signature matches sha1_block_fn() */ BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); - return sha1_base_do_update(desc, data, len, - (sha1_block_fn *)sha1_block_data_order); + return sha1_base_do_update(desc, data, len, sha1_block_data_order); } EXPORT_SYMBOL_GPL(sha1_update_arm); static int sha1_final(struct shash_desc *desc, u8 *out) { - sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_block_data_order); + sha1_base_do_finalize(desc, sha1_block_data_order); return sha1_base_finish(desc, out); } int sha1_finup_arm(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - sha1_base_do_update(desc, data, len, - (sha1_block_fn *)sha1_block_data_order); + sha1_base_do_update(desc, data, len, sha1_block_data_order); return sha1_final(desc, out); } EXPORT_SYMBOL_GPL(sha1_finup_arm); diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index c4f14415f5f0..25cd3808ecbe 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -161,43 +161,39 @@ static int ccm_encrypt(struct aead_request *req) memcpy(buf, req->iv, AES_BLOCK_SIZE); err = skcipher_walk_aead_encrypt(&walk, req, false); - if (unlikely(err)) - return err; kernel_neon_begin(); if (req->assoclen) ccm_calculate_auth_mac(req, mac); - do { + while (walk.nbytes) { u32 tail = walk.nbytes % AES_BLOCK_SIZE; + bool final = walk.nbytes == walk.total; - if (walk.nbytes == walk.total) + if (final) tail = 0; ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes - tail, ctx->key_enc, num_rounds(ctx), mac, walk.iv); - if (walk.nbytes == walk.total) - ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); + if (!final) + kernel_neon_end(); + err = skcipher_walk_done(&walk, tail); + if (!final) + kernel_neon_begin(); + } - kernel_neon_end(); + ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); - if (walk.nbytes) { - err = skcipher_walk_done(&walk, tail); - if (unlikely(err)) - return err; - if (unlikely(walk.nbytes)) - kernel_neon_begin(); - } - } while (walk.nbytes); + kernel_neon_end(); /* copy authtag to end of dst */ scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen, crypto_aead_authsize(aead), 1); - return 0; + return err; } static int ccm_decrypt(struct aead_request *req) @@ -219,37 +215,36 @@ static int ccm_decrypt(struct aead_request *req) memcpy(buf, req->iv, AES_BLOCK_SIZE); err = skcipher_walk_aead_decrypt(&walk, req, false); - if (unlikely(err)) - return err; kernel_neon_begin(); if (req->assoclen) ccm_calculate_auth_mac(req, mac); - do { + while (walk.nbytes) { u32 tail = walk.nbytes % AES_BLOCK_SIZE; + bool final = walk.nbytes == walk.total; - if (walk.nbytes == walk.total) + if (final) tail = 0; ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes - tail, ctx->key_enc, num_rounds(ctx), mac, walk.iv); - if (walk.nbytes == walk.total) - ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); + if (!final) + kernel_neon_end(); + err = skcipher_walk_done(&walk, tail); + if (!final) + kernel_neon_begin(); + } - kernel_neon_end(); + ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); - if (walk.nbytes) { - err = skcipher_walk_done(&walk, tail); - if (unlikely(err)) - return err; - if (unlikely(walk.nbytes)) - kernel_neon_begin(); - } - } while (walk.nbytes); + kernel_neon_end(); + + if (unlikely(err)) + return err; /* compare calculated auth tag with the stored one */ scatterwalk_map_and_copy(buf, req->src, diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index e5e9adc1fcf4..97331b454ea8 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -9,6 +9,7 @@ #include <asm/simd.h> #include <asm/unaligned.h> #include <crypto/aes.h> +#include <crypto/gcm.h> #include <crypto/algapi.h> #include <crypto/b128ops.h> #include <crypto/gf128mul.h> @@ -28,7 +29,8 @@ MODULE_ALIAS_CRYPTO("ghash"); #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 -#define GCM_IV_SIZE 12 + +#define RFC4106_NONCE_SIZE 4 struct ghash_key { be128 k; @@ -43,6 +45,7 @@ struct ghash_desc_ctx { struct gcm_aes_ctx { struct crypto_aes_ctx aes_key; + u8 nonce[RFC4106_NONCE_SIZE]; struct ghash_key ghash_key; }; @@ -226,8 +229,8 @@ static int num_rounds(struct crypto_aes_ctx *ctx) return 6 + ctx->key_length / 4; } -static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey, - unsigned int keylen) +static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *inkey, + unsigned int keylen) { struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm); u8 key[GHASH_BLOCK_SIZE]; @@ -258,17 +261,9 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey, return 0; } -static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { - switch (authsize) { - case 4: - case 8: - case 12 ... 16: - break; - default: - return -EINVAL; - } - return 0; + return crypto_gcm_check_authsize(authsize); } static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[], @@ -302,13 +297,12 @@ static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[], } } -static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[]) +static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[], u32 len) { struct crypto_aead *aead = crypto_aead_reqtfm(req); struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); u8 buf[GHASH_BLOCK_SIZE]; struct scatter_walk walk; - u32 len = req->assoclen; int buf_count = 0; scatterwalk_start(&walk, req->src); @@ -338,27 +332,25 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[]) } } -static int gcm_encrypt(struct aead_request *req) +static int gcm_encrypt(struct aead_request *req, char *iv, int assoclen) { struct crypto_aead *aead = crypto_aead_reqtfm(req); struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); int nrounds = num_rounds(&ctx->aes_key); struct skcipher_walk walk; u8 buf[AES_BLOCK_SIZE]; - u8 iv[AES_BLOCK_SIZE]; u64 dg[2] = {}; be128 lengths; u8 *tag; int err; - lengths.a = cpu_to_be64(req->assoclen * 8); + lengths.a = cpu_to_be64(assoclen * 8); lengths.b = cpu_to_be64(req->cryptlen * 8); - if (req->assoclen) - gcm_calculate_auth_mac(req, dg); + if (assoclen) + gcm_calculate_auth_mac(req, dg, assoclen); - memcpy(iv, req->iv, GCM_IV_SIZE); - put_unaligned_be32(2, iv + GCM_IV_SIZE); + put_unaligned_be32(2, iv + GCM_AES_IV_SIZE); err = skcipher_walk_aead_encrypt(&walk, req, false); @@ -403,7 +395,7 @@ static int gcm_encrypt(struct aead_request *req) return 0; } -static int gcm_decrypt(struct aead_request *req) +static int gcm_decrypt(struct aead_request *req, char *iv, int assoclen) { struct crypto_aead *aead = crypto_aead_reqtfm(req); struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); @@ -412,21 +404,19 @@ static int gcm_decrypt(struct aead_request *req) struct skcipher_walk walk; u8 otag[AES_BLOCK_SIZE]; u8 buf[AES_BLOCK_SIZE]; - u8 iv[AES_BLOCK_SIZE]; u64 dg[2] = {}; be128 lengths; u8 *tag; int ret; int err; - lengths.a = cpu_to_be64(req->assoclen * 8); + lengths.a = cpu_to_be64(assoclen * 8); lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8); - if (req->assoclen) - gcm_calculate_auth_mac(req, dg); + if (assoclen) + gcm_calculate_auth_mac(req, dg, assoclen); - memcpy(iv, req->iv, GCM_IV_SIZE); - put_unaligned_be32(2, iv + GCM_IV_SIZE); + put_unaligned_be32(2, iv + GCM_AES_IV_SIZE); scatterwalk_map_and_copy(otag, req->src, req->assoclen + req->cryptlen - authsize, @@ -471,14 +461,76 @@ static int gcm_decrypt(struct aead_request *req) return ret ? -EBADMSG : 0; } -static struct aead_alg gcm_aes_alg = { - .ivsize = GCM_IV_SIZE, +static int gcm_aes_encrypt(struct aead_request *req) +{ + u8 iv[AES_BLOCK_SIZE]; + + memcpy(iv, req->iv, GCM_AES_IV_SIZE); + return gcm_encrypt(req, iv, req->assoclen); +} + +static int gcm_aes_decrypt(struct aead_request *req) +{ + u8 iv[AES_BLOCK_SIZE]; + + memcpy(iv, req->iv, GCM_AES_IV_SIZE); + return gcm_decrypt(req, iv, req->assoclen); +} + +static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey, + unsigned int keylen) +{ + struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm); + int err; + + keylen -= RFC4106_NONCE_SIZE; + err = gcm_aes_setkey(tfm, inkey, keylen); + if (err) + return err; + + memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE); + return 0; +} + +static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + return crypto_rfc4106_check_authsize(authsize); +} + +static int rfc4106_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); + u8 iv[AES_BLOCK_SIZE]; + + memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); + memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); + + return crypto_ipsec_check_assoclen(req->assoclen) ?: + gcm_encrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE); +} + +static int rfc4106_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); + u8 iv[AES_BLOCK_SIZE]; + + memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); + memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); + + return crypto_ipsec_check_assoclen(req->assoclen) ?: + gcm_decrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE); +} + +static struct aead_alg gcm_aes_algs[] = {{ + .ivsize = GCM_AES_IV_SIZE, .chunksize = AES_BLOCK_SIZE, .maxauthsize = AES_BLOCK_SIZE, - .setkey = gcm_setkey, - .setauthsize = gcm_setauthsize, - .encrypt = gcm_encrypt, - .decrypt = gcm_decrypt, + .setkey = gcm_aes_setkey, + .setauthsize = gcm_aes_setauthsize, + .encrypt = gcm_aes_encrypt, + .decrypt = gcm_aes_decrypt, .base.cra_name = "gcm(aes)", .base.cra_driver_name = "gcm-aes-ce", @@ -487,7 +539,23 @@ static struct aead_alg gcm_aes_alg = { .base.cra_ctxsize = sizeof(struct gcm_aes_ctx) + 4 * sizeof(u64[2]), .base.cra_module = THIS_MODULE, -}; +}, { + .ivsize = GCM_RFC4106_IV_SIZE, + .chunksize = AES_BLOCK_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + .setkey = rfc4106_setkey, + .setauthsize = rfc4106_setauthsize, + .encrypt = rfc4106_encrypt, + .decrypt = rfc4106_decrypt, + + .base.cra_name = "rfc4106(gcm(aes))", + .base.cra_driver_name = "rfc4106-gcm-aes-ce", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct gcm_aes_ctx) + + 4 * sizeof(u64[2]), + .base.cra_module = THIS_MODULE, +}}; static int __init ghash_ce_mod_init(void) { @@ -495,7 +563,8 @@ static int __init ghash_ce_mod_init(void) return -ENODEV; if (cpu_have_named_feature(PMULL)) - return crypto_register_aead(&gcm_aes_alg); + return crypto_register_aeads(gcm_aes_algs, + ARRAY_SIZE(gcm_aes_algs)); return crypto_register_shash(&ghash_alg); } @@ -503,7 +572,7 @@ static int __init ghash_ce_mod_init(void) static void __exit ghash_ce_mod_exit(void) { if (cpu_have_named_feature(PMULL)) - crypto_unregister_aead(&gcm_aes_alg); + crypto_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs)); else crypto_unregister_shash(&ghash_alg); } diff --git a/arch/arm64/crypto/sm4-ce-ccm-glue.c b/arch/arm64/crypto/sm4-ce-ccm-glue.c index f2cec7b52efc..5e7e17bbec81 100644 --- a/arch/arm64/crypto/sm4-ce-ccm-glue.c +++ b/arch/arm64/crypto/sm4-ce-ccm-glue.c @@ -166,7 +166,7 @@ static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk, unsigned int nbytes, u8 *mac)) { u8 __aligned(8) ctr0[SM4_BLOCK_SIZE]; - int err; + int err = 0; /* preserve the initial ctr0 for the TAG */ memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE); @@ -177,33 +177,37 @@ static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk, if (req->assoclen) ccm_calculate_auth_mac(req, mac); - do { + while (walk->nbytes && walk->nbytes != walk->total) { unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; - const u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - if (walk->nbytes == walk->total) - tail = 0; + sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, + walk->src.virt.addr, walk->iv, + walk->nbytes - tail, mac); + + kernel_neon_end(); + + err = skcipher_walk_done(walk, tail); + + kernel_neon_begin(); + } - if (walk->nbytes - tail) - sm4_ce_ccm_crypt(rkey_enc, dst, src, walk->iv, - walk->nbytes - tail, mac); + if (walk->nbytes) { + sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, + walk->src.virt.addr, walk->iv, + walk->nbytes, mac); - if (walk->nbytes == walk->total) - sm4_ce_ccm_final(rkey_enc, ctr0, mac); + sm4_ce_ccm_final(rkey_enc, ctr0, mac); kernel_neon_end(); - if (walk->nbytes) { - err = skcipher_walk_done(walk, tail); - if (err) - return err; - if (walk->nbytes) - kernel_neon_begin(); - } - } while (walk->nbytes > 0); + err = skcipher_walk_done(walk, 0); + } else { + sm4_ce_ccm_final(rkey_enc, ctr0, mac); - return 0; + kernel_neon_end(); + } + + return err; } static int ccm_encrypt(struct aead_request *req) diff --git a/arch/arm64/crypto/sm4-ce-gcm-glue.c b/arch/arm64/crypto/sm4-ce-gcm-glue.c index c450a2025ca9..73bfb6972d3a 100644 --- a/arch/arm64/crypto/sm4-ce-gcm-glue.c +++ b/arch/arm64/crypto/sm4-ce-gcm-glue.c @@ -135,22 +135,23 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u8 ghash[]) } static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk, - struct sm4_gcm_ctx *ctx, u8 ghash[], + u8 ghash[], int err, void (*sm4_ce_pmull_gcm_crypt)(const u32 *rkey_enc, u8 *dst, const u8 *src, u8 *iv, unsigned int nbytes, u8 *ghash, const u8 *ghash_table, const u8 *lengths)) { + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead); u8 __aligned(8) iv[SM4_BLOCK_SIZE]; be128 __aligned(8) lengths; - int err; memset(ghash, 0, SM4_BLOCK_SIZE); lengths.a = cpu_to_be64(req->assoclen * 8); lengths.b = cpu_to_be64(walk->total * 8); - memcpy(iv, walk->iv, GCM_IV_SIZE); + memcpy(iv, req->iv, GCM_IV_SIZE); put_unaligned_be32(2, iv + GCM_IV_SIZE); kernel_neon_begin(); @@ -158,49 +159,51 @@ static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk, if (req->assoclen) gcm_calculate_auth_mac(req, ghash); - do { + while (walk->nbytes) { unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; const u8 *src = walk->src.virt.addr; u8 *dst = walk->dst.virt.addr; if (walk->nbytes == walk->total) { - tail = 0; - sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, walk->nbytes, ghash, ctx->ghash_table, (const u8 *)&lengths); - } else if (walk->nbytes - tail) { - sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, - walk->nbytes - tail, ghash, - ctx->ghash_table, NULL); + + kernel_neon_end(); + + return skcipher_walk_done(walk, 0); } + sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, + walk->nbytes - tail, ghash, + ctx->ghash_table, NULL); + kernel_neon_end(); err = skcipher_walk_done(walk, tail); - if (err) - return err; - if (walk->nbytes) - kernel_neon_begin(); - } while (walk->nbytes > 0); - return 0; + kernel_neon_begin(); + } + + sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, NULL, NULL, iv, + walk->nbytes, ghash, ctx->ghash_table, + (const u8 *)&lengths); + + kernel_neon_end(); + + return err; } static int gcm_encrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead); u8 __aligned(8) ghash[SM4_BLOCK_SIZE]; struct skcipher_walk walk; int err; err = skcipher_walk_aead_encrypt(&walk, req, false); - if (err) - return err; - - err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_enc); + err = gcm_crypt(req, &walk, ghash, err, sm4_ce_pmull_gcm_enc); if (err) return err; @@ -215,17 +218,13 @@ static int gcm_decrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); unsigned int authsize = crypto_aead_authsize(aead); - struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead); u8 __aligned(8) ghash[SM4_BLOCK_SIZE]; u8 authtag[SM4_BLOCK_SIZE]; struct skcipher_walk walk; int err; err = skcipher_walk_aead_decrypt(&walk, req, false); - if (err) - return err; - - err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_dec); + err = gcm_crypt(req, &walk, ghash, err, sm4_ce_pmull_gcm_dec); if (err) return err; diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 526c3f40f6a2..c773820e4af9 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -398,10 +398,6 @@ static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, if (err) return err; - /* In fips mode only 128 bit or 256 bit keys are valid */ - if (fips_enabled && key_len != 32 && key_len != 64) - return -EINVAL; - /* Pick the correct function code based on the key length */ fc = (key_len == 32) ? CPACF_KM_XTS_128 : (key_len == 64) ? CPACF_KM_XTS_256 : 0; diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index a279b7d23a5e..29dc827e0fe8 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -474,7 +474,7 @@ static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, return rc; /* - * xts_check_key verifies the key length is not odd and makes + * xts_verify_key verifies the key length is not odd and makes * sure that the two keys are not the same. This can be done * on the two protected keys as well */ diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index 26b8c08e2fc4..b88f784cb02e 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -19,3 +19,8 @@ config AS_TPAUSE def_bool $(as-instr,tpause %ecx) help Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7 + +config AS_GFNI + def_bool $(as-instr,vgf2p8mulb %xmm0$(comma)%xmm1$(comma)%xmm2) + help + Supported by binutils >= 2.30 and LLVM integrated assembler diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig index 71c4c473d34b..9bbfd01cfa2f 100644 --- a/arch/x86/crypto/Kconfig +++ b/arch/x86/crypto/Kconfig @@ -304,6 +304,44 @@ config CRYPTO_ARIA_AESNI_AVX_X86_64 Processes 16 blocks in parallel. +config CRYPTO_ARIA_AESNI_AVX2_X86_64 + tristate "Ciphers: ARIA with modes: ECB, CTR (AES-NI/AVX2/GFNI)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_ALGAPI + select CRYPTO_ARIA + select CRYPTO_ARIA_AESNI_AVX_X86_64 + help + Length-preserving cipher: ARIA cipher algorithms + (RFC 5794) with ECB and CTR modes + + Architecture: x86_64 using: + - AES-NI (AES New Instructions) + - AVX2 (Advanced Vector Extensions) + - GFNI (Galois Field New Instructions) + + Processes 32 blocks in parallel. + +config CRYPTO_ARIA_GFNI_AVX512_X86_64 + tristate "Ciphers: ARIA with modes: ECB, CTR (AVX512/GFNI)" + depends on X86 && 64BIT && AS_AVX512 && AS_GFNI + select CRYPTO_SKCIPHER + select CRYPTO_SIMD + select CRYPTO_ALGAPI + select CRYPTO_ARIA + select CRYPTO_ARIA_AESNI_AVX_X86_64 + select CRYPTO_ARIA_AESNI_AVX2_X86_64 + help + Length-preserving cipher: ARIA cipher algorithms + (RFC 5794) with ECB and CTR modes + + Architecture: x86_64 using: + - AVX512 (Advanced Vector Extensions) + - GFNI (Galois Field New Instructions) + + Processes 64 blocks in parallel. + config CRYPTO_CHACHA20_X86_64 tristate "Ciphers: ChaCha20, XChaCha20, XChaCha12 (SSSE3/AVX2/AVX-512VL)" depends on X86 && 64BIT diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 3e7a329235bd..9aa46093c91b 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -103,6 +103,12 @@ sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o obj-$(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) += aria-aesni-avx-x86_64.o aria-aesni-avx-x86_64-y := aria-aesni-avx-asm_64.o aria_aesni_avx_glue.o +obj-$(CONFIG_CRYPTO_ARIA_AESNI_AVX2_X86_64) += aria-aesni-avx2-x86_64.o +aria-aesni-avx2-x86_64-y := aria-aesni-avx2-asm_64.o aria_aesni_avx2_glue.o + +obj-$(CONFIG_CRYPTO_ARIA_GFNI_AVX512_X86_64) += aria-gfni-avx512-x86_64.o +aria-gfni-avx512-x86_64-y := aria-gfni-avx512-asm_64.o aria_gfni_avx512_glue.o + quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $< > $@ $(obj)/%.S: $(src)/%.pl FORCE diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S index 03ae4cd1d976..9243f6289d34 100644 --- a/arch/x86/crypto/aria-aesni-avx-asm_64.S +++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S @@ -8,13 +8,9 @@ #include <linux/linkage.h> #include <linux/cfi_types.h> +#include <asm/asm-offsets.h> #include <asm/frame.h> -/* struct aria_ctx: */ -#define enc_key 0 -#define dec_key 272 -#define rounds 544 - /* register macros */ #define CTX %rdi @@ -271,34 +267,44 @@ #define aria_ark_8way(x0, x1, x2, x3, \ x4, x5, x6, x7, \ - t0, rk, idx, round) \ + t0, t1, t2, rk, \ + idx, round) \ /* AddRoundKey */ \ - vpbroadcastb ((round * 16) + idx + 3)(rk), t0; \ - vpxor t0, x0, x0; \ - vpbroadcastb ((round * 16) + idx + 2)(rk), t0; \ - vpxor t0, x1, x1; \ - vpbroadcastb ((round * 16) + idx + 1)(rk), t0; \ - vpxor t0, x2, x2; \ - vpbroadcastb ((round * 16) + idx + 0)(rk), t0; \ - vpxor t0, x3, x3; \ - vpbroadcastb ((round * 16) + idx + 7)(rk), t0; \ - vpxor t0, x4, x4; \ - vpbroadcastb ((round * 16) + idx + 6)(rk), t0; \ - vpxor t0, x5, x5; \ - vpbroadcastb ((round * 16) + idx + 5)(rk), t0; \ - vpxor t0, x6, x6; \ - vpbroadcastb ((round * 16) + idx + 4)(rk), t0; \ - vpxor t0, x7, x7; - + vbroadcastss ((round * 16) + idx + 0)(rk), t0; \ + vpsrld $24, t0, t2; \ + vpshufb t1, t2, t2; \ + vpxor t2, x0, x0; \ + vpsrld $16, t0, t2; \ + vpshufb t1, t2, t2; \ + vpxor t2, x1, x1; \ + vpsrld $8, t0, t2; \ + vpshufb t1, t2, t2; \ + vpxor t2, x2, x2; \ + vpshufb t1, t0, t2; \ + vpxor t2, x3, x3; \ + vbroadcastss ((round * 16) + idx + 4)(rk), t0; \ + vpsrld $24, t0, t2; \ + vpshufb t1, t2, t2; \ + vpxor t2, x4, x4; \ + vpsrld $16, t0, t2; \ + vpshufb t1, t2, t2; \ + vpxor t2, x5, x5; \ + vpsrld $8, t0, t2; \ + vpshufb t1, t2, t2; \ + vpxor t2, x6, x6; \ + vpshufb t1, t0, t2; \ + vpxor t2, x7, x7; + +#ifdef CONFIG_AS_GFNI #define aria_sbox_8way_gfni(x0, x1, x2, x3, \ x4, x5, x6, x7, \ t0, t1, t2, t3, \ t4, t5, t6, t7) \ - vpbroadcastq .Ltf_s2_bitmatrix, t0; \ - vpbroadcastq .Ltf_inv_bitmatrix, t1; \ - vpbroadcastq .Ltf_id_bitmatrix, t2; \ - vpbroadcastq .Ltf_aff_bitmatrix, t3; \ - vpbroadcastq .Ltf_x2_bitmatrix, t4; \ + vmovdqa .Ltf_s2_bitmatrix, t0; \ + vmovdqa .Ltf_inv_bitmatrix, t1; \ + vmovdqa .Ltf_id_bitmatrix, t2; \ + vmovdqa .Ltf_aff_bitmatrix, t3; \ + vmovdqa .Ltf_x2_bitmatrix, t4; \ vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ @@ -312,14 +318,15 @@ vgf2p8affineinvqb $0, t2, x3, x3; \ vgf2p8affineinvqb $0, t2, x7, x7 +#endif /* CONFIG_AS_GFNI */ + #define aria_sbox_8way(x0, x1, x2, x3, \ x4, x5, x6, x7, \ t0, t1, t2, t3, \ t4, t5, t6, t7) \ - vpxor t7, t7, t7; \ vmovdqa .Linv_shift_row, t0; \ vmovdqa .Lshift_row, t1; \ - vpbroadcastd .L0f0f0f0f, t6; \ + vbroadcastss .L0f0f0f0f, t6; \ vmovdqa .Ltf_lo__inv_aff__and__s2, t2; \ vmovdqa .Ltf_hi__inv_aff__and__s2, t3; \ vmovdqa .Ltf_lo__x2__and__fwd_aff, t4; \ @@ -414,8 +421,9 @@ y0, y1, y2, y3, \ y4, y5, y6, y7, \ mem_tmp, rk, round) \ + vpxor y7, y7, y7; \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 8, round); \ + y0, y7, y2, rk, 8, round); \ \ aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ y0, y1, y2, y3, y4, y5, y6, y7); \ @@ -430,7 +438,7 @@ x4, x5, x6, x7, \ mem_tmp, 0); \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 0, round); \ + y0, y7, y2, rk, 0, round); \ \ aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ y0, y1, y2, y3, y4, y5, y6, y7); \ @@ -468,8 +476,9 @@ y0, y1, y2, y3, \ y4, y5, y6, y7, \ mem_tmp, rk, round) \ + vpxor y7, y7, y7; \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 8, round); \ + y0, y7, y2, rk, 8, round); \ \ aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ y0, y1, y2, y3, y4, y5, y6, y7); \ @@ -484,7 +493,7 @@ x4, x5, x6, x7, \ mem_tmp, 0); \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 0, round); \ + y0, y7, y2, rk, 0, round); \ \ aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ y0, y1, y2, y3, y4, y5, y6, y7); \ @@ -522,14 +531,15 @@ y0, y1, y2, y3, \ y4, y5, y6, y7, \ mem_tmp, rk, round, last_round) \ + vpxor y7, y7, y7; \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 8, round); \ + y0, y7, y2, rk, 8, round); \ \ aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ y0, y1, y2, y3, y4, y5, y6, y7); \ \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 8, last_round); \ + y0, y7, y2, rk, 8, last_round); \ \ aria_store_state_8way(x0, x1, x2, x3, \ x4, x5, x6, x7, \ @@ -539,25 +549,27 @@ x4, x5, x6, x7, \ mem_tmp, 0); \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 0, round); \ + y0, y7, y2, rk, 0, round); \ \ aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ y0, y1, y2, y3, y4, y5, y6, y7); \ \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 0, last_round); \ + y0, y7, y2, rk, 0, last_round); \ \ aria_load_state_8way(y0, y1, y2, y3, \ y4, y5, y6, y7, \ mem_tmp, 8); +#ifdef CONFIG_AS_GFNI #define aria_fe_gfni(x0, x1, x2, x3, \ x4, x5, x6, x7, \ y0, y1, y2, y3, \ y4, y5, y6, y7, \ mem_tmp, rk, round) \ + vpxor y7, y7, y7; \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 8, round); \ + y0, y7, y2, rk, 8, round); \ \ aria_sbox_8way_gfni(x2, x3, x0, x1, \ x6, x7, x4, x5, \ @@ -574,7 +586,7 @@ x4, x5, x6, x7, \ mem_tmp, 0); \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 0, round); \ + y0, y7, y2, rk, 0, round); \ \ aria_sbox_8way_gfni(x2, x3, x0, x1, \ x6, x7, x4, x5, \ @@ -614,8 +626,9 @@ y0, y1, y2, y3, \ y4, y5, y6, y7, \ mem_tmp, rk, round) \ + vpxor y7, y7, y7; \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 8, round); \ + y0, y7, y2, rk, 8, round); \ \ aria_sbox_8way_gfni(x0, x1, x2, x3, \ x4, x5, x6, x7, \ @@ -632,7 +645,7 @@ x4, x5, x6, x7, \ mem_tmp, 0); \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 0, round); \ + y0, y7, y2, rk, 0, round); \ \ aria_sbox_8way_gfni(x0, x1, x2, x3, \ x4, x5, x6, x7, \ @@ -672,8 +685,9 @@ y0, y1, y2, y3, \ y4, y5, y6, y7, \ mem_tmp, rk, round, last_round) \ + vpxor y7, y7, y7; \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 8, round); \ + y0, y7, y2, rk, 8, round); \ \ aria_sbox_8way_gfni(x2, x3, x0, x1, \ x6, x7, x4, x5, \ @@ -681,7 +695,7 @@ y4, y5, y6, y7); \ \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 8, last_round); \ + y0, y7, y2, rk, 8, last_round); \ \ aria_store_state_8way(x0, x1, x2, x3, \ x4, x5, x6, x7, \ @@ -691,7 +705,7 @@ x4, x5, x6, x7, \ mem_tmp, 0); \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 0, round); \ + y0, y7, y2, rk, 0, round); \ \ aria_sbox_8way_gfni(x2, x3, x0, x1, \ x6, x7, x4, x5, \ @@ -699,12 +713,14 @@ y4, y5, y6, y7); \ \ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ - y0, rk, 0, last_round); \ + y0, y7, y2, rk, 0, last_round); \ \ aria_load_state_8way(y0, y1, y2, y3, \ y4, y5, y6, y7, \ mem_tmp, 8); +#endif /* CONFIG_AS_GFNI */ + /* NB: section is mergeable, all elements must be aligned 16-byte blocks */ .section .rodata.cst16, "aM", @progbits, 16 .align 16 @@ -756,6 +772,7 @@ .Ltf_hi__x2__and__fwd_aff: .octa 0x3F893781E95FE1576CDA64D2BA0CB204 +#ifdef CONFIG_AS_GFNI .section .rodata.cst8, "aM", @progbits, 8 .align 8 /* AES affine: */ @@ -769,6 +786,14 @@ BV8(0, 1, 1, 1, 1, 1, 0, 0), BV8(0, 0, 1, 1, 1, 1, 1, 0), BV8(0, 0, 0, 1, 1, 1, 1, 1)) + .quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1), + BV8(1, 1, 0, 0, 0, 1, 1, 1), + BV8(1, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 1, 0, 0, 0, 1), + BV8(1, 1, 1, 1, 1, 0, 0, 0), + BV8(0, 1, 1, 1, 1, 1, 0, 0), + BV8(0, 0, 1, 1, 1, 1, 1, 0), + BV8(0, 0, 0, 1, 1, 1, 1, 1)) /* AES inverse affine: */ #define tf_inv_const BV8(1, 0, 1, 0, 0, 0, 0, 0) @@ -781,6 +806,14 @@ BV8(0, 0, 1, 0, 1, 0, 0, 1), BV8(1, 0, 0, 1, 0, 1, 0, 0), BV8(0, 1, 0, 0, 1, 0, 1, 0)) + .quad BM8X8(BV8(0, 0, 1, 0, 0, 1, 0, 1), + BV8(1, 0, 0, 1, 0, 0, 1, 0), + BV8(0, 1, 0, 0, 1, 0, 0, 1), + BV8(1, 0, 1, 0, 0, 1, 0, 0), + BV8(0, 1, 0, 1, 0, 0, 1, 0), + BV8(0, 0, 1, 0, 1, 0, 0, 1), + BV8(1, 0, 0, 1, 0, 1, 0, 0), + BV8(0, 1, 0, 0, 1, 0, 1, 0)) /* S2: */ #define tf_s2_const BV8(0, 1, 0, 0, 0, 1, 1, 1) @@ -793,6 +826,14 @@ BV8(1, 1, 0, 0, 1, 1, 1, 0), BV8(0, 1, 1, 0, 0, 0, 1, 1), BV8(1, 1, 1, 1, 0, 1, 1, 0)) + .quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1), + BV8(0, 0, 1, 1, 1, 1, 1, 1), + BV8(1, 1, 1, 0, 1, 1, 0, 1), + BV8(1, 1, 0, 0, 0, 0, 1, 1), + BV8(0, 1, 0, 0, 0, 0, 1, 1), + BV8(1, 1, 0, 0, 1, 1, 1, 0), + BV8(0, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 1, 0, 1, 1, 0)) /* X2: */ #define tf_x2_const BV8(0, 0, 1, 1, 0, 1, 0, 0) @@ -805,6 +846,14 @@ BV8(0, 1, 1, 0, 1, 0, 1, 1), BV8(1, 0, 1, 1, 1, 1, 0, 1), BV8(1, 0, 0, 1, 0, 0, 1, 1)) + .quad BM8X8(BV8(0, 0, 0, 1, 1, 0, 0, 0), + BV8(0, 0, 1, 0, 0, 1, 1, 0), + BV8(0, 0, 0, 0, 1, 0, 1, 0), + BV8(1, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 0, 1, 1, 0, 0), + BV8(0, 1, 1, 0, 1, 0, 1, 1), + BV8(1, 0, 1, 1, 1, 1, 0, 1), + BV8(1, 0, 0, 1, 0, 0, 1, 1)) /* Identity matrix: */ .Ltf_id_bitmatrix: @@ -816,6 +865,15 @@ BV8(0, 0, 0, 0, 0, 1, 0, 0), BV8(0, 0, 0, 0, 0, 0, 1, 0), BV8(0, 0, 0, 0, 0, 0, 0, 1)) + .quad BM8X8(BV8(1, 0, 0, 0, 0, 0, 0, 0), + BV8(0, 1, 0, 0, 0, 0, 0, 0), + BV8(0, 0, 1, 0, 0, 0, 0, 0), + BV8(0, 0, 0, 1, 0, 0, 0, 0), + BV8(0, 0, 0, 0, 1, 0, 0, 0), + BV8(0, 0, 0, 0, 0, 1, 0, 0), + BV8(0, 0, 0, 0, 0, 0, 1, 0), + BV8(0, 0, 0, 0, 0, 0, 0, 1)) +#endif /* CONFIG_AS_GFNI */ /* 4-bit mask */ .section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 @@ -874,7 +932,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_crypt_16way) aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %rax, %r9, 10); - cmpl $12, rounds(CTX); + cmpl $12, ARIA_CTX_rounds(CTX); jne .Laria_192; aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, @@ -887,7 +945,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_crypt_16way) aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %rax, %r9, 12); - cmpl $14, rounds(CTX); + cmpl $14, ARIA_CTX_rounds(CTX); jne .Laria_256; aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, @@ -923,7 +981,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_encrypt_16way) FRAME_BEGIN - leaq enc_key(CTX), %r9; + leaq ARIA_CTX_enc_key(CTX), %r9; inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, @@ -948,7 +1006,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_decrypt_16way) FRAME_BEGIN - leaq dec_key(CTX), %r9; + leaq ARIA_CTX_dec_key(CTX), %r9; inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, @@ -1056,7 +1114,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_ctr_crypt_16way) leaq (%rdx), %r11; leaq (%rcx), %rsi; leaq (%rcx), %rdx; - leaq enc_key(CTX), %r9; + leaq ARIA_CTX_enc_key(CTX), %r9; call __aria_aesni_avx_crypt_16way; @@ -1084,6 +1142,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_ctr_crypt_16way) RET; SYM_FUNC_END(aria_aesni_avx_ctr_crypt_16way) +#ifdef CONFIG_AS_GFNI SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way) /* input: * %r9: rk @@ -1157,7 +1216,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way) %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %rax, %r9, 10); - cmpl $12, rounds(CTX); + cmpl $12, ARIA_CTX_rounds(CTX); jne .Laria_gfni_192; aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, @@ -1174,7 +1233,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way) %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %rax, %r9, 12); - cmpl $14, rounds(CTX); + cmpl $14, ARIA_CTX_rounds(CTX); jne .Laria_gfni_256; aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, @@ -1218,7 +1277,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_encrypt_16way) FRAME_BEGIN - leaq enc_key(CTX), %r9; + leaq ARIA_CTX_enc_key(CTX), %r9; inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, @@ -1243,7 +1302,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_decrypt_16way) FRAME_BEGIN - leaq dec_key(CTX), %r9; + leaq ARIA_CTX_dec_key(CTX), %r9; inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, @@ -1275,7 +1334,7 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way) leaq (%rdx), %r11; leaq (%rcx), %rsi; leaq (%rcx), %rdx; - leaq enc_key(CTX), %r9; + leaq ARIA_CTX_enc_key(CTX), %r9; call __aria_aesni_avx_gfni_crypt_16way; @@ -1302,3 +1361,4 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way) FRAME_END RET; SYM_FUNC_END(aria_aesni_avx_gfni_ctr_crypt_16way) +#endif /* CONFIG_AS_GFNI */ diff --git a/arch/x86/crypto/aria-aesni-avx2-asm_64.S b/arch/x86/crypto/aria-aesni-avx2-asm_64.S new file mode 100644 index 000000000000..82a14b4ad920 --- /dev/null +++ b/arch/x86/crypto/aria-aesni-avx2-asm_64.S @@ -0,0 +1,1441 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * ARIA Cipher 32-way parallel algorithm (AVX2) + * + * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com> + * + */ + +#include <linux/linkage.h> +#include <asm/frame.h> +#include <asm/asm-offsets.h> +#include <linux/cfi_types.h> + +/* register macros */ +#define CTX %rdi + +#define ymm0_x xmm0 +#define ymm1_x xmm1 +#define ymm2_x xmm2 +#define ymm3_x xmm3 +#define ymm4_x xmm4 +#define ymm5_x xmm5 +#define ymm6_x xmm6 +#define ymm7_x xmm7 +#define ymm8_x xmm8 +#define ymm9_x xmm9 +#define ymm10_x xmm10 +#define ymm11_x xmm11 +#define ymm12_x xmm12 +#define ymm13_x xmm13 +#define ymm14_x xmm14 +#define ymm15_x xmm15 + +#define BV8(a0, a1, a2, a3, a4, a5, a6, a7) \ + ( (((a0) & 1) << 0) | \ + (((a1) & 1) << 1) | \ + (((a2) & 1) << 2) | \ + (((a3) & 1) << 3) | \ + (((a4) & 1) << 4) | \ + (((a5) & 1) << 5) | \ + (((a6) & 1) << 6) | \ + (((a7) & 1) << 7) ) + +#define BM8X8(l0, l1, l2, l3, l4, l5, l6, l7) \ + ( ((l7) << (0 * 8)) | \ + ((l6) << (1 * 8)) | \ + ((l5) << (2 * 8)) | \ + ((l4) << (3 * 8)) | \ + ((l3) << (4 * 8)) | \ + ((l2) << (5 * 8)) | \ + ((l1) << (6 * 8)) | \ + ((l0) << (7 * 8)) ) + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +#define byteslice_16x16b(a0, b0, c0, d0, \ + a1, b1, c1, d1, \ + a2, b2, c2, d2, \ + a3, b3, c3, d3, \ + st0, st1) \ + vmovdqu d2, st0; \ + vmovdqu d3, st1; \ + transpose_4x4(a0, a1, a2, a3, d2, d3); \ + transpose_4x4(b0, b1, b2, b3, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu a0, st0; \ + vmovdqu a1, st1; \ + transpose_4x4(c0, c1, c2, c3, a0, a1); \ + transpose_4x4(d0, d1, d2, d3, a0, a1); \ + \ + vbroadcasti128 .Lshufb_16x16b, a0; \ + vmovdqu st1, a1; \ + vpshufb a0, a2, a2; \ + vpshufb a0, a3, a3; \ + vpshufb a0, b0, b0; \ + vpshufb a0, b1, b1; \ + vpshufb a0, b2, b2; \ + vpshufb a0, b3, b3; \ + vpshufb a0, a1, a1; \ + vpshufb a0, c0, c0; \ + vpshufb a0, c1, c1; \ + vpshufb a0, c2, c2; \ + vpshufb a0, c3, c3; \ + vpshufb a0, d0, d0; \ + vpshufb a0, d1, d1; \ + vpshufb a0, d2, d2; \ + vpshufb a0, d3, d3; \ + vmovdqu d3, st1; \ + vmovdqu st0, d3; \ + vpshufb a0, d3, a0; \ + vmovdqu d2, st0; \ + \ + transpose_4x4(a0, b0, c0, d0, d2, d3); \ + transpose_4x4(a1, b1, c1, d1, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu b0, st0; \ + vmovdqu b1, st1; \ + transpose_4x4(a2, b2, c2, d2, b0, b1); \ + transpose_4x4(a3, b3, c3, d3, b0, b1); \ + vmovdqu st0, b0; \ + vmovdqu st1, b1; \ + /* does not adjust output bytes inside vectors */ + +#define debyteslice_16x16b(a0, b0, c0, d0, \ + a1, b1, c1, d1, \ + a2, b2, c2, d2, \ + a3, b3, c3, d3, \ + st0, st1) \ + vmovdqu d2, st0; \ + vmovdqu d3, st1; \ + transpose_4x4(a0, a1, a2, a3, d2, d3); \ + transpose_4x4(b0, b1, b2, b3, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu a0, st0; \ + vmovdqu a1, st1; \ + transpose_4x4(c0, c1, c2, c3, a0, a1); \ + transpose_4x4(d0, d1, d2, d3, a0, a1); \ + \ + vbroadcasti128 .Lshufb_16x16b, a0; \ + vmovdqu st1, a1; \ + vpshufb a0, a2, a2; \ + vpshufb a0, a3, a3; \ + vpshufb a0, b0, b0; \ + vpshufb a0, b1, b1; \ + vpshufb a0, b2, b2; \ + vpshufb a0, b3, b3; \ + vpshufb a0, a1, a1; \ + vpshufb a0, c0, c0; \ + vpshufb a0, c1, c1; \ + vpshufb a0, c2, c2; \ + vpshufb a0, c3, c3; \ + vpshufb a0, d0, d0; \ + vpshufb a0, d1, d1; \ + vpshufb a0, d2, d2; \ + vpshufb a0, d3, d3; \ + vmovdqu d3, st1; \ + vmovdqu st0, d3; \ + vpshufb a0, d3, a0; \ + vmovdqu d2, st0; \ + \ + transpose_4x4(c0, d0, a0, b0, d2, d3); \ + transpose_4x4(c1, d1, a1, b1, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu b0, st0; \ + vmovdqu b1, st1; \ + transpose_4x4(c2, d2, a2, b2, b0, b1); \ + transpose_4x4(c3, d3, a3, b3, b0, b1); \ + vmovdqu st0, b0; \ + vmovdqu st1, b1; \ + /* does not adjust output bytes inside vectors */ + +/* load blocks to registers and apply pre-whitening */ +#define inpack16_pre(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + rio) \ + vmovdqu (0 * 32)(rio), x0; \ + vmovdqu (1 * 32)(rio), x1; \ + vmovdqu (2 * 32)(rio), x2; \ + vmovdqu (3 * 32)(rio), x3; \ + vmovdqu (4 * 32)(rio), x4; \ + vmovdqu (5 * 32)(rio), x5; \ + vmovdqu (6 * 32)(rio), x6; \ + vmovdqu (7 * 32)(rio), x7; \ + vmovdqu (8 * 32)(rio), y0; \ + vmovdqu (9 * 32)(rio), y1; \ + vmovdqu (10 * 32)(rio), y2; \ + vmovdqu (11 * 32)(rio), y3; \ + vmovdqu (12 * 32)(rio), y4; \ + vmovdqu (13 * 32)(rio), y5; \ + vmovdqu (14 * 32)(rio), y6; \ + vmovdqu (15 * 32)(rio), y7; + +/* byteslice pre-whitened blocks and store to temporary memory */ +#define inpack16_post(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_ab, mem_cd) \ + byteslice_16x16b(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + (mem_ab), (mem_cd)); \ + \ + vmovdqu x0, 0 * 32(mem_ab); \ + vmovdqu x1, 1 * 32(mem_ab); \ + vmovdqu x2, 2 * 32(mem_ab); \ + vmovdqu x3, 3 * 32(mem_ab); \ + vmovdqu x4, 4 * 32(mem_ab); \ + vmovdqu x5, 5 * 32(mem_ab); \ + vmovdqu x6, 6 * 32(mem_ab); \ + vmovdqu x7, 7 * 32(mem_ab); \ + vmovdqu y0, 0 * 32(mem_cd); \ + vmovdqu y1, 1 * 32(mem_cd); \ + vmovdqu y2, 2 * 32(mem_cd); \ + vmovdqu y3, 3 * 32(mem_cd); \ + vmovdqu y4, 4 * 32(mem_cd); \ + vmovdqu y5, 5 * 32(mem_cd); \ + vmovdqu y6, 6 * 32(mem_cd); \ + vmovdqu y7, 7 * 32(mem_cd); + +#define write_output(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem) \ + vmovdqu x0, 0 * 32(mem); \ + vmovdqu x1, 1 * 32(mem); \ + vmovdqu x2, 2 * 32(mem); \ + vmovdqu x3, 3 * 32(mem); \ + vmovdqu x4, 4 * 32(mem); \ + vmovdqu x5, 5 * 32(mem); \ + vmovdqu x6, 6 * 32(mem); \ + vmovdqu x7, 7 * 32(mem); \ + vmovdqu y0, 8 * 32(mem); \ + vmovdqu y1, 9 * 32(mem); \ + vmovdqu y2, 10 * 32(mem); \ + vmovdqu y3, 11 * 32(mem); \ + vmovdqu y4, 12 * 32(mem); \ + vmovdqu y5, 13 * 32(mem); \ + vmovdqu y6, 14 * 32(mem); \ + vmovdqu y7, 15 * 32(mem); \ + +#define aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, idx) \ + vmovdqu x0, ((idx + 0) * 32)(mem_tmp); \ + vmovdqu x1, ((idx + 1) * 32)(mem_tmp); \ + vmovdqu x2, ((idx + 2) * 32)(mem_tmp); \ + vmovdqu x3, ((idx + 3) * 32)(mem_tmp); \ + vmovdqu x4, ((idx + 4) * 32)(mem_tmp); \ + vmovdqu x5, ((idx + 5) * 32)(mem_tmp); \ + vmovdqu x6, ((idx + 6) * 32)(mem_tmp); \ + vmovdqu x7, ((idx + 7) * 32)(mem_tmp); + +#define aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, idx) \ + vmovdqu ((idx + 0) * 32)(mem_tmp), x0; \ + vmovdqu ((idx + 1) * 32)(mem_tmp), x1; \ + vmovdqu ((idx + 2) * 32)(mem_tmp), x2; \ + vmovdqu ((idx + 3) * 32)(mem_tmp), x3; \ + vmovdqu ((idx + 4) * 32)(mem_tmp), x4; \ + vmovdqu ((idx + 5) * 32)(mem_tmp), x5; \ + vmovdqu ((idx + 6) * 32)(mem_tmp), x6; \ + vmovdqu ((idx + 7) * 32)(mem_tmp), x7; + +#define aria_ark_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + t0, rk, idx, round) \ + /* AddRoundKey */ \ + vpbroadcastb ((round * 16) + idx + 3)(rk), t0; \ + vpxor t0, x0, x0; \ + vpbroadcastb ((round * 16) + idx + 2)(rk), t0; \ + vpxor t0, x1, x1; \ + vpbroadcastb ((round * 16) + idx + 1)(rk), t0; \ + vpxor t0, x2, x2; \ + vpbroadcastb ((round * 16) + idx + 0)(rk), t0; \ + vpxor t0, x3, x3; \ + vpbroadcastb ((round * 16) + idx + 7)(rk), t0; \ + vpxor t0, x4, x4; \ + vpbroadcastb ((round * 16) + idx + 6)(rk), t0; \ + vpxor t0, x5, x5; \ + vpbroadcastb ((round * 16) + idx + 5)(rk), t0; \ + vpxor t0, x6, x6; \ + vpbroadcastb ((round * 16) + idx + 4)(rk), t0; \ + vpxor t0, x7, x7; + +#ifdef CONFIG_AS_GFNI +#define aria_sbox_8way_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + t0, t1, t2, t3, \ + t4, t5, t6, t7) \ + vpbroadcastq .Ltf_s2_bitmatrix, t0; \ + vpbroadcastq .Ltf_inv_bitmatrix, t1; \ + vpbroadcastq .Ltf_id_bitmatrix, t2; \ + vpbroadcastq .Ltf_aff_bitmatrix, t3; \ + vpbroadcastq .Ltf_x2_bitmatrix, t4; \ + vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ + vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ + vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ + vgf2p8affineqb $(tf_inv_const), t1, x6, x6; \ + vgf2p8affineinvqb $0, t2, x2, x2; \ + vgf2p8affineinvqb $0, t2, x6, x6; \ + vgf2p8affineinvqb $(tf_aff_const), t3, x0, x0; \ + vgf2p8affineinvqb $(tf_aff_const), t3, x4, x4; \ + vgf2p8affineqb $(tf_x2_const), t4, x3, x3; \ + vgf2p8affineqb $(tf_x2_const), t4, x7, x7; \ + vgf2p8affineinvqb $0, t2, x3, x3; \ + vgf2p8affineinvqb $0, t2, x7, x7 + +#endif /* CONFIG_AS_GFNI */ +#define aria_sbox_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + t0, t1, t2, t3, \ + t4, t5, t6, t7) \ + vpxor t7, t7, t7; \ + vpxor t6, t6, t6; \ + vbroadcasti128 .Linv_shift_row, t0; \ + vbroadcasti128 .Lshift_row, t1; \ + vbroadcasti128 .Ltf_lo__inv_aff__and__s2, t2; \ + vbroadcasti128 .Ltf_hi__inv_aff__and__s2, t3; \ + vbroadcasti128 .Ltf_lo__x2__and__fwd_aff, t4; \ + vbroadcasti128 .Ltf_hi__x2__and__fwd_aff, t5; \ + \ + vextracti128 $1, x0, t6##_x; \ + vaesenclast t7##_x, x0##_x, x0##_x; \ + vaesenclast t7##_x, t6##_x, t6##_x; \ + vinserti128 $1, t6##_x, x0, x0; \ + \ + vextracti128 $1, x4, t6##_x; \ + vaesenclast t7##_x, x4##_x, x4##_x; \ + vaesenclast t7##_x, t6##_x, t6##_x; \ + vinserti128 $1, t6##_x, x4, x4; \ + \ + vextracti128 $1, x1, t6##_x; \ + vaesenclast t7##_x, x1##_x, x1##_x; \ + vaesenclast t7##_x, t6##_x, t6##_x; \ + vinserti128 $1, t6##_x, x1, x1; \ + \ + vextracti128 $1, x5, t6##_x; \ + vaesenclast t7##_x, x5##_x, x5##_x; \ + vaesenclast t7##_x, t6##_x, t6##_x; \ + vinserti128 $1, t6##_x, x5, x5; \ + \ + vextracti128 $1, x2, t6##_x; \ + vaesdeclast t7##_x, x2##_x, x2##_x; \ + vaesdeclast t7##_x, t6##_x, t6##_x; \ + vinserti128 $1, t6##_x, x2, x2; \ + \ + vextracti128 $1, x6, t6##_x; \ + vaesdeclast t7##_x, x6##_x, x6##_x; \ + vaesdeclast t7##_x, t6##_x, t6##_x; \ + vinserti128 $1, t6##_x, x6, x6; \ + \ + vpbroadcastd .L0f0f0f0f, t6; \ + \ + /* AES inverse shift rows */ \ + vpshufb t0, x0, x0; \ + vpshufb t0, x4, x4; \ + vpshufb t0, x1, x1; \ + vpshufb t0, x5, x5; \ + vpshufb t1, x3, x3; \ + vpshufb t1, x7, x7; \ + vpshufb t1, x2, x2; \ + vpshufb t1, x6, x6; \ + \ + /* affine transformation for S2 */ \ + filter_8bit(x1, t2, t3, t6, t0); \ + /* affine transformation for S2 */ \ + filter_8bit(x5, t2, t3, t6, t0); \ + \ + /* affine transformation for X2 */ \ + filter_8bit(x3, t4, t5, t6, t0); \ + /* affine transformation for X2 */ \ + filter_8bit(x7, t4, t5, t6, t0); \ + \ + vpxor t6, t6, t6; \ + vextracti128 $1, x3, t6##_x; \ + vaesdeclast t7##_x, x3##_x, x3##_x; \ + vaesdeclast t7##_x, t6##_x, t6##_x; \ + vinserti128 $1, t6##_x, x3, x3; \ + \ + vextracti128 $1, x7, t6##_x; \ + vaesdeclast t7##_x, x7##_x, x7##_x; \ + vaesdeclast t7##_x, t6##_x, t6##_x; \ + vinserti128 $1, t6##_x, x7, x7; \ + +#define aria_diff_m(x0, x1, x2, x3, \ + t0, t1, t2, t3) \ + /* T = rotr32(X, 8); */ \ + /* X ^= T */ \ + vpxor x0, x3, t0; \ + vpxor x1, x0, t1; \ + vpxor x2, x1, t2; \ + vpxor x3, x2, t3; \ + /* X = T ^ rotr(X, 16); */ \ + vpxor t2, x0, x0; \ + vpxor x1, t3, t3; \ + vpxor t0, x2, x2; \ + vpxor t1, x3, x1; \ + vmovdqu t3, x3; + +#define aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7) \ + /* t1 ^= t2; */ \ + vpxor y0, x4, x4; \ + vpxor y1, x5, x5; \ + vpxor y2, x6, x6; \ + vpxor y3, x7, x7; \ + \ + /* t2 ^= t3; */ \ + vpxor y4, y0, y0; \ + vpxor y5, y1, y1; \ + vpxor y6, y2, y2; \ + vpxor y7, y3, y3; \ + \ + /* t0 ^= t1; */ \ + vpxor x4, x0, x0; \ + vpxor x5, x1, x1; \ + vpxor x6, x2, x2; \ + vpxor x7, x3, x3; \ + \ + /* t3 ^= t1; */ \ + vpxor x4, y4, y4; \ + vpxor x5, y5, y5; \ + vpxor x6, y6, y6; \ + vpxor x7, y7, y7; \ + \ + /* t2 ^= t0; */ \ + vpxor x0, y0, y0; \ + vpxor x1, y1, y1; \ + vpxor x2, y2, y2; \ + vpxor x3, y3, y3; \ + \ + /* t1 ^= t2; */ \ + vpxor y0, x4, x4; \ + vpxor y1, x5, x5; \ + vpxor y2, x6, x6; \ + vpxor y3, x7, x7; + +#define aria_fe(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); \ + aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + /* aria_diff_byte() \ + * T3 = ABCD -> BADC \ + * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \ + * T0 = ABCD -> CDAB \ + * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \ + * T1 = ABCD -> DCBA \ + * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \ + */ \ + aria_diff_word(x2, x3, x0, x1, \ + x7, x6, x5, x4, \ + y0, y1, y2, y3, \ + y5, y4, y7, y6); \ + aria_store_state_8way(x3, x2, x1, x0, \ + x6, x7, x4, x5, \ + mem_tmp, 0); + +#define aria_fo(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); \ + aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + /* aria_diff_byte() \ + * T1 = ABCD -> BADC \ + * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \ + * T2 = ABCD -> CDAB \ + * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \ + * T3 = ABCD -> DCBA \ + * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \ + */ \ + aria_diff_word(x0, x1, x2, x3, \ + x5, x4, x7, x6, \ + y2, y3, y0, y1, \ + y7, y6, y5, y4); \ + aria_store_state_8way(x3, x2, x1, x0, \ + x6, x7, x4, x5, \ + mem_tmp, 0); + +#define aria_ff(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round, last_round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, last_round); \ + \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ + y0, y1, y2, y3, y4, y5, y6, y7); \ + \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, last_round); \ + \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); +#ifdef CONFIG_AS_GFNI +#define aria_fe_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way_gfni(x2, x3, x0, x1, \ + x6, x7, x4, x5, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way_gfni(x2, x3, x0, x1, \ + x6, x7, x4, x5, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); \ + aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + /* aria_diff_byte() \ + * T3 = ABCD -> BADC \ + * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \ + * T0 = ABCD -> CDAB \ + * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \ + * T1 = ABCD -> DCBA \ + * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \ + */ \ + aria_diff_word(x2, x3, x0, x1, \ + x7, x6, x5, x4, \ + y0, y1, y2, y3, \ + y5, y4, y7, y6); \ + aria_store_state_8way(x3, x2, x1, x0, \ + x6, x7, x4, x5, \ + mem_tmp, 0); + +#define aria_fo_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); \ + aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + /* aria_diff_byte() \ + * T1 = ABCD -> BADC \ + * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \ + * T2 = ABCD -> CDAB \ + * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \ + * T3 = ABCD -> DCBA \ + * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \ + */ \ + aria_diff_word(x0, x1, x2, x3, \ + x5, x4, x7, x6, \ + y2, y3, y0, y1, \ + y7, y6, y5, y4); \ + aria_store_state_8way(x3, x2, x1, x0, \ + x6, x7, x4, x5, \ + mem_tmp, 0); + +#define aria_ff_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, rk, round, last_round) \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, round); \ + \ + aria_sbox_8way_gfni(x2, x3, x0, x1, \ + x6, x7, x4, x5, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 8, last_round); \ + \ + aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 8); \ + \ + aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, 0); \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, round); \ + \ + aria_sbox_8way_gfni(x2, x3, x0, x1, \ + x6, x7, x4, x5, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + \ + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, rk, 0, last_round); \ + \ + aria_load_state_8way(y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_tmp, 8); +#endif /* CONFIG_AS_GFNI */ + +.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32 +.align 32 +#define SHUFB_BYTES(idx) \ + 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) +.Lshufb_16x16b: + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) + +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 +.Lshift_row: + .byte 0x00, 0x05, 0x0a, 0x0f, 0x04, 0x09, 0x0e, 0x03 + .byte 0x08, 0x0d, 0x02, 0x07, 0x0c, 0x01, 0x06, 0x0b +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 + .byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + +/* AES inverse affine and S2 combined: + * 1 1 0 0 0 0 0 1 x0 0 + * 0 1 0 0 1 0 0 0 x1 0 + * 1 1 0 0 1 1 1 1 x2 0 + * 0 1 1 0 1 0 0 1 x3 1 + * 0 1 0 0 1 1 0 0 * x4 + 0 + * 0 1 0 1 1 0 0 0 x5 0 + * 0 0 0 0 0 1 0 1 x6 0 + * 1 1 1 0 0 1 1 1 x7 1 + */ +.Ltf_lo__inv_aff__and__s2: + .octa 0x92172DA81A9FA520B2370D883ABF8500 +.Ltf_hi__inv_aff__and__s2: + .octa 0x2B15FFC1AF917B45E6D8320C625CB688 + +/* X2 and AES forward affine combined: + * 1 0 1 1 0 0 0 1 x0 0 + * 0 1 1 1 1 0 1 1 x1 0 + * 0 0 0 1 1 0 1 0 x2 1 + * 0 1 0 0 0 1 0 0 x3 0 + * 0 0 1 1 1 0 1 1 * x4 + 0 + * 0 1 0 0 1 0 0 0 x5 0 + * 1 1 0 1 0 0 1 1 x6 0 + * 0 1 0 0 1 0 1 0 x7 0 + */ +.Ltf_lo__x2__and__fwd_aff: + .octa 0xEFAE0544FCBD1657B8F95213ABEA4100 +.Ltf_hi__x2__and__fwd_aff: + .octa 0x3F893781E95FE1576CDA64D2BA0CB204 + +#ifdef CONFIG_AS_GFNI +.section .rodata.cst8, "aM", @progbits, 8 +.align 8 +/* AES affine: */ +#define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0) +.Ltf_aff_bitmatrix: + .quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1), + BV8(1, 1, 0, 0, 0, 1, 1, 1), + BV8(1, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 1, 0, 0, 0, 1), + BV8(1, 1, 1, 1, 1, 0, 0, 0), + BV8(0, 1, 1, 1, 1, 1, 0, 0), + BV8(0, 0, 1, 1, 1, 1, 1, 0), + BV8(0, 0, 0, 1, 1, 1, 1, 1)) + +/* AES inverse affine: */ +#define tf_inv_const BV8(1, 0, 1, 0, 0, 0, 0, 0) +.Ltf_inv_bitmatrix: + .quad BM8X8(BV8(0, 0, 1, 0, 0, 1, 0, 1), + BV8(1, 0, 0, 1, 0, 0, 1, 0), + BV8(0, 1, 0, 0, 1, 0, 0, 1), + BV8(1, 0, 1, 0, 0, 1, 0, 0), + BV8(0, 1, 0, 1, 0, 0, 1, 0), + BV8(0, 0, 1, 0, 1, 0, 0, 1), + BV8(1, 0, 0, 1, 0, 1, 0, 0), + BV8(0, 1, 0, 0, 1, 0, 1, 0)) + +/* S2: */ +#define tf_s2_const BV8(0, 1, 0, 0, 0, 1, 1, 1) +.Ltf_s2_bitmatrix: + .quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1), + BV8(0, 0, 1, 1, 1, 1, 1, 1), + BV8(1, 1, 1, 0, 1, 1, 0, 1), + BV8(1, 1, 0, 0, 0, 0, 1, 1), + BV8(0, 1, 0, 0, 0, 0, 1, 1), + BV8(1, 1, 0, 0, 1, 1, 1, 0), + BV8(0, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 1, 0, 1, 1, 0)) + +/* X2: */ +#define tf_x2_const BV8(0, 0, 1, 1, 0, 1, 0, 0) +.Ltf_x2_bitmatrix: + .quad BM8X8(BV8(0, 0, 0, 1, 1, 0, 0, 0), + BV8(0, 0, 1, 0, 0, 1, 1, 0), + BV8(0, 0, 0, 0, 1, 0, 1, 0), + BV8(1, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 0, 1, 1, 0, 0), + BV8(0, 1, 1, 0, 1, 0, 1, 1), + BV8(1, 0, 1, 1, 1, 1, 0, 1), + BV8(1, 0, 0, 1, 0, 0, 1, 1)) + +/* Identity matrix: */ +.Ltf_id_bitmatrix: + .quad BM8X8(BV8(1, 0, 0, 0, 0, 0, 0, 0), + BV8(0, 1, 0, 0, 0, 0, 0, 0), + BV8(0, 0, 1, 0, 0, 0, 0, 0), + BV8(0, 0, 0, 1, 0, 0, 0, 0), + BV8(0, 0, 0, 0, 1, 0, 0, 0), + BV8(0, 0, 0, 0, 0, 1, 0, 0), + BV8(0, 0, 0, 0, 0, 0, 1, 0), + BV8(0, 0, 0, 0, 0, 0, 0, 1)) + +#endif /* CONFIG_AS_GFNI */ + +/* 4-bit mask */ +.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 +.align 4 +.L0f0f0f0f: + .long 0x0f0f0f0f + +.text + +SYM_FUNC_START_LOCAL(__aria_aesni_avx2_crypt_32way) + /* input: + * %r9: rk + * %rsi: dst + * %rdx: src + * %ymm0..%ymm15: byte-sliced blocks + */ + + FRAME_BEGIN + + movq %rsi, %rax; + leaq 8 * 32(%rax), %r8; + + inpack16_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r8); + aria_fo(%ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 0); + aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 1); + aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 2); + aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 3); + aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 4); + aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 5); + aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 6); + aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 7); + aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 8); + aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 9); + aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 10); + cmpl $12, ARIA_CTX_rounds(CTX); + jne .Laria_192; + aria_ff(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 11, 12); + jmp .Laria_end; +.Laria_192: + aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 11); + aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 12); + cmpl $14, ARIA_CTX_rounds(CTX); + jne .Laria_256; + aria_ff(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 13, 14); + jmp .Laria_end; +.Laria_256: + aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 13); + aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 14); + aria_ff(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 15, 16); +.Laria_end: + debyteslice_16x16b(%ymm8, %ymm12, %ymm1, %ymm4, + %ymm9, %ymm13, %ymm0, %ymm5, + %ymm10, %ymm14, %ymm3, %ymm6, + %ymm11, %ymm15, %ymm2, %ymm7, + (%rax), (%r8)); + + FRAME_END + RET; +SYM_FUNC_END(__aria_aesni_avx2_crypt_32way) + +SYM_TYPED_FUNC_START(aria_aesni_avx2_encrypt_32way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + + leaq ARIA_CTX_enc_key(CTX), %r9; + + inpack16_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rdx); + + call __aria_aesni_avx2_crypt_32way; + + write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx2_encrypt_32way) + +SYM_TYPED_FUNC_START(aria_aesni_avx2_decrypt_32way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + + leaq ARIA_CTX_dec_key(CTX), %r9; + + inpack16_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rdx); + + call __aria_aesni_avx2_crypt_32way; + + write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx2_decrypt_32way) + +SYM_FUNC_START_LOCAL(__aria_aesni_avx2_ctr_gen_keystream_32way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + * %rcx: keystream + * %r8: iv (big endian, 128bit) + */ + + FRAME_BEGIN + movq 8(%r8), %r11; + bswapq %r11; + + vbroadcasti128 .Lbswap128_mask (%rip), %ymm6; + vpcmpeqd %ymm0, %ymm0, %ymm0; + vpsrldq $8, %ymm0, %ymm0; /* ab: -1:0 ; cd: -1:0 */ + vpaddq %ymm0, %ymm0, %ymm5; /* ab: -2:0 ; cd: -2:0 */ + + /* load IV and byteswap */ + vmovdqu (%r8), %xmm7; + vpshufb %xmm6, %xmm7, %xmm7; + vmovdqa %xmm7, %xmm3; + inc_le128(%xmm7, %xmm0, %xmm4); + vinserti128 $1, %xmm7, %ymm3, %ymm3; + vpshufb %ymm6, %ymm3, %ymm8; /* +1 ; +0 */ + + /* check need for handling 64-bit overflow and carry */ + cmpq $(0xffffffffffffffff - 32), %r11; + ja .Lhandle_ctr_carry; + + /* construct IVs */ + vpsubq %ymm5, %ymm3, %ymm3; /* +3 ; +2 */ + vpshufb %ymm6, %ymm3, %ymm9; + vpsubq %ymm5, %ymm3, %ymm3; /* +5 ; +4 */ + vpshufb %ymm6, %ymm3, %ymm10; + vpsubq %ymm5, %ymm3, %ymm3; /* +7 ; +6 */ + vpshufb %ymm6, %ymm3, %ymm11; + vpsubq %ymm5, %ymm3, %ymm3; /* +9 ; +8 */ + vpshufb %ymm6, %ymm3, %ymm12; + vpsubq %ymm5, %ymm3, %ymm3; /* +11 ; +10 */ + vpshufb %ymm6, %ymm3, %ymm13; + vpsubq %ymm5, %ymm3, %ymm3; /* +13 ; +12 */ + vpshufb %ymm6, %ymm3, %ymm14; + vpsubq %ymm5, %ymm3, %ymm3; /* +15 ; +14 */ + vpshufb %ymm6, %ymm3, %ymm15; + vmovdqu %ymm8, (0 * 32)(%rcx); + vmovdqu %ymm9, (1 * 32)(%rcx); + vmovdqu %ymm10, (2 * 32)(%rcx); + vmovdqu %ymm11, (3 * 32)(%rcx); + vmovdqu %ymm12, (4 * 32)(%rcx); + vmovdqu %ymm13, (5 * 32)(%rcx); + vmovdqu %ymm14, (6 * 32)(%rcx); + vmovdqu %ymm15, (7 * 32)(%rcx); + + vpsubq %ymm5, %ymm3, %ymm3; /* +17 ; +16 */ + vpshufb %ymm6, %ymm3, %ymm8; + vpsubq %ymm5, %ymm3, %ymm3; /* +19 ; +18 */ + vpshufb %ymm6, %ymm3, %ymm9; + vpsubq %ymm5, %ymm3, %ymm3; /* +21 ; +20 */ + vpshufb %ymm6, %ymm3, %ymm10; + vpsubq %ymm5, %ymm3, %ymm3; /* +23 ; +22 */ + vpshufb %ymm6, %ymm3, %ymm11; + vpsubq %ymm5, %ymm3, %ymm3; /* +25 ; +24 */ + vpshufb %ymm6, %ymm3, %ymm12; + vpsubq %ymm5, %ymm3, %ymm3; /* +27 ; +26 */ + vpshufb %ymm6, %ymm3, %ymm13; + vpsubq %ymm5, %ymm3, %ymm3; /* +29 ; +28 */ + vpshufb %ymm6, %ymm3, %ymm14; + vpsubq %ymm5, %ymm3, %ymm3; /* +31 ; +30 */ + vpshufb %ymm6, %ymm3, %ymm15; + vpsubq %ymm5, %ymm3, %ymm3; /* +32 */ + vpshufb %xmm6, %xmm3, %xmm3; + vmovdqu %xmm3, (%r8); + vmovdqu (0 * 32)(%rcx), %ymm0; + vmovdqu (1 * 32)(%rcx), %ymm1; + vmovdqu (2 * 32)(%rcx), %ymm2; + vmovdqu (3 * 32)(%rcx), %ymm3; + vmovdqu (4 * 32)(%rcx), %ymm4; + vmovdqu (5 * 32)(%rcx), %ymm5; + vmovdqu (6 * 32)(%rcx), %ymm6; + vmovdqu (7 * 32)(%rcx), %ymm7; + jmp .Lctr_carry_done; + + .Lhandle_ctr_carry: + /* construct IVs */ + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm9; /* +3 ; +2 */ + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm10; /* +5 ; +4 */ + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm11; /* +7 ; +6 */ + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm12; /* +9 ; +8 */ + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm13; /* +11 ; +10 */ + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm14; /* +13 ; +12 */ + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm15; /* +15 ; +14 */ + vmovdqu %ymm8, (0 * 32)(%rcx); + vmovdqu %ymm9, (1 * 32)(%rcx); + vmovdqu %ymm10, (2 * 32)(%rcx); + vmovdqu %ymm11, (3 * 32)(%rcx); + vmovdqu %ymm12, (4 * 32)(%rcx); + vmovdqu %ymm13, (5 * 32)(%rcx); + vmovdqu %ymm14, (6 * 32)(%rcx); + vmovdqu %ymm15, (7 * 32)(%rcx); + + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm8; /* +17 ; +16 */ + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm9; /* +19 ; +18 */ + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm10; /* +21 ; +20 */ + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm11; /* +23 ; +22 */ + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm12; /* +25 ; +24 */ + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm13; /* +27 ; +26 */ + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm14; /* +29 ; +28 */ + inc_le128(%ymm3, %ymm0, %ymm4); + inc_le128(%ymm3, %ymm0, %ymm4); + vpshufb %ymm6, %ymm3, %ymm15; /* +31 ; +30 */ + inc_le128(%ymm3, %ymm0, %ymm4); + vextracti128 $1, %ymm3, %xmm3; + vpshufb %xmm6, %xmm3, %xmm3; /* +32 */ + vmovdqu %xmm3, (%r8); + vmovdqu (0 * 32)(%rcx), %ymm0; + vmovdqu (1 * 32)(%rcx), %ymm1; + vmovdqu (2 * 32)(%rcx), %ymm2; + vmovdqu (3 * 32)(%rcx), %ymm3; + vmovdqu (4 * 32)(%rcx), %ymm4; + vmovdqu (5 * 32)(%rcx), %ymm5; + vmovdqu (6 * 32)(%rcx), %ymm6; + vmovdqu (7 * 32)(%rcx), %ymm7; + + .Lctr_carry_done: + + FRAME_END + RET; +SYM_FUNC_END(__aria_aesni_avx2_ctr_gen_keystream_32way) + +SYM_TYPED_FUNC_START(aria_aesni_avx2_ctr_crypt_32way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + * %rcx: keystream + * %r8: iv (big endian, 128bit) + */ + FRAME_BEGIN + + call __aria_aesni_avx2_ctr_gen_keystream_32way; + + leaq (%rsi), %r10; + leaq (%rdx), %r11; + leaq (%rcx), %rsi; + leaq (%rcx), %rdx; + leaq ARIA_CTX_enc_key(CTX), %r9; + + call __aria_aesni_avx2_crypt_32way; + + vpxor (0 * 32)(%r11), %ymm1, %ymm1; + vpxor (1 * 32)(%r11), %ymm0, %ymm0; + vpxor (2 * 32)(%r11), %ymm3, %ymm3; + vpxor (3 * 32)(%r11), %ymm2, %ymm2; + vpxor (4 * 32)(%r11), %ymm4, %ymm4; + vpxor (5 * 32)(%r11), %ymm5, %ymm5; + vpxor (6 * 32)(%r11), %ymm6, %ymm6; + vpxor (7 * 32)(%r11), %ymm7, %ymm7; + vpxor (8 * 32)(%r11), %ymm8, %ymm8; + vpxor (9 * 32)(%r11), %ymm9, %ymm9; + vpxor (10 * 32)(%r11), %ymm10, %ymm10; + vpxor (11 * 32)(%r11), %ymm11, %ymm11; + vpxor (12 * 32)(%r11), %ymm12, %ymm12; + vpxor (13 * 32)(%r11), %ymm13, %ymm13; + vpxor (14 * 32)(%r11), %ymm14, %ymm14; + vpxor (15 * 32)(%r11), %ymm15, %ymm15; + write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %r10); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx2_ctr_crypt_32way) + +#ifdef CONFIG_AS_GFNI +SYM_FUNC_START_LOCAL(__aria_aesni_avx2_gfni_crypt_32way) + /* input: + * %r9: rk + * %rsi: dst + * %rdx: src + * %ymm0..%ymm15: 16 byte-sliced blocks + */ + + FRAME_BEGIN + + movq %rsi, %rax; + leaq 8 * 32(%rax), %r8; + + inpack16_post(%ymm0, %ymm1, %ymm2, %ymm3, + %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, + %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r8); + aria_fo_gfni(%ymm8, %ymm9, %ymm10, %ymm11, + %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, + %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 0); + aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2, + %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, + %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 1); + aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10, + %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, + %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 2); + aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2, + %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, + %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 3); + aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10, + %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, + %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 4); + aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2, + %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, + %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 5); + aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10, + %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, + %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 6); + aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2, + %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, + %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 7); + aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10, + %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, + %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 8); + aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2, + %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, + %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 9); + aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10, + %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, + %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 10); + cmpl $12, ARIA_CTX_rounds(CTX); + jne .Laria_gfni_192; + aria_ff_gfni(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 11, 12); + jmp .Laria_gfni_end; +.Laria_gfni_192: + aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2, + %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, + %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 11); + aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10, + %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, + %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 12); + cmpl $14, ARIA_CTX_rounds(CTX); + jne .Laria_gfni_256; + aria_ff_gfni(%ymm1, %ymm0, %ymm3, %ymm2, + %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, + %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 13, 14); + jmp .Laria_gfni_end; +.Laria_gfni_256: + aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2, + %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, + %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 13); + aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10, + %ymm12, %ymm13, %ymm14, %ymm15, + %ymm0, %ymm1, %ymm2, %ymm3, + %ymm4, %ymm5, %ymm6, %ymm7, + %rax, %r9, 14); + aria_ff_gfni(%ymm1, %ymm0, %ymm3, %ymm2, + %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, + %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %r9, 15, 16); +.Laria_gfni_end: + debyteslice_16x16b(%ymm8, %ymm12, %ymm1, %ymm4, + %ymm9, %ymm13, %ymm0, %ymm5, + %ymm10, %ymm14, %ymm3, %ymm6, + %ymm11, %ymm15, %ymm2, %ymm7, + (%rax), (%r8)); + + FRAME_END + RET; +SYM_FUNC_END(__aria_aesni_avx2_gfni_crypt_32way) + +SYM_TYPED_FUNC_START(aria_aesni_avx2_gfni_encrypt_32way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + + leaq ARIA_CTX_enc_key(CTX), %r9; + + inpack16_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rdx); + + call __aria_aesni_avx2_gfni_crypt_32way; + + write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx2_gfni_encrypt_32way) + +SYM_TYPED_FUNC_START(aria_aesni_avx2_gfni_decrypt_32way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + + leaq ARIA_CTX_dec_key(CTX), %r9; + + inpack16_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rdx); + + call __aria_aesni_avx2_gfni_crypt_32way; + + write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx2_gfni_decrypt_32way) + +SYM_TYPED_FUNC_START(aria_aesni_avx2_gfni_ctr_crypt_32way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + * %rcx: keystream + * %r8: iv (big endian, 128bit) + */ + FRAME_BEGIN + + call __aria_aesni_avx2_ctr_gen_keystream_32way + + leaq (%rsi), %r10; + leaq (%rdx), %r11; + leaq (%rcx), %rsi; + leaq (%rcx), %rdx; + leaq ARIA_CTX_enc_key(CTX), %r9; + + call __aria_aesni_avx2_gfni_crypt_32way; + + vpxor (0 * 32)(%r11), %ymm1, %ymm1; + vpxor (1 * 32)(%r11), %ymm0, %ymm0; + vpxor (2 * 32)(%r11), %ymm3, %ymm3; + vpxor (3 * 32)(%r11), %ymm2, %ymm2; + vpxor (4 * 32)(%r11), %ymm4, %ymm4; + vpxor (5 * 32)(%r11), %ymm5, %ymm5; + vpxor (6 * 32)(%r11), %ymm6, %ymm6; + vpxor (7 * 32)(%r11), %ymm7, %ymm7; + vpxor (8 * 32)(%r11), %ymm8, %ymm8; + vpxor (9 * 32)(%r11), %ymm9, %ymm9; + vpxor (10 * 32)(%r11), %ymm10, %ymm10; + vpxor (11 * 32)(%r11), %ymm11, %ymm11; + vpxor (12 * 32)(%r11), %ymm12, %ymm12; + vpxor (13 * 32)(%r11), %ymm13, %ymm13; + vpxor (14 * 32)(%r11), %ymm14, %ymm14; + vpxor (15 * 32)(%r11), %ymm15, %ymm15; + write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %r10); + + FRAME_END + RET; +SYM_FUNC_END(aria_aesni_avx2_gfni_ctr_crypt_32way) +#endif /* CONFIG_AS_GFNI */ diff --git a/arch/x86/crypto/aria-avx.h b/arch/x86/crypto/aria-avx.h index 01e9a01dc157..6e1b2d8a31ed 100644 --- a/arch/x86/crypto/aria-avx.h +++ b/arch/x86/crypto/aria-avx.h @@ -5,12 +5,58 @@ #include <linux/types.h> #define ARIA_AESNI_PARALLEL_BLOCKS 16 -#define ARIA_AESNI_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * 16) +#define ARIA_AESNI_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_AESNI_PARALLEL_BLOCKS) + +#define ARIA_AESNI_AVX2_PARALLEL_BLOCKS 32 +#define ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_AESNI_AVX2_PARALLEL_BLOCKS) + +#define ARIA_GFNI_AVX512_PARALLEL_BLOCKS 64 +#define ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_GFNI_AVX512_PARALLEL_BLOCKS) + +asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst, + const u8 *src, + u8 *keystream, u8 *iv); +asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx_gfni_decrypt_16way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst, + const u8 *src, + u8 *keystream, u8 *iv); + +asmlinkage void aria_aesni_avx2_encrypt_32way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx2_decrypt_32way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx2_ctr_crypt_32way(const void *ctx, u8 *dst, + const u8 *src, + u8 *keystream, u8 *iv); +asmlinkage void aria_aesni_avx2_gfni_encrypt_32way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx2_gfni_decrypt_32way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_aesni_avx2_gfni_ctr_crypt_32way(const void *ctx, u8 *dst, + const u8 *src, + u8 *keystream, u8 *iv); struct aria_avx_ops { void (*aria_encrypt_16way)(const void *ctx, u8 *dst, const u8 *src); void (*aria_decrypt_16way)(const void *ctx, u8 *dst, const u8 *src); void (*aria_ctr_crypt_16way)(const void *ctx, u8 *dst, const u8 *src, u8 *keystream, u8 *iv); + void (*aria_encrypt_32way)(const void *ctx, u8 *dst, const u8 *src); + void (*aria_decrypt_32way)(const void *ctx, u8 *dst, const u8 *src); + void (*aria_ctr_crypt_32way)(const void *ctx, u8 *dst, const u8 *src, + u8 *keystream, u8 *iv); + void (*aria_encrypt_64way)(const void *ctx, u8 *dst, const u8 *src); + void (*aria_decrypt_64way)(const void *ctx, u8 *dst, const u8 *src); + void (*aria_ctr_crypt_64way)(const void *ctx, u8 *dst, const u8 *src, + u8 *keystream, u8 *iv); + + }; #endif diff --git a/arch/x86/crypto/aria-gfni-avx512-asm_64.S b/arch/x86/crypto/aria-gfni-avx512-asm_64.S new file mode 100644 index 000000000000..3193f0701450 --- /dev/null +++ b/arch/x86/crypto/aria-gfni-avx512-asm_64.S @@ -0,0 +1,971 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * ARIA Cipher 64-way parallel algorithm (AVX512) + * + * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com> + * + */ + +#include <linux/linkage.h> +#include <asm/frame.h> +#include <asm/asm-offsets.h> +#include <linux/cfi_types.h> + +/* register macros */ +#define CTX %rdi + + +#define BV8(a0, a1, a2, a3, a4, a5, a6, a7) \ + ( (((a0) & 1) << 0) | \ + (((a1) & 1) << 1) | \ + (((a2) & 1) << 2) | \ + (((a3) & 1) << 3) | \ + (((a4) & 1) << 4) | \ + (((a5) & 1) << 5) | \ + (((a6) & 1) << 6) | \ + (((a7) & 1) << 7) ) + +#define BM8X8(l0, l1, l2, l3, l4, l5, l6, l7) \ + ( ((l7) << (0 * 8)) | \ + ((l6) << (1 * 8)) | \ + ((l5) << (2 * 8)) | \ + ((l4) << (3 * 8)) | \ + ((l3) << (4 * 8)) | \ + ((l2) << (5 * 8)) | \ + ((l1) << (6 * 8)) | \ + ((l0) << (7 * 8)) ) + +#define add_le128(out, in, lo_counter, hi_counter1) \ + vpaddq lo_counter, in, out; \ + vpcmpuq $1, lo_counter, out, %k1; \ + kaddb %k1, %k1, %k1; \ + vpaddq hi_counter1, out, out{%k1}; + +#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \ + vpandq x, mask4bit, tmp0; \ + vpandqn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxorq tmp0, x, x; + +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +#define byteslice_16x16b(a0, b0, c0, d0, \ + a1, b1, c1, d1, \ + a2, b2, c2, d2, \ + a3, b3, c3, d3, \ + st0, st1) \ + vmovdqu64 d2, st0; \ + vmovdqu64 d3, st1; \ + transpose_4x4(a0, a1, a2, a3, d2, d3); \ + transpose_4x4(b0, b1, b2, b3, d2, d3); \ + vmovdqu64 st0, d2; \ + vmovdqu64 st1, d3; \ + \ + vmovdqu64 a0, st0; \ + vmovdqu64 a1, st1; \ + transpose_4x4(c0, c1, c2, c3, a0, a1); \ + transpose_4x4(d0, d1, d2, d3, a0, a1); \ + \ + vbroadcasti64x2 .Lshufb_16x16b, a0; \ + vmovdqu64 st1, a1; \ + vpshufb a0, a2, a2; \ + vpshufb a0, a3, a3; \ + vpshufb a0, b0, b0; \ + vpshufb a0, b1, b1; \ + vpshufb a0, b2, b2; \ + vpshufb a0, b3, b3; \ + vpshufb a0, a1, a1; \ + vpshufb a0, c0, c0; \ + vpshufb a0, c1, c1; \ + vpshufb a0, c2, c2; \ + vpshufb a0, c3, c3; \ + vpshufb a0, d0, d0; \ + vpshufb a0, d1, d1; \ + vpshufb a0, d2, d2; \ + vpshufb a0, d3, d3; \ + vmovdqu64 d3, st1; \ + vmovdqu64 st0, d3; \ + vpshufb a0, d3, a0; \ + vmovdqu64 d2, st0; \ + \ + transpose_4x4(a0, b0, c0, d0, d2, d3); \ + transpose_4x4(a1, b1, c1, d1, d2, d3); \ + vmovdqu64 st0, d2; \ + vmovdqu64 st1, d3; \ + \ + vmovdqu64 b0, st0; \ + vmovdqu64 b1, st1; \ + transpose_4x4(a2, b2, c2, d2, b0, b1); \ + transpose_4x4(a3, b3, c3, d3, b0, b1); \ + vmovdqu64 st0, b0; \ + vmovdqu64 st1, b1; \ + /* does not adjust output bytes inside vectors */ + +#define debyteslice_16x16b(a0, b0, c0, d0, \ + a1, b1, c1, d1, \ + a2, b2, c2, d2, \ + a3, b3, c3, d3, \ + st0, st1) \ + vmovdqu64 d2, st0; \ + vmovdqu64 d3, st1; \ + transpose_4x4(a0, a1, a2, a3, d2, d3); \ + transpose_4x4(b0, b1, b2, b3, d2, d3); \ + vmovdqu64 st0, d2; \ + vmovdqu64 st1, d3; \ + \ + vmovdqu64 a0, st0; \ + vmovdqu64 a1, st1; \ + transpose_4x4(c0, c1, c2, c3, a0, a1); \ + transpose_4x4(d0, d1, d2, d3, a0, a1); \ + \ + vbroadcasti64x2 .Lshufb_16x16b, a0; \ + vmovdqu64 st1, a1; \ + vpshufb a0, a2, a2; \ + vpshufb a0, a3, a3; \ + vpshufb a0, b0, b0; \ + vpshufb a0, b1, b1; \ + vpshufb a0, b2, b2; \ + vpshufb a0, b3, b3; \ + vpshufb a0, a1, a1; \ + vpshufb a0, c0, c0; \ + vpshufb a0, c1, c1; \ + vpshufb a0, c2, c2; \ + vpshufb a0, c3, c3; \ + vpshufb a0, d0, d0; \ + vpshufb a0, d1, d1; \ + vpshufb a0, d2, d2; \ + vpshufb a0, d3, d3; \ + vmovdqu64 d3, st1; \ + vmovdqu64 st0, d3; \ + vpshufb a0, d3, a0; \ + vmovdqu64 d2, st0; \ + \ + transpose_4x4(c0, d0, a0, b0, d2, d3); \ + transpose_4x4(c1, d1, a1, b1, d2, d3); \ + vmovdqu64 st0, d2; \ + vmovdqu64 st1, d3; \ + \ + vmovdqu64 b0, st0; \ + vmovdqu64 b1, st1; \ + transpose_4x4(c2, d2, a2, b2, b0, b1); \ + transpose_4x4(c3, d3, a3, b3, b0, b1); \ + vmovdqu64 st0, b0; \ + vmovdqu64 st1, b1; \ + /* does not adjust output bytes inside vectors */ + +/* load blocks to registers and apply pre-whitening */ +#define inpack16_pre(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + rio) \ + vmovdqu64 (0 * 64)(rio), x0; \ + vmovdqu64 (1 * 64)(rio), x1; \ + vmovdqu64 (2 * 64)(rio), x2; \ + vmovdqu64 (3 * 64)(rio), x3; \ + vmovdqu64 (4 * 64)(rio), x4; \ + vmovdqu64 (5 * 64)(rio), x5; \ + vmovdqu64 (6 * 64)(rio), x6; \ + vmovdqu64 (7 * 64)(rio), x7; \ + vmovdqu64 (8 * 64)(rio), y0; \ + vmovdqu64 (9 * 64)(rio), y1; \ + vmovdqu64 (10 * 64)(rio), y2; \ + vmovdqu64 (11 * 64)(rio), y3; \ + vmovdqu64 (12 * 64)(rio), y4; \ + vmovdqu64 (13 * 64)(rio), y5; \ + vmovdqu64 (14 * 64)(rio), y6; \ + vmovdqu64 (15 * 64)(rio), y7; + +/* byteslice pre-whitened blocks and store to temporary memory */ +#define inpack16_post(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem_ab, mem_cd) \ + byteslice_16x16b(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + (mem_ab), (mem_cd)); \ + \ + vmovdqu64 x0, 0 * 64(mem_ab); \ + vmovdqu64 x1, 1 * 64(mem_ab); \ + vmovdqu64 x2, 2 * 64(mem_ab); \ + vmovdqu64 x3, 3 * 64(mem_ab); \ + vmovdqu64 x4, 4 * 64(mem_ab); \ + vmovdqu64 x5, 5 * 64(mem_ab); \ + vmovdqu64 x6, 6 * 64(mem_ab); \ + vmovdqu64 x7, 7 * 64(mem_ab); \ + vmovdqu64 y0, 0 * 64(mem_cd); \ + vmovdqu64 y1, 1 * 64(mem_cd); \ + vmovdqu64 y2, 2 * 64(mem_cd); \ + vmovdqu64 y3, 3 * 64(mem_cd); \ + vmovdqu64 y4, 4 * 64(mem_cd); \ + vmovdqu64 y5, 5 * 64(mem_cd); \ + vmovdqu64 y6, 6 * 64(mem_cd); \ + vmovdqu64 y7, 7 * 64(mem_cd); + +#define write_output(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + mem) \ + vmovdqu64 x0, 0 * 64(mem); \ + vmovdqu64 x1, 1 * 64(mem); \ + vmovdqu64 x2, 2 * 64(mem); \ + vmovdqu64 x3, 3 * 64(mem); \ + vmovdqu64 x4, 4 * 64(mem); \ + vmovdqu64 x5, 5 * 64(mem); \ + vmovdqu64 x6, 6 * 64(mem); \ + vmovdqu64 x7, 7 * 64(mem); \ + vmovdqu64 y0, 8 * 64(mem); \ + vmovdqu64 y1, 9 * 64(mem); \ + vmovdqu64 y2, 10 * 64(mem); \ + vmovdqu64 y3, 11 * 64(mem); \ + vmovdqu64 y4, 12 * 64(mem); \ + vmovdqu64 y5, 13 * 64(mem); \ + vmovdqu64 y6, 14 * 64(mem); \ + vmovdqu64 y7, 15 * 64(mem); \ + +#define aria_store_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, idx) \ + vmovdqu64 x0, ((idx + 0) * 64)(mem_tmp); \ + vmovdqu64 x1, ((idx + 1) * 64)(mem_tmp); \ + vmovdqu64 x2, ((idx + 2) * 64)(mem_tmp); \ + vmovdqu64 x3, ((idx + 3) * 64)(mem_tmp); \ + vmovdqu64 x4, ((idx + 4) * 64)(mem_tmp); \ + vmovdqu64 x5, ((idx + 5) * 64)(mem_tmp); \ + vmovdqu64 x6, ((idx + 6) * 64)(mem_tmp); \ + vmovdqu64 x7, ((idx + 7) * 64)(mem_tmp); + +#define aria_load_state_8way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + mem_tmp, idx) \ + vmovdqu64 ((idx + 0) * 64)(mem_tmp), x0; \ + vmovdqu64 ((idx + 1) * 64)(mem_tmp), x1; \ + vmovdqu64 ((idx + 2) * 64)(mem_tmp), x2; \ + vmovdqu64 ((idx + 3) * 64)(mem_tmp), x3; \ + vmovdqu64 ((idx + 4) * 64)(mem_tmp), x4; \ + vmovdqu64 ((idx + 5) * 64)(mem_tmp), x5; \ + vmovdqu64 ((idx + 6) * 64)(mem_tmp), x6; \ + vmovdqu64 ((idx + 7) * 64)(mem_tmp), x7; + +#define aria_ark_16way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + t0, rk, round) \ + /* AddRoundKey */ \ + vpbroadcastb ((round * 16) + 3)(rk), t0; \ + vpxorq t0, x0, x0; \ + vpbroadcastb ((round * 16) + 2)(rk), t0; \ + vpxorq t0, x1, x1; \ + vpbroadcastb ((round * 16) + 1)(rk), t0; \ + vpxorq t0, x2, x2; \ + vpbroadcastb ((round * 16) + 0)(rk), t0; \ + vpxorq t0, x3, x3; \ + vpbroadcastb ((round * 16) + 7)(rk), t0; \ + vpxorq t0, x4, x4; \ + vpbroadcastb ((round * 16) + 6)(rk), t0; \ + vpxorq t0, x5, x5; \ + vpbroadcastb ((round * 16) + 5)(rk), t0; \ + vpxorq t0, x6, x6; \ + vpbroadcastb ((round * 16) + 4)(rk), t0; \ + vpxorq t0, x7, x7; \ + vpbroadcastb ((round * 16) + 11)(rk), t0; \ + vpxorq t0, y0, y0; \ + vpbroadcastb ((round * 16) + 10)(rk), t0; \ + vpxorq t0, y1, y1; \ + vpbroadcastb ((round * 16) + 9)(rk), t0; \ + vpxorq t0, y2, y2; \ + vpbroadcastb ((round * 16) + 8)(rk), t0; \ + vpxorq t0, y3, y3; \ + vpbroadcastb ((round * 16) + 15)(rk), t0; \ + vpxorq t0, y4, y4; \ + vpbroadcastb ((round * 16) + 14)(rk), t0; \ + vpxorq t0, y5, y5; \ + vpbroadcastb ((round * 16) + 13)(rk), t0; \ + vpxorq t0, y6, y6; \ + vpbroadcastb ((round * 16) + 12)(rk), t0; \ + vpxorq t0, y7, y7; + +#define aria_sbox_8way_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + t0, t1, t2, t3, \ + t4, t5, t6, t7) \ + vpbroadcastq .Ltf_s2_bitmatrix, t0; \ + vpbroadcastq .Ltf_inv_bitmatrix, t1; \ + vpbroadcastq .Ltf_id_bitmatrix, t2; \ + vpbroadcastq .Ltf_aff_bitmatrix, t3; \ + vpbroadcastq .Ltf_x2_bitmatrix, t4; \ + vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ + vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ + vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ + vgf2p8affineqb $(tf_inv_const), t1, x6, x6; \ + vgf2p8affineinvqb $0, t2, x2, x2; \ + vgf2p8affineinvqb $0, t2, x6, x6; \ + vgf2p8affineinvqb $(tf_aff_const), t3, x0, x0; \ + vgf2p8affineinvqb $(tf_aff_const), t3, x4, x4; \ + vgf2p8affineqb $(tf_x2_const), t4, x3, x3; \ + vgf2p8affineqb $(tf_x2_const), t4, x7, x7; \ + vgf2p8affineinvqb $0, t2, x3, x3; \ + vgf2p8affineinvqb $0, t2, x7, x7; + +#define aria_sbox_16way_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + t0, t1, t2, t3, \ + t4, t5, t6, t7) \ + vpbroadcastq .Ltf_s2_bitmatrix, t0; \ + vpbroadcastq .Ltf_inv_bitmatrix, t1; \ + vpbroadcastq .Ltf_id_bitmatrix, t2; \ + vpbroadcastq .Ltf_aff_bitmatrix, t3; \ + vpbroadcastq .Ltf_x2_bitmatrix, t4; \ + vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ + vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ + vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ + vgf2p8affineqb $(tf_inv_const), t1, x6, x6; \ + vgf2p8affineinvqb $0, t2, x2, x2; \ + vgf2p8affineinvqb $0, t2, x6, x6; \ + vgf2p8affineinvqb $(tf_aff_const), t3, x0, x0; \ + vgf2p8affineinvqb $(tf_aff_const), t3, x4, x4; \ + vgf2p8affineqb $(tf_x2_const), t4, x3, x3; \ + vgf2p8affineqb $(tf_x2_const), t4, x7, x7; \ + vgf2p8affineinvqb $0, t2, x3, x3; \ + vgf2p8affineinvqb $0, t2, x7, x7; \ + vgf2p8affineinvqb $(tf_s2_const), t0, y1, y1; \ + vgf2p8affineinvqb $(tf_s2_const), t0, y5, y5; \ + vgf2p8affineqb $(tf_inv_const), t1, y2, y2; \ + vgf2p8affineqb $(tf_inv_const), t1, y6, y6; \ + vgf2p8affineinvqb $0, t2, y2, y2; \ + vgf2p8affineinvqb $0, t2, y6, y6; \ + vgf2p8affineinvqb $(tf_aff_const), t3, y0, y0; \ + vgf2p8affineinvqb $(tf_aff_const), t3, y4, y4; \ + vgf2p8affineqb $(tf_x2_const), t4, y3, y3; \ + vgf2p8affineqb $(tf_x2_const), t4, y7, y7; \ + vgf2p8affineinvqb $0, t2, y3, y3; \ + vgf2p8affineinvqb $0, t2, y7, y7; + + +#define aria_diff_m(x0, x1, x2, x3, \ + t0, t1, t2, t3) \ + /* T = rotr32(X, 8); */ \ + /* X ^= T */ \ + vpxorq x0, x3, t0; \ + vpxorq x1, x0, t1; \ + vpxorq x2, x1, t2; \ + vpxorq x3, x2, t3; \ + /* X = T ^ rotr(X, 16); */ \ + vpxorq t2, x0, x0; \ + vpxorq x1, t3, t3; \ + vpxorq t0, x2, x2; \ + vpxorq t1, x3, x1; \ + vmovdqu64 t3, x3; + +#define aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7) \ + /* t1 ^= t2; */ \ + vpxorq y0, x4, x4; \ + vpxorq y1, x5, x5; \ + vpxorq y2, x6, x6; \ + vpxorq y3, x7, x7; \ + \ + /* t2 ^= t3; */ \ + vpxorq y4, y0, y0; \ + vpxorq y5, y1, y1; \ + vpxorq y6, y2, y2; \ + vpxorq y7, y3, y3; \ + \ + /* t0 ^= t1; */ \ + vpxorq x4, x0, x0; \ + vpxorq x5, x1, x1; \ + vpxorq x6, x2, x2; \ + vpxorq x7, x3, x3; \ + \ + /* t3 ^= t1; */ \ + vpxorq x4, y4, y4; \ + vpxorq x5, y5, y5; \ + vpxorq x6, y6, y6; \ + vpxorq x7, y7, y7; \ + \ + /* t2 ^= t0; */ \ + vpxorq x0, y0, y0; \ + vpxorq x1, y1, y1; \ + vpxorq x2, y2, y2; \ + vpxorq x3, y3, y3; \ + \ + /* t1 ^= t2; */ \ + vpxorq y0, x4, x4; \ + vpxorq y1, x5, x5; \ + vpxorq y2, x6, x6; \ + vpxorq y3, x7, x7; + +#define aria_fe_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + z0, z1, z2, z3, \ + z4, z5, z6, z7, \ + mem_tmp, rk, round) \ + aria_ark_16way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, y1, y2, y3, y4, y5, y6, y7, \ + z0, rk, round); \ + \ + aria_sbox_16way_gfni(x2, x3, x0, x1, \ + x6, x7, x4, x5, \ + y2, y3, y0, y1, \ + y6, y7, y4, y5, \ + z0, z1, z2, z3, \ + z4, z5, z6, z7); \ + \ + aria_diff_m(x0, x1, x2, x3, z0, z1, z2, z3); \ + aria_diff_m(x4, x5, x6, x7, z0, z1, z2, z3); \ + aria_diff_m(y0, y1, y2, y3, z0, z1, z2, z3); \ + aria_diff_m(y4, y5, y6, y7, z0, z1, z2, z3); \ + aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + /* aria_diff_byte() \ + * T3 = ABCD -> BADC \ + * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \ + * T0 = ABCD -> CDAB \ + * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \ + * T1 = ABCD -> DCBA \ + * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \ + */ \ + aria_diff_word(x2, x3, x0, x1, \ + x7, x6, x5, x4, \ + y0, y1, y2, y3, \ + y5, y4, y7, y6); \ + + +#define aria_fo_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + z0, z1, z2, z3, \ + z4, z5, z6, z7, \ + mem_tmp, rk, round) \ + aria_ark_16way(x0, x1, x2, x3, x4, x5, x6, x7, \ + y0, y1, y2, y3, y4, y5, y6, y7, \ + z0, rk, round); \ + \ + aria_sbox_16way_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + z0, z1, z2, z3, \ + z4, z5, z6, z7); \ + \ + aria_diff_m(x0, x1, x2, x3, z0, z1, z2, z3); \ + aria_diff_m(x4, x5, x6, x7, z0, z1, z2, z3); \ + aria_diff_m(y0, y1, y2, y3, z0, z1, z2, z3); \ + aria_diff_m(y4, y5, y6, y7, z0, z1, z2, z3); \ + aria_diff_word(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7); \ + /* aria_diff_byte() \ + * T1 = ABCD -> BADC \ + * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \ + * T2 = ABCD -> CDAB \ + * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \ + * T3 = ABCD -> DCBA \ + * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \ + */ \ + aria_diff_word(x0, x1, x2, x3, \ + x5, x4, x7, x6, \ + y2, y3, y0, y1, \ + y7, y6, y5, y4); + +#define aria_ff_gfni(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + z0, z1, z2, z3, \ + z4, z5, z6, z7, \ + mem_tmp, rk, round, last_round) \ + aria_ark_16way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + z0, rk, round); \ + aria_sbox_16way_gfni(x2, x3, x0, x1, \ + x6, x7, x4, x5, \ + y2, y3, y0, y1, \ + y6, y7, y4, y5, \ + z0, z1, z2, z3, \ + z4, z5, z6, z7); \ + aria_ark_16way(x0, x1, x2, x3, \ + x4, x5, x6, x7, \ + y0, y1, y2, y3, \ + y4, y5, y6, y7, \ + z0, rk, last_round); + + +.section .rodata.cst64, "aM", @progbits, 64 +.align 64 +.Lcounter0123_lo: + .quad 0, 0 + .quad 1, 0 + .quad 2, 0 + .quad 3, 0 + +.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32 +.align 32 +#define SHUFB_BYTES(idx) \ + 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) +.Lshufb_16x16b: + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) + +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + +.Lcounter4444_lo: + .quad 4, 0 +.Lcounter8888_lo: + .quad 8, 0 +.Lcounter16161616_lo: + .quad 16, 0 +.Lcounter1111_hi: + .quad 0, 1 + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 + .byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + +.section .rodata.cst8, "aM", @progbits, 8 +.align 8 +/* AES affine: */ +#define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0) +.Ltf_aff_bitmatrix: + .quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1), + BV8(1, 1, 0, 0, 0, 1, 1, 1), + BV8(1, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 1, 0, 0, 0, 1), + BV8(1, 1, 1, 1, 1, 0, 0, 0), + BV8(0, 1, 1, 1, 1, 1, 0, 0), + BV8(0, 0, 1, 1, 1, 1, 1, 0), + BV8(0, 0, 0, 1, 1, 1, 1, 1)) + +/* AES inverse affine: */ +#define tf_inv_const BV8(1, 0, 1, 0, 0, 0, 0, 0) +.Ltf_inv_bitmatrix: + .quad BM8X8(BV8(0, 0, 1, 0, 0, 1, 0, 1), + BV8(1, 0, 0, 1, 0, 0, 1, 0), + BV8(0, 1, 0, 0, 1, 0, 0, 1), + BV8(1, 0, 1, 0, 0, 1, 0, 0), + BV8(0, 1, 0, 1, 0, 0, 1, 0), + BV8(0, 0, 1, 0, 1, 0, 0, 1), + BV8(1, 0, 0, 1, 0, 1, 0, 0), + BV8(0, 1, 0, 0, 1, 0, 1, 0)) + +/* S2: */ +#define tf_s2_const BV8(0, 1, 0, 0, 0, 1, 1, 1) +.Ltf_s2_bitmatrix: + .quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1), + BV8(0, 0, 1, 1, 1, 1, 1, 1), + BV8(1, 1, 1, 0, 1, 1, 0, 1), + BV8(1, 1, 0, 0, 0, 0, 1, 1), + BV8(0, 1, 0, 0, 0, 0, 1, 1), + BV8(1, 1, 0, 0, 1, 1, 1, 0), + BV8(0, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 1, 0, 1, 1, 0)) + +/* X2: */ +#define tf_x2_const BV8(0, 0, 1, 1, 0, 1, 0, 0) +.Ltf_x2_bitmatrix: + .quad BM8X8(BV8(0, 0, 0, 1, 1, 0, 0, 0), + BV8(0, 0, 1, 0, 0, 1, 1, 0), + BV8(0, 0, 0, 0, 1, 0, 1, 0), + BV8(1, 1, 1, 0, 0, 0, 1, 1), + BV8(1, 1, 1, 0, 1, 1, 0, 0), + BV8(0, 1, 1, 0, 1, 0, 1, 1), + BV8(1, 0, 1, 1, 1, 1, 0, 1), + BV8(1, 0, 0, 1, 0, 0, 1, 1)) + +/* Identity matrix: */ +.Ltf_id_bitmatrix: + .quad BM8X8(BV8(1, 0, 0, 0, 0, 0, 0, 0), + BV8(0, 1, 0, 0, 0, 0, 0, 0), + BV8(0, 0, 1, 0, 0, 0, 0, 0), + BV8(0, 0, 0, 1, 0, 0, 0, 0), + BV8(0, 0, 0, 0, 1, 0, 0, 0), + BV8(0, 0, 0, 0, 0, 1, 0, 0), + BV8(0, 0, 0, 0, 0, 0, 1, 0), + BV8(0, 0, 0, 0, 0, 0, 0, 1)) + +.text +SYM_FUNC_START_LOCAL(__aria_gfni_avx512_crypt_64way) + /* input: + * %r9: rk + * %rsi: dst + * %rdx: src + * %zmm0..%zmm15: byte-sliced blocks + */ + + FRAME_BEGIN + + movq %rsi, %rax; + leaq 8 * 64(%rax), %r8; + + inpack16_post(%zmm0, %zmm1, %zmm2, %zmm3, + %zmm4, %zmm5, %zmm6, %zmm7, + %zmm8, %zmm9, %zmm10, %zmm11, + %zmm12, %zmm13, %zmm14, + %zmm15, %rax, %r8); + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, + %zmm4, %zmm5, %zmm6, %zmm7, + %zmm8, %zmm9, %zmm10, %zmm11, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 0); + aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0, + %zmm6, %zmm7, %zmm4, %zmm5, + %zmm9, %zmm8, %zmm11, %zmm10, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 1); + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, + %zmm4, %zmm5, %zmm6, %zmm7, + %zmm8, %zmm9, %zmm10, %zmm11, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 2); + aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0, + %zmm6, %zmm7, %zmm4, %zmm5, + %zmm9, %zmm8, %zmm11, %zmm10, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 3); + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, + %zmm4, %zmm5, %zmm6, %zmm7, + %zmm8, %zmm9, %zmm10, %zmm11, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 4); + aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0, + %zmm6, %zmm7, %zmm4, %zmm5, + %zmm9, %zmm8, %zmm11, %zmm10, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 5); + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, + %zmm4, %zmm5, %zmm6, %zmm7, + %zmm8, %zmm9, %zmm10, %zmm11, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 6); + aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0, + %zmm6, %zmm7, %zmm4, %zmm5, + %zmm9, %zmm8, %zmm11, %zmm10, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 7); + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, + %zmm4, %zmm5, %zmm6, %zmm7, + %zmm8, %zmm9, %zmm10, %zmm11, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 8); + aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0, + %zmm6, %zmm7, %zmm4, %zmm5, + %zmm9, %zmm8, %zmm11, %zmm10, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 9); + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, + %zmm4, %zmm5, %zmm6, %zmm7, + %zmm8, %zmm9, %zmm10, %zmm11, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 10); + cmpl $12, ARIA_CTX_rounds(CTX); + jne .Laria_gfni_192; + aria_ff_gfni(%zmm3, %zmm2, %zmm1, %zmm0, + %zmm6, %zmm7, %zmm4, %zmm5, + %zmm9, %zmm8, %zmm11, %zmm10, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 11, 12); + jmp .Laria_gfni_end; +.Laria_gfni_192: + aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0, + %zmm6, %zmm7, %zmm4, %zmm5, + %zmm9, %zmm8, %zmm11, %zmm10, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 11); + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, + %zmm4, %zmm5, %zmm6, %zmm7, + %zmm8, %zmm9, %zmm10, %zmm11, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 12); + cmpl $14, ARIA_CTX_rounds(CTX); + jne .Laria_gfni_256; + aria_ff_gfni(%zmm3, %zmm2, %zmm1, %zmm0, + %zmm6, %zmm7, %zmm4, %zmm5, + %zmm9, %zmm8, %zmm11, %zmm10, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 13, 14); + jmp .Laria_gfni_end; +.Laria_gfni_256: + aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0, + %zmm6, %zmm7, %zmm4, %zmm5, + %zmm9, %zmm8, %zmm11, %zmm10, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 13); + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, + %zmm4, %zmm5, %zmm6, %zmm7, + %zmm8, %zmm9, %zmm10, %zmm11, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 14); + aria_ff_gfni(%zmm3, %zmm2, %zmm1, %zmm0, + %zmm6, %zmm7, %zmm4, %zmm5, + %zmm9, %zmm8, %zmm11, %zmm10, + %zmm12, %zmm13, %zmm14, %zmm15, + %zmm24, %zmm25, %zmm26, %zmm27, + %zmm28, %zmm29, %zmm30, %zmm31, + %rax, %r9, 15, 16); +.Laria_gfni_end: + debyteslice_16x16b(%zmm9, %zmm12, %zmm3, %zmm6, + %zmm8, %zmm13, %zmm2, %zmm7, + %zmm11, %zmm14, %zmm1, %zmm4, + %zmm10, %zmm15, %zmm0, %zmm5, + (%rax), (%r8)); + FRAME_END + RET; +SYM_FUNC_END(__aria_gfni_avx512_crypt_64way) + +SYM_TYPED_FUNC_START(aria_gfni_avx512_encrypt_64way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + + leaq ARIA_CTX_enc_key(CTX), %r9; + + inpack16_pre(%zmm0, %zmm1, %zmm2, %zmm3, %zmm4, %zmm5, %zmm6, %zmm7, + %zmm8, %zmm9, %zmm10, %zmm11, %zmm12, %zmm13, %zmm14, + %zmm15, %rdx); + + call __aria_gfni_avx512_crypt_64way; + + write_output(%zmm3, %zmm2, %zmm1, %zmm0, %zmm6, %zmm7, %zmm4, %zmm5, + %zmm9, %zmm8, %zmm11, %zmm10, %zmm12, %zmm13, %zmm14, + %zmm15, %rax); + + FRAME_END + RET; +SYM_FUNC_END(aria_gfni_avx512_encrypt_64way) + +SYM_TYPED_FUNC_START(aria_gfni_avx512_decrypt_64way) + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + FRAME_BEGIN + + leaq ARIA_CTX_dec_key(CTX), %r9; + + inpack16_pre(%zmm0, %zmm1, %zmm2, %zmm3, %zmm4, %zmm5, %zmm6, %zmm7, + %zmm8, %zmm9, %zmm10, %zmm11, %zmm12, %zmm13, %zmm14, + %zmm15, %rdx); + + call __aria_gfni_avx512_crypt_64way; + + write_output(%zmm3, %zmm2, %zmm1, %zmm0, %zmm6, %zmm7, %zmm4, %zmm5, + %zmm9, %zmm8, %zmm11, %zmm10, %zmm12, %zmm13, %zmm14, + %zmm15, %rax); + + FRAME_END + RET; +SYM_FUNC_END(aria_gfni_avx512_decrypt_64way) + +SYM_FUNC_START_LOCAL(__aria_gfni_avx512_ctr_gen_keystream_64way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + * %rcx: keystream + * %r8: iv (big endian, 128bit) + */ + + FRAME_BEGIN + + vbroadcasti64x2 .Lbswap128_mask (%rip), %zmm19; + vmovdqa64 .Lcounter0123_lo (%rip), %zmm21; + vbroadcasti64x2 .Lcounter4444_lo (%rip), %zmm22; + vbroadcasti64x2 .Lcounter8888_lo (%rip), %zmm23; + vbroadcasti64x2 .Lcounter16161616_lo (%rip), %zmm24; + vbroadcasti64x2 .Lcounter1111_hi (%rip), %zmm25; + + /* load IV and byteswap */ + movq 8(%r8), %r11; + movq (%r8), %r10; + bswapq %r11; + bswapq %r10; + vbroadcasti64x2 (%r8), %zmm20; + vpshufb %zmm19, %zmm20, %zmm20; + + /* check need for handling 64-bit overflow and carry */ + cmpq $(0xffffffffffffffff - 64), %r11; + ja .Lload_ctr_carry; + + /* construct IVs */ + vpaddq %zmm21, %zmm20, %zmm0; /* +0:+1:+2:+3 */ + vpaddq %zmm22, %zmm0, %zmm1; /* +4:+5:+6:+7 */ + vpaddq %zmm23, %zmm0, %zmm2; /* +8:+9:+10:+11 */ + vpaddq %zmm23, %zmm1, %zmm3; /* +12:+13:+14:+15 */ + vpaddq %zmm24, %zmm0, %zmm4; /* +16... */ + vpaddq %zmm24, %zmm1, %zmm5; /* +20... */ + vpaddq %zmm24, %zmm2, %zmm6; /* +24... */ + vpaddq %zmm24, %zmm3, %zmm7; /* +28... */ + vpaddq %zmm24, %zmm4, %zmm8; /* +32... */ + vpaddq %zmm24, %zmm5, %zmm9; /* +36... */ + vpaddq %zmm24, %zmm6, %zmm10; /* +40... */ + vpaddq %zmm24, %zmm7, %zmm11; /* +44... */ + vpaddq %zmm24, %zmm8, %zmm12; /* +48... */ + vpaddq %zmm24, %zmm9, %zmm13; /* +52... */ + vpaddq %zmm24, %zmm10, %zmm14; /* +56... */ + vpaddq %zmm24, %zmm11, %zmm15; /* +60... */ + jmp .Lload_ctr_done; + +.Lload_ctr_carry: + /* construct IVs */ + add_le128(%zmm0, %zmm20, %zmm21, %zmm25); /* +0:+1:+2:+3 */ + add_le128(%zmm1, %zmm0, %zmm22, %zmm25); /* +4:+5:+6:+7 */ + add_le128(%zmm2, %zmm0, %zmm23, %zmm25); /* +8:+9:+10:+11 */ + add_le128(%zmm3, %zmm1, %zmm23, %zmm25); /* +12:+13:+14:+15 */ + add_le128(%zmm4, %zmm0, %zmm24, %zmm25); /* +16... */ + add_le128(%zmm5, %zmm1, %zmm24, %zmm25); /* +20... */ + add_le128(%zmm6, %zmm2, %zmm24, %zmm25); /* +24... */ + add_le128(%zmm7, %zmm3, %zmm24, %zmm25); /* +28... */ + add_le128(%zmm8, %zmm4, %zmm24, %zmm25); /* +32... */ + add_le128(%zmm9, %zmm5, %zmm24, %zmm25); /* +36... */ + add_le128(%zmm10, %zmm6, %zmm24, %zmm25); /* +40... */ + add_le128(%zmm11, %zmm7, %zmm24, %zmm25); /* +44... */ + add_le128(%zmm12, %zmm8, %zmm24, %zmm25); /* +48... */ + add_le128(%zmm13, %zmm9, %zmm24, %zmm25); /* +52... */ + add_le128(%zmm14, %zmm10, %zmm24, %zmm25); /* +56... */ + add_le128(%zmm15, %zmm11, %zmm24, %zmm25); /* +60... */ + +.Lload_ctr_done: + /* Byte-swap IVs and update counter. */ + addq $64, %r11; + adcq $0, %r10; + vpshufb %zmm19, %zmm15, %zmm15; + vpshufb %zmm19, %zmm14, %zmm14; + vpshufb %zmm19, %zmm13, %zmm13; + vpshufb %zmm19, %zmm12, %zmm12; + vpshufb %zmm19, %zmm11, %zmm11; + vpshufb %zmm19, %zmm10, %zmm10; + vpshufb %zmm19, %zmm9, %zmm9; + vpshufb %zmm19, %zmm8, %zmm8; + bswapq %r11; + bswapq %r10; + vpshufb %zmm19, %zmm7, %zmm7; + vpshufb %zmm19, %zmm6, %zmm6; + vpshufb %zmm19, %zmm5, %zmm5; + vpshufb %zmm19, %zmm4, %zmm4; + vpshufb %zmm19, %zmm3, %zmm3; + vpshufb %zmm19, %zmm2, %zmm2; + vpshufb %zmm19, %zmm1, %zmm1; + vpshufb %zmm19, %zmm0, %zmm0; + movq %r11, 8(%r8); + movq %r10, (%r8); + + FRAME_END + RET; +SYM_FUNC_END(__aria_gfni_avx512_ctr_gen_keystream_64way) + +SYM_TYPED_FUNC_START(aria_gfni_avx512_ctr_crypt_64way) + /* input: + * %rdi: ctx + * %rsi: dst + * %rdx: src + * %rcx: keystream + * %r8: iv (big endian, 128bit) + */ + FRAME_BEGIN + + call __aria_gfni_avx512_ctr_gen_keystream_64way + + leaq (%rsi), %r10; + leaq (%rdx), %r11; + leaq (%rcx), %rsi; + leaq (%rcx), %rdx; + leaq ARIA_CTX_enc_key(CTX), %r9; + + call __aria_gfni_avx512_crypt_64way; + + vpxorq (0 * 64)(%r11), %zmm3, %zmm3; + vpxorq (1 * 64)(%r11), %zmm2, %zmm2; + vpxorq (2 * 64)(%r11), %zmm1, %zmm1; + vpxorq (3 * 64)(%r11), %zmm0, %zmm0; + vpxorq (4 * 64)(%r11), %zmm6, %zmm6; + vpxorq (5 * 64)(%r11), %zmm7, %zmm7; + vpxorq (6 * 64)(%r11), %zmm4, %zmm4; + vpxorq (7 * 64)(%r11), %zmm5, %zmm5; + vpxorq (8 * 64)(%r11), %zmm9, %zmm9; + vpxorq (9 * 64)(%r11), %zmm8, %zmm8; + vpxorq (10 * 64)(%r11), %zmm11, %zmm11; + vpxorq (11 * 64)(%r11), %zmm10, %zmm10; + vpxorq (12 * 64)(%r11), %zmm12, %zmm12; + vpxorq (13 * 64)(%r11), %zmm13, %zmm13; + vpxorq (14 * 64)(%r11), %zmm14, %zmm14; + vpxorq (15 * 64)(%r11), %zmm15, %zmm15; + write_output(%zmm3, %zmm2, %zmm1, %zmm0, %zmm6, %zmm7, %zmm4, %zmm5, + %zmm9, %zmm8, %zmm11, %zmm10, %zmm12, %zmm13, %zmm14, + %zmm15, %r10); + + FRAME_END + RET; +SYM_FUNC_END(aria_gfni_avx512_ctr_crypt_64way) diff --git a/arch/x86/crypto/aria_aesni_avx2_glue.c b/arch/x86/crypto/aria_aesni_avx2_glue.c new file mode 100644 index 000000000000..87a11804fc77 --- /dev/null +++ b/arch/x86/crypto/aria_aesni_avx2_glue.c @@ -0,0 +1,254 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Glue Code for the AVX2/AES-NI/GFNI assembler implementation of the ARIA Cipher + * + * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com> + */ + +#include <crypto/algapi.h> +#include <crypto/internal/simd.h> +#include <crypto/aria.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/types.h> + +#include "ecb_cbc_helpers.h" +#include "aria-avx.h" + +asmlinkage void aria_aesni_avx2_encrypt_32way(const void *ctx, u8 *dst, + const u8 *src); +EXPORT_SYMBOL_GPL(aria_aesni_avx2_encrypt_32way); +asmlinkage void aria_aesni_avx2_decrypt_32way(const void *ctx, u8 *dst, + const u8 *src); +EXPORT_SYMBOL_GPL(aria_aesni_avx2_decrypt_32way); +asmlinkage void aria_aesni_avx2_ctr_crypt_32way(const void *ctx, u8 *dst, + const u8 *src, + u8 *keystream, u8 *iv); +EXPORT_SYMBOL_GPL(aria_aesni_avx2_ctr_crypt_32way); +#ifdef CONFIG_AS_GFNI +asmlinkage void aria_aesni_avx2_gfni_encrypt_32way(const void *ctx, u8 *dst, + const u8 *src); +EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_encrypt_32way); +asmlinkage void aria_aesni_avx2_gfni_decrypt_32way(const void *ctx, u8 *dst, + const u8 *src); +EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_decrypt_32way); +asmlinkage void aria_aesni_avx2_gfni_ctr_crypt_32way(const void *ctx, u8 *dst, + const u8 *src, + u8 *keystream, u8 *iv); +EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_ctr_crypt_32way); +#endif /* CONFIG_AS_GFNI */ + +static struct aria_avx_ops aria_ops; + +struct aria_avx2_request_ctx { + u8 keystream[ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE]; +}; + +static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey) +{ + ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_encrypt_32way); + ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_encrypt_16way); + ECB_BLOCK(1, aria_encrypt); + ECB_WALK_END(); +} + +static int ecb_do_decrypt(struct skcipher_request *req, const u32 *rkey) +{ + ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_decrypt_32way); + ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_decrypt_16way); + ECB_BLOCK(1, aria_decrypt); + ECB_WALK_END(); +} + +static int aria_avx2_ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_encrypt(req, ctx->enc_key[0]); +} + +static int aria_avx2_ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_decrypt(req, ctx->dec_key[0]); +} + +static int aria_avx2_set_key(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + return aria_set_key(&tfm->base, key, keylen); +} + +static int aria_avx2_ctr_encrypt(struct skcipher_request *req) +{ + struct aria_avx2_request_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE) { + kernel_fpu_begin(); + aria_ops.aria_ctr_crypt_32way(ctx, dst, src, + &req_ctx->keystream[0], + walk.iv); + kernel_fpu_end(); + dst += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE; + src += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE; + nbytes -= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE; + } + + while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) { + kernel_fpu_begin(); + aria_ops.aria_ctr_crypt_16way(ctx, dst, src, + &req_ctx->keystream[0], + walk.iv); + kernel_fpu_end(); + dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE; + src += ARIA_AESNI_PARALLEL_BLOCK_SIZE; + nbytes -= ARIA_AESNI_PARALLEL_BLOCK_SIZE; + } + + while (nbytes >= ARIA_BLOCK_SIZE) { + memcpy(&req_ctx->keystream[0], walk.iv, ARIA_BLOCK_SIZE); + crypto_inc(walk.iv, ARIA_BLOCK_SIZE); + + aria_encrypt(ctx, &req_ctx->keystream[0], + &req_ctx->keystream[0]); + + crypto_xor_cpy(dst, src, &req_ctx->keystream[0], + ARIA_BLOCK_SIZE); + dst += ARIA_BLOCK_SIZE; + src += ARIA_BLOCK_SIZE; + nbytes -= ARIA_BLOCK_SIZE; + } + + if (walk.nbytes == walk.total && nbytes > 0) { + memcpy(&req_ctx->keystream[0], walk.iv, + ARIA_BLOCK_SIZE); + crypto_inc(walk.iv, ARIA_BLOCK_SIZE); + + aria_encrypt(ctx, &req_ctx->keystream[0], + &req_ctx->keystream[0]); + + crypto_xor_cpy(dst, src, &req_ctx->keystream[0], + nbytes); + dst += nbytes; + src += nbytes; + nbytes = 0; + } + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int aria_avx2_init_tfm(struct crypto_skcipher *tfm) +{ + crypto_skcipher_set_reqsize(tfm, sizeof(struct aria_avx2_request_ctx)); + + return 0; +} + +static struct skcipher_alg aria_algs[] = { + { + .base.cra_name = "__ecb(aria)", + .base.cra_driver_name = "__ecb-aria-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = ARIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aria_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = ARIA_MIN_KEY_SIZE, + .max_keysize = ARIA_MAX_KEY_SIZE, + .setkey = aria_avx2_set_key, + .encrypt = aria_avx2_ecb_encrypt, + .decrypt = aria_avx2_ecb_decrypt, + }, { + .base.cra_name = "__ctr(aria)", + .base.cra_driver_name = "__ctr-aria-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL | + CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aria_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = ARIA_MIN_KEY_SIZE, + .max_keysize = ARIA_MAX_KEY_SIZE, + .ivsize = ARIA_BLOCK_SIZE, + .chunksize = ARIA_BLOCK_SIZE, + .setkey = aria_avx2_set_key, + .encrypt = aria_avx2_ctr_encrypt, + .decrypt = aria_avx2_ctr_encrypt, + .init = aria_avx2_init_tfm, + } +}; + +static struct simd_skcipher_alg *aria_simd_algs[ARRAY_SIZE(aria_algs)]; + +static int __init aria_avx2_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_AES) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX2 or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + if (boot_cpu_has(X86_FEATURE_GFNI) && IS_ENABLED(CONFIG_AS_GFNI)) { + aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way; + aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way; + aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way; + aria_ops.aria_encrypt_32way = aria_aesni_avx2_gfni_encrypt_32way; + aria_ops.aria_decrypt_32way = aria_aesni_avx2_gfni_decrypt_32way; + aria_ops.aria_ctr_crypt_32way = aria_aesni_avx2_gfni_ctr_crypt_32way; + } else { + aria_ops.aria_encrypt_16way = aria_aesni_avx_encrypt_16way; + aria_ops.aria_decrypt_16way = aria_aesni_avx_decrypt_16way; + aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_ctr_crypt_16way; + aria_ops.aria_encrypt_32way = aria_aesni_avx2_encrypt_32way; + aria_ops.aria_decrypt_32way = aria_aesni_avx2_decrypt_32way; + aria_ops.aria_ctr_crypt_32way = aria_aesni_avx2_ctr_crypt_32way; + } + + return simd_register_skciphers_compat(aria_algs, + ARRAY_SIZE(aria_algs), + aria_simd_algs); +} + +static void __exit aria_avx2_exit(void) +{ + simd_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs), + aria_simd_algs); +} + +module_init(aria_avx2_init); +module_exit(aria_avx2_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Taehee Yoo <ap420073@gmail.com>"); +MODULE_DESCRIPTION("ARIA Cipher Algorithm, AVX2/AES-NI/GFNI optimized"); +MODULE_ALIAS_CRYPTO("aria"); +MODULE_ALIAS_CRYPTO("aria-aesni-avx2"); diff --git a/arch/x86/crypto/aria_aesni_avx_glue.c b/arch/x86/crypto/aria_aesni_avx_glue.c index c561ea4fefa5..4e1516b76669 100644 --- a/arch/x86/crypto/aria_aesni_avx_glue.c +++ b/arch/x86/crypto/aria_aesni_avx_glue.c @@ -18,21 +18,33 @@ asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst, const u8 *src); +EXPORT_SYMBOL_GPL(aria_aesni_avx_encrypt_16way); asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst, const u8 *src); +EXPORT_SYMBOL_GPL(aria_aesni_avx_decrypt_16way); asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst, const u8 *src, u8 *keystream, u8 *iv); +EXPORT_SYMBOL_GPL(aria_aesni_avx_ctr_crypt_16way); +#ifdef CONFIG_AS_GFNI asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst, const u8 *src); +EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_encrypt_16way); asmlinkage void aria_aesni_avx_gfni_decrypt_16way(const void *ctx, u8 *dst, const u8 *src); +EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_decrypt_16way); asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst, const u8 *src, u8 *keystream, u8 *iv); +EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_ctr_crypt_16way); +#endif /* CONFIG_AS_GFNI */ static struct aria_avx_ops aria_ops; +struct aria_avx_request_ctx { + u8 keystream[ARIA_AESNI_PARALLEL_BLOCK_SIZE]; +}; + static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey) { ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); @@ -73,6 +85,7 @@ static int aria_avx_set_key(struct crypto_skcipher *tfm, const u8 *key, static int aria_avx_ctr_encrypt(struct skcipher_request *req) { + struct aria_avx_request_ctx *req_ctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; @@ -86,10 +99,9 @@ static int aria_avx_ctr_encrypt(struct skcipher_request *req) u8 *dst = walk.dst.virt.addr; while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) { - u8 keystream[ARIA_AESNI_PARALLEL_BLOCK_SIZE]; - kernel_fpu_begin(); - aria_ops.aria_ctr_crypt_16way(ctx, dst, src, keystream, + aria_ops.aria_ctr_crypt_16way(ctx, dst, src, + &req_ctx->keystream[0], walk.iv); kernel_fpu_end(); dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE; @@ -98,28 +110,29 @@ static int aria_avx_ctr_encrypt(struct skcipher_request *req) } while (nbytes >= ARIA_BLOCK_SIZE) { - u8 keystream[ARIA_BLOCK_SIZE]; - - memcpy(keystream, walk.iv, ARIA_BLOCK_SIZE); + memcpy(&req_ctx->keystream[0], walk.iv, ARIA_BLOCK_SIZE); crypto_inc(walk.iv, ARIA_BLOCK_SIZE); - aria_encrypt(ctx, keystream, keystream); + aria_encrypt(ctx, &req_ctx->keystream[0], + &req_ctx->keystream[0]); - crypto_xor_cpy(dst, src, keystream, ARIA_BLOCK_SIZE); + crypto_xor_cpy(dst, src, &req_ctx->keystream[0], + ARIA_BLOCK_SIZE); dst += ARIA_BLOCK_SIZE; src += ARIA_BLOCK_SIZE; nbytes -= ARIA_BLOCK_SIZE; } if (walk.nbytes == walk.total && nbytes > 0) { - u8 keystream[ARIA_BLOCK_SIZE]; - - memcpy(keystream, walk.iv, ARIA_BLOCK_SIZE); + memcpy(&req_ctx->keystream[0], walk.iv, + ARIA_BLOCK_SIZE); crypto_inc(walk.iv, ARIA_BLOCK_SIZE); - aria_encrypt(ctx, keystream, keystream); + aria_encrypt(ctx, &req_ctx->keystream[0], + &req_ctx->keystream[0]); - crypto_xor_cpy(dst, src, keystream, nbytes); + crypto_xor_cpy(dst, src, &req_ctx->keystream[0], + nbytes); dst += nbytes; src += nbytes; nbytes = 0; @@ -130,6 +143,13 @@ static int aria_avx_ctr_encrypt(struct skcipher_request *req) return err; } +static int aria_avx_init_tfm(struct crypto_skcipher *tfm) +{ + crypto_skcipher_set_reqsize(tfm, sizeof(struct aria_avx_request_ctx)); + + return 0; +} + static struct skcipher_alg aria_algs[] = { { .base.cra_name = "__ecb(aria)", @@ -160,6 +180,7 @@ static struct skcipher_alg aria_algs[] = { .setkey = aria_avx_set_key, .encrypt = aria_avx_ctr_encrypt, .decrypt = aria_avx_ctr_encrypt, + .init = aria_avx_init_tfm, } }; @@ -182,7 +203,7 @@ static int __init aria_avx_init(void) return -ENODEV; } - if (boot_cpu_has(X86_FEATURE_GFNI)) { + if (boot_cpu_has(X86_FEATURE_GFNI) && IS_ENABLED(CONFIG_AS_GFNI)) { aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way; aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way; aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way; diff --git a/arch/x86/crypto/aria_gfni_avx512_glue.c b/arch/x86/crypto/aria_gfni_avx512_glue.c new file mode 100644 index 000000000000..f4a2208d2638 --- /dev/null +++ b/arch/x86/crypto/aria_gfni_avx512_glue.c @@ -0,0 +1,250 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Glue Code for the AVX512/GFNI assembler implementation of the ARIA Cipher + * + * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com> + */ + +#include <crypto/algapi.h> +#include <crypto/internal/simd.h> +#include <crypto/aria.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/types.h> + +#include "ecb_cbc_helpers.h" +#include "aria-avx.h" + +asmlinkage void aria_gfni_avx512_encrypt_64way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_gfni_avx512_decrypt_64way(const void *ctx, u8 *dst, + const u8 *src); +asmlinkage void aria_gfni_avx512_ctr_crypt_64way(const void *ctx, u8 *dst, + const u8 *src, + u8 *keystream, u8 *iv); + +static struct aria_avx_ops aria_ops; + +struct aria_avx512_request_ctx { + u8 keystream[ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE]; +}; + +static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey) +{ + ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(ARIA_GFNI_AVX512_PARALLEL_BLOCKS, aria_ops.aria_encrypt_64way); + ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_encrypt_32way); + ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_encrypt_16way); + ECB_BLOCK(1, aria_encrypt); + ECB_WALK_END(); +} + +static int ecb_do_decrypt(struct skcipher_request *req, const u32 *rkey) +{ + ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); + ECB_BLOCK(ARIA_GFNI_AVX512_PARALLEL_BLOCKS, aria_ops.aria_decrypt_64way); + ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_decrypt_32way); + ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_decrypt_16way); + ECB_BLOCK(1, aria_decrypt); + ECB_WALK_END(); +} + +static int aria_avx512_ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_encrypt(req, ctx->enc_key[0]); +} + +static int aria_avx512_ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); + + return ecb_do_decrypt(req, ctx->dec_key[0]); +} + +static int aria_avx512_set_key(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + return aria_set_key(&tfm->base, key, keylen); +} + +static int aria_avx512_ctr_encrypt(struct skcipher_request *req) +{ + struct aria_avx512_request_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while ((nbytes = walk.nbytes) > 0) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + + while (nbytes >= ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE) { + kernel_fpu_begin(); + aria_ops.aria_ctr_crypt_64way(ctx, dst, src, + &req_ctx->keystream[0], + walk.iv); + kernel_fpu_end(); + dst += ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE; + src += ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE; + nbytes -= ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE; + } + + while (nbytes >= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE) { + kernel_fpu_begin(); + aria_ops.aria_ctr_crypt_32way(ctx, dst, src, + &req_ctx->keystream[0], + walk.iv); + kernel_fpu_end(); + dst += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE; + src += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE; + nbytes -= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE; + } + + while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) { + kernel_fpu_begin(); + aria_ops.aria_ctr_crypt_16way(ctx, dst, src, + &req_ctx->keystream[0], + walk.iv); + kernel_fpu_end(); + dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE; + src += ARIA_AESNI_PARALLEL_BLOCK_SIZE; + nbytes -= ARIA_AESNI_PARALLEL_BLOCK_SIZE; + } + + while (nbytes >= ARIA_BLOCK_SIZE) { + memcpy(&req_ctx->keystream[0], walk.iv, + ARIA_BLOCK_SIZE); + crypto_inc(walk.iv, ARIA_BLOCK_SIZE); + + aria_encrypt(ctx, &req_ctx->keystream[0], + &req_ctx->keystream[0]); + + crypto_xor_cpy(dst, src, &req_ctx->keystream[0], + ARIA_BLOCK_SIZE); + dst += ARIA_BLOCK_SIZE; + src += ARIA_BLOCK_SIZE; + nbytes -= ARIA_BLOCK_SIZE; + } + + if (walk.nbytes == walk.total && nbytes > 0) { + memcpy(&req_ctx->keystream[0], walk.iv, + ARIA_BLOCK_SIZE); + crypto_inc(walk.iv, ARIA_BLOCK_SIZE); + + aria_encrypt(ctx, &req_ctx->keystream[0], + &req_ctx->keystream[0]); + + crypto_xor_cpy(dst, src, &req_ctx->keystream[0], + nbytes); + dst += nbytes; + src += nbytes; + nbytes = 0; + } + err = skcipher_walk_done(&walk, nbytes); + } + + return err; +} + +static int aria_avx512_init_tfm(struct crypto_skcipher *tfm) +{ + crypto_skcipher_set_reqsize(tfm, + sizeof(struct aria_avx512_request_ctx)); + + return 0; +} + +static struct skcipher_alg aria_algs[] = { + { + .base.cra_name = "__ecb(aria)", + .base.cra_driver_name = "__ecb-aria-avx512", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = ARIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct aria_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = ARIA_MIN_KEY_SIZE, + .max_keysize = ARIA_MAX_KEY_SIZE, + .setkey = aria_avx512_set_key, + .encrypt = aria_avx512_ecb_encrypt, + .decrypt = aria_avx512_ecb_decrypt, + }, { + .base.cra_name = "__ctr(aria)", + .base.cra_driver_name = "__ctr-aria-avx512", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL | + CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aria_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = ARIA_MIN_KEY_SIZE, + .max_keysize = ARIA_MAX_KEY_SIZE, + .ivsize = ARIA_BLOCK_SIZE, + .chunksize = ARIA_BLOCK_SIZE, + .setkey = aria_avx512_set_key, + .encrypt = aria_avx512_ctr_encrypt, + .decrypt = aria_avx512_ctr_encrypt, + .init = aria_avx512_init_tfm, + } +}; + +static struct simd_skcipher_alg *aria_simd_algs[ARRAY_SIZE(aria_algs)]; + +static int __init aria_avx512_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX) || + !boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_AVX512F) || + !boot_cpu_has(X86_FEATURE_AVX512VL) || + !boot_cpu_has(X86_FEATURE_GFNI) || + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { + pr_info("AVX512/GFNI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way; + aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way; + aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way; + aria_ops.aria_encrypt_32way = aria_aesni_avx2_gfni_encrypt_32way; + aria_ops.aria_decrypt_32way = aria_aesni_avx2_gfni_decrypt_32way; + aria_ops.aria_ctr_crypt_32way = aria_aesni_avx2_gfni_ctr_crypt_32way; + aria_ops.aria_encrypt_64way = aria_gfni_avx512_encrypt_64way; + aria_ops.aria_decrypt_64way = aria_gfni_avx512_decrypt_64way; + aria_ops.aria_ctr_crypt_64way = aria_gfni_avx512_ctr_crypt_64way; + + return simd_register_skciphers_compat(aria_algs, + ARRAY_SIZE(aria_algs), + aria_simd_algs); +} + +static void __exit aria_avx512_exit(void) +{ + simd_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs), + aria_simd_algs); +} + +module_init(aria_avx512_init); +module_exit(aria_avx512_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Taehee Yoo <ap420073@gmail.com>"); +MODULE_DESCRIPTION("ARIA Cipher Algorithm, AVX512/GFNI optimized"); +MODULE_ALIAS_CRYPTO("aria"); +MODULE_ALIAS_CRYPTO("aria-gfni-avx512"); diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S index 4a43e072d2d1..e88c8e4f013c 100644 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -6,7 +6,6 @@ */ #include <linux/linkage.h> -#include <linux/cfi_types.h> .file "blowfish-x86_64-asm.S" .text @@ -100,16 +99,11 @@ bswapq RX0; \ movq RX0, (RIO); -#define xor_block() \ - bswapq RX0; \ - xorq RX0, (RIO); - -SYM_FUNC_START(__blowfish_enc_blk) +SYM_FUNC_START(blowfish_enc_blk) /* input: * %rdi: ctx * %rsi: dst * %rdx: src - * %rcx: bool, if true: xor output */ movq %r12, %r11; @@ -130,19 +124,13 @@ SYM_FUNC_START(__blowfish_enc_blk) add_roundkey_enc(16); movq %r11, %r12; - movq %r10, RIO; - test %cl, %cl; - jnz .L__enc_xor; write_block(); RET; -.L__enc_xor: - xor_block(); - RET; -SYM_FUNC_END(__blowfish_enc_blk) +SYM_FUNC_END(blowfish_enc_blk) -SYM_TYPED_FUNC_START(blowfish_dec_blk) +SYM_FUNC_START(blowfish_dec_blk) /* input: * %rdi: ctx * %rsi: dst @@ -272,28 +260,26 @@ SYM_FUNC_END(blowfish_dec_blk) movq RX3, 24(RIO); #define xor_block4() \ - bswapq RX0; \ - xorq RX0, (RIO); \ + movq (RIO), RT0; \ + bswapq RT0; \ + xorq RT0, RX1; \ \ - bswapq RX1; \ - xorq RX1, 8(RIO); \ + movq 8(RIO), RT2; \ + bswapq RT2; \ + xorq RT2, RX2; \ \ - bswapq RX2; \ - xorq RX2, 16(RIO); \ - \ - bswapq RX3; \ - xorq RX3, 24(RIO); + movq 16(RIO), RT3; \ + bswapq RT3; \ + xorq RT3, RX3; -SYM_FUNC_START(__blowfish_enc_blk_4way) +SYM_FUNC_START(blowfish_enc_blk_4way) /* input: * %rdi: ctx * %rsi: dst * %rdx: src - * %rcx: bool, if true: xor output */ pushq %r12; pushq %rbx; - pushq %rcx; movq %rdi, CTX movq %rsi, %r11; @@ -313,37 +299,28 @@ SYM_FUNC_START(__blowfish_enc_blk_4way) round_enc4(14); add_preloaded_roundkey4(); - popq %r12; movq %r11, RIO; - - test %r12b, %r12b; - jnz .L__enc_xor4; - write_block4(); popq %rbx; popq %r12; RET; +SYM_FUNC_END(blowfish_enc_blk_4way) -.L__enc_xor4: - xor_block4(); - - popq %rbx; - popq %r12; - RET; -SYM_FUNC_END(__blowfish_enc_blk_4way) - -SYM_TYPED_FUNC_START(blowfish_dec_blk_4way) +SYM_FUNC_START(__blowfish_dec_blk_4way) /* input: * %rdi: ctx * %rsi: dst * %rdx: src + * %rcx: cbc (bool) */ pushq %r12; pushq %rbx; + pushq %rcx; + pushq %rdx; movq %rdi, CTX; - movq %rsi, %r11 + movq %rsi, %r11; movq %rdx, RIO; preload_roundkey_dec(17); @@ -359,6 +336,14 @@ SYM_TYPED_FUNC_START(blowfish_dec_blk_4way) round_dec4(3); add_preloaded_roundkey4(); + popq RIO; + popq %r12; + testq %r12, %r12; + jz .L_no_cbc_xor; + + xor_block4(); + +.L_no_cbc_xor: movq %r11, RIO; write_block4(); @@ -366,4 +351,4 @@ SYM_TYPED_FUNC_START(blowfish_dec_blk_4way) popq %r12; RET; -SYM_FUNC_END(blowfish_dec_blk_4way) +SYM_FUNC_END(__blowfish_dec_blk_4way) diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 019c64c1340a..552f2df0643f 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -16,26 +16,28 @@ #include <linux/module.h> #include <linux/types.h> +#include "ecb_cbc_helpers.h" + /* regular block cipher functions */ -asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, - bool xor); +asmlinkage void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); /* 4-way parallel cipher functions */ -asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, +asmlinkage void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src); +asmlinkage void __blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src, bool cbc); -static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) +static inline void blowfish_dec_ecb_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src) { - __blowfish_enc_blk(ctx, dst, src, false); + return __blowfish_dec_blk_4way(ctx, dst, src, false); } -static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src) +static inline void blowfish_dec_cbc_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src) { - __blowfish_enc_blk_4way(ctx, dst, src, false); + return __blowfish_dec_blk_4way(ctx, dst, src, true); } static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) @@ -54,183 +56,35 @@ static int blowfish_setkey_skcipher(struct crypto_skcipher *tfm, return blowfish_setkey(&tfm->base, key, keylen); } -static int ecb_crypt(struct skcipher_request *req, - void (*fn)(struct bf_ctx *, u8 *, const u8 *), - void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *)) -{ - unsigned int bsize = BF_BLOCK_SIZE; - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while ((nbytes = walk.nbytes)) { - u8 *wsrc = walk.src.virt.addr; - u8 *wdst = walk.dst.virt.addr; - - /* Process four block batch */ - if (nbytes >= bsize * 4) { - do { - fn_4way(ctx, wdst, wsrc); - - wsrc += bsize * 4; - wdst += bsize * 4; - nbytes -= bsize * 4; - } while (nbytes >= bsize * 4); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - fn(ctx, wdst, wsrc); - - wsrc += bsize; - wdst += bsize; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - err = skcipher_walk_done(&walk, nbytes); - } - - return err; -} - static int ecb_encrypt(struct skcipher_request *req) { - return ecb_crypt(req, blowfish_enc_blk, blowfish_enc_blk_4way); + ECB_WALK_START(req, BF_BLOCK_SIZE, -1); + ECB_BLOCK(4, blowfish_enc_blk_4way); + ECB_BLOCK(1, blowfish_enc_blk); + ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { - return ecb_crypt(req, blowfish_dec_blk, blowfish_dec_blk_4way); -} - -static unsigned int __cbc_encrypt(struct bf_ctx *ctx, - struct skcipher_walk *walk) -{ - unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 *iv = (u64 *)walk->iv; - - do { - *dst = *src ^ *iv; - blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - *(u64 *)walk->iv = *iv; - return nbytes; + ECB_WALK_START(req, BF_BLOCK_SIZE, -1); + ECB_BLOCK(4, blowfish_dec_ecb_4way); + ECB_BLOCK(1, blowfish_dec_blk); + ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while (walk.nbytes) { - nbytes = __cbc_encrypt(ctx, &walk); - err = skcipher_walk_done(&walk, nbytes); - } - - return err; -} - -static unsigned int __cbc_decrypt(struct bf_ctx *ctx, - struct skcipher_walk *walk) -{ - unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 ivs[4 - 1]; - u64 last_iv; - - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - /* Process four block batch */ - if (nbytes >= bsize * 4) { - do { - nbytes -= bsize * 4 - bsize; - src -= 4 - 1; - dst -= 4 - 1; - - ivs[0] = src[0]; - ivs[1] = src[1]; - ivs[2] = src[2]; - - blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src); - - dst[1] ^= ivs[0]; - dst[2] ^= ivs[1]; - dst[3] ^= ivs[2]; - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - *dst ^= *(src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * 4); - } - - /* Handle leftovers */ - for (;;) { - blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - break; - - *dst ^= *(src - 1); - src -= 1; - dst -= 1; - } - -done: - *dst ^= *(u64 *)walk->iv; - *(u64 *)walk->iv = last_iv; - - return nbytes; + CBC_WALK_START(req, BF_BLOCK_SIZE, -1); + CBC_ENC_BLOCK(blowfish_enc_blk); + CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - while (walk.nbytes) { - nbytes = __cbc_decrypt(ctx, &walk); - err = skcipher_walk_done(&walk, nbytes); - } - - return err; + CBC_WALK_START(req, BF_BLOCK_SIZE, -1); + CBC_DEC_BLOCK(4, blowfish_dec_cbc_4way); + CBC_DEC_BLOCK(1, blowfish_dec_blk); + CBC_WALK_END(); } static struct crypto_alg bf_cipher_alg = { diff --git a/arch/x86/crypto/ecb_cbc_helpers.h b/arch/x86/crypto/ecb_cbc_helpers.h index eaa15c7b29d6..11955bd01af1 100644 --- a/arch/x86/crypto/ecb_cbc_helpers.h +++ b/arch/x86/crypto/ecb_cbc_helpers.h @@ -13,13 +13,14 @@ #define ECB_WALK_START(req, bsize, fpu_blocks) do { \ void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); \ + const int __fpu_blocks = (fpu_blocks); \ const int __bsize = (bsize); \ struct skcipher_walk walk; \ int err = skcipher_walk_virt(&walk, (req), false); \ while (walk.nbytes > 0) { \ unsigned int nbytes = walk.nbytes; \ - bool do_fpu = (fpu_blocks) != -1 && \ - nbytes >= (fpu_blocks) * __bsize; \ + bool do_fpu = __fpu_blocks != -1 && \ + nbytes >= __fpu_blocks * __bsize; \ const u8 *src = walk.src.virt.addr; \ u8 *dst = walk.dst.virt.addr; \ u8 __maybe_unused buf[(bsize)]; \ @@ -35,7 +36,12 @@ } while (0) #define ECB_BLOCK(blocks, func) do { \ - while (nbytes >= (blocks) * __bsize) { \ + const int __blocks = (blocks); \ + if (do_fpu && __blocks < __fpu_blocks) { \ + kernel_fpu_end(); \ + do_fpu = false; \ + } \ + while (nbytes >= __blocks * __bsize) { \ (func)(ctx, dst, src); \ ECB_WALK_ADVANCE(blocks); \ } \ @@ -53,7 +59,12 @@ } while (0) #define CBC_DEC_BLOCK(blocks, func) do { \ - while (nbytes >= (blocks) * __bsize) { \ + const int __blocks = (blocks); \ + if (do_fpu && __blocks < __fpu_blocks) { \ + kernel_fpu_end(); \ + do_fpu = false; \ + } \ + while (nbytes >= __blocks * __bsize) { \ const u8 *__iv = src + ((blocks) - 1) * __bsize; \ if (dst == src) \ __iv = memcpy(buf, __iv, __bsize); \ diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 2bf871899920..257ed9446f3e 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -4,7 +4,7 @@ * instructions. This file contains accelerated part of ghash * implementation. More information about PCLMULQDQ can be found at: * - * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ + * https://www.intel.com/content/dam/develop/external/us/en/documents/clmul-wp-rev-2-02-2014-04-20.pdf * * Copyright (c) 2009 Intel Corp. * Author: Huang Ying <ying.huang@intel.com> @@ -88,7 +88,7 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble) RET SYM_FUNC_END(__clmul_gf128mul_ble) -/* void clmul_ghash_mul(char *dst, const u128 *shash) */ +/* void clmul_ghash_mul(char *dst, const le128 *shash) */ SYM_FUNC_START(clmul_ghash_mul) FRAME_BEGIN movups (%rdi), DATA @@ -104,7 +104,7 @@ SYM_FUNC_END(clmul_ghash_mul) /* * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, - * const u128 *shash); + * const le128 *shash); */ SYM_FUNC_START(clmul_ghash_update) FRAME_BEGIN diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 1f1a95f3dd0c..700ecaee9a08 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -19,21 +19,22 @@ #include <crypto/internal/simd.h> #include <asm/cpu_device_id.h> #include <asm/simd.h> +#include <asm/unaligned.h> #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 -void clmul_ghash_mul(char *dst, const u128 *shash); +void clmul_ghash_mul(char *dst, const le128 *shash); void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, - const u128 *shash); + const le128 *shash); struct ghash_async_ctx { struct cryptd_ahash *cryptd_tfm; }; struct ghash_ctx { - u128 shash; + le128 shash; }; struct ghash_desc_ctx { @@ -54,22 +55,40 @@ static int ghash_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { struct ghash_ctx *ctx = crypto_shash_ctx(tfm); - be128 *x = (be128 *)key; u64 a, b; if (keylen != GHASH_BLOCK_SIZE) return -EINVAL; - /* perform multiplication by 'x' in GF(2^128) */ - a = be64_to_cpu(x->a); - b = be64_to_cpu(x->b); - - ctx->shash.a = (b << 1) | (a >> 63); - ctx->shash.b = (a << 1) | (b >> 63); - + /* + * GHASH maps bits to polynomial coefficients backwards, which makes it + * hard to implement. But it can be shown that the GHASH multiplication + * + * D * K (mod x^128 + x^7 + x^2 + x + 1) + * + * (where D is a data block and K is the key) is equivalent to: + * + * bitreflect(D) * bitreflect(K) * x^(-127) + * (mod x^128 + x^127 + x^126 + x^121 + 1) + * + * So, the code below precomputes: + * + * bitreflect(K) * x^(-127) (mod x^128 + x^127 + x^126 + x^121 + 1) + * + * ... but in Montgomery form (so that Montgomery multiplication can be + * used), i.e. with an extra x^128 factor, which means actually: + * + * bitreflect(K) * x (mod x^128 + x^127 + x^126 + x^121 + 1) + * + * The within-a-byte part of bitreflect() cancels out GHASH's built-in + * reflection, and thus bitreflect() is actually a byteswap. + */ + a = get_unaligned_be64(key); + b = get_unaligned_be64(key + 8); + ctx->shash.a = cpu_to_le64((a << 1) | (b >> 63)); + ctx->shash.b = cpu_to_le64((b << 1) | (a >> 63)); if (a >> 63) - ctx->shash.b ^= ((u64)0xc2) << 56; - + ctx->shash.a ^= cpu_to_le64((u64)0xc2 << 56); return 0; } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 82c783da16a8..ef9e951415c5 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -7,6 +7,7 @@ #define COMPILE_OFFSETS #include <linux/crypto.h> +#include <crypto/aria.h> #include <linux/sched.h> #include <linux/stddef.h> #include <linux/hardirq.h> @@ -111,5 +112,12 @@ static void __used common(void) #ifdef CONFIG_CALL_DEPTH_TRACKING OFFSET(X86_call_depth, pcpu_hot, call_depth); #endif +#if IS_ENABLED(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) + /* Offset for fields in aria_ctx */ + BLANK(); + OFFSET(ARIA_CTX_enc_key, aria_ctx, enc_key); + OFFSET(ARIA_CTX_dec_key, aria_ctx, dec_key); + OFFSET(ARIA_CTX_rounds, aria_ctx, rounds); +#endif } diff --git a/crypto/adiantum.c b/crypto/adiantum.c index 84450130cb6b..c33ba22a6638 100644 --- a/crypto/adiantum.c +++ b/crypto/adiantum.c @@ -308,10 +308,9 @@ static int adiantum_finish(struct skcipher_request *req) return 0; } -static void adiantum_streamcipher_done(struct crypto_async_request *areq, - int err) +static void adiantum_streamcipher_done(void *data, int err) { - struct skcipher_request *req = areq->data; + struct skcipher_request *req = data; if (!err) err = adiantum_finish(req); diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 0a4fa2a429e2..5f7252a5b7b4 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1186,7 +1186,7 @@ EXPORT_SYMBOL_GPL(af_alg_free_resources); /** * af_alg_async_cb - AIO callback handler - * @_req: async request info + * @data: async request completion data * @err: if non-zero, error result to be returned via ki_complete(); * otherwise return the AIO output length via ki_complete(). * @@ -1196,9 +1196,9 @@ EXPORT_SYMBOL_GPL(af_alg_free_resources); * The number of bytes to be generated with the AIO operation must be set * in areq->outlen before the AIO callback handler is invoked. */ -void af_alg_async_cb(struct crypto_async_request *_req, int err) +void af_alg_async_cb(void *data, int err) { - struct af_alg_async_req *areq = _req->data; + struct af_alg_async_req *areq = data; struct sock *sk = areq->sk; struct kiocb *iocb = areq->iocb; unsigned int resultlen; diff --git a/crypto/ahash.c b/crypto/ahash.c index c2ca631a111f..ff8c79d975c1 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -45,7 +45,7 @@ static int hash_walk_next(struct crypto_hash_walk *walk) unsigned int nbytes = min(walk->entrylen, ((unsigned int)(PAGE_SIZE)) - offset); - walk->data = kmap_atomic(walk->pg); + walk->data = kmap_local_page(walk->pg); walk->data += offset; if (offset & alignmask) { @@ -95,7 +95,7 @@ int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err) } } - kunmap_atomic(walk->data); + kunmap_local(walk->data); crypto_yield(walk->flags); if (err) @@ -190,133 +190,98 @@ int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, } EXPORT_SYMBOL_GPL(crypto_ahash_setkey); -static inline unsigned int ahash_align_buffer_size(unsigned len, - unsigned long mask) -{ - return len + (mask & ~(crypto_tfm_ctx_alignment() - 1)); -} - -static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt) +static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt, + bool has_state) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); unsigned long alignmask = crypto_ahash_alignmask(tfm); unsigned int ds = crypto_ahash_digestsize(tfm); - struct ahash_request_priv *priv; + struct ahash_request *subreq; + unsigned int subreq_size; + unsigned int reqsize; + u8 *result; + gfp_t gfp; + u32 flags; - priv = kmalloc(sizeof(*priv) + ahash_align_buffer_size(ds, alignmask), - (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC); - if (!priv) + subreq_size = sizeof(*subreq); + reqsize = crypto_ahash_reqsize(tfm); + reqsize = ALIGN(reqsize, crypto_tfm_ctx_alignment()); + subreq_size += reqsize; + subreq_size += ds; + subreq_size += alignmask & ~(crypto_tfm_ctx_alignment() - 1); + + flags = ahash_request_flags(req); + gfp = (flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; + subreq = kmalloc(subreq_size, gfp); + if (!subreq) return -ENOMEM; - /* - * WARNING: Voodoo programming below! - * - * The code below is obscure and hard to understand, thus explanation - * is necessary. See include/crypto/hash.h and include/linux/crypto.h - * to understand the layout of structures used here! - * - * The code here will replace portions of the ORIGINAL request with - * pointers to new code and buffers so the hashing operation can store - * the result in aligned buffer. We will call the modified request - * an ADJUSTED request. - * - * The newly mangled request will look as such: - * - * req { - * .result = ADJUSTED[new aligned buffer] - * .base.complete = ADJUSTED[pointer to completion function] - * .base.data = ADJUSTED[*req (pointer to self)] - * .priv = ADJUSTED[new priv] { - * .result = ORIGINAL(result) - * .complete = ORIGINAL(base.complete) - * .data = ORIGINAL(base.data) - * } - */ - - priv->result = req->result; - priv->complete = req->base.complete; - priv->data = req->base.data; - priv->flags = req->base.flags; - - /* - * WARNING: We do not backup req->priv here! The req->priv - * is for internal use of the Crypto API and the - * user must _NOT_ _EVER_ depend on it's content! - */ - - req->result = PTR_ALIGN((u8 *)priv->ubuf, alignmask + 1); - req->base.complete = cplt; - req->base.data = req; - req->priv = priv; + ahash_request_set_tfm(subreq, tfm); + ahash_request_set_callback(subreq, flags, cplt, req); + + result = (u8 *)(subreq + 1) + reqsize; + result = PTR_ALIGN(result, alignmask + 1); + + ahash_request_set_crypt(subreq, req->src, result, req->nbytes); + + if (has_state) { + void *state; + + state = kmalloc(crypto_ahash_statesize(tfm), gfp); + if (!state) { + kfree(subreq); + return -ENOMEM; + } + + crypto_ahash_export(req, state); + crypto_ahash_import(subreq, state); + kfree_sensitive(state); + } + + req->priv = subreq; return 0; } static void ahash_restore_req(struct ahash_request *req, int err) { - struct ahash_request_priv *priv = req->priv; + struct ahash_request *subreq = req->priv; if (!err) - memcpy(priv->result, req->result, + memcpy(req->result, subreq->result, crypto_ahash_digestsize(crypto_ahash_reqtfm(req))); - /* Restore the original crypto request. */ - req->result = priv->result; - - ahash_request_set_callback(req, priv->flags, - priv->complete, priv->data); req->priv = NULL; - /* Free the req->priv.priv from the ADJUSTED request. */ - kfree_sensitive(priv); -} - -static void ahash_notify_einprogress(struct ahash_request *req) -{ - struct ahash_request_priv *priv = req->priv; - struct crypto_async_request oreq; - - oreq.data = priv->data; - - priv->complete(&oreq, -EINPROGRESS); + kfree_sensitive(subreq); } -static void ahash_op_unaligned_done(struct crypto_async_request *req, int err) +static void ahash_op_unaligned_done(void *data, int err) { - struct ahash_request *areq = req->data; - - if (err == -EINPROGRESS) { - ahash_notify_einprogress(areq); - return; - } + struct ahash_request *areq = data; - /* - * Restore the original request, see ahash_op_unaligned() for what - * goes where. - * - * The "struct ahash_request *req" here is in fact the "req.base" - * from the ADJUSTED request from ahash_op_unaligned(), thus as it - * is a pointer to self, it is also the ADJUSTED "req" . - */ + if (err == -EINPROGRESS) + goto out; /* First copy req->result into req->priv.result */ ahash_restore_req(areq, err); +out: /* Complete the ORIGINAL request. */ - areq->base.complete(&areq->base, err); + ahash_request_complete(areq, err); } static int ahash_op_unaligned(struct ahash_request *req, - int (*op)(struct ahash_request *)) + int (*op)(struct ahash_request *), + bool has_state) { int err; - err = ahash_save_req(req, ahash_op_unaligned_done); + err = ahash_save_req(req, ahash_op_unaligned_done, has_state); if (err) return err; - err = op(req); + err = op(req->priv); if (err == -EINPROGRESS || err == -EBUSY) return err; @@ -326,13 +291,14 @@ static int ahash_op_unaligned(struct ahash_request *req, } static int crypto_ahash_op(struct ahash_request *req, - int (*op)(struct ahash_request *)) + int (*op)(struct ahash_request *), + bool has_state) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); unsigned long alignmask = crypto_ahash_alignmask(tfm); if ((unsigned long)req->result & alignmask) - return ahash_op_unaligned(req, op); + return ahash_op_unaligned(req, op, has_state); return op(req); } @@ -345,7 +311,7 @@ int crypto_ahash_final(struct ahash_request *req) int ret; crypto_stats_get(alg); - ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final); + ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final, true); crypto_stats_ahash_final(nbytes, ret, alg); return ret; } @@ -359,7 +325,7 @@ int crypto_ahash_finup(struct ahash_request *req) int ret; crypto_stats_get(alg); - ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup); + ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup, true); crypto_stats_ahash_final(nbytes, ret, alg); return ret; } @@ -376,32 +342,34 @@ int crypto_ahash_digest(struct ahash_request *req) if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; else - ret = crypto_ahash_op(req, tfm->digest); + ret = crypto_ahash_op(req, tfm->digest, false); crypto_stats_ahash_final(nbytes, ret, alg); return ret; } EXPORT_SYMBOL_GPL(crypto_ahash_digest); -static void ahash_def_finup_done2(struct crypto_async_request *req, int err) +static void ahash_def_finup_done2(void *data, int err) { - struct ahash_request *areq = req->data; + struct ahash_request *areq = data; if (err == -EINPROGRESS) return; ahash_restore_req(areq, err); - areq->base.complete(&areq->base, err); + ahash_request_complete(areq, err); } static int ahash_def_finup_finish1(struct ahash_request *req, int err) { + struct ahash_request *subreq = req->priv; + if (err) goto out; - req->base.complete = ahash_def_finup_done2; + subreq->base.complete = ahash_def_finup_done2; - err = crypto_ahash_reqtfm(req)->final(req); + err = crypto_ahash_reqtfm(req)->final(subreq); if (err == -EINPROGRESS || err == -EBUSY) return err; @@ -410,22 +378,23 @@ out: return err; } -static void ahash_def_finup_done1(struct crypto_async_request *req, int err) +static void ahash_def_finup_done1(void *data, int err) { - struct ahash_request *areq = req->data; + struct ahash_request *areq = data; + struct ahash_request *subreq; - if (err == -EINPROGRESS) { - ahash_notify_einprogress(areq); - return; - } + if (err == -EINPROGRESS) + goto out; - areq->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + subreq = areq->priv; + subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; err = ahash_def_finup_finish1(areq, err); - if (areq->priv) + if (err == -EINPROGRESS || err == -EBUSY) return; - areq->base.complete(&areq->base, err); +out: + ahash_request_complete(areq, err); } static int ahash_def_finup(struct ahash_request *req) @@ -433,11 +402,11 @@ static int ahash_def_finup(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); int err; - err = ahash_save_req(req, ahash_def_finup_done1); + err = ahash_save_req(req, ahash_def_finup_done1, true); if (err) return err; - err = tfm->update(req); + err = tfm->update(req->priv); if (err == -EINPROGRESS || err == -EBUSY) return err; diff --git a/crypto/api.c b/crypto/api.c index b022702f6436..e67cc63368ed 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -643,9 +643,9 @@ int crypto_has_alg(const char *name, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(crypto_has_alg); -void crypto_req_done(struct crypto_async_request *req, int err) +void crypto_req_done(void *data, int err) { - struct crypto_wait *wait = req->data; + struct crypto_wait *wait = data; if (err == -EINPROGRESS) return; diff --git a/crypto/aria_generic.c b/crypto/aria_generic.c index 4cc29b82b99d..d96dfc4fdde6 100644 --- a/crypto/aria_generic.c +++ b/crypto/aria_generic.c @@ -178,6 +178,10 @@ int aria_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) if (key_len != 16 && key_len != 24 && key_len != 32) return -EINVAL; + BUILD_BUG_ON(sizeof(ctx->enc_key) != 272); + BUILD_BUG_ON(sizeof(ctx->dec_key) != 272); + BUILD_BUG_ON(sizeof(int) != sizeof(ctx->rounds)); + ctx->key_length = key_len; ctx->rounds = (key_len + 32) / 4; diff --git a/crypto/authenc.c b/crypto/authenc.c index 17f674a7cdff..3326c7343e86 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -109,9 +109,9 @@ out: return err; } -static void authenc_geniv_ahash_done(struct crypto_async_request *areq, int err) +static void authenc_geniv_ahash_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; struct crypto_aead *authenc = crypto_aead_reqtfm(req); struct aead_instance *inst = aead_alg_instance(authenc); struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); @@ -160,10 +160,9 @@ static int crypto_authenc_genicv(struct aead_request *req, unsigned int flags) return 0; } -static void crypto_authenc_encrypt_done(struct crypto_async_request *req, - int err) +static void crypto_authenc_encrypt_done(void *data, int err) { - struct aead_request *areq = req->data; + struct aead_request *areq = data; if (err) goto out; @@ -261,10 +260,9 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req, return crypto_skcipher_decrypt(skreq); } -static void authenc_verify_ahash_done(struct crypto_async_request *areq, - int err) +static void authenc_verify_ahash_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; if (err) goto out; diff --git a/crypto/authencesn.c b/crypto/authencesn.c index b60e61b1904c..91424e791d5c 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -107,10 +107,9 @@ static int crypto_authenc_esn_genicv_tail(struct aead_request *req, return 0; } -static void authenc_esn_geniv_ahash_done(struct crypto_async_request *areq, - int err) +static void authenc_esn_geniv_ahash_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; err = err ?: crypto_authenc_esn_genicv_tail(req, 0); aead_request_complete(req, err); @@ -153,10 +152,9 @@ static int crypto_authenc_esn_genicv(struct aead_request *req, } -static void crypto_authenc_esn_encrypt_done(struct crypto_async_request *req, - int err) +static void crypto_authenc_esn_encrypt_done(void *data, int err) { - struct aead_request *areq = req->data; + struct aead_request *areq = data; if (!err) err = crypto_authenc_esn_genicv(areq, 0); @@ -258,10 +256,9 @@ decrypt: return crypto_skcipher_decrypt(skreq); } -static void authenc_esn_verify_ahash_done(struct crypto_async_request *areq, - int err) +static void authenc_esn_verify_ahash_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; err = err ?: crypto_authenc_esn_decrypt_tail(req, 0); authenc_esn_request_complete(req, err); diff --git a/crypto/ccm.c b/crypto/ccm.c index 30dbae72728f..a9453129c51c 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -224,9 +224,9 @@ out: return err; } -static void crypto_ccm_encrypt_done(struct crypto_async_request *areq, int err) +static void crypto_ccm_encrypt_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req); u8 *odata = pctx->odata; @@ -320,10 +320,9 @@ static int crypto_ccm_encrypt(struct aead_request *req) return err; } -static void crypto_ccm_decrypt_done(struct crypto_async_request *areq, - int err) +static void crypto_ccm_decrypt_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req); struct crypto_aead *aead = crypto_aead_reqtfm(req); unsigned int authsize = crypto_aead_authsize(aead); diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index 97bbb135e9a6..3a905c5d8f53 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -115,9 +115,9 @@ static int poly_copy_tag(struct aead_request *req) return 0; } -static void chacha_decrypt_done(struct crypto_async_request *areq, int err) +static void chacha_decrypt_done(void *data, int err) { - async_done_continue(areq->data, err, poly_verify_tag); + async_done_continue(data, err, poly_verify_tag); } static int chacha_decrypt(struct aead_request *req) @@ -161,9 +161,9 @@ static int poly_tail_continue(struct aead_request *req) return chacha_decrypt(req); } -static void poly_tail_done(struct crypto_async_request *areq, int err) +static void poly_tail_done(void *data, int err) { - async_done_continue(areq->data, err, poly_tail_continue); + async_done_continue(data, err, poly_tail_continue); } static int poly_tail(struct aead_request *req) @@ -191,9 +191,9 @@ static int poly_tail(struct aead_request *req) return poly_tail_continue(req); } -static void poly_cipherpad_done(struct crypto_async_request *areq, int err) +static void poly_cipherpad_done(void *data, int err) { - async_done_continue(areq->data, err, poly_tail); + async_done_continue(data, err, poly_tail); } static int poly_cipherpad(struct aead_request *req) @@ -220,9 +220,9 @@ static int poly_cipherpad(struct aead_request *req) return poly_tail(req); } -static void poly_cipher_done(struct crypto_async_request *areq, int err) +static void poly_cipher_done(void *data, int err) { - async_done_continue(areq->data, err, poly_cipherpad); + async_done_continue(data, err, poly_cipherpad); } static int poly_cipher(struct aead_request *req) @@ -250,9 +250,9 @@ static int poly_cipher(struct aead_request *req) return poly_cipherpad(req); } -static void poly_adpad_done(struct crypto_async_request *areq, int err) +static void poly_adpad_done(void *data, int err) { - async_done_continue(areq->data, err, poly_cipher); + async_done_continue(data, err, poly_cipher); } static int poly_adpad(struct aead_request *req) @@ -279,9 +279,9 @@ static int poly_adpad(struct aead_request *req) return poly_cipher(req); } -static void poly_ad_done(struct crypto_async_request *areq, int err) +static void poly_ad_done(void *data, int err) { - async_done_continue(areq->data, err, poly_adpad); + async_done_continue(data, err, poly_adpad); } static int poly_ad(struct aead_request *req) @@ -303,9 +303,9 @@ static int poly_ad(struct aead_request *req) return poly_adpad(req); } -static void poly_setkey_done(struct crypto_async_request *areq, int err) +static void poly_setkey_done(void *data, int err) { - async_done_continue(areq->data, err, poly_ad); + async_done_continue(data, err, poly_ad); } static int poly_setkey(struct aead_request *req) @@ -329,9 +329,9 @@ static int poly_setkey(struct aead_request *req) return poly_ad(req); } -static void poly_init_done(struct crypto_async_request *areq, int err) +static void poly_init_done(void *data, int err) { - async_done_continue(areq->data, err, poly_setkey); + async_done_continue(data, err, poly_setkey); } static int poly_init(struct aead_request *req) @@ -352,9 +352,9 @@ static int poly_init(struct aead_request *req) return poly_setkey(req); } -static void poly_genkey_done(struct crypto_async_request *areq, int err) +static void poly_genkey_done(void *data, int err) { - async_done_continue(areq->data, err, poly_init); + async_done_continue(data, err, poly_init); } static int poly_genkey(struct aead_request *req) @@ -391,9 +391,9 @@ static int poly_genkey(struct aead_request *req) return poly_init(req); } -static void chacha_encrypt_done(struct crypto_async_request *areq, int err) +static void chacha_encrypt_done(void *data, int err) { - async_done_continue(areq->data, err, poly_genkey); + async_done_continue(data, err, poly_genkey); } static int chacha_encrypt(struct aead_request *req) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index ca3a40fc7da9..37365ed30b38 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -72,7 +72,6 @@ struct cryptd_skcipher_ctx { }; struct cryptd_skcipher_request_ctx { - crypto_completion_t complete; struct skcipher_request req; }; @@ -83,6 +82,7 @@ struct cryptd_hash_ctx { struct cryptd_hash_request_ctx { crypto_completion_t complete; + void *data; struct shash_desc desc; }; @@ -92,7 +92,7 @@ struct cryptd_aead_ctx { }; struct cryptd_aead_request_ctx { - crypto_completion_t complete; + struct aead_request req; }; static void cryptd_queue_worker(struct work_struct *work); @@ -177,8 +177,8 @@ static void cryptd_queue_worker(struct work_struct *work) return; if (backlog) - backlog->complete(backlog, -EINPROGRESS); - req->complete(req, 0); + crypto_request_complete(backlog, -EINPROGRESS); + crypto_request_complete(req, 0); if (cpu_queue->queue.qlen) queue_work(cryptd_wq, &cpu_queue->work); @@ -237,33 +237,22 @@ static int cryptd_skcipher_setkey(struct crypto_skcipher *parent, return crypto_skcipher_setkey(child, key, keylen); } -static void cryptd_skcipher_complete(struct skcipher_request *req, int err) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); - int refcnt = refcount_read(&ctx->refcnt); - - local_bh_disable(); - rctx->complete(&req->base, err); - local_bh_enable(); - - if (err != -EINPROGRESS && refcnt && refcount_dec_and_test(&ctx->refcnt)) - crypto_free_skcipher(tfm); -} - -static void cryptd_skcipher_encrypt(struct crypto_async_request *base, - int err) +static struct skcipher_request *cryptd_skcipher_prepare( + struct skcipher_request *req, int err) { - struct skcipher_request *req = skcipher_request_cast(base); struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_request *subreq = &rctx->req; - struct crypto_skcipher *child = ctx->child; + struct cryptd_skcipher_ctx *ctx; + struct crypto_skcipher *child; + + req->base.complete = subreq->base.complete; + req->base.data = subreq->base.data; if (unlikely(err == -EINPROGRESS)) - goto out; + return NULL; + + ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + child = ctx->child; skcipher_request_set_tfm(subreq, child); skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, @@ -271,41 +260,53 @@ static void cryptd_skcipher_encrypt(struct crypto_async_request *base, skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, req->iv); - err = crypto_skcipher_encrypt(subreq); - skcipher_request_zero(subreq); - - req->base.complete = rctx->complete; - -out: - cryptd_skcipher_complete(req, err); + return subreq; } -static void cryptd_skcipher_decrypt(struct crypto_async_request *base, - int err) +static void cryptd_skcipher_complete(struct skcipher_request *req, int err, + crypto_completion_t complete) { - struct skcipher_request *req = skcipher_request_cast(base); struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_request *subreq = &rctx->req; - struct crypto_skcipher *child = ctx->child; + int refcnt = refcount_read(&ctx->refcnt); - if (unlikely(err == -EINPROGRESS)) - goto out; + local_bh_disable(); + skcipher_request_complete(req, err); + local_bh_enable(); - skcipher_request_set_tfm(subreq, child); - skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, - NULL, NULL); - skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, - req->iv); + if (unlikely(err == -EINPROGRESS)) { + subreq->base.complete = req->base.complete; + subreq->base.data = req->base.data; + req->base.complete = complete; + req->base.data = req; + } else if (refcnt && refcount_dec_and_test(&ctx->refcnt)) + crypto_free_skcipher(tfm); +} - err = crypto_skcipher_decrypt(subreq); - skcipher_request_zero(subreq); +static void cryptd_skcipher_encrypt(void *data, int err) +{ + struct skcipher_request *req = data; + struct skcipher_request *subreq; - req->base.complete = rctx->complete; + subreq = cryptd_skcipher_prepare(req, err); + if (likely(subreq)) + err = crypto_skcipher_encrypt(subreq); -out: - cryptd_skcipher_complete(req, err); + cryptd_skcipher_complete(req, err, cryptd_skcipher_encrypt); +} + +static void cryptd_skcipher_decrypt(void *data, int err) +{ + struct skcipher_request *req = data; + struct skcipher_request *subreq; + + subreq = cryptd_skcipher_prepare(req, err); + if (likely(subreq)) + err = crypto_skcipher_decrypt(subreq); + + cryptd_skcipher_complete(req, err, cryptd_skcipher_decrypt); } static int cryptd_skcipher_enqueue(struct skcipher_request *req, @@ -313,11 +314,14 @@ static int cryptd_skcipher_enqueue(struct skcipher_request *req, { struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct skcipher_request *subreq = &rctx->req; struct cryptd_queue *queue; queue = cryptd_get_queue(crypto_skcipher_tfm(tfm)); - rctx->complete = req->base.complete; + subreq->base.complete = req->base.complete; + subreq->base.data = req->base.data; req->base.complete = compl; + req->base.data = req; return cryptd_enqueue_request(queue, &req->base); } @@ -470,45 +474,63 @@ static int cryptd_hash_enqueue(struct ahash_request *req, cryptd_get_queue(crypto_ahash_tfm(tfm)); rctx->complete = req->base.complete; + rctx->data = req->base.data; req->base.complete = compl; + req->base.data = req; return cryptd_enqueue_request(queue, &req->base); } -static void cryptd_hash_complete(struct ahash_request *req, int err) +static struct shash_desc *cryptd_hash_prepare(struct ahash_request *req, + int err) +{ + struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + + req->base.complete = rctx->complete; + req->base.data = rctx->data; + + if (unlikely(err == -EINPROGRESS)) + return NULL; + + return &rctx->desc; +} + +static void cryptd_hash_complete(struct ahash_request *req, int err, + crypto_completion_t complete) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); - struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); int refcnt = refcount_read(&ctx->refcnt); local_bh_disable(); - rctx->complete(&req->base, err); + ahash_request_complete(req, err); local_bh_enable(); - if (err != -EINPROGRESS && refcnt && refcount_dec_and_test(&ctx->refcnt)) + if (err == -EINPROGRESS) { + req->base.complete = complete; + req->base.data = req; + } else if (refcnt && refcount_dec_and_test(&ctx->refcnt)) crypto_free_ahash(tfm); } -static void cryptd_hash_init(struct crypto_async_request *req_async, int err) +static void cryptd_hash_init(void *data, int err) { - struct cryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); + struct ahash_request *req = data; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); struct crypto_shash *child = ctx->child; - struct ahash_request *req = ahash_request_cast(req_async); - struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); - struct shash_desc *desc = &rctx->desc; + struct shash_desc *desc; - if (unlikely(err == -EINPROGRESS)) + desc = cryptd_hash_prepare(req, err); + if (unlikely(!desc)) goto out; desc->tfm = child; err = crypto_shash_init(desc); - req->base.complete = rctx->complete; - out: - cryptd_hash_complete(req, err); + cryptd_hash_complete(req, err, cryptd_hash_init); } static int cryptd_hash_init_enqueue(struct ahash_request *req) @@ -516,22 +538,16 @@ static int cryptd_hash_init_enqueue(struct ahash_request *req) return cryptd_hash_enqueue(req, cryptd_hash_init); } -static void cryptd_hash_update(struct crypto_async_request *req_async, int err) +static void cryptd_hash_update(void *data, int err) { - struct ahash_request *req = ahash_request_cast(req_async); - struct cryptd_hash_request_ctx *rctx; + struct ahash_request *req = data; + struct shash_desc *desc; - rctx = ahash_request_ctx(req); + desc = cryptd_hash_prepare(req, err); + if (likely(desc)) + err = shash_ahash_update(req, desc); - if (unlikely(err == -EINPROGRESS)) - goto out; - - err = shash_ahash_update(req, &rctx->desc); - - req->base.complete = rctx->complete; - -out: - cryptd_hash_complete(req, err); + cryptd_hash_complete(req, err, cryptd_hash_update); } static int cryptd_hash_update_enqueue(struct ahash_request *req) @@ -539,20 +555,16 @@ static int cryptd_hash_update_enqueue(struct ahash_request *req) return cryptd_hash_enqueue(req, cryptd_hash_update); } -static void cryptd_hash_final(struct crypto_async_request *req_async, int err) +static void cryptd_hash_final(void *data, int err) { - struct ahash_request *req = ahash_request_cast(req_async); - struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); - - if (unlikely(err == -EINPROGRESS)) - goto out; + struct ahash_request *req = data; + struct shash_desc *desc; - err = crypto_shash_final(&rctx->desc, req->result); + desc = cryptd_hash_prepare(req, err); + if (likely(desc)) + err = crypto_shash_final(desc, req->result); - req->base.complete = rctx->complete; - -out: - cryptd_hash_complete(req, err); + cryptd_hash_complete(req, err, cryptd_hash_final); } static int cryptd_hash_final_enqueue(struct ahash_request *req) @@ -560,20 +572,16 @@ static int cryptd_hash_final_enqueue(struct ahash_request *req) return cryptd_hash_enqueue(req, cryptd_hash_final); } -static void cryptd_hash_finup(struct crypto_async_request *req_async, int err) +static void cryptd_hash_finup(void *data, int err) { - struct ahash_request *req = ahash_request_cast(req_async); - struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); - - if (unlikely(err == -EINPROGRESS)) - goto out; + struct ahash_request *req = data; + struct shash_desc *desc; - err = shash_ahash_finup(req, &rctx->desc); + desc = cryptd_hash_prepare(req, err); + if (likely(desc)) + err = shash_ahash_finup(req, desc); - req->base.complete = rctx->complete; - -out: - cryptd_hash_complete(req, err); + cryptd_hash_complete(req, err, cryptd_hash_finup); } static int cryptd_hash_finup_enqueue(struct ahash_request *req) @@ -581,25 +589,24 @@ static int cryptd_hash_finup_enqueue(struct ahash_request *req) return cryptd_hash_enqueue(req, cryptd_hash_finup); } -static void cryptd_hash_digest(struct crypto_async_request *req_async, int err) +static void cryptd_hash_digest(void *data, int err) { - struct cryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); + struct ahash_request *req = data; + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); struct crypto_shash *child = ctx->child; - struct ahash_request *req = ahash_request_cast(req_async); - struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); - struct shash_desc *desc = &rctx->desc; + struct shash_desc *desc; - if (unlikely(err == -EINPROGRESS)) + desc = cryptd_hash_prepare(req, err); + if (unlikely(!desc)) goto out; desc->tfm = child; err = shash_ahash_digest(req, desc); - req->base.complete = rctx->complete; - out: - cryptd_hash_complete(req, err); + cryptd_hash_complete(req, err, cryptd_hash_digest); } static int cryptd_hash_digest_enqueue(struct ahash_request *req) @@ -712,56 +719,74 @@ static int cryptd_aead_setauthsize(struct crypto_aead *parent, } static void cryptd_aead_crypt(struct aead_request *req, - struct crypto_aead *child, - int err, - int (*crypt)(struct aead_request *req)) + struct crypto_aead *child, int err, + int (*crypt)(struct aead_request *req), + crypto_completion_t compl) { struct cryptd_aead_request_ctx *rctx; + struct aead_request *subreq; struct cryptd_aead_ctx *ctx; - crypto_completion_t compl; struct crypto_aead *tfm; int refcnt; rctx = aead_request_ctx(req); - compl = rctx->complete; + subreq = &rctx->req; + req->base.complete = subreq->base.complete; + req->base.data = subreq->base.data; tfm = crypto_aead_reqtfm(req); if (unlikely(err == -EINPROGRESS)) goto out; - aead_request_set_tfm(req, child); - err = crypt( req ); + + aead_request_set_tfm(subreq, child); + aead_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, + NULL, NULL); + aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, + req->iv); + aead_request_set_ad(subreq, req->assoclen); + + err = crypt(subreq); out: ctx = crypto_aead_ctx(tfm); refcnt = refcount_read(&ctx->refcnt); local_bh_disable(); - compl(&req->base, err); + aead_request_complete(req, err); local_bh_enable(); - if (err != -EINPROGRESS && refcnt && refcount_dec_and_test(&ctx->refcnt)) + if (err == -EINPROGRESS) { + subreq->base.complete = req->base.complete; + subreq->base.data = req->base.data; + req->base.complete = compl; + req->base.data = req; + } else if (refcnt && refcount_dec_and_test(&ctx->refcnt)) crypto_free_aead(tfm); } -static void cryptd_aead_encrypt(struct crypto_async_request *areq, int err) +static void cryptd_aead_encrypt(void *data, int err) { - struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(areq->tfm); - struct crypto_aead *child = ctx->child; - struct aead_request *req; + struct aead_request *req = data; + struct cryptd_aead_ctx *ctx; + struct crypto_aead *child; - req = container_of(areq, struct aead_request, base); - cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->encrypt); + ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + child = ctx->child; + cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->encrypt, + cryptd_aead_encrypt); } -static void cryptd_aead_decrypt(struct crypto_async_request *areq, int err) +static void cryptd_aead_decrypt(void *data, int err) { - struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(areq->tfm); - struct crypto_aead *child = ctx->child; - struct aead_request *req; + struct aead_request *req = data; + struct cryptd_aead_ctx *ctx; + struct crypto_aead *child; - req = container_of(areq, struct aead_request, base); - cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->decrypt); + ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + child = ctx->child; + cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->decrypt, + cryptd_aead_decrypt); } static int cryptd_aead_enqueue(struct aead_request *req, @@ -770,9 +795,12 @@ static int cryptd_aead_enqueue(struct aead_request *req, struct cryptd_aead_request_ctx *rctx = aead_request_ctx(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cryptd_queue *queue = cryptd_get_queue(crypto_aead_tfm(tfm)); + struct aead_request *subreq = &rctx->req; - rctx->complete = req->base.complete; + subreq->base.complete = req->base.complete; + subreq->base.data = req->base.data; req->base.complete = compl; + req->base.data = req; return cryptd_enqueue_request(queue, &req->base); } @@ -800,8 +828,8 @@ static int cryptd_aead_init_tfm(struct crypto_aead *tfm) ctx->child = cipher; crypto_aead_set_reqsize( - tfm, max((unsigned)sizeof(struct cryptd_aead_request_ctx), - crypto_aead_reqsize(cipher))); + tfm, sizeof(struct cryptd_aead_request_ctx) + + crypto_aead_reqsize(cipher)); return 0; } diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c index bb8e77077f02..21f791615114 100644 --- a/crypto/crypto_engine.c +++ b/crypto/crypto_engine.c @@ -54,7 +54,7 @@ static void crypto_finalize_request(struct crypto_engine *engine, } } lockdep_assert_in_softirq(); - req->complete(req, err); + crypto_request_complete(req, err); kthread_queue_work(engine->kworker, &engine->pump_requests); } @@ -130,7 +130,7 @@ start_request: engine->cur_req = async_req; if (backlog) - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); if (engine->busy) was_busy = true; @@ -214,7 +214,7 @@ req_err_1: } req_err_2: - async_req->complete(async_req, ret); + crypto_request_complete(async_req, ret); retry: /* If retry mechanism is supported, send new requests to engine */ @@ -499,7 +499,7 @@ EXPORT_SYMBOL_GPL(crypto_engine_stop); * This has the form: * callback(struct crypto_engine *engine) * where: - * @engine: the crypto engine structure. + * engine: the crypto engine structure. * @rt: whether this queue is set to run as a realtime task * @qlen: maximum size of the crypto-engine queue * diff --git a/crypto/cts.c b/crypto/cts.c index 3766d47ebcc0..8f604f6554b1 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -85,9 +85,9 @@ static int crypto_cts_setkey(struct crypto_skcipher *parent, const u8 *key, return crypto_skcipher_setkey(child, key, keylen); } -static void cts_cbc_crypt_done(struct crypto_async_request *areq, int err) +static void cts_cbc_crypt_done(void *data, int err) { - struct skcipher_request *req = areq->data; + struct skcipher_request *req = data; if (err == -EINPROGRESS) return; @@ -125,9 +125,9 @@ static int cts_cbc_encrypt(struct skcipher_request *req) return crypto_skcipher_encrypt(subreq); } -static void crypto_cts_encrypt_done(struct crypto_async_request *areq, int err) +static void crypto_cts_encrypt_done(void *data, int err) { - struct skcipher_request *req = areq->data; + struct skcipher_request *req = data; if (err) goto out; @@ -219,9 +219,9 @@ static int cts_cbc_decrypt(struct skcipher_request *req) return crypto_skcipher_decrypt(subreq); } -static void crypto_cts_decrypt_done(struct crypto_async_request *areq, int err) +static void crypto_cts_decrypt_done(void *data, int err) { - struct skcipher_request *req = areq->data; + struct skcipher_request *req = data; if (err) goto out; diff --git a/crypto/dh.c b/crypto/dh.c index e39c1bde1ac0..0fcad279e6fe 100644 --- a/crypto/dh.c +++ b/crypto/dh.c @@ -503,10 +503,9 @@ out: return err; } -static void dh_safe_prime_complete_req(struct crypto_async_request *dh_req, - int err) +static void dh_safe_prime_complete_req(void *data, int err) { - struct kpp_request *req = dh_req->data; + struct kpp_request *req = data; kpp_request_complete(req, err); } diff --git a/crypto/ecc.c b/crypto/ecc.c index 7315217c8f73..f53fb4d6af99 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -1384,7 +1384,8 @@ void ecc_point_mult_shamir(const struct ecc_point *result, num_bits = max(vli_num_bits(u1, ndigits), vli_num_bits(u2, ndigits)); i = num_bits - 1; - idx = (!!vli_test_bit(u1, i)) | ((!!vli_test_bit(u2, i)) << 1); + idx = !!vli_test_bit(u1, i); + idx |= (!!vli_test_bit(u2, i)) << 1; point = points[idx]; vli_set(rx, point->x, ndigits); @@ -1394,7 +1395,8 @@ void ecc_point_mult_shamir(const struct ecc_point *result, for (--i; i >= 0; i--) { ecc_point_double_jacobian(rx, ry, z, curve); - idx = (!!vli_test_bit(u1, i)) | ((!!vli_test_bit(u2, i)) << 1); + idx = !!vli_test_bit(u1, i); + idx |= (!!vli_test_bit(u2, i)) << 1; point = points[idx]; if (point) { u64 tx[ECC_MAX_DIGITS]; diff --git a/crypto/essiv.c b/crypto/essiv.c index e33369df9034..f7d4ef4837e5 100644 --- a/crypto/essiv.c +++ b/crypto/essiv.c @@ -131,9 +131,9 @@ static int essiv_aead_setauthsize(struct crypto_aead *tfm, return crypto_aead_setauthsize(tctx->u.aead, authsize); } -static void essiv_skcipher_done(struct crypto_async_request *areq, int err) +static void essiv_skcipher_done(void *data, int err) { - struct skcipher_request *req = areq->data; + struct skcipher_request *req = data; skcipher_request_complete(req, err); } @@ -166,12 +166,17 @@ static int essiv_skcipher_decrypt(struct skcipher_request *req) return essiv_skcipher_crypt(req, false); } -static void essiv_aead_done(struct crypto_async_request *areq, int err) +static void essiv_aead_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; struct essiv_aead_request_ctx *rctx = aead_request_ctx(req); + if (err == -EINPROGRESS) + goto out; + kfree(rctx->assoc); + +out: aead_request_complete(req, err); } @@ -247,7 +252,7 @@ static int essiv_aead_crypt(struct aead_request *req, bool enc) err = enc ? crypto_aead_encrypt(subreq) : crypto_aead_decrypt(subreq); - if (rctx->assoc && err != -EINPROGRESS) + if (rctx->assoc && err != -EINPROGRESS && err != -EBUSY) kfree(rctx->assoc); return err; } diff --git a/crypto/gcm.c b/crypto/gcm.c index 338ee0769747..4ba624450c3f 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -197,7 +197,7 @@ static inline unsigned int gcm_remain(unsigned int len) return len ? 16 - len : 0; } -static void gcm_hash_len_done(struct crypto_async_request *areq, int err); +static void gcm_hash_len_done(void *data, int err); static int gcm_hash_update(struct aead_request *req, crypto_completion_t compl, @@ -246,9 +246,9 @@ static int gcm_hash_len_continue(struct aead_request *req, u32 flags) return gctx->complete(req, flags); } -static void gcm_hash_len_done(struct crypto_async_request *areq, int err) +static void gcm_hash_len_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; if (err) goto out; @@ -267,10 +267,9 @@ static int gcm_hash_crypt_remain_continue(struct aead_request *req, u32 flags) gcm_hash_len_continue(req, flags); } -static void gcm_hash_crypt_remain_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_crypt_remain_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; if (err) goto out; @@ -298,9 +297,9 @@ static int gcm_hash_crypt_continue(struct aead_request *req, u32 flags) return gcm_hash_crypt_remain_continue(req, flags); } -static void gcm_hash_crypt_done(struct crypto_async_request *areq, int err) +static void gcm_hash_crypt_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; if (err) goto out; @@ -326,10 +325,9 @@ static int gcm_hash_assoc_remain_continue(struct aead_request *req, u32 flags) return gcm_hash_crypt_remain_continue(req, flags); } -static void gcm_hash_assoc_remain_done(struct crypto_async_request *areq, - int err) +static void gcm_hash_assoc_remain_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; if (err) goto out; @@ -355,9 +353,9 @@ static int gcm_hash_assoc_continue(struct aead_request *req, u32 flags) return gcm_hash_assoc_remain_continue(req, flags); } -static void gcm_hash_assoc_done(struct crypto_async_request *areq, int err) +static void gcm_hash_assoc_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; if (err) goto out; @@ -380,9 +378,9 @@ static int gcm_hash_init_continue(struct aead_request *req, u32 flags) return gcm_hash_assoc_remain_continue(req, flags); } -static void gcm_hash_init_done(struct crypto_async_request *areq, int err) +static void gcm_hash_init_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; if (err) goto out; @@ -433,9 +431,9 @@ static int gcm_encrypt_continue(struct aead_request *req, u32 flags) return gcm_hash(req, flags); } -static void gcm_encrypt_done(struct crypto_async_request *areq, int err) +static void gcm_encrypt_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; if (err) goto out; @@ -477,9 +475,9 @@ static int crypto_gcm_verify(struct aead_request *req) return crypto_memneq(iauth_tag, auth_tag, authsize) ? -EBADMSG : 0; } -static void gcm_decrypt_done(struct crypto_async_request *areq, int err) +static void gcm_decrypt_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; if (!err) err = crypto_gcm_verify(req); diff --git a/crypto/hctr2.c b/crypto/hctr2.c index 7d00a3bcb667..6f4c1884d0e9 100644 --- a/crypto/hctr2.c +++ b/crypto/hctr2.c @@ -252,10 +252,9 @@ static int hctr2_finish(struct skcipher_request *req) return 0; } -static void hctr2_xctr_done(struct crypto_async_request *areq, - int err) +static void hctr2_xctr_done(void *data, int err) { - struct skcipher_request *req = areq->data; + struct skcipher_request *req = data; if (!err) err = hctr2_finish(req); diff --git a/crypto/lrw.c b/crypto/lrw.c index 8d59a66b6525..1b0f76ba3eb5 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -205,9 +205,9 @@ static int lrw_xor_tweak_post(struct skcipher_request *req) return lrw_xor_tweak(req, true); } -static void lrw_crypt_done(struct crypto_async_request *areq, int err) +static void lrw_crypt_done(void *data, int err) { - struct skcipher_request *req = areq->data; + struct skcipher_request *req = data; if (!err) { struct lrw_request_ctx *rctx = skcipher_request_ctx(req); diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index 9d10b846ccf7..8c1d0ca41213 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -63,9 +63,9 @@ static void pcrypt_aead_serial(struct padata_priv *padata) aead_request_complete(req->base.data, padata->info); } -static void pcrypt_aead_done(struct crypto_async_request *areq, int err) +static void pcrypt_aead_done(void *data, int err) { - struct aead_request *req = areq->data; + struct aead_request *req = data; struct pcrypt_request *preq = aead_request_ctx(req); struct padata_priv *padata = pcrypt_request_padata(preq); diff --git a/crypto/proc.c b/crypto/proc.c index 12fccb9c5205..56c7c78df297 100644 --- a/crypto/proc.c +++ b/crypto/proc.c @@ -11,6 +11,7 @@ #include <linux/atomic.h> #include <linux/init.h> #include <linux/crypto.h> +#include <linux/fips.h> #include <linux/module.h> /* for module_name() */ #include <linux/rwsem.h> #include <linux/proc_fs.h> @@ -48,6 +49,11 @@ static int c_show(struct seq_file *m, void *p) seq_printf(m, "internal : %s\n", (alg->cra_flags & CRYPTO_ALG_INTERNAL) ? "yes" : "no"); + if (fips_enabled) { + seq_printf(m, "fips : %s\n", + (alg->cra_flags & CRYPTO_ALG_FIPS_INTERNAL) ? + "no" : "yes"); + } if (alg->cra_flags & CRYPTO_ALG_LARVAL) { seq_printf(m, "type : larval\n"); diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 6ee5b8a060c0..d2e5e104f8cf 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -190,7 +190,7 @@ static int pkcs1pad_encrypt_sign_complete(struct akcipher_request *req, int err) if (likely(!pad_len)) goto out; - out_buf = kzalloc(ctx->key_size, GFP_KERNEL); + out_buf = kzalloc(ctx->key_size, GFP_ATOMIC); err = -ENOMEM; if (!out_buf) goto out; @@ -210,20 +210,17 @@ out: return err; } -static void pkcs1pad_encrypt_sign_complete_cb( - struct crypto_async_request *child_async_req, int err) +static void pkcs1pad_encrypt_sign_complete_cb(void *data, int err) { - struct akcipher_request *req = child_async_req->data; - struct crypto_async_request async_req; + struct akcipher_request *req = data; if (err == -EINPROGRESS) - return; + goto out; + + err = pkcs1pad_encrypt_sign_complete(req, err); - async_req.data = req->base.data; - async_req.tfm = crypto_akcipher_tfm(crypto_akcipher_reqtfm(req)); - async_req.flags = child_async_req->flags; - req->base.complete(&async_req, - pkcs1pad_encrypt_sign_complete(req, err)); +out: + akcipher_request_complete(req, err); } static int pkcs1pad_encrypt(struct akcipher_request *req) @@ -328,19 +325,17 @@ done: return err; } -static void pkcs1pad_decrypt_complete_cb( - struct crypto_async_request *child_async_req, int err) +static void pkcs1pad_decrypt_complete_cb(void *data, int err) { - struct akcipher_request *req = child_async_req->data; - struct crypto_async_request async_req; + struct akcipher_request *req = data; if (err == -EINPROGRESS) - return; + goto out; + + err = pkcs1pad_decrypt_complete(req, err); - async_req.data = req->base.data; - async_req.tfm = crypto_akcipher_tfm(crypto_akcipher_reqtfm(req)); - async_req.flags = child_async_req->flags; - req->base.complete(&async_req, pkcs1pad_decrypt_complete(req, err)); +out: + akcipher_request_complete(req, err); } static int pkcs1pad_decrypt(struct akcipher_request *req) @@ -509,19 +504,17 @@ done: return err; } -static void pkcs1pad_verify_complete_cb( - struct crypto_async_request *child_async_req, int err) +static void pkcs1pad_verify_complete_cb(void *data, int err) { - struct akcipher_request *req = child_async_req->data; - struct crypto_async_request async_req; + struct akcipher_request *req = data; if (err == -EINPROGRESS) - return; + goto out; - async_req.data = req->base.data; - async_req.tfm = crypto_akcipher_tfm(crypto_akcipher_reqtfm(req)); - async_req.flags = child_async_req->flags; - req->base.complete(&async_req, pkcs1pad_verify_complete(req, err)); + err = pkcs1pad_verify_complete(req, err); + +out: + akcipher_request_complete(req, err); } /* diff --git a/crypto/seqiv.c b/crypto/seqiv.c index 0899d527c284..17e11d51ddc3 100644 --- a/crypto/seqiv.c +++ b/crypto/seqiv.c @@ -23,7 +23,7 @@ static void seqiv_aead_encrypt_complete2(struct aead_request *req, int err) struct aead_request *subreq = aead_request_ctx(req); struct crypto_aead *geniv; - if (err == -EINPROGRESS) + if (err == -EINPROGRESS || err == -EBUSY) return; if (err) @@ -36,10 +36,9 @@ out: kfree_sensitive(subreq->iv); } -static void seqiv_aead_encrypt_complete(struct crypto_async_request *base, - int err) +static void seqiv_aead_encrypt_complete(void *data, int err) { - struct aead_request *req = base->data; + struct aead_request *req = data; seqiv_aead_encrypt_complete2(req, err); aead_request_complete(req, err); diff --git a/crypto/shash.c b/crypto/shash.c index 868b6ba2b3b7..58b46f198449 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -320,10 +320,10 @@ int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc) nbytes <= min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset))) { void *data; - data = kmap_atomic(sg_page(sg)); + data = kmap_local_page(sg_page(sg)); err = crypto_shash_digest(desc, data + offset, nbytes, req->result); - kunmap_atomic(data); + kunmap_local(data); } else err = crypto_shash_init(desc) ?: shash_ahash_finup(req, desc); diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 0ecab31cfe79..7bf4871fec80 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -42,38 +42,24 @@ struct skcipher_walk_buffer { static int skcipher_walk_next(struct skcipher_walk *walk); -static inline void skcipher_unmap(struct scatter_walk *walk, void *vaddr) -{ - if (PageHighMem(scatterwalk_page(walk))) - kunmap_atomic(vaddr); -} - -static inline void *skcipher_map(struct scatter_walk *walk) -{ - struct page *page = scatterwalk_page(walk); - - return (PageHighMem(page) ? kmap_atomic(page) : page_address(page)) + - offset_in_page(walk->offset); -} - static inline void skcipher_map_src(struct skcipher_walk *walk) { - walk->src.virt.addr = skcipher_map(&walk->in); + walk->src.virt.addr = scatterwalk_map(&walk->in); } static inline void skcipher_map_dst(struct skcipher_walk *walk) { - walk->dst.virt.addr = skcipher_map(&walk->out); + walk->dst.virt.addr = scatterwalk_map(&walk->out); } static inline void skcipher_unmap_src(struct skcipher_walk *walk) { - skcipher_unmap(&walk->in, walk->src.virt.addr); + scatterwalk_unmap(walk->src.virt.addr); } static inline void skcipher_unmap_dst(struct skcipher_walk *walk) { - skcipher_unmap(&walk->out, walk->dst.virt.addr); + scatterwalk_unmap(walk->dst.virt.addr); } static inline gfp_t skcipher_walk_gfp(struct skcipher_walk *walk) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index a0833654ce94..6521feec7756 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -2044,11 +2044,11 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) case 211: test_aead_speed("rfc4106(gcm(aes))", ENCRYPT, sec, - NULL, 0, 16, 16, aead_speed_template_20); + NULL, 0, 16, 16, aead_speed_template_20_28_36); test_aead_speed("gcm(aes)", ENCRYPT, sec, NULL, 0, 16, 8, speed_template_16_24_32); test_aead_speed("rfc4106(gcm(aes))", DECRYPT, sec, - NULL, 0, 16, 16, aead_speed_template_20); + NULL, 0, 16, 16, aead_speed_template_20_28_36); test_aead_speed("gcm(aes)", DECRYPT, sec, NULL, 0, 16, 8, speed_template_16_24_32); break; @@ -2074,11 +2074,11 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) case 215: test_mb_aead_speed("rfc4106(gcm(aes))", ENCRYPT, sec, NULL, - 0, 16, 16, aead_speed_template_20, num_mb); + 0, 16, 16, aead_speed_template_20_28_36, num_mb); test_mb_aead_speed("gcm(aes)", ENCRYPT, sec, NULL, 0, 16, 8, speed_template_16_24_32, num_mb); test_mb_aead_speed("rfc4106(gcm(aes))", DECRYPT, sec, NULL, - 0, 16, 16, aead_speed_template_20, num_mb); + 0, 16, 16, aead_speed_template_20_28_36, num_mb); test_mb_aead_speed("gcm(aes)", DECRYPT, sec, NULL, 0, 16, 8, speed_template_16_24_32, num_mb); break; diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h index 9f654677172a..96c843a24607 100644 --- a/crypto/tcrypt.h +++ b/crypto/tcrypt.h @@ -62,7 +62,7 @@ static u8 speed_template_32[] = {32, 0}; * AEAD speed tests */ static u8 aead_speed_template_19[] = {19, 0}; -static u8 aead_speed_template_20[] = {20, 0}; +static u8 aead_speed_template_20_28_36[] = {20, 28, 36, 0}; static u8 aead_speed_template_36[] = {36, 0}; /* diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 4476ac97baa5..c91e93ece20b 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -357,6 +357,14 @@ static const struct testvec_config default_cipher_testvec_configs[] = { { .proportion_of_total = 5000 }, }, }, { + .name = "one src, two even splits dst", + .inplace_mode = OUT_OF_PLACE, + .src_divs = { { .proportion_of_total = 10000 } }, + .dst_divs = { + { .proportion_of_total = 5000 }, + { .proportion_of_total = 5000 }, + }, + }, { .name = "uneven misaligned splits, may sleep", .req_flags = CRYPTO_TFM_REQ_MAY_SLEEP, .src_divs = { @@ -4501,7 +4509,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { #endif .alg = "cbcmac(aes)", - .fips_allowed = 1, .test = alg_test_hash, .suite = { .hash = __VECS(aes_cbcmac_tv_template) @@ -4782,7 +4789,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { /* covered by drbg_nopr_hmac_sha256 test */ .alg = "drbg_nopr_hmac_sha384", - .fips_allowed = 1, .test = alg_test_null, }, { .alg = "drbg_nopr_hmac_sha512", @@ -4805,7 +4811,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { /* covered by drbg_nopr_sha256 test */ .alg = "drbg_nopr_sha384", - .fips_allowed = 1, .test = alg_test_null, }, { .alg = "drbg_nopr_sha512", @@ -4841,7 +4846,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { /* covered by drbg_pr_hmac_sha256 test */ .alg = "drbg_pr_hmac_sha384", - .fips_allowed = 1, .test = alg_test_null, }, { .alg = "drbg_pr_hmac_sha512", @@ -4861,7 +4865,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { /* covered by drbg_pr_sha256 test */ .alg = "drbg_pr_sha384", - .fips_allowed = 1, .test = alg_test_null, }, { .alg = "drbg_pr_sha512", @@ -5035,12 +5038,14 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "ecdsa-nist-p256", .test = alg_test_akcipher, + .fips_allowed = 1, .suite = { .akcipher = __VECS(ecdsa_nist_p256_tv_template) } }, { .alg = "ecdsa-nist-p384", .test = alg_test_akcipher, + .fips_allowed = 1, .suite = { .akcipher = __VECS(ecdsa_nist_p384_tv_template) } @@ -5126,7 +5131,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "ghash", .test = alg_test_hash, - .fips_allowed = 1, .suite = { .hash = __VECS(ghash_tv_template) } diff --git a/crypto/wp512.c b/crypto/wp512.c index 5e820afa3c78..07994e5ebf4e 100644 --- a/crypto/wp512.c +++ b/crypto/wp512.c @@ -779,7 +779,7 @@ static const u64 rc[WHIRLPOOL_ROUNDS] = { * The core Whirlpool transform. */ -static void wp512_process_buffer(struct wp512_ctx *wctx) { +static __no_kmsan_checks void wp512_process_buffer(struct wp512_ctx *wctx) { int i, r; u64 K[8]; /* the round key */ u64 block[8]; /* mu(buffer) */ diff --git a/crypto/xts.c b/crypto/xts.c index 63c85b9e64e0..09be909a6a1a 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -140,9 +140,9 @@ static int xts_xor_tweak_post(struct skcipher_request *req, bool enc) return xts_xor_tweak(req, true, enc); } -static void xts_cts_done(struct crypto_async_request *areq, int err) +static void xts_cts_done(void *data, int err) { - struct skcipher_request *req = areq->data; + struct skcipher_request *req = data; le128 b; if (!err) { @@ -196,19 +196,19 @@ static int xts_cts_final(struct skcipher_request *req, return 0; } -static void xts_encrypt_done(struct crypto_async_request *areq, int err) +static void xts_encrypt_done(void *data, int err) { - struct skcipher_request *req = areq->data; + struct skcipher_request *req = data; if (!err) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); - rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; err = xts_xor_tweak_post(req, true); if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { err = xts_cts_final(req, crypto_skcipher_encrypt); - if (err == -EINPROGRESS) + if (err == -EINPROGRESS || err == -EBUSY) return; } } @@ -216,19 +216,19 @@ static void xts_encrypt_done(struct crypto_async_request *areq, int err) skcipher_request_complete(req, err); } -static void xts_decrypt_done(struct crypto_async_request *areq, int err) +static void xts_decrypt_done(void *data, int err) { - struct skcipher_request *req = areq->data; + struct skcipher_request *req = data; if (!err) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); - rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; err = xts_xor_tweak_post(req, false); if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { err = xts_cts_final(req, crypto_skcipher_decrypt); - if (err == -EINPROGRESS) + if (err == -EINPROGRESS || err == -EBUSY) return; } } diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 3da8e85f8aae..4fdf07ae3c54 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -549,6 +549,16 @@ config HW_RANDOM_CN10K To compile this driver as a module, choose M here. The module will be called cn10k_rng. If unsure, say Y. +config HW_RANDOM_JH7110 + tristate "StarFive JH7110 Random Number Generator support" + depends on SOC_STARFIVE || COMPILE_TEST + help + This driver provides support for the True Random Number + Generator in StarFive JH7110 SoCs. + + To compile this driver as a module, choose M here. + The module will be called jh7110-trng. + endif # HW_RANDOM config UML_RANDOM diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile index 3e948cf04476..09bde4a0f971 100644 --- a/drivers/char/hw_random/Makefile +++ b/drivers/char/hw_random/Makefile @@ -47,3 +47,4 @@ obj-$(CONFIG_HW_RANDOM_XIPHERA) += xiphera-trng.o obj-$(CONFIG_HW_RANDOM_ARM_SMCCC_TRNG) += arm_smccc_trng.o obj-$(CONFIG_HW_RANDOM_CN10K) += cn10k-rng.o obj-$(CONFIG_HW_RANDOM_POLARFIRE_SOC) += mpfs-rng.o +obj-$(CONFIG_HW_RANDOM_JH7110) += jh7110-trng.o diff --git a/drivers/char/hw_random/jh7110-trng.c b/drivers/char/hw_random/jh7110-trng.c new file mode 100644 index 000000000000..38474d48a25e --- /dev/null +++ b/drivers/char/hw_random/jh7110-trng.c @@ -0,0 +1,393 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * TRNG driver for the StarFive JH7110 SoC + * + * Copyright (C) 2022 StarFive Technology Co. + */ + +#include <linux/clk.h> +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/hw_random.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/random.h> +#include <linux/reset.h> + +/* trng register offset */ +#define STARFIVE_CTRL 0x00 +#define STARFIVE_STAT 0x04 +#define STARFIVE_MODE 0x08 +#define STARFIVE_SMODE 0x0C +#define STARFIVE_IE 0x10 +#define STARFIVE_ISTAT 0x14 +#define STARFIVE_RAND0 0x20 +#define STARFIVE_RAND1 0x24 +#define STARFIVE_RAND2 0x28 +#define STARFIVE_RAND3 0x2C +#define STARFIVE_RAND4 0x30 +#define STARFIVE_RAND5 0x34 +#define STARFIVE_RAND6 0x38 +#define STARFIVE_RAND7 0x3C +#define STARFIVE_AUTO_RQSTS 0x60 +#define STARFIVE_AUTO_AGE 0x64 + +/* CTRL CMD */ +#define STARFIVE_CTRL_EXEC_NOP 0x0 +#define STARFIVE_CTRL_GENE_RANDNUM 0x1 +#define STARFIVE_CTRL_EXEC_RANDRESEED 0x2 + +/* STAT */ +#define STARFIVE_STAT_NONCE_MODE BIT(2) +#define STARFIVE_STAT_R256 BIT(3) +#define STARFIVE_STAT_MISSION_MODE BIT(8) +#define STARFIVE_STAT_SEEDED BIT(9) +#define STARFIVE_STAT_LAST_RESEED(x) ((x) << 16) +#define STARFIVE_STAT_SRVC_RQST BIT(27) +#define STARFIVE_STAT_RAND_GENERATING BIT(30) +#define STARFIVE_STAT_RAND_SEEDING BIT(31) + +/* MODE */ +#define STARFIVE_MODE_R256 BIT(3) + +/* SMODE */ +#define STARFIVE_SMODE_NONCE_MODE BIT(2) +#define STARFIVE_SMODE_MISSION_MODE BIT(8) +#define STARFIVE_SMODE_MAX_REJECTS(x) ((x) << 16) + +/* IE */ +#define STARFIVE_IE_RAND_RDY_EN BIT(0) +#define STARFIVE_IE_SEED_DONE_EN BIT(1) +#define STARFIVE_IE_LFSR_LOCKUP_EN BIT(4) +#define STARFIVE_IE_GLBL_EN BIT(31) + +#define STARFIVE_IE_ALL (STARFIVE_IE_GLBL_EN | \ + STARFIVE_IE_RAND_RDY_EN | \ + STARFIVE_IE_SEED_DONE_EN | \ + STARFIVE_IE_LFSR_LOCKUP_EN) + +/* ISTAT */ +#define STARFIVE_ISTAT_RAND_RDY BIT(0) +#define STARFIVE_ISTAT_SEED_DONE BIT(1) +#define STARFIVE_ISTAT_LFSR_LOCKUP BIT(4) + +#define STARFIVE_RAND_LEN sizeof(u32) + +#define to_trng(p) container_of(p, struct starfive_trng, rng) + +enum reseed { + RANDOM_RESEED, + NONCE_RESEED, +}; + +enum mode { + PRNG_128BIT, + PRNG_256BIT, +}; + +struct starfive_trng { + struct device *dev; + void __iomem *base; + struct clk *hclk; + struct clk *ahb; + struct reset_control *rst; + struct hwrng rng; + struct completion random_done; + struct completion reseed_done; + u32 mode; + u32 mission; + u32 reseed; + /* protects against concurrent write to ctrl register */ + spinlock_t write_lock; +}; + +static u16 autoreq; +module_param(autoreq, ushort, 0); +MODULE_PARM_DESC(autoreq, "Auto-reseeding after random number requests by host reaches specified counter:\n" + " 0 - disable counter\n" + " other - reload value for internal counter"); + +static u16 autoage; +module_param(autoage, ushort, 0); +MODULE_PARM_DESC(autoage, "Auto-reseeding after specified timer countdowns to 0:\n" + " 0 - disable timer\n" + " other - reload value for internal timer"); + +static inline int starfive_trng_wait_idle(struct starfive_trng *trng) +{ + u32 stat; + + return readl_relaxed_poll_timeout(trng->base + STARFIVE_STAT, stat, + !(stat & (STARFIVE_STAT_RAND_GENERATING | + STARFIVE_STAT_RAND_SEEDING)), + 10, 100000); +} + +static inline void starfive_trng_irq_mask_clear(struct starfive_trng *trng) +{ + /* clear register: ISTAT */ + u32 data = readl(trng->base + STARFIVE_ISTAT); + + writel(data, trng->base + STARFIVE_ISTAT); +} + +static int starfive_trng_cmd(struct starfive_trng *trng, u32 cmd, bool wait) +{ + int wait_time = 1000; + + /* allow up to 40 us for wait == 0 */ + if (!wait) + wait_time = 40; + + switch (cmd) { + case STARFIVE_CTRL_GENE_RANDNUM: + reinit_completion(&trng->random_done); + spin_lock_irq(&trng->write_lock); + writel(cmd, trng->base + STARFIVE_CTRL); + spin_unlock_irq(&trng->write_lock); + if (!wait_for_completion_timeout(&trng->random_done, usecs_to_jiffies(wait_time))) + return -ETIMEDOUT; + break; + case STARFIVE_CTRL_EXEC_RANDRESEED: + reinit_completion(&trng->reseed_done); + spin_lock_irq(&trng->write_lock); + writel(cmd, trng->base + STARFIVE_CTRL); + spin_unlock_irq(&trng->write_lock); + if (!wait_for_completion_timeout(&trng->reseed_done, usecs_to_jiffies(wait_time))) + return -ETIMEDOUT; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int starfive_trng_init(struct hwrng *rng) +{ + struct starfive_trng *trng = to_trng(rng); + u32 mode, intr = 0; + + /* setup Auto Request/Age register */ + writel(autoage, trng->base + STARFIVE_AUTO_AGE); + writel(autoreq, trng->base + STARFIVE_AUTO_RQSTS); + + /* clear register: ISTAT */ + starfive_trng_irq_mask_clear(trng); + + intr |= STARFIVE_IE_ALL; + writel(intr, trng->base + STARFIVE_IE); + + mode = readl(trng->base + STARFIVE_MODE); + + switch (trng->mode) { + case PRNG_128BIT: + mode &= ~STARFIVE_MODE_R256; + break; + case PRNG_256BIT: + mode |= STARFIVE_MODE_R256; + break; + default: + mode |= STARFIVE_MODE_R256; + break; + } + + writel(mode, trng->base + STARFIVE_MODE); + + return starfive_trng_cmd(trng, STARFIVE_CTRL_EXEC_RANDRESEED, 1); +} + +static irqreturn_t starfive_trng_irq(int irq, void *priv) +{ + u32 status; + struct starfive_trng *trng = (struct starfive_trng *)priv; + + status = readl(trng->base + STARFIVE_ISTAT); + if (status & STARFIVE_ISTAT_RAND_RDY) { + writel(STARFIVE_ISTAT_RAND_RDY, trng->base + STARFIVE_ISTAT); + complete(&trng->random_done); + } + + if (status & STARFIVE_ISTAT_SEED_DONE) { + writel(STARFIVE_ISTAT_SEED_DONE, trng->base + STARFIVE_ISTAT); + complete(&trng->reseed_done); + } + + if (status & STARFIVE_ISTAT_LFSR_LOCKUP) { + writel(STARFIVE_ISTAT_LFSR_LOCKUP, trng->base + STARFIVE_ISTAT); + /* SEU occurred, reseeding required*/ + spin_lock(&trng->write_lock); + writel(STARFIVE_CTRL_EXEC_RANDRESEED, trng->base + STARFIVE_CTRL); + spin_unlock(&trng->write_lock); + } + + return IRQ_HANDLED; +} + +static void starfive_trng_cleanup(struct hwrng *rng) +{ + struct starfive_trng *trng = to_trng(rng); + + writel(0, trng->base + STARFIVE_CTRL); + + reset_control_assert(trng->rst); + clk_disable_unprepare(trng->hclk); + clk_disable_unprepare(trng->ahb); +} + +static int starfive_trng_read(struct hwrng *rng, void *buf, size_t max, bool wait) +{ + struct starfive_trng *trng = to_trng(rng); + int ret; + + pm_runtime_get_sync(trng->dev); + + if (trng->mode == PRNG_256BIT) + max = min_t(size_t, max, (STARFIVE_RAND_LEN * 8)); + else + max = min_t(size_t, max, (STARFIVE_RAND_LEN * 4)); + + if (wait) { + ret = starfive_trng_wait_idle(trng); + if (ret) + return -ETIMEDOUT; + } + + ret = starfive_trng_cmd(trng, STARFIVE_CTRL_GENE_RANDNUM, wait); + if (ret) + return ret; + + memcpy_fromio(buf, trng->base + STARFIVE_RAND0, max); + + pm_runtime_put_sync_autosuspend(trng->dev); + + return max; +} + +static int starfive_trng_probe(struct platform_device *pdev) +{ + int ret; + int irq; + struct starfive_trng *trng; + + trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL); + if (!trng) + return -ENOMEM; + + platform_set_drvdata(pdev, trng); + trng->dev = &pdev->dev; + + trng->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(trng->base)) + return dev_err_probe(&pdev->dev, PTR_ERR(trng->base), + "Error remapping memory for platform device.\n"); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + init_completion(&trng->random_done); + init_completion(&trng->reseed_done); + spin_lock_init(&trng->write_lock); + + ret = devm_request_irq(&pdev->dev, irq, starfive_trng_irq, 0, pdev->name, + (void *)trng); + if (ret) + return dev_err_probe(&pdev->dev, irq, + "Failed to register interrupt handler\n"); + + trng->hclk = devm_clk_get(&pdev->dev, "hclk"); + if (IS_ERR(trng->hclk)) + return dev_err_probe(&pdev->dev, PTR_ERR(trng->hclk), + "Error getting hardware reference clock\n"); + + trng->ahb = devm_clk_get(&pdev->dev, "ahb"); + if (IS_ERR(trng->ahb)) + return dev_err_probe(&pdev->dev, PTR_ERR(trng->ahb), + "Error getting ahb reference clock\n"); + + trng->rst = devm_reset_control_get_shared(&pdev->dev, NULL); + if (IS_ERR(trng->rst)) + return dev_err_probe(&pdev->dev, PTR_ERR(trng->rst), + "Error getting hardware reset line\n"); + + clk_prepare_enable(trng->hclk); + clk_prepare_enable(trng->ahb); + reset_control_deassert(trng->rst); + + trng->rng.name = dev_driver_string(&pdev->dev); + trng->rng.init = starfive_trng_init; + trng->rng.cleanup = starfive_trng_cleanup; + trng->rng.read = starfive_trng_read; + + trng->mode = PRNG_256BIT; + trng->mission = 1; + trng->reseed = RANDOM_RESEED; + + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, 100); + pm_runtime_enable(&pdev->dev); + + ret = devm_hwrng_register(&pdev->dev, &trng->rng); + if (ret) { + pm_runtime_disable(&pdev->dev); + + reset_control_assert(trng->rst); + clk_disable_unprepare(trng->ahb); + clk_disable_unprepare(trng->hclk); + + return dev_err_probe(&pdev->dev, ret, "Failed to register hwrng\n"); + } + + return 0; +} + +static int __maybe_unused starfive_trng_suspend(struct device *dev) +{ + struct starfive_trng *trng = dev_get_drvdata(dev); + + clk_disable_unprepare(trng->hclk); + clk_disable_unprepare(trng->ahb); + + return 0; +} + +static int __maybe_unused starfive_trng_resume(struct device *dev) +{ + struct starfive_trng *trng = dev_get_drvdata(dev); + + clk_prepare_enable(trng->hclk); + clk_prepare_enable(trng->ahb); + + return 0; +} + +static DEFINE_SIMPLE_DEV_PM_OPS(starfive_trng_pm_ops, starfive_trng_suspend, + starfive_trng_resume); + +static const struct of_device_id trng_dt_ids[] __maybe_unused = { + { .compatible = "starfive,jh7110-trng" }, + { } +}; +MODULE_DEVICE_TABLE(of, trng_dt_ids); + +static struct platform_driver starfive_trng_driver = { + .probe = starfive_trng_probe, + .driver = { + .name = "jh7110-trng", + .pm = &starfive_trng_pm_ops, + .of_match_table = of_match_ptr(trng_dt_ids), + }, +}; + +module_platform_driver(starfive_trng_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("StarFive True Random Number Generator"); diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index dfb103f81a64..3b2516d1433f 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -390,16 +390,6 @@ if CRYPTO_DEV_NX source "drivers/crypto/nx/Kconfig" endif -config CRYPTO_DEV_UX500 - tristate "Driver for ST-Ericsson UX500 crypto hardware acceleration" - depends on ARCH_U8500 - help - Driver for ST-Ericsson UX500 crypto engine. - -if CRYPTO_DEV_UX500 - source "drivers/crypto/ux500/Kconfig" -endif # if CRYPTO_DEV_UX500 - config CRYPTO_DEV_ATMEL_AUTHENC bool "Support for Atmel IPSEC/SSL hw accelerator" depends on ARCH_AT91 || COMPILE_TEST diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index fa8bf1be1a8c..476f1a25ca32 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -43,7 +43,6 @@ obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/ obj-y += stm32/ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o -obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/ diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c index 9f6594699835..a6865ff4d400 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c @@ -118,6 +118,7 @@ static const struct ce_variant ce_d1_variant = { { "bus", 0, 200000000 }, { "mod", 300000000, 0 }, { "ram", 0, 400000000 }, + { "trng", 0, 0 }, }, .esr = ESR_D1, .prng = CE_ALG_PRNG, diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h index 8177aaba4434..27029fb77e29 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h @@ -105,7 +105,7 @@ #define MAX_SG 8 -#define CE_MAX_CLOCKS 3 +#define CE_MAX_CLOCKS 4 #define MAXFLOW 4 diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index 902f6be057ec..83c6dfad77e1 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -452,7 +452,7 @@ int sun8i_ss_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, } kfree_sensitive(op->key); op->keylen = keylen; - op->key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA); + op->key = kmemdup(key, keylen, GFP_KERNEL); if (!op->key) return -ENOMEM; @@ -475,7 +475,7 @@ int sun8i_ss_des3_setkey(struct crypto_skcipher *tfm, const u8 *key, kfree_sensitive(op->key); op->keylen = keylen; - op->key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA); + op->key = kmemdup(key, keylen, GFP_KERNEL); if (!op->key) return -ENOMEM; diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c index ac2329e2b0e5..c9dc06f97857 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c @@ -16,6 +16,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/irq.h> +#include <linux/kernel.h> #include <linux/module.h> #include <linux/of.h> #include <linux/of_device.h> @@ -527,7 +528,7 @@ static int allocate_flows(struct sun8i_ss_dev *ss) init_completion(&ss->flows[i].complete); ss->flows[i].biv = devm_kmalloc(ss->dev, AES_BLOCK_SIZE, - GFP_KERNEL | GFP_DMA); + GFP_KERNEL); if (!ss->flows[i].biv) { err = -ENOMEM; goto error_engine; @@ -535,7 +536,7 @@ static int allocate_flows(struct sun8i_ss_dev *ss) for (j = 0; j < MAX_SG; j++) { ss->flows[i].iv[j] = devm_kmalloc(ss->dev, AES_BLOCK_SIZE, - GFP_KERNEL | GFP_DMA); + GFP_KERNEL); if (!ss->flows[i].iv[j]) { err = -ENOMEM; goto error_engine; @@ -544,13 +545,15 @@ static int allocate_flows(struct sun8i_ss_dev *ss) /* the padding could be up to two block. */ ss->flows[i].pad = devm_kmalloc(ss->dev, MAX_PAD_SIZE, - GFP_KERNEL | GFP_DMA); + GFP_KERNEL); if (!ss->flows[i].pad) { err = -ENOMEM; goto error_engine; } - ss->flows[i].result = devm_kmalloc(ss->dev, SHA256_DIGEST_SIZE, - GFP_KERNEL | GFP_DMA); + ss->flows[i].result = + devm_kmalloc(ss->dev, max(SHA256_DIGEST_SIZE, + dma_get_cache_alignment()), + GFP_KERNEL); if (!ss->flows[i].result) { err = -ENOMEM; goto error_engine; diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index 36a82b22953c..577bf636f7fb 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -79,10 +79,10 @@ int sun8i_ss_hmac_setkey(struct crypto_ahash *ahash, const u8 *key, memcpy(tfmctx->key, key, keylen); } - tfmctx->ipad = kzalloc(bs, GFP_KERNEL | GFP_DMA); + tfmctx->ipad = kzalloc(bs, GFP_KERNEL); if (!tfmctx->ipad) return -ENOMEM; - tfmctx->opad = kzalloc(bs, GFP_KERNEL | GFP_DMA); + tfmctx->opad = kzalloc(bs, GFP_KERNEL); if (!tfmctx->opad) { ret = -ENOMEM; goto err_opad; diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c index dd677e9ed06f..70c7b5d571b8 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c @@ -11,6 +11,8 @@ */ #include "sun8i-ss.h" #include <linux/dma-mapping.h> +#include <linux/kernel.h> +#include <linux/mm.h> #include <linux/pm_runtime.h> #include <crypto/internal/rng.h> @@ -25,7 +27,7 @@ int sun8i_ss_prng_seed(struct crypto_rng *tfm, const u8 *seed, ctx->seed = NULL; } if (!ctx->seed) - ctx->seed = kmalloc(slen, GFP_KERNEL | GFP_DMA); + ctx->seed = kmalloc(slen, GFP_KERNEL); if (!ctx->seed) return -ENOMEM; @@ -58,6 +60,7 @@ int sun8i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src, struct sun8i_ss_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm); struct rng_alg *alg = crypto_rng_alg(tfm); struct sun8i_ss_alg_template *algt; + unsigned int todo_with_padding; struct sun8i_ss_dev *ss; dma_addr_t dma_iv, dma_dst; unsigned int todo; @@ -81,7 +84,11 @@ int sun8i_ss_prng_generate(struct crypto_rng *tfm, const u8 *src, todo = dlen + PRNG_SEED_SIZE + PRNG_DATA_SIZE; todo -= todo % PRNG_DATA_SIZE; - d = kzalloc(todo, GFP_KERNEL | GFP_DMA); + todo_with_padding = ALIGN(todo, dma_get_cache_alignment()); + if (todo_with_padding < todo || todo < dlen) + return -EOVERFLOW; + + d = kzalloc(todo_with_padding, GFP_KERNEL); if (!d) return -ENOMEM; diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index 280f4b0e7133..50dc783821b6 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -522,7 +522,6 @@ static void crypto4xx_cipher_done(struct crypto4xx_device *dev, { struct skcipher_request *req; struct scatterlist *dst; - dma_addr_t addr; req = skcipher_request_cast(pd_uinfo->async_req); @@ -531,8 +530,8 @@ static void crypto4xx_cipher_done(struct crypto4xx_device *dev, req->cryptlen, req->dst); } else { dst = pd_uinfo->dest_va; - addr = dma_map_page(dev->core_dev->device, sg_page(dst), - dst->offset, dst->length, DMA_FROM_DEVICE); + dma_unmap_page(dev->core_dev->device, pd->dest, dst->length, + DMA_FROM_DEVICE); } if (pd_uinfo->sa_va->sa_command_0.bf.save_iv == SA_SAVE_IV) { @@ -557,10 +556,9 @@ static void crypto4xx_ahash_done(struct crypto4xx_device *dev, struct ahash_request *ahash_req; ahash_req = ahash_request_cast(pd_uinfo->async_req); - ctx = crypto_tfm_ctx(ahash_req->base.tfm); + ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(ahash_req)); - crypto4xx_copy_digest_to_dst(ahash_req->result, pd_uinfo, - crypto_tfm_ctx(ahash_req->base.tfm)); + crypto4xx_copy_digest_to_dst(ahash_req->result, pd_uinfo, ctx); crypto4xx_ret_sg_desc(dev, pd_uinfo); if (pd_uinfo->state & PD_ENTRY_BUSY) diff --git a/drivers/crypto/aspeed/Kconfig b/drivers/crypto/aspeed/Kconfig index ae2710ae8d8f..db6c5b4cdc40 100644 --- a/drivers/crypto/aspeed/Kconfig +++ b/drivers/crypto/aspeed/Kconfig @@ -46,3 +46,14 @@ config CRYPTO_DEV_ASPEED_HACE_CRYPTO crypto driver. Supports AES/DES symmetric-key encryption and decryption with ECB/CBC/CFB/OFB/CTR options. + +config CRYPTO_DEV_ASPEED_ACRY + bool "Enable Aspeed ACRY RSA Engine" + depends on CRYPTO_DEV_ASPEED + select CRYPTO_ENGINE + select CRYPTO_RSA + help + Select here to enable Aspeed ECC/RSA Engine (ACRY) + RSA driver. + Supports 256 bits to 4096 bits RSA encryption/decryption + and signature/verification. diff --git a/drivers/crypto/aspeed/Makefile b/drivers/crypto/aspeed/Makefile index a0ed40ddaad1..15862752c053 100644 --- a/drivers/crypto/aspeed/Makefile +++ b/drivers/crypto/aspeed/Makefile @@ -5,3 +5,7 @@ obj-$(CONFIG_CRYPTO_DEV_ASPEED) += aspeed_crypto.o aspeed_crypto-objs := aspeed-hace.o \ $(hace-hash-y) \ $(hace-crypto-y) + +aspeed_acry-$(CONFIG_CRYPTO_DEV_ASPEED_ACRY) += aspeed-acry.o + +obj-$(CONFIG_CRYPTO_DEV_ASPEED) += $(aspeed_acry-y) diff --git a/drivers/crypto/aspeed/aspeed-acry.c b/drivers/crypto/aspeed/aspeed-acry.c new file mode 100644 index 000000000000..1f77ebd73489 --- /dev/null +++ b/drivers/crypto/aspeed/aspeed-acry.c @@ -0,0 +1,828 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright 2021 Aspeed Technology Inc. + */ +#include <crypto/akcipher.h> +#include <crypto/algapi.h> +#include <crypto/engine.h> +#include <crypto/internal/akcipher.h> +#include <crypto/internal/rsa.h> +#include <crypto/scatterwalk.h> +#include <linux/clk.h> +#include <linux/platform_device.h> +#include <linux/module.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/mfd/syscon.h> +#include <linux/interrupt.h> +#include <linux/count_zeros.h> +#include <linux/err.h> +#include <linux/dma-mapping.h> +#include <linux/regmap.h> + +#ifdef CONFIG_CRYPTO_DEV_ASPEED_DEBUG +#define ACRY_DBG(d, fmt, ...) \ + dev_info((d)->dev, "%s() " fmt, __func__, ##__VA_ARGS__) +#else +#define ACRY_DBG(d, fmt, ...) \ + dev_dbg((d)->dev, "%s() " fmt, __func__, ##__VA_ARGS__) +#endif + +/***************************** + * * + * ACRY register definitions * + * * + * ***************************/ +#define ASPEED_ACRY_TRIGGER 0x000 /* ACRY Engine Control: trigger */ +#define ASPEED_ACRY_DMA_CMD 0x048 /* ACRY Engine Control: Command */ +#define ASPEED_ACRY_DMA_SRC_BASE 0x04C /* ACRY DRAM base address for DMA */ +#define ASPEED_ACRY_DMA_LEN 0x050 /* ACRY Data Length of DMA */ +#define ASPEED_ACRY_RSA_KEY_LEN 0x058 /* ACRY RSA Exp/Mod Key Length (Bits) */ +#define ASPEED_ACRY_INT_MASK 0x3F8 /* ACRY Interrupt Mask */ +#define ASPEED_ACRY_STATUS 0x3FC /* ACRY Interrupt Status */ + +/* rsa trigger */ +#define ACRY_CMD_RSA_TRIGGER BIT(0) +#define ACRY_CMD_DMA_RSA_TRIGGER BIT(1) + +/* rsa dma cmd */ +#define ACRY_CMD_DMA_SRAM_MODE_RSA (0x3 << 4) +#define ACRY_CMD_DMEM_AHB BIT(8) +#define ACRY_CMD_DMA_SRAM_AHB_ENGINE 0 + +/* rsa key len */ +#define RSA_E_BITS_LEN(x) ((x) << 16) +#define RSA_M_BITS_LEN(x) (x) + +/* acry isr */ +#define ACRY_RSA_ISR BIT(1) + +#define ASPEED_ACRY_BUFF_SIZE 0x1800 /* DMA buffer size */ +#define ASPEED_ACRY_SRAM_MAX_LEN 2048 /* ACRY SRAM maximum length (Bytes) */ +#define ASPEED_ACRY_RSA_MAX_KEY_LEN 512 /* ACRY RSA maximum key length (Bytes) */ + +#define CRYPTO_FLAGS_BUSY BIT(1) +#define BYTES_PER_DWORD 4 + +/***************************** + * * + * AHBC register definitions * + * * + * ***************************/ +#define AHBC_REGION_PROT 0x240 +#define REGION_ACRYM BIT(23) + +#define ast_acry_write(acry, val, offset) \ + writel((val), (acry)->regs + (offset)) + +#define ast_acry_read(acry, offset) \ + readl((acry)->regs + (offset)) + +struct aspeed_acry_dev; + +typedef int (*aspeed_acry_fn_t)(struct aspeed_acry_dev *); + +struct aspeed_acry_dev { + void __iomem *regs; + struct device *dev; + int irq; + struct clk *clk; + struct regmap *ahbc; + + struct akcipher_request *req; + struct tasklet_struct done_task; + aspeed_acry_fn_t resume; + unsigned long flags; + + /* ACRY output SRAM buffer */ + void __iomem *acry_sram; + + /* ACRY input DMA buffer */ + void *buf_addr; + dma_addr_t buf_dma_addr; + + struct crypto_engine *crypt_engine_rsa; + + /* ACRY SRAM memory mapped */ + int exp_dw_mapping[ASPEED_ACRY_RSA_MAX_KEY_LEN]; + int mod_dw_mapping[ASPEED_ACRY_RSA_MAX_KEY_LEN]; + int data_byte_mapping[ASPEED_ACRY_SRAM_MAX_LEN]; +}; + +struct aspeed_acry_ctx { + struct crypto_engine_ctx enginectx; + struct aspeed_acry_dev *acry_dev; + + struct rsa_key key; + int enc; + u8 *n; + u8 *e; + u8 *d; + size_t n_sz; + size_t e_sz; + size_t d_sz; + + aspeed_acry_fn_t trigger; + + struct crypto_akcipher *fallback_tfm; +}; + +struct aspeed_acry_alg { + struct aspeed_acry_dev *acry_dev; + struct akcipher_alg akcipher; +}; + +enum aspeed_rsa_key_mode { + ASPEED_RSA_EXP_MODE = 0, + ASPEED_RSA_MOD_MODE, + ASPEED_RSA_DATA_MODE, +}; + +static inline struct akcipher_request * + akcipher_request_cast(struct crypto_async_request *req) +{ + return container_of(req, struct akcipher_request, base); +} + +static int aspeed_acry_do_fallback(struct akcipher_request *req) +{ + struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req); + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher); + int err; + + akcipher_request_set_tfm(req, ctx->fallback_tfm); + + if (ctx->enc) + err = crypto_akcipher_encrypt(req); + else + err = crypto_akcipher_decrypt(req); + + akcipher_request_set_tfm(req, cipher); + + return err; +} + +static bool aspeed_acry_need_fallback(struct akcipher_request *req) +{ + struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req); + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher); + + return ctx->key.n_sz > ASPEED_ACRY_RSA_MAX_KEY_LEN; +} + +static int aspeed_acry_handle_queue(struct aspeed_acry_dev *acry_dev, + struct akcipher_request *req) +{ + if (aspeed_acry_need_fallback(req)) { + ACRY_DBG(acry_dev, "SW fallback\n"); + return aspeed_acry_do_fallback(req); + } + + return crypto_transfer_akcipher_request_to_engine(acry_dev->crypt_engine_rsa, req); +} + +static int aspeed_acry_do_request(struct crypto_engine *engine, void *areq) +{ + struct akcipher_request *req = akcipher_request_cast(areq); + struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req); + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher); + struct aspeed_acry_dev *acry_dev = ctx->acry_dev; + + acry_dev->req = req; + acry_dev->flags |= CRYPTO_FLAGS_BUSY; + + return ctx->trigger(acry_dev); +} + +static int aspeed_acry_complete(struct aspeed_acry_dev *acry_dev, int err) +{ + struct akcipher_request *req = acry_dev->req; + + acry_dev->flags &= ~CRYPTO_FLAGS_BUSY; + + crypto_finalize_akcipher_request(acry_dev->crypt_engine_rsa, req, err); + + return err; +} + +/* + * Copy Data to DMA buffer for engine used. + */ +static void aspeed_acry_rsa_sg_copy_to_buffer(struct aspeed_acry_dev *acry_dev, + u8 *buf, struct scatterlist *src, + size_t nbytes) +{ + static u8 dram_buffer[ASPEED_ACRY_SRAM_MAX_LEN]; + int i = 0, j; + int data_idx; + + ACRY_DBG(acry_dev, "\n"); + + scatterwalk_map_and_copy(dram_buffer, src, 0, nbytes, 0); + + for (j = nbytes - 1; j >= 0; j--) { + data_idx = acry_dev->data_byte_mapping[i]; + buf[data_idx] = dram_buffer[j]; + i++; + } + + for (; i < ASPEED_ACRY_SRAM_MAX_LEN; i++) { + data_idx = acry_dev->data_byte_mapping[i]; + buf[data_idx] = 0; + } +} + +/* + * Copy Exp/Mod to DMA buffer for engine used. + * + * Params: + * - mode 0 : Exponential + * - mode 1 : Modulus + * + * Example: + * - DRAM memory layout: + * D[0], D[4], D[8], D[12] + * - ACRY SRAM memory layout should reverse the order of source data: + * D[12], D[8], D[4], D[0] + */ +static int aspeed_acry_rsa_ctx_copy(struct aspeed_acry_dev *acry_dev, void *buf, + const void *xbuf, size_t nbytes, + enum aspeed_rsa_key_mode mode) +{ + const u8 *src = xbuf; + __le32 *dw_buf = buf; + int nbits, ndw; + int i, j, idx; + u32 data = 0; + + ACRY_DBG(acry_dev, "nbytes:%zu, mode:%d\n", nbytes, mode); + + if (nbytes > ASPEED_ACRY_RSA_MAX_KEY_LEN) + return -ENOMEM; + + /* Remove the leading zeros */ + while (nbytes > 0 && src[0] == 0) { + src++; + nbytes--; + } + + nbits = nbytes * 8; + if (nbytes > 0) + nbits -= count_leading_zeros(src[0]) - (BITS_PER_LONG - 8); + + /* double-world alignment */ + ndw = DIV_ROUND_UP(nbytes, BYTES_PER_DWORD); + + if (nbytes > 0) { + i = BYTES_PER_DWORD - nbytes % BYTES_PER_DWORD; + i %= BYTES_PER_DWORD; + + for (j = ndw; j > 0; j--) { + for (; i < BYTES_PER_DWORD; i++) { + data <<= 8; + data |= *src++; + } + + i = 0; + + if (mode == ASPEED_RSA_EXP_MODE) + idx = acry_dev->exp_dw_mapping[j - 1]; + else if (mode == ASPEED_RSA_MOD_MODE) + idx = acry_dev->mod_dw_mapping[j - 1]; + + dw_buf[idx] = cpu_to_le32(data); + } + } + + return nbits; +} + +static int aspeed_acry_rsa_transfer(struct aspeed_acry_dev *acry_dev) +{ + struct akcipher_request *req = acry_dev->req; + u8 __iomem *sram_buffer = acry_dev->acry_sram; + struct scatterlist *out_sg = req->dst; + static u8 dram_buffer[ASPEED_ACRY_SRAM_MAX_LEN]; + int leading_zero = 1; + int result_nbytes; + int i = 0, j; + int data_idx; + + /* Set Data Memory to AHB(CPU) Access Mode */ + ast_acry_write(acry_dev, ACRY_CMD_DMEM_AHB, ASPEED_ACRY_DMA_CMD); + + /* Disable ACRY SRAM protection */ + regmap_update_bits(acry_dev->ahbc, AHBC_REGION_PROT, + REGION_ACRYM, 0); + + result_nbytes = ASPEED_ACRY_SRAM_MAX_LEN; + + for (j = ASPEED_ACRY_SRAM_MAX_LEN - 1; j >= 0; j--) { + data_idx = acry_dev->data_byte_mapping[j]; + if (readb(sram_buffer + data_idx) == 0 && leading_zero) { + result_nbytes--; + } else { + leading_zero = 0; + dram_buffer[i] = readb(sram_buffer + data_idx); + i++; + } + } + + ACRY_DBG(acry_dev, "result_nbytes:%d, req->dst_len:%d\n", + result_nbytes, req->dst_len); + + if (result_nbytes <= req->dst_len) { + scatterwalk_map_and_copy(dram_buffer, out_sg, 0, result_nbytes, + 1); + req->dst_len = result_nbytes; + + } else { + dev_err(acry_dev->dev, "RSA engine error!\n"); + } + + memzero_explicit(acry_dev->buf_addr, ASPEED_ACRY_BUFF_SIZE); + + return aspeed_acry_complete(acry_dev, 0); +} + +static int aspeed_acry_rsa_trigger(struct aspeed_acry_dev *acry_dev) +{ + struct akcipher_request *req = acry_dev->req; + struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req); + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher); + int ne, nm; + + if (!ctx->n || !ctx->n_sz) { + dev_err(acry_dev->dev, "%s: key n is not set\n", __func__); + return -EINVAL; + } + + memzero_explicit(acry_dev->buf_addr, ASPEED_ACRY_BUFF_SIZE); + + /* Copy source data to DMA buffer */ + aspeed_acry_rsa_sg_copy_to_buffer(acry_dev, acry_dev->buf_addr, + req->src, req->src_len); + + nm = aspeed_acry_rsa_ctx_copy(acry_dev, acry_dev->buf_addr, ctx->n, + ctx->n_sz, ASPEED_RSA_MOD_MODE); + if (ctx->enc) { + if (!ctx->e || !ctx->e_sz) { + dev_err(acry_dev->dev, "%s: key e is not set\n", + __func__); + return -EINVAL; + } + /* Copy key e to DMA buffer */ + ne = aspeed_acry_rsa_ctx_copy(acry_dev, acry_dev->buf_addr, + ctx->e, ctx->e_sz, + ASPEED_RSA_EXP_MODE); + } else { + if (!ctx->d || !ctx->d_sz) { + dev_err(acry_dev->dev, "%s: key d is not set\n", + __func__); + return -EINVAL; + } + /* Copy key d to DMA buffer */ + ne = aspeed_acry_rsa_ctx_copy(acry_dev, acry_dev->buf_addr, + ctx->key.d, ctx->key.d_sz, + ASPEED_RSA_EXP_MODE); + } + + ast_acry_write(acry_dev, acry_dev->buf_dma_addr, + ASPEED_ACRY_DMA_SRC_BASE); + ast_acry_write(acry_dev, (ne << 16) + nm, + ASPEED_ACRY_RSA_KEY_LEN); + ast_acry_write(acry_dev, ASPEED_ACRY_BUFF_SIZE, + ASPEED_ACRY_DMA_LEN); + + acry_dev->resume = aspeed_acry_rsa_transfer; + + /* Enable ACRY SRAM protection */ + regmap_update_bits(acry_dev->ahbc, AHBC_REGION_PROT, + REGION_ACRYM, REGION_ACRYM); + + ast_acry_write(acry_dev, ACRY_RSA_ISR, ASPEED_ACRY_INT_MASK); + ast_acry_write(acry_dev, ACRY_CMD_DMA_SRAM_MODE_RSA | + ACRY_CMD_DMA_SRAM_AHB_ENGINE, ASPEED_ACRY_DMA_CMD); + + /* Trigger RSA engines */ + ast_acry_write(acry_dev, ACRY_CMD_RSA_TRIGGER | + ACRY_CMD_DMA_RSA_TRIGGER, ASPEED_ACRY_TRIGGER); + + return 0; +} + +static int aspeed_acry_rsa_enc(struct akcipher_request *req) +{ + struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req); + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher); + struct aspeed_acry_dev *acry_dev = ctx->acry_dev; + + ctx->trigger = aspeed_acry_rsa_trigger; + ctx->enc = 1; + + return aspeed_acry_handle_queue(acry_dev, req); +} + +static int aspeed_acry_rsa_dec(struct akcipher_request *req) +{ + struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req); + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher); + struct aspeed_acry_dev *acry_dev = ctx->acry_dev; + + ctx->trigger = aspeed_acry_rsa_trigger; + ctx->enc = 0; + + return aspeed_acry_handle_queue(acry_dev, req); +} + +static u8 *aspeed_rsa_key_copy(u8 *src, size_t len) +{ + return kmemdup(src, len, GFP_KERNEL); +} + +static int aspeed_rsa_set_n(struct aspeed_acry_ctx *ctx, u8 *value, + size_t len) +{ + ctx->n_sz = len; + ctx->n = aspeed_rsa_key_copy(value, len); + if (!ctx->n) + return -ENOMEM; + + return 0; +} + +static int aspeed_rsa_set_e(struct aspeed_acry_ctx *ctx, u8 *value, + size_t len) +{ + ctx->e_sz = len; + ctx->e = aspeed_rsa_key_copy(value, len); + if (!ctx->e) + return -ENOMEM; + + return 0; +} + +static int aspeed_rsa_set_d(struct aspeed_acry_ctx *ctx, u8 *value, + size_t len) +{ + ctx->d_sz = len; + ctx->d = aspeed_rsa_key_copy(value, len); + if (!ctx->d) + return -ENOMEM; + + return 0; +} + +static void aspeed_rsa_key_free(struct aspeed_acry_ctx *ctx) +{ + kfree_sensitive(ctx->n); + kfree_sensitive(ctx->e); + kfree_sensitive(ctx->d); + ctx->n_sz = 0; + ctx->e_sz = 0; + ctx->d_sz = 0; +} + +static int aspeed_acry_rsa_setkey(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen, int priv) +{ + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm); + struct aspeed_acry_dev *acry_dev = ctx->acry_dev; + int ret; + + if (priv) + ret = rsa_parse_priv_key(&ctx->key, key, keylen); + else + ret = rsa_parse_pub_key(&ctx->key, key, keylen); + + if (ret) { + dev_err(acry_dev->dev, "rsa parse key failed, ret:0x%x\n", + ret); + return ret; + } + + /* Aspeed engine supports up to 4096 bits, + * Use software fallback instead. + */ + if (ctx->key.n_sz > ASPEED_ACRY_RSA_MAX_KEY_LEN) + return 0; + + ret = aspeed_rsa_set_n(ctx, (u8 *)ctx->key.n, ctx->key.n_sz); + if (ret) + goto err; + + ret = aspeed_rsa_set_e(ctx, (u8 *)ctx->key.e, ctx->key.e_sz); + if (ret) + goto err; + + if (priv) { + ret = aspeed_rsa_set_d(ctx, (u8 *)ctx->key.d, ctx->key.d_sz); + if (ret) + goto err; + } + + return 0; + +err: + dev_err(acry_dev->dev, "rsa set key failed\n"); + aspeed_rsa_key_free(ctx); + + return ret; +} + +static int aspeed_acry_rsa_set_pub_key(struct crypto_akcipher *tfm, + const void *key, + unsigned int keylen) +{ + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm); + int ret; + + ret = crypto_akcipher_set_pub_key(ctx->fallback_tfm, key, keylen); + if (ret) + return ret; + + return aspeed_acry_rsa_setkey(tfm, key, keylen, 0); +} + +static int aspeed_acry_rsa_set_priv_key(struct crypto_akcipher *tfm, + const void *key, + unsigned int keylen) +{ + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm); + int ret; + + ret = crypto_akcipher_set_priv_key(ctx->fallback_tfm, key, keylen); + if (ret) + return ret; + + return aspeed_acry_rsa_setkey(tfm, key, keylen, 1); +} + +static unsigned int aspeed_acry_rsa_max_size(struct crypto_akcipher *tfm) +{ + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm); + + if (ctx->key.n_sz > ASPEED_ACRY_RSA_MAX_KEY_LEN) + return crypto_akcipher_maxsize(ctx->fallback_tfm); + + return ctx->n_sz; +} + +static int aspeed_acry_rsa_init_tfm(struct crypto_akcipher *tfm) +{ + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm); + struct akcipher_alg *alg = crypto_akcipher_alg(tfm); + const char *name = crypto_tfm_alg_name(&tfm->base); + struct aspeed_acry_alg *acry_alg; + + acry_alg = container_of(alg, struct aspeed_acry_alg, akcipher); + + ctx->acry_dev = acry_alg->acry_dev; + + ctx->fallback_tfm = crypto_alloc_akcipher(name, 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->fallback_tfm)) { + dev_err(ctx->acry_dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n", + name, PTR_ERR(ctx->fallback_tfm)); + return PTR_ERR(ctx->fallback_tfm); + } + + ctx->enginectx.op.do_one_request = aspeed_acry_do_request; + ctx->enginectx.op.prepare_request = NULL; + ctx->enginectx.op.unprepare_request = NULL; + + return 0; +} + +static void aspeed_acry_rsa_exit_tfm(struct crypto_akcipher *tfm) +{ + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm); + + crypto_free_akcipher(ctx->fallback_tfm); +} + +static struct aspeed_acry_alg aspeed_acry_akcipher_algs[] = { + { + .akcipher = { + .encrypt = aspeed_acry_rsa_enc, + .decrypt = aspeed_acry_rsa_dec, + .sign = aspeed_acry_rsa_dec, + .verify = aspeed_acry_rsa_enc, + .set_pub_key = aspeed_acry_rsa_set_pub_key, + .set_priv_key = aspeed_acry_rsa_set_priv_key, + .max_size = aspeed_acry_rsa_max_size, + .init = aspeed_acry_rsa_init_tfm, + .exit = aspeed_acry_rsa_exit_tfm, + .base = { + .cra_name = "rsa", + .cra_driver_name = "aspeed-rsa", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AKCIPHER | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct aspeed_acry_ctx), + }, + }, + }, +}; + +static void aspeed_acry_register(struct aspeed_acry_dev *acry_dev) +{ + int i, rc; + + for (i = 0; i < ARRAY_SIZE(aspeed_acry_akcipher_algs); i++) { + aspeed_acry_akcipher_algs[i].acry_dev = acry_dev; + rc = crypto_register_akcipher(&aspeed_acry_akcipher_algs[i].akcipher); + if (rc) { + ACRY_DBG(acry_dev, "Failed to register %s\n", + aspeed_acry_akcipher_algs[i].akcipher.base.cra_name); + } + } +} + +static void aspeed_acry_unregister(struct aspeed_acry_dev *acry_dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(aspeed_acry_akcipher_algs); i++) + crypto_unregister_akcipher(&aspeed_acry_akcipher_algs[i].akcipher); +} + +/* ACRY interrupt service routine. */ +static irqreturn_t aspeed_acry_irq(int irq, void *dev) +{ + struct aspeed_acry_dev *acry_dev = (struct aspeed_acry_dev *)dev; + u32 sts; + + sts = ast_acry_read(acry_dev, ASPEED_ACRY_STATUS); + ast_acry_write(acry_dev, sts, ASPEED_ACRY_STATUS); + + ACRY_DBG(acry_dev, "irq sts:0x%x\n", sts); + + if (sts & ACRY_RSA_ISR) { + /* Stop RSA engine */ + ast_acry_write(acry_dev, 0, ASPEED_ACRY_TRIGGER); + + if (acry_dev->flags & CRYPTO_FLAGS_BUSY) + tasklet_schedule(&acry_dev->done_task); + else + dev_err(acry_dev->dev, "RSA no active requests.\n"); + } + + return IRQ_HANDLED; +} + +/* + * ACRY SRAM has its own memory layout. + * Set the DRAM to SRAM indexing for future used. + */ +static void aspeed_acry_sram_mapping(struct aspeed_acry_dev *acry_dev) +{ + int i, j = 0; + + for (i = 0; i < (ASPEED_ACRY_SRAM_MAX_LEN / BYTES_PER_DWORD); i++) { + acry_dev->exp_dw_mapping[i] = j; + acry_dev->mod_dw_mapping[i] = j + 4; + acry_dev->data_byte_mapping[(i * 4)] = (j + 8) * 4; + acry_dev->data_byte_mapping[(i * 4) + 1] = (j + 8) * 4 + 1; + acry_dev->data_byte_mapping[(i * 4) + 2] = (j + 8) * 4 + 2; + acry_dev->data_byte_mapping[(i * 4) + 3] = (j + 8) * 4 + 3; + j++; + j = j % 4 ? j : j + 8; + } +} + +static void aspeed_acry_done_task(unsigned long data) +{ + struct aspeed_acry_dev *acry_dev = (struct aspeed_acry_dev *)data; + + (void)acry_dev->resume(acry_dev); +} + +static const struct of_device_id aspeed_acry_of_matches[] = { + { .compatible = "aspeed,ast2600-acry", }, + {}, +}; + +static int aspeed_acry_probe(struct platform_device *pdev) +{ + struct aspeed_acry_dev *acry_dev; + struct device *dev = &pdev->dev; + struct resource *res; + int rc; + + acry_dev = devm_kzalloc(dev, sizeof(struct aspeed_acry_dev), + GFP_KERNEL); + if (!acry_dev) + return -ENOMEM; + + acry_dev->dev = dev; + + platform_set_drvdata(pdev, acry_dev); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + acry_dev->regs = devm_ioremap_resource(dev, res); + if (IS_ERR(acry_dev->regs)) + return PTR_ERR(acry_dev->regs); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + acry_dev->acry_sram = devm_ioremap_resource(dev, res); + if (IS_ERR(acry_dev->acry_sram)) + return PTR_ERR(acry_dev->acry_sram); + + /* Get irq number and register it */ + acry_dev->irq = platform_get_irq(pdev, 0); + if (acry_dev->irq < 0) + return -ENXIO; + + rc = devm_request_irq(dev, acry_dev->irq, aspeed_acry_irq, 0, + dev_name(dev), acry_dev); + if (rc) { + dev_err(dev, "Failed to request irq.\n"); + return rc; + } + + acry_dev->clk = devm_clk_get_enabled(dev, NULL); + if (IS_ERR(acry_dev->clk)) { + dev_err(dev, "Failed to get acry clk\n"); + return PTR_ERR(acry_dev->clk); + } + + acry_dev->ahbc = syscon_regmap_lookup_by_phandle(dev->of_node, + "aspeed,ahbc"); + if (IS_ERR(acry_dev->ahbc)) { + dev_err(dev, "Failed to get AHBC regmap\n"); + return -ENODEV; + } + + /* Initialize crypto hardware engine structure for RSA */ + acry_dev->crypt_engine_rsa = crypto_engine_alloc_init(dev, true); + if (!acry_dev->crypt_engine_rsa) { + rc = -ENOMEM; + goto clk_exit; + } + + rc = crypto_engine_start(acry_dev->crypt_engine_rsa); + if (rc) + goto err_engine_rsa_start; + + tasklet_init(&acry_dev->done_task, aspeed_acry_done_task, + (unsigned long)acry_dev); + + /* Set Data Memory to AHB(CPU) Access Mode */ + ast_acry_write(acry_dev, ACRY_CMD_DMEM_AHB, ASPEED_ACRY_DMA_CMD); + + /* Initialize ACRY SRAM index */ + aspeed_acry_sram_mapping(acry_dev); + + acry_dev->buf_addr = dmam_alloc_coherent(dev, ASPEED_ACRY_BUFF_SIZE, + &acry_dev->buf_dma_addr, + GFP_KERNEL); + memzero_explicit(acry_dev->buf_addr, ASPEED_ACRY_BUFF_SIZE); + + aspeed_acry_register(acry_dev); + + dev_info(dev, "Aspeed ACRY Accelerator successfully registered\n"); + + return 0; + +err_engine_rsa_start: + crypto_engine_exit(acry_dev->crypt_engine_rsa); +clk_exit: + clk_disable_unprepare(acry_dev->clk); + + return rc; +} + +static int aspeed_acry_remove(struct platform_device *pdev) +{ + struct aspeed_acry_dev *acry_dev = platform_get_drvdata(pdev); + + aspeed_acry_unregister(acry_dev); + crypto_engine_exit(acry_dev->crypt_engine_rsa); + tasklet_kill(&acry_dev->done_task); + clk_disable_unprepare(acry_dev->clk); + + return 0; +} + +MODULE_DEVICE_TABLE(of, aspeed_acry_of_matches); + +static struct platform_driver aspeed_acry_driver = { + .probe = aspeed_acry_probe, + .remove = aspeed_acry_remove, + .driver = { + .name = KBUILD_MODNAME, + .of_match_table = aspeed_acry_of_matches, + }, +}; + +module_platform_driver(aspeed_acry_driver); + +MODULE_AUTHOR("Neal Liu <neal_liu@aspeedtech.com>"); +MODULE_DESCRIPTION("ASPEED ACRY driver for hardware RSA Engine"); +MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/aspeed/aspeed-hace.c b/drivers/crypto/aspeed/aspeed-hace.c index 656cb92c8bb6..d2871e1de9c2 100644 --- a/drivers/crypto/aspeed/aspeed-hace.c +++ b/drivers/crypto/aspeed/aspeed-hace.c @@ -99,7 +99,6 @@ static int aspeed_hace_probe(struct platform_device *pdev) const struct of_device_id *hace_dev_id; struct aspeed_engine_hash *hash_engine; struct aspeed_hace_dev *hace_dev; - struct resource *res; int rc; hace_dev = devm_kzalloc(&pdev->dev, sizeof(struct aspeed_hace_dev), @@ -118,11 +117,9 @@ static int aspeed_hace_probe(struct platform_device *pdev) hash_engine = &hace_dev->hash_engine; crypto_engine = &hace_dev->crypto_engine; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - platform_set_drvdata(pdev, hace_dev); - hace_dev->regs = devm_ioremap_resource(&pdev->dev, res); + hace_dev->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); if (IS_ERR(hace_dev->regs)) return PTR_ERR(hace_dev->regs); diff --git a/drivers/crypto/aspeed/aspeed-hace.h b/drivers/crypto/aspeed/aspeed-hace.h index f2cde23b56ae..05d0a15d546d 100644 --- a/drivers/crypto/aspeed/aspeed-hace.h +++ b/drivers/crypto/aspeed/aspeed-hace.h @@ -183,7 +183,7 @@ struct aspeed_sham_ctx { struct aspeed_hace_dev *hace_dev; unsigned long flags; /* hmac flag */ - struct aspeed_sha_hmac_ctx base[0]; + struct aspeed_sha_hmac_ctx base[]; }; struct aspeed_sham_reqctx { diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 886bf258544c..ed10f2ae4523 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -554,7 +554,7 @@ static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err) } if (dd->is_async) - dd->areq->complete(dd->areq, err); + crypto_request_complete(dd->areq, err); tasklet_schedule(&dd->queue_task); @@ -955,7 +955,7 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd, return ret; if (backlog) - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); ctx = crypto_tfm_ctx(areq->tfm); @@ -1879,7 +1879,7 @@ static int atmel_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err; - err = xts_check_key(crypto_skcipher_tfm(tfm), key, keylen); + err = xts_verify_key(tfm, key, keylen); if (err) return err; @@ -2510,6 +2510,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) /* keep only major version number */ switch (dd->hw_version & 0xff0) { case 0x700: + case 0x600: case 0x500: dd->caps.has_dualbuff = 1; dd->caps.has_cfb64 = 1; diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c index 12205e2b53b4..aac64b555204 100644 --- a/drivers/crypto/atmel-ecc.c +++ b/drivers/crypto/atmel-ecc.c @@ -313,11 +313,10 @@ static struct kpp_alg atmel_ecdh_nist_p256 = { static int atmel_ecc_probe(struct i2c_client *client) { - const struct i2c_device_id *id = i2c_client_get_device_id(client); struct atmel_i2c_client_priv *i2c_priv; int ret; - ret = atmel_i2c_probe(client, id); + ret = atmel_i2c_probe(client); if (ret) return ret; diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index 55bff1e13142..83a9093eff25 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -59,7 +59,7 @@ void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd) * Read the word from Configuration zone that contains the lock bytes * (UserExtra, Selector, LockValue, LockConfig). */ - cmd->param1 = CONFIG_ZONE; + cmd->param1 = CONFIGURATION_ZONE; cmd->param2 = cpu_to_le16(DEVICE_LOCK_ADDR); cmd->count = READ_COUNT; @@ -324,7 +324,7 @@ free_cmd: return ret; } -int atmel_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) +int atmel_i2c_probe(struct i2c_client *client) { struct atmel_i2c_client_priv *i2c_priv; struct device *dev = &client->dev; diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h index 35f7857a7f7c..c0bd429ee2c7 100644 --- a/drivers/crypto/atmel-i2c.h +++ b/drivers/crypto/atmel-i2c.h @@ -63,7 +63,7 @@ struct atmel_i2c_cmd { #define STATUS_WAKE_SUCCESSFUL 0x11 /* Definitions for eeprom organization */ -#define CONFIG_ZONE 0 +#define CONFIGURATION_ZONE 0 /* Definitions for Indexes common to all commands */ #define RSP_DATA_IDX 1 /* buffer index of data in response */ @@ -167,7 +167,7 @@ struct atmel_i2c_work_data { struct atmel_i2c_cmd cmd; }; -int atmel_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id); +int atmel_i2c_probe(struct i2c_client *client); void atmel_i2c_enqueue(struct atmel_i2c_work_data *work_data, void (*cbk)(struct atmel_i2c_work_data *work_data, diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index ca4b01926d1b..e7c1db2739ec 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -292,7 +292,7 @@ static inline int atmel_sha_complete(struct atmel_sha_dev *dd, int err) clk_disable(dd->iclk); if ((dd->is_async || dd->force_complete) && req->base.complete) - req->base.complete(&req->base, err); + ahash_request_complete(req, err); /* handle new request */ tasklet_schedule(&dd->queue_task); @@ -1080,7 +1080,7 @@ static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, return ret; if (backlog) - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); ctx = crypto_tfm_ctx(async_req->tfm); @@ -2099,10 +2099,9 @@ struct atmel_sha_authenc_reqctx { unsigned int digestlen; }; -static void atmel_sha_authenc_complete(struct crypto_async_request *areq, - int err) +static void atmel_sha_authenc_complete(void *data, int err) { - struct ahash_request *req = areq->data; + struct ahash_request *req = data; struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); authctx->cb(authctx->aes_dev, err, authctx->base.dd->is_async); @@ -2509,6 +2508,7 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd) /* keep only major version number */ switch (dd->hw_version & 0xff0) { case 0x700: + case 0x600: case 0x510: dd->caps.has_dma = 1; dd->caps.has_dualbuff = 1; diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index 272a06f0b588..4403dbb0f0b1 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -93,11 +93,10 @@ static int atmel_sha204a_rng_read(struct hwrng *rng, void *data, size_t max, static int atmel_sha204a_probe(struct i2c_client *client) { - const struct i2c_device_id *id = i2c_client_get_device_id(client); struct atmel_i2c_client_priv *i2c_priv; int ret; - ret = atmel_i2c_probe(client, id); + ret = atmel_i2c_probe(client); if (ret) return ret; diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index 8b7bc1076e0d..b2d48c1649b9 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -590,7 +590,7 @@ static void atmel_tdes_finish_req(struct atmel_tdes_dev *dd, int err) if (!err && (rctx->mode & TDES_FLAGS_OPMODE_MASK) != TDES_FLAGS_ECB) atmel_tdes_set_iv_as_last_ciphertext_block(dd); - req->base.complete(&req->base, err); + skcipher_request_complete(req, err); } static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd, @@ -619,7 +619,7 @@ static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd, return ret; if (backlog) - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); req = skcipher_request_cast(async_req); diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c index 51c66afbe677..8493a45e1bd4 100644 --- a/drivers/crypto/axis/artpec6_crypto.c +++ b/drivers/crypto/axis/artpec6_crypto.c @@ -1621,7 +1621,7 @@ artpec6_crypto_xts_set_key(struct crypto_skcipher *cipher, const u8 *key, crypto_skcipher_ctx(cipher); int ret; - ret = xts_check_key(&cipher->base, key, keylen); + ret = xts_verify_key(cipher, key, keylen); if (ret) return ret; @@ -2143,13 +2143,13 @@ static void artpec6_crypto_task(unsigned long data) list_for_each_entry_safe(req, n, &complete_in_progress, complete_in_progress) { - req->req->complete(req->req, -EINPROGRESS); + crypto_request_complete(req->req, -EINPROGRESS); } } static void artpec6_crypto_complete_crypto(struct crypto_async_request *req) { - req->complete(req, 0); + crypto_request_complete(req, 0); } static void @@ -2161,7 +2161,7 @@ artpec6_crypto_complete_cbc_decrypt(struct crypto_async_request *req) scatterwalk_map_and_copy(cipher_req->iv, cipher_req->src, cipher_req->cryptlen - AES_BLOCK_SIZE, AES_BLOCK_SIZE, 0); - req->complete(req, 0); + skcipher_request_complete(cipher_req, 0); } static void @@ -2173,7 +2173,7 @@ artpec6_crypto_complete_cbc_encrypt(struct crypto_async_request *req) scatterwalk_map_and_copy(cipher_req->iv, cipher_req->dst, cipher_req->cryptlen - AES_BLOCK_SIZE, AES_BLOCK_SIZE, 0); - req->complete(req, 0); + skcipher_request_complete(cipher_req, 0); } static void artpec6_crypto_complete_aead(struct crypto_async_request *req) @@ -2211,12 +2211,12 @@ static void artpec6_crypto_complete_aead(struct crypto_async_request *req) } } - req->complete(req, result); + aead_request_complete(areq, result); } static void artpec6_crypto_complete_hash(struct crypto_async_request *req) { - req->complete(req, 0); + crypto_request_complete(req, 0); } diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index c8c799428fe0..70b911baab26 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -1614,7 +1614,7 @@ static void finish_req(struct iproc_reqctx_s *rctx, int err) spu_chunk_cleanup(rctx); if (areq) - areq->complete(areq, err); + crypto_request_complete(areq, err); } /** @@ -2570,66 +2570,29 @@ static int aead_need_fallback(struct aead_request *req) return payload_len > ctx->max_payload; } -static void aead_complete(struct crypto_async_request *areq, int err) -{ - struct aead_request *req = - container_of(areq, struct aead_request, base); - struct iproc_reqctx_s *rctx = aead_request_ctx(req); - struct crypto_aead *aead = crypto_aead_reqtfm(req); - - flow_log("%s() err:%d\n", __func__, err); - - areq->tfm = crypto_aead_tfm(aead); - - areq->complete = rctx->old_complete; - areq->data = rctx->old_data; - - areq->complete(areq, err); -} - static int aead_do_fallback(struct aead_request *req, bool is_encrypt) { struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_tfm *tfm = crypto_aead_tfm(aead); struct iproc_reqctx_s *rctx = aead_request_ctx(req); struct iproc_ctx_s *ctx = crypto_tfm_ctx(tfm); - int err; - u32 req_flags; + struct aead_request *subreq; flow_log("%s() enc:%u\n", __func__, is_encrypt); - if (ctx->fallback_cipher) { - /* Store the cipher tfm and then use the fallback tfm */ - rctx->old_tfm = tfm; - aead_request_set_tfm(req, ctx->fallback_cipher); - /* - * Save the callback and chain ourselves in, so we can restore - * the tfm - */ - rctx->old_complete = req->base.complete; - rctx->old_data = req->base.data; - req_flags = aead_request_flags(req); - aead_request_set_callback(req, req_flags, aead_complete, req); - err = is_encrypt ? crypto_aead_encrypt(req) : - crypto_aead_decrypt(req); - - if (err == 0) { - /* - * fallback was synchronous (did not return - * -EINPROGRESS). So restore request state here. - */ - aead_request_set_callback(req, req_flags, - rctx->old_complete, req); - req->base.data = rctx->old_data; - aead_request_set_tfm(req, aead); - flow_log("%s() fallback completed successfully\n\n", - __func__); - } - } else { - err = -EINVAL; - } + if (!ctx->fallback_cipher) + return -EINVAL; - return err; + subreq = &rctx->req; + aead_request_set_tfm(subreq, ctx->fallback_cipher); + aead_request_set_callback(subreq, aead_request_flags(req), + req->base.complete, req->base.data); + aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, + req->iv); + aead_request_set_ad(subreq, req->assoclen); + + return is_encrypt ? crypto_aead_encrypt(req) : + crypto_aead_decrypt(req); } static int aead_enqueue(struct aead_request *req, bool is_encrypt) @@ -4243,6 +4206,7 @@ static int ahash_cra_init(struct crypto_tfm *tfm) static int aead_cra_init(struct crypto_aead *aead) { + unsigned int reqsize = sizeof(struct iproc_reqctx_s); struct crypto_tfm *tfm = crypto_aead_tfm(aead); struct iproc_ctx_s *ctx = crypto_tfm_ctx(tfm); struct crypto_alg *alg = tfm->__crt_alg; @@ -4254,7 +4218,6 @@ static int aead_cra_init(struct crypto_aead *aead) flow_log("%s()\n", __func__); - crypto_aead_set_reqsize(aead, sizeof(struct iproc_reqctx_s)); ctx->is_esp = false; ctx->salt_len = 0; ctx->salt_offset = 0; @@ -4263,22 +4226,29 @@ static int aead_cra_init(struct crypto_aead *aead) get_random_bytes(ctx->iv, MAX_IV_SIZE); flow_dump(" iv: ", ctx->iv, MAX_IV_SIZE); - if (!err) { - if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { - flow_log("%s() creating fallback cipher\n", __func__); - - ctx->fallback_cipher = - crypto_alloc_aead(alg->cra_name, 0, - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(ctx->fallback_cipher)) { - pr_err("%s() Error: failed to allocate fallback for %s\n", - __func__, alg->cra_name); - return PTR_ERR(ctx->fallback_cipher); - } - } + if (err) + goto out; + + if (!(alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK)) + goto reqsize; + + flow_log("%s() creating fallback cipher\n", __func__); + + ctx->fallback_cipher = crypto_alloc_aead(alg->cra_name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->fallback_cipher)) { + pr_err("%s() Error: failed to allocate fallback for %s\n", + __func__, alg->cra_name); + return PTR_ERR(ctx->fallback_cipher); } + reqsize += crypto_aead_reqsize(ctx->fallback_cipher); + +reqsize: + crypto_aead_set_reqsize(aead, reqsize); + +out: return err; } diff --git a/drivers/crypto/bcm/cipher.h b/drivers/crypto/bcm/cipher.h index d6d87332140a..e36881c983cf 100644 --- a/drivers/crypto/bcm/cipher.h +++ b/drivers/crypto/bcm/cipher.h @@ -339,15 +339,12 @@ struct iproc_reqctx_s { /* hmac context */ bool is_sw_hmac; - /* aead context */ - struct crypto_tfm *old_tfm; - crypto_completion_t old_complete; - void *old_data; - gfp_t gfp; /* Buffers used to build SPU request and response messages */ struct spu_msg_buf msg_buf; + + struct aead_request req; }; /* diff --git a/drivers/crypto/caam/blob_gen.c b/drivers/crypto/caam/blob_gen.c index f46b161d2cda..87781c1534ee 100644 --- a/drivers/crypto/caam/blob_gen.c +++ b/drivers/crypto/caam/blob_gen.c @@ -83,7 +83,7 @@ int caam_process_blob(struct caam_blob_priv *priv, output_len = info->input_len - CAAM_BLOB_OVERHEAD; } - desc = kzalloc(CAAM_BLOB_DESC_BYTES_MAX, GFP_KERNEL | GFP_DMA); + desc = kzalloc(CAAM_BLOB_DESC_BYTES_MAX, GFP_KERNEL); if (!desc) return -ENOMEM; diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index ecc15bc521db..4a9b998a8d26 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -59,6 +59,8 @@ #include <crypto/engine.h> #include <crypto/xts.h> #include <asm/unaligned.h> +#include <linux/dma-mapping.h> +#include <linux/kernel.h> /* * crypto alg @@ -1379,8 +1381,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ - edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes, - GFP_DMA | flags); + edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes, flags); if (!edesc) { caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0, 0, 0, 0); @@ -1608,6 +1609,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, u8 *iv; int ivsize = crypto_skcipher_ivsize(skcipher); int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes; + unsigned int aligned_size; src_nents = sg_nents_for_len(req->src, req->cryptlen); if (unlikely(src_nents < 0)) { @@ -1681,15 +1683,18 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, /* * allocate space for base edesc and hw desc commands, link tables, IV */ - edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes + ivsize, - GFP_DMA | flags); - if (!edesc) { + aligned_size = ALIGN(ivsize, __alignof__(*edesc)); + aligned_size += sizeof(*edesc) + desc_bytes + sec4_sg_bytes; + aligned_size = ALIGN(aligned_size, dma_get_cache_alignment()); + iv = kzalloc(aligned_size, flags); + if (!iv) { dev_err(jrdev, "could not allocate extended descriptor\n"); caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0, 0, 0, 0); return ERR_PTR(-ENOMEM); } + edesc = (void *)(iv + ALIGN(ivsize, __alignof__(*edesc))); edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; edesc->mapped_src_nents = mapped_src_nents; @@ -1701,7 +1706,6 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, /* Make sure IV is located in a DMAable area */ if (ivsize) { - iv = (u8 *)edesc->sec4_sg + sec4_sg_bytes; memcpy(iv, req->iv, ivsize); iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_BIDIRECTIONAL); diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index c37b67be0492..5e218bf20d5b 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -20,6 +20,8 @@ #include "caamalg_desc.h" #include <crypto/xts.h> #include <asm/unaligned.h> +#include <linux/dma-mapping.h> +#include <linux/kernel.h> /* * crypto alg @@ -959,7 +961,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, return (struct aead_edesc *)drv_ctx; /* allocate space for base edesc and hw desc commands, link tables */ - edesc = qi_cache_alloc(GFP_DMA | flags); + edesc = qi_cache_alloc(flags); if (unlikely(!edesc)) { dev_err(qidev, "could not allocate extended descriptor\n"); return ERR_PTR(-ENOMEM); @@ -1317,8 +1319,9 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, qm_sg_ents = 1 + pad_sg_nents(qm_sg_ents); qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry); - if (unlikely(offsetof(struct skcipher_edesc, sgt) + qm_sg_bytes + - ivsize > CAAM_QI_MEMCACHE_SIZE)) { + if (unlikely(ALIGN(ivsize, __alignof__(*edesc)) + + offsetof(struct skcipher_edesc, sgt) + qm_sg_bytes > + CAAM_QI_MEMCACHE_SIZE)) { dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n", qm_sg_ents, ivsize); caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0, @@ -1327,17 +1330,18 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, } /* allocate space for base edesc, link tables and IV */ - edesc = qi_cache_alloc(GFP_DMA | flags); - if (unlikely(!edesc)) { + iv = qi_cache_alloc(flags); + if (unlikely(!iv)) { dev_err(qidev, "could not allocate extended descriptor\n"); caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0, 0, DMA_NONE, 0, 0); return ERR_PTR(-ENOMEM); } + edesc = (void *)(iv + ALIGN(ivsize, __alignof__(*edesc))); + /* Make sure IV is located in a DMAable area */ sg_table = &edesc->sgt[0]; - iv = (u8 *)(sg_table + qm_sg_ents); memcpy(iv, req->iv, ivsize); iv_dma = dma_map_single(qidev, iv, ivsize, DMA_BIDIRECTIONAL); diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index 1b0dd742c53f..5c8d35edaa1c 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -16,7 +16,9 @@ #include "caamalg_desc.h" #include "caamhash_desc.h" #include "dpseci-debugfs.h" +#include <linux/dma-mapping.h> #include <linux/fsl/mc.h> +#include <linux/kernel.h> #include <soc/fsl/dpaa2-io.h> #include <soc/fsl/dpaa2-fd.h> #include <crypto/xts.h> @@ -370,7 +372,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, struct dpaa2_sg_entry *sg_table; /* allocate space for base edesc, link tables and IV */ - edesc = qi_cache_zalloc(GFP_DMA | flags); + edesc = qi_cache_zalloc(flags); if (unlikely(!edesc)) { dev_err(dev, "could not allocate extended descriptor\n"); return ERR_PTR(-ENOMEM); @@ -1189,7 +1191,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req) } /* allocate space for base edesc, link tables and IV */ - edesc = qi_cache_zalloc(GFP_DMA | flags); + edesc = qi_cache_zalloc(flags); if (unlikely(!edesc)) { dev_err(dev, "could not allocate extended descriptor\n"); caam_unmap(dev, req->src, req->dst, src_nents, dst_nents, 0, @@ -3220,14 +3222,14 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key, int ret = -ENOMEM; struct dpaa2_fl_entry *in_fle, *out_fle; - req_ctx = kzalloc(sizeof(*req_ctx), GFP_KERNEL | GFP_DMA); + req_ctx = kzalloc(sizeof(*req_ctx), GFP_KERNEL); if (!req_ctx) return -ENOMEM; in_fle = &req_ctx->fd_flt[1]; out_fle = &req_ctx->fd_flt[0]; - flc = kzalloc(sizeof(*flc), GFP_KERNEL | GFP_DMA); + flc = kzalloc(sizeof(*flc), GFP_KERNEL); if (!flc) goto err_flc; @@ -3316,7 +3318,13 @@ static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key, dev_dbg(ctx->dev, "keylen %d blocksize %d\n", keylen, blocksize); if (keylen > blocksize) { - hashed_key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA); + unsigned int aligned_len = + ALIGN(keylen, dma_get_cache_alignment()); + + if (aligned_len < keylen) + return -EOVERFLOW; + + hashed_key = kmemdup(key, aligned_len, GFP_KERNEL); if (!hashed_key) return -ENOMEM; ret = hash_digest_key(ctx, &keylen, hashed_key, digestsize); @@ -3411,7 +3419,7 @@ static void ahash_done(void *cbk_ctx, u32 status) DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, ctx->ctx_len, 1); - req->base.complete(&req->base, ecode); + ahash_request_complete(req, ecode); } static void ahash_done_bi(void *cbk_ctx, u32 status) @@ -3449,7 +3457,7 @@ static void ahash_done_bi(void *cbk_ctx, u32 status) DUMP_PREFIX_ADDRESS, 16, 4, req->result, crypto_ahash_digestsize(ahash), 1); - req->base.complete(&req->base, ecode); + ahash_request_complete(req, ecode); } static void ahash_done_ctx_src(void *cbk_ctx, u32 status) @@ -3476,7 +3484,7 @@ static void ahash_done_ctx_src(void *cbk_ctx, u32 status) DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, ctx->ctx_len, 1); - req->base.complete(&req->base, ecode); + ahash_request_complete(req, ecode); } static void ahash_done_ctx_dst(void *cbk_ctx, u32 status) @@ -3514,7 +3522,7 @@ static void ahash_done_ctx_dst(void *cbk_ctx, u32 status) DUMP_PREFIX_ADDRESS, 16, 4, req->result, crypto_ahash_digestsize(ahash), 1); - req->base.complete(&req->base, ecode); + ahash_request_complete(req, ecode); } static int ahash_update_ctx(struct ahash_request *req) @@ -3560,7 +3568,7 @@ static int ahash_update_ctx(struct ahash_request *req) } /* allocate space for base edesc and link tables */ - edesc = qi_cache_zalloc(GFP_DMA | flags); + edesc = qi_cache_zalloc(flags); if (!edesc) { dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE); @@ -3654,7 +3662,7 @@ static int ahash_final_ctx(struct ahash_request *req) int ret; /* allocate space for base edesc and link tables */ - edesc = qi_cache_zalloc(GFP_DMA | flags); + edesc = qi_cache_zalloc(flags); if (!edesc) return -ENOMEM; @@ -3743,7 +3751,7 @@ static int ahash_finup_ctx(struct ahash_request *req) } /* allocate space for base edesc and link tables */ - edesc = qi_cache_zalloc(GFP_DMA | flags); + edesc = qi_cache_zalloc(flags); if (!edesc) { dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -3836,7 +3844,7 @@ static int ahash_digest(struct ahash_request *req) } /* allocate space for base edesc and link tables */ - edesc = qi_cache_zalloc(GFP_DMA | flags); + edesc = qi_cache_zalloc(flags); if (!edesc) { dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE); return ret; @@ -3913,7 +3921,7 @@ static int ahash_final_no_ctx(struct ahash_request *req) int ret = -ENOMEM; /* allocate space for base edesc and link tables */ - edesc = qi_cache_zalloc(GFP_DMA | flags); + edesc = qi_cache_zalloc(flags); if (!edesc) return ret; @@ -4012,7 +4020,7 @@ static int ahash_update_no_ctx(struct ahash_request *req) } /* allocate space for base edesc and link tables */ - edesc = qi_cache_zalloc(GFP_DMA | flags); + edesc = qi_cache_zalloc(flags); if (!edesc) { dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE); @@ -4125,7 +4133,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req) } /* allocate space for base edesc and link tables */ - edesc = qi_cache_zalloc(GFP_DMA | flags); + edesc = qi_cache_zalloc(flags); if (!edesc) { dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE); return ret; @@ -4230,7 +4238,7 @@ static int ahash_update_first(struct ahash_request *req) } /* allocate space for base edesc and link tables */ - edesc = qi_cache_zalloc(GFP_DMA | flags); + edesc = qi_cache_zalloc(flags); if (!edesc) { dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE); @@ -4926,6 +4934,7 @@ static int dpaa2_dpseci_congestion_setup(struct dpaa2_caam_priv *priv, { struct dpseci_congestion_notification_cfg cong_notif_cfg = { 0 }; struct device *dev = priv->dev; + unsigned int alignmask; int err; /* @@ -4936,13 +4945,14 @@ static int dpaa2_dpseci_congestion_setup(struct dpaa2_caam_priv *priv, !(priv->dpseci_attr.options & DPSECI_OPT_HAS_CG)) return 0; - priv->cscn_mem = kzalloc(DPAA2_CSCN_SIZE + DPAA2_CSCN_ALIGN, - GFP_KERNEL | GFP_DMA); + alignmask = DPAA2_CSCN_ALIGN - 1; + alignmask |= dma_get_cache_alignment() - 1; + priv->cscn_mem = kzalloc(ALIGN(DPAA2_CSCN_SIZE, alignmask + 1), + GFP_KERNEL); if (!priv->cscn_mem) return -ENOMEM; - priv->cscn_mem_aligned = PTR_ALIGN(priv->cscn_mem, DPAA2_CSCN_ALIGN); - priv->cscn_dma = dma_map_single(dev, priv->cscn_mem_aligned, + priv->cscn_dma = dma_map_single(dev, priv->cscn_mem, DPAA2_CSCN_SIZE, DMA_FROM_DEVICE); if (dma_mapping_error(dev, priv->cscn_dma)) { dev_err(dev, "Error mapping CSCN memory area\n"); @@ -5174,7 +5184,7 @@ static int dpaa2_caam_probe(struct fsl_mc_device *dpseci_dev) priv->domain = iommu_get_domain_for_dev(dev); qi_cache = kmem_cache_create("dpaa2_caamqicache", CAAM_QI_MEMCACHE_SIZE, - 0, SLAB_CACHE_DMA, NULL); + 0, 0, NULL); if (!qi_cache) { dev_err(dev, "Can't allocate SEC cache\n"); return -ENOMEM; @@ -5451,7 +5461,7 @@ int dpaa2_caam_enqueue(struct device *dev, struct caam_request *req) dma_sync_single_for_cpu(priv->dev, priv->cscn_dma, DPAA2_CSCN_SIZE, DMA_FROM_DEVICE); - if (unlikely(dpaa2_cscn_state_congested(priv->cscn_mem_aligned))) { + if (unlikely(dpaa2_cscn_state_congested(priv->cscn_mem))) { dev_dbg_ratelimited(dev, "Dropping request\n"); return -EBUSY; } diff --git a/drivers/crypto/caam/caamalg_qi2.h b/drivers/crypto/caam/caamalg_qi2.h index d35253407ade..abb502bb675c 100644 --- a/drivers/crypto/caam/caamalg_qi2.h +++ b/drivers/crypto/caam/caamalg_qi2.h @@ -7,13 +7,14 @@ #ifndef _CAAMALG_QI2_H_ #define _CAAMALG_QI2_H_ +#include <crypto/internal/skcipher.h> +#include <linux/compiler_attributes.h> #include <soc/fsl/dpaa2-io.h> #include <soc/fsl/dpaa2-fd.h> #include <linux/threads.h> #include <linux/netdevice.h> #include "dpseci.h" #include "desc_constr.h" -#include <crypto/skcipher.h> #define DPAA2_CAAM_STORE_SIZE 16 /* NAPI weight *must* be a multiple of the store size. */ @@ -36,8 +37,6 @@ * @tx_queue_attr: array of Tx queue attributes * @cscn_mem: pointer to memory region containing the congestion SCN * it's size is larger than to accommodate alignment - * @cscn_mem_aligned: pointer to congestion SCN; it is computed as - * PTR_ALIGN(cscn_mem, DPAA2_CSCN_ALIGN) * @cscn_dma: dma address used by the QMAN to write CSCN messages * @dev: device associated with the DPSECI object * @mc_io: pointer to MC portal's I/O object @@ -58,7 +57,6 @@ struct dpaa2_caam_priv { /* congestion */ void *cscn_mem; - void *cscn_mem_aligned; dma_addr_t cscn_dma; struct device *dev; @@ -158,7 +156,7 @@ struct ahash_edesc { struct caam_flc { u32 flc[16]; u32 sh_desc[MAX_SDLEN]; -} ____cacheline_aligned; +} __aligned(CRYPTO_DMA_ALIGN); enum optype { ENCRYPT = 0, @@ -180,7 +178,7 @@ enum optype { * @edesc: extended descriptor; points to one of {skcipher,aead}_edesc */ struct caam_request { - struct dpaa2_fl_entry fd_flt[2]; + struct dpaa2_fl_entry fd_flt[2] __aligned(CRYPTO_DMA_ALIGN); dma_addr_t fd_flt_dma; struct caam_flc *flc; dma_addr_t flc_dma; diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 1050e965a438..82d3c730a502 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -66,6 +66,8 @@ #include "key_gen.h" #include "caamhash_desc.h" #include <crypto/engine.h> +#include <linux/dma-mapping.h> +#include <linux/kernel.h> #define CAAM_CRA_PRIORITY 3000 @@ -365,7 +367,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key, dma_addr_t key_dma; int ret; - desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); + desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL); if (!desc) { dev_err(jrdev, "unable to allocate key input memory\n"); return -ENOMEM; @@ -432,7 +434,13 @@ static int ahash_setkey(struct crypto_ahash *ahash, dev_dbg(jrdev, "keylen %d\n", keylen); if (keylen > blocksize) { - hashed_key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA); + unsigned int aligned_len = + ALIGN(keylen, dma_get_cache_alignment()); + + if (aligned_len < keylen) + return -EOVERFLOW; + + hashed_key = kmemdup(key, keylen, GFP_KERNEL); if (!hashed_key) return -ENOMEM; ret = hash_digest_key(ctx, &keylen, hashed_key, digestsize); @@ -606,7 +614,7 @@ static inline void ahash_done_cpy(struct device *jrdev, u32 *desc, u32 err, * by CAAM, not crypto engine. */ if (!has_bklog) - req->base.complete(&req->base, ecode); + ahash_request_complete(req, ecode); else crypto_finalize_hash_request(jrp->engine, req, ecode); } @@ -668,7 +676,7 @@ static inline void ahash_done_switch(struct device *jrdev, u32 *desc, u32 err, * by CAAM, not crypto engine. */ if (!has_bklog) - req->base.complete(&req->base, ecode); + ahash_request_complete(req, ecode); else crypto_finalize_hash_request(jrp->engine, req, ecode); @@ -702,7 +710,7 @@ static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req, struct ahash_edesc *edesc; unsigned int sg_size = sg_num * sizeof(struct sec4_sg_entry); - edesc = kzalloc(sizeof(*edesc) + sg_size, GFP_DMA | flags); + edesc = kzalloc(sizeof(*edesc) + sg_size, flags); if (!edesc) { dev_err(ctx->jrdev, "could not allocate extended descriptor\n"); return NULL; diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index aef031946f33..e40614fef39d 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -16,6 +16,8 @@ #include "desc_constr.h" #include "sg_sw_sec4.h" #include "caampkc.h" +#include <linux/dma-mapping.h> +#include <linux/kernel.h> #define DESC_RSA_PUB_LEN (2 * CAAM_CMD_SZ + SIZEOF_RSA_PUB_PDB) #define DESC_RSA_PRIV_F1_LEN (2 * CAAM_CMD_SZ + \ @@ -310,8 +312,7 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req, sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); /* allocate space for base edesc, hw desc commands and link tables */ - edesc = kzalloc(sizeof(*edesc) + desclen + sec4_sg_bytes, - GFP_DMA | flags); + edesc = kzalloc(sizeof(*edesc) + desclen + sec4_sg_bytes, flags); if (!edesc) goto dst_fail; @@ -898,7 +899,7 @@ static u8 *caam_read_rsa_crt(const u8 *ptr, size_t nbytes, size_t dstlen) if (!nbytes) return NULL; - dst = kzalloc(dstlen, GFP_DMA | GFP_KERNEL); + dst = kzalloc(dstlen, GFP_KERNEL); if (!dst) return NULL; @@ -910,7 +911,7 @@ static u8 *caam_read_rsa_crt(const u8 *ptr, size_t nbytes, size_t dstlen) /** * caam_read_raw_data - Read a raw byte stream as a positive integer. * The function skips buffer's leading zeros, copies the remained data - * to a buffer allocated in the GFP_DMA | GFP_KERNEL zone and returns + * to a buffer allocated in the GFP_KERNEL zone and returns * the address of the new buffer. * * @buf : The data to read @@ -923,7 +924,7 @@ static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes) if (!*nbytes) return NULL; - return kmemdup(buf, *nbytes, GFP_DMA | GFP_KERNEL); + return kmemdup(buf, *nbytes, GFP_KERNEL); } static int caam_rsa_check_key_length(unsigned int len) @@ -949,13 +950,13 @@ static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, return ret; /* Copy key in DMA zone */ - rsa_key->e = kmemdup(raw_key.e, raw_key.e_sz, GFP_DMA | GFP_KERNEL); + rsa_key->e = kmemdup(raw_key.e, raw_key.e_sz, GFP_KERNEL); if (!rsa_key->e) goto err; /* * Skip leading zeros and copy the positive integer to a buffer - * allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor + * allocated in the GFP_KERNEL zone. The decryption descriptor * expects a positive integer for the RSA modulus and uses its length as * decryption output length. */ @@ -983,6 +984,7 @@ static void caam_rsa_set_priv_key_form(struct caam_rsa_ctx *ctx, struct caam_rsa_key *rsa_key = &ctx->key; size_t p_sz = raw_key->p_sz; size_t q_sz = raw_key->q_sz; + unsigned aligned_size; rsa_key->p = caam_read_raw_data(raw_key->p, &p_sz); if (!rsa_key->p) @@ -994,11 +996,13 @@ static void caam_rsa_set_priv_key_form(struct caam_rsa_ctx *ctx, goto free_p; rsa_key->q_sz = q_sz; - rsa_key->tmp1 = kzalloc(raw_key->p_sz, GFP_DMA | GFP_KERNEL); + aligned_size = ALIGN(raw_key->p_sz, dma_get_cache_alignment()); + rsa_key->tmp1 = kzalloc(aligned_size, GFP_KERNEL); if (!rsa_key->tmp1) goto free_q; - rsa_key->tmp2 = kzalloc(raw_key->q_sz, GFP_DMA | GFP_KERNEL); + aligned_size = ALIGN(raw_key->q_sz, dma_get_cache_alignment()); + rsa_key->tmp2 = kzalloc(aligned_size, GFP_KERNEL); if (!rsa_key->tmp2) goto free_tmp1; @@ -1051,17 +1055,17 @@ static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key, return ret; /* Copy key in DMA zone */ - rsa_key->d = kmemdup(raw_key.d, raw_key.d_sz, GFP_DMA | GFP_KERNEL); + rsa_key->d = kmemdup(raw_key.d, raw_key.d_sz, GFP_KERNEL); if (!rsa_key->d) goto err; - rsa_key->e = kmemdup(raw_key.e, raw_key.e_sz, GFP_DMA | GFP_KERNEL); + rsa_key->e = kmemdup(raw_key.e, raw_key.e_sz, GFP_KERNEL); if (!rsa_key->e) goto err; /* * Skip leading zeros and copy the positive integer to a buffer - * allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor + * allocated in the GFP_KERNEL zone. The decryption descriptor * expects a positive integer for the RSA modulus and uses its length as * decryption output length. */ @@ -1185,8 +1189,7 @@ int caam_pkc_init(struct device *ctrldev) return 0; /* allocate zero buffer, used for padding input */ - zero_buffer = kzalloc(CAAM_RSA_MAX_INPUT_SIZE - 1, GFP_DMA | - GFP_KERNEL); + zero_buffer = kzalloc(CAAM_RSA_MAX_INPUT_SIZE - 1, GFP_KERNEL); if (!zero_buffer) return -ENOMEM; diff --git a/drivers/crypto/caam/caamprng.c b/drivers/crypto/caam/caamprng.c index 4839e66300a2..6e4c1191cb28 100644 --- a/drivers/crypto/caam/caamprng.c +++ b/drivers/crypto/caam/caamprng.c @@ -8,6 +8,8 @@ #include <linux/completion.h> #include <crypto/internal/rng.h> +#include <linux/dma-mapping.h> +#include <linux/kernel.h> #include "compat.h" #include "regs.h" #include "intern.h" @@ -75,6 +77,7 @@ static int caam_prng_generate(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int dlen) { + unsigned int aligned_dlen = ALIGN(dlen, dma_get_cache_alignment()); struct caam_prng_ctx ctx; struct device *jrdev; dma_addr_t dst_dma; @@ -82,7 +85,10 @@ static int caam_prng_generate(struct crypto_rng *tfm, u8 *buf; int ret; - buf = kzalloc(dlen, GFP_KERNEL); + if (aligned_dlen < dlen) + return -EOVERFLOW; + + buf = kzalloc(aligned_dlen, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -94,7 +100,7 @@ static int caam_prng_generate(struct crypto_rng *tfm, return ret; } - desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL | GFP_DMA); + desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL); if (!desc) { ret = -ENOMEM; goto out1; @@ -156,7 +162,7 @@ static int caam_prng_seed(struct crypto_rng *tfm, return ret; } - desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL | GFP_DMA); + desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL); if (!desc) { caam_jr_free(jrdev); return -ENOMEM; diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index 1f0e82050976..1fd8ff965006 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -12,6 +12,8 @@ #include <linux/hw_random.h> #include <linux/completion.h> #include <linux/atomic.h> +#include <linux/dma-mapping.h> +#include <linux/kernel.h> #include <linux/kfifo.h> #include "compat.h" @@ -176,17 +178,18 @@ static int caam_init(struct hwrng *rng) int err; ctx->desc_sync = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN, - GFP_DMA | GFP_KERNEL); + GFP_KERNEL); if (!ctx->desc_sync) return -ENOMEM; ctx->desc_async = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN, - GFP_DMA | GFP_KERNEL); + GFP_KERNEL); if (!ctx->desc_async) return -ENOMEM; - if (kfifo_alloc(&ctx->fifo, CAAM_RNG_MAX_FIFO_STORE_SIZE, - GFP_DMA | GFP_KERNEL)) + if (kfifo_alloc(&ctx->fifo, ALIGN(CAAM_RNG_MAX_FIFO_STORE_SIZE, + dma_get_cache_alignment()), + GFP_KERNEL)) return -ENOMEM; INIT_WORK(&ctx->worker, caam_rng_worker); diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 32253a064d0f..6278afb951c3 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -199,7 +199,7 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask) u32 *desc, status; int sh_idx, ret = 0; - desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL | GFP_DMA); + desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL); if (!desc) return -ENOMEM; @@ -276,7 +276,7 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, int ret = 0, sh_idx; ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; - desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL | GFP_DMA); + desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL); if (!desc) return -ENOMEM; diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index 62ce6421bb3f..824c94d44f94 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -163,7 +163,8 @@ static inline void append_data(u32 * const desc, const void *data, int len) { u32 *offset = desc_end(desc); - if (len) /* avoid sparse warning: memcpy with byte count of 0 */ + /* Avoid gcc warning: memcpy with data == NULL */ + if (!IS_ENABLED(CONFIG_CRYPTO_DEV_FSL_CAAM_DEBUG) || data) memcpy(offset, data, len); (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c index b0e8a4939b4f..88cc4fe2a585 100644 --- a/drivers/crypto/caam/key_gen.c +++ b/drivers/crypto/caam/key_gen.c @@ -64,7 +64,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out, if (local_max > max_keylen) return -EINVAL; - desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); + desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL); if (!desc) { dev_err(jrdev, "unable to allocate key input memory\n"); return ret; diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c index c36f27376d7e..4c52c9365558 100644 --- a/drivers/crypto/caam/qi.c +++ b/drivers/crypto/caam/qi.c @@ -614,7 +614,7 @@ static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu) struct qman_fq *fq; int ret; - fq = kzalloc(sizeof(*fq), GFP_KERNEL | GFP_DMA); + fq = kzalloc(sizeof(*fq), GFP_KERNEL); if (!fq) return -ENOMEM; @@ -756,7 +756,7 @@ int caam_qi_init(struct platform_device *caam_pdev) } qi_cache = kmem_cache_create("caamqicache", CAAM_QI_MEMCACHE_SIZE, 0, - SLAB_CACHE_DMA, NULL); + 0, NULL); if (!qi_cache) { dev_err(qidev, "Can't allocate CAAM cache\n"); free_rsp_fqs(); diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h index 5894f16f8fe3..a96e3d213c06 100644 --- a/drivers/crypto/caam/qi.h +++ b/drivers/crypto/caam/qi.h @@ -9,6 +9,8 @@ #ifndef __QI_H__ #define __QI_H__ +#include <crypto/algapi.h> +#include <linux/compiler_attributes.h> #include <soc/fsl/qman.h> #include "compat.h" #include "desc.h" @@ -58,8 +60,10 @@ enum optype { * @qidev: device pointer for CAAM/QI backend */ struct caam_drv_ctx { - u32 prehdr[2]; - u32 sh_desc[MAX_SDLEN]; + struct { + u32 prehdr[2]; + u32 sh_desc[MAX_SDLEN]; + } __aligned(CRYPTO_DMA_ALIGN); dma_addr_t context_a; struct qman_fq *req_fq; struct qman_fq *rsp_fq; @@ -67,7 +71,7 @@ struct caam_drv_ctx { int cpu; enum optype op_type; struct device *qidev; -} ____cacheline_aligned; +}; /** * caam_drv_req - The request structure the driver application should fill while @@ -88,7 +92,7 @@ struct caam_drv_req { struct caam_drv_ctx *drv_ctx; caam_qi_cbk cbk; void *app_ctx; -} ____cacheline_aligned; +} __aligned(CRYPTO_DMA_ALIGN); /** * caam_drv_ctx_init - Initialise a CAAM/QI driver context diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.c b/drivers/crypto/cavium/cpt/cptvf_algs.c index 9eca0c302186..ee476c6c7f82 100644 --- a/drivers/crypto/cavium/cpt/cptvf_algs.c +++ b/drivers/crypto/cavium/cpt/cptvf_algs.c @@ -28,7 +28,7 @@ static void cvm_callback(u32 status, void *arg) { struct crypto_async_request *req = (struct crypto_async_request *)arg; - req->complete(req, !status); + crypto_request_complete(req, !status); } static inline void update_input_iv(struct cpt_request_info *req_info, @@ -232,13 +232,12 @@ static int cvm_decrypt(struct skcipher_request *req) static int cvm_xts_setkey(struct crypto_skcipher *cipher, const u8 *key, u32 keylen) { - struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); - struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm); + struct cvm_enc_ctx *ctx = crypto_skcipher_ctx(cipher); int err; const u8 *key1 = key; const u8 *key2 = key + (keylen / 2); - err = xts_check_key(tfm, key, keylen); + err = xts_verify_key(cipher, key, keylen); if (err) return err; ctx->key_len = keylen; @@ -289,8 +288,7 @@ static int cvm_validate_keylen(struct cvm_enc_ctx *ctx, u32 keylen) static int cvm_setkey(struct crypto_skcipher *cipher, const u8 *key, u32 keylen, u8 cipher_type) { - struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); - struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm); + struct cvm_enc_ctx *ctx = crypto_skcipher_ctx(cipher); ctx->cipher_type = cipher_type; if (!cvm_validate_keylen(ctx, keylen)) { diff --git a/drivers/crypto/cavium/nitrox/nitrox_aead.c b/drivers/crypto/cavium/nitrox/nitrox_aead.c index 0653484df23f..b0e53034164a 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_aead.c +++ b/drivers/crypto/cavium/nitrox/nitrox_aead.c @@ -199,7 +199,7 @@ static void nitrox_aead_callback(void *arg, int err) err = -EINVAL; } - areq->base.complete(&areq->base, err); + aead_request_complete(areq, err); } static inline bool nitrox_aes_gcm_assoclen_supported(unsigned int assoclen) @@ -434,7 +434,7 @@ static void nitrox_rfc4106_callback(void *arg, int err) err = -EINVAL; } - areq->base.complete(&areq->base, err); + aead_request_complete(areq, err); } static int nitrox_rfc4106_enc(struct aead_request *areq) diff --git a/drivers/crypto/cavium/nitrox/nitrox_skcipher.c b/drivers/crypto/cavium/nitrox/nitrox_skcipher.c index 248b4fff1c72..138261dcd032 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_skcipher.c +++ b/drivers/crypto/cavium/nitrox/nitrox_skcipher.c @@ -337,12 +337,11 @@ static int nitrox_3des_decrypt(struct skcipher_request *skreq) static int nitrox_aes_xts_setkey(struct crypto_skcipher *cipher, const u8 *key, unsigned int keylen) { - struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); - struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm); + struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(cipher); struct flexi_crypto_context *fctx; int aes_keylen, ret; - ret = xts_check_key(tfm, key, keylen); + ret = xts_verify_key(cipher, key, keylen); if (ret) return ret; @@ -362,8 +361,7 @@ static int nitrox_aes_xts_setkey(struct crypto_skcipher *cipher, static int nitrox_aes_ctr_rfc3686_setkey(struct crypto_skcipher *cipher, const u8 *key, unsigned int keylen) { - struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); - struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm); + struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(cipher); struct flexi_crypto_context *fctx; int aes_keylen; diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c index 73442a382f68..ecd58b38c46e 100644 --- a/drivers/crypto/ccp/ccp-crypto-main.c +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -146,7 +146,7 @@ static void ccp_crypto_complete(void *data, int err) /* Only propagate the -EINPROGRESS if necessary */ if (crypto_cmd->ret == -EBUSY) { crypto_cmd->ret = -EINPROGRESS; - req->complete(req, -EINPROGRESS); + crypto_request_complete(req, -EINPROGRESS); } return; @@ -159,18 +159,18 @@ static void ccp_crypto_complete(void *data, int err) held = ccp_crypto_cmd_complete(crypto_cmd, &backlog); if (backlog) { backlog->ret = -EINPROGRESS; - backlog->req->complete(backlog->req, -EINPROGRESS); + crypto_request_complete(backlog->req, -EINPROGRESS); } /* Transition the state from -EBUSY to -EINPROGRESS first */ if (crypto_cmd->ret == -EBUSY) - req->complete(req, -EINPROGRESS); + crypto_request_complete(req, -EINPROGRESS); /* Completion callbacks */ ret = err; if (ctx->complete) ret = ctx->complete(req, ret); - req->complete(req, ret); + crypto_request_complete(req, ret); /* Submit the next cmd */ while (held) { @@ -186,12 +186,12 @@ static void ccp_crypto_complete(void *data, int err) ctx = crypto_tfm_ctx_dma(held->req->tfm); if (ctx->complete) ret = ctx->complete(held->req, ret); - held->req->complete(held->req, ret); + crypto_request_complete(held->req, ret); next = ccp_crypto_cmd_complete(held, &backlog); if (backlog) { backlog->ret = -EINPROGRESS; - backlog->req->complete(backlog->req, -EINPROGRESS); + crypto_request_complete(backlog->req, -EINPROGRESS); } kfree(held); diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index 9f753cb4f5f1..b386a7063818 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -642,14 +642,26 @@ static void ccp_dma_release(struct ccp_device *ccp) chan = ccp->ccp_dma_chan + i; dma_chan = &chan->dma_chan; - if (dma_chan->client_count) - dma_release_channel(dma_chan); - tasklet_kill(&chan->cleanup_tasklet); list_del_rcu(&dma_chan->device_node); } } +static void ccp_dma_release_channels(struct ccp_device *ccp) +{ + struct ccp_dma_chan *chan; + struct dma_chan *dma_chan; + unsigned int i; + + for (i = 0; i < ccp->cmd_q_count; i++) { + chan = ccp->ccp_dma_chan + i; + dma_chan = &chan->dma_chan; + + if (dma_chan->client_count) + dma_release_channel(dma_chan); + } +} + int ccp_dmaengine_register(struct ccp_device *ccp) { struct ccp_dma_chan *chan; @@ -770,8 +782,9 @@ void ccp_dmaengine_unregister(struct ccp_device *ccp) if (!dmaengine) return; - ccp_dma_release(ccp); + ccp_dma_release_channels(ccp); dma_async_device_unregister(dma_dev); + ccp_dma_release(ccp); kmem_cache_destroy(ccp->dma_desc_cache); kmem_cache_destroy(ccp->dma_cmd_cache); diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 06fc7156c04f..e2f25926eb51 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -26,6 +26,7 @@ #include <linux/fs_struct.h> #include <asm/smp.h> +#include <asm/cacheflush.h> #include "psp-dev.h" #include "sev-dev.h" @@ -56,6 +57,7 @@ MODULE_PARM_DESC(psp_init_on_probe, " if true, the PSP will be initialized on m MODULE_FIRMWARE("amd/amd_sev_fam17h_model0xh.sbin"); /* 1st gen EPYC */ MODULE_FIRMWARE("amd/amd_sev_fam17h_model3xh.sbin"); /* 2nd gen EPYC */ MODULE_FIRMWARE("amd/amd_sev_fam19h_model0xh.sbin"); /* 3rd gen EPYC */ +MODULE_FIRMWARE("amd/amd_sev_fam19h_model1xh.sbin"); /* 4th gen EPYC */ static bool psp_dead; static int psp_timeout; @@ -881,7 +883,14 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp) input_address = (void __user *)input.address; if (input.address && input.length) { - id_blob = kzalloc(input.length, GFP_KERNEL); + /* + * The length of the ID shouldn't be assumed by software since + * it may change in the future. The allocation size is limited + * to 1 << (PAGE_SHIFT + MAX_ORDER - 1) by the page allocator. + * If the allocation fails, simply return ENOMEM rather than + * warning in the kernel log. + */ + id_blob = kzalloc(input.length, GFP_KERNEL | __GFP_NOWARN); if (!id_blob) return -ENOMEM; @@ -1327,7 +1336,10 @@ void sev_pci_init(void) /* Obtain the TMR memory area for SEV-ES use */ sev_es_tmr = sev_fw_alloc(SEV_ES_TMR_SIZE); - if (!sev_es_tmr) + if (sev_es_tmr) + /* Must flush the cache before giving it to the firmware */ + clflush_cache_range(sev_es_tmr, SEV_ES_TMR_SIZE); + else dev_warn(sev->dev, "SEV: TMR allocation failed, SEV-ES support unavailable\n"); diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c index 084d052fddcc..cde33b2ac71b 100644 --- a/drivers/crypto/ccp/sp-pci.c +++ b/drivers/crypto/ccp/sp-pci.c @@ -342,52 +342,52 @@ static int __maybe_unused sp_pci_resume(struct device *dev) #ifdef CONFIG_CRYPTO_DEV_SP_PSP static const struct sev_vdata sevv1 = { - .cmdresp_reg = 0x10580, - .cmdbuff_addr_lo_reg = 0x105e0, - .cmdbuff_addr_hi_reg = 0x105e4, + .cmdresp_reg = 0x10580, /* C2PMSG_32 */ + .cmdbuff_addr_lo_reg = 0x105e0, /* C2PMSG_56 */ + .cmdbuff_addr_hi_reg = 0x105e4, /* C2PMSG_57 */ }; static const struct sev_vdata sevv2 = { - .cmdresp_reg = 0x10980, - .cmdbuff_addr_lo_reg = 0x109e0, - .cmdbuff_addr_hi_reg = 0x109e4, + .cmdresp_reg = 0x10980, /* C2PMSG_32 */ + .cmdbuff_addr_lo_reg = 0x109e0, /* C2PMSG_56 */ + .cmdbuff_addr_hi_reg = 0x109e4, /* C2PMSG_57 */ }; static const struct tee_vdata teev1 = { - .cmdresp_reg = 0x10544, - .cmdbuff_addr_lo_reg = 0x10548, - .cmdbuff_addr_hi_reg = 0x1054c, - .ring_wptr_reg = 0x10550, - .ring_rptr_reg = 0x10554, + .cmdresp_reg = 0x10544, /* C2PMSG_17 */ + .cmdbuff_addr_lo_reg = 0x10548, /* C2PMSG_18 */ + .cmdbuff_addr_hi_reg = 0x1054c, /* C2PMSG_19 */ + .ring_wptr_reg = 0x10550, /* C2PMSG_20 */ + .ring_rptr_reg = 0x10554, /* C2PMSG_21 */ }; static const struct psp_vdata pspv1 = { .sev = &sevv1, - .feature_reg = 0x105fc, - .inten_reg = 0x10610, - .intsts_reg = 0x10614, + .feature_reg = 0x105fc, /* C2PMSG_63 */ + .inten_reg = 0x10610, /* P2CMSG_INTEN */ + .intsts_reg = 0x10614, /* P2CMSG_INTSTS */ }; static const struct psp_vdata pspv2 = { .sev = &sevv2, - .feature_reg = 0x109fc, - .inten_reg = 0x10690, - .intsts_reg = 0x10694, + .feature_reg = 0x109fc, /* C2PMSG_63 */ + .inten_reg = 0x10690, /* P2CMSG_INTEN */ + .intsts_reg = 0x10694, /* P2CMSG_INTSTS */ }; static const struct psp_vdata pspv3 = { .tee = &teev1, - .feature_reg = 0x109fc, - .inten_reg = 0x10690, - .intsts_reg = 0x10694, + .feature_reg = 0x109fc, /* C2PMSG_63 */ + .inten_reg = 0x10690, /* P2CMSG_INTEN */ + .intsts_reg = 0x10694, /* P2CMSG_INTSTS */ }; static const struct psp_vdata pspv4 = { .sev = &sevv2, .tee = &teev1, - .feature_reg = 0x109fc, - .inten_reg = 0x10690, - .intsts_reg = 0x10694, + .feature_reg = 0x109fc, /* C2PMSG_63 */ + .inten_reg = 0x10690, /* P2CMSG_INTEN */ + .intsts_reg = 0x10694, /* P2CMSG_INTSTS */ }; #endif diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index 309da6334a0a..2cd44d7457a4 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -460,7 +460,7 @@ static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key, } if (ctx_p->cipher_mode == DRV_CIPHER_XTS && - xts_check_key(tfm, key, keylen)) { + xts_verify_key(sktfm, key, keylen)) { dev_dbg(dev, "weak XTS key"); return -EINVAL; } diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 68d65773ef2b..0eade4fa6695 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -220,7 +220,7 @@ static inline int chcr_handle_aead_resp(struct aead_request *req, reqctx->verify = VERIFY_HW; } chcr_dec_wrcount(dev); - req->base.complete(&req->base, err); + aead_request_complete(req, err); return err; } @@ -1235,7 +1235,7 @@ complete: complete(&ctx->cbc_aes_aio_done); } chcr_dec_wrcount(dev); - req->base.complete(&req->base, err); + skcipher_request_complete(req, err); return err; } @@ -2132,7 +2132,7 @@ unmap: out: chcr_dec_wrcount(dev); - req->base.complete(&req->base, err); + ahash_request_complete(req, err); } /* diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c index 7e7a8f01ea6b..5a7f6611803c 100644 --- a/drivers/crypto/hifn_795x.c +++ b/drivers/crypto/hifn_795x.c @@ -1705,7 +1705,7 @@ static void hifn_process_ready(struct skcipher_request *req, int error) hifn_cipher_walk_exit(&rctx->walk); } - req->base.complete(&req->base, error); + skcipher_request_complete(req, error); } static void hifn_clear_rings(struct hifn_device *dev, int error) @@ -2054,7 +2054,7 @@ static int hifn_process_queue(struct hifn_device *dev) break; if (backlog) - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); req = skcipher_request_cast(async_req); diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig index 743ce4fc3158..4137a8bf131f 100644 --- a/drivers/crypto/hisilicon/Kconfig +++ b/drivers/crypto/hisilicon/Kconfig @@ -27,7 +27,7 @@ config CRYPTO_DEV_HISI_SEC2 select CRYPTO_SHA256 select CRYPTO_SHA512 select CRYPTO_SM4_GENERIC - depends on PCI && PCI_MSI + depends on PCI_MSI depends on UACCE || UACCE=n depends on ARM64 || (COMPILE_TEST && 64BIT) depends on ACPI @@ -42,7 +42,7 @@ config CRYPTO_DEV_HISI_SEC2 config CRYPTO_DEV_HISI_QM tristate depends on ARM64 || COMPILE_TEST - depends on PCI && PCI_MSI + depends on PCI_MSI depends on UACCE || UACCE=n depends on ACPI help @@ -51,7 +51,7 @@ config CRYPTO_DEV_HISI_QM config CRYPTO_DEV_HISI_ZIP tristate "Support for HiSilicon ZIP accelerator" - depends on PCI && PCI_MSI + depends on PCI_MSI depends on ARM64 || (COMPILE_TEST && 64BIT) depends on !CPU_BIG_ENDIAN || COMPILE_TEST depends on UACCE || UACCE=n @@ -62,7 +62,7 @@ config CRYPTO_DEV_HISI_ZIP config CRYPTO_DEV_HISI_HPRE tristate "Support for HISI HPRE accelerator" - depends on PCI && PCI_MSI + depends on PCI_MSI depends on UACCE || UACCE=n depends on ARM64 || (COMPILE_TEST && 64BIT) depends on ACPI diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 007ac7a69ce7..59823ad1d9ae 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -95,8 +95,6 @@ #define QM_VFT_CFG_RDY 0x10006c #define QM_VFT_CFG_OP_WR 0x100058 #define QM_VFT_CFG_TYPE 0x10005c -#define QM_SQC_VFT 0x0 -#define QM_CQC_VFT 0x1 #define QM_VFT_CFG 0x100060 #define QM_VFT_CFG_OP_ENABLE 0x100054 #define QM_PM_CTRL 0x100148 @@ -118,7 +116,7 @@ #define QM_SQC_VFT_BASE_SHIFT_V2 28 #define QM_SQC_VFT_BASE_MASK_V2 GENMASK(15, 0) #define QM_SQC_VFT_NUM_SHIFT_V2 45 -#define QM_SQC_VFT_NUM_MASK_v2 GENMASK(9, 0) +#define QM_SQC_VFT_NUM_MASK_V2 GENMASK(9, 0) #define QM_ABNORMAL_INT_SOURCE 0x100000 #define QM_ABNORMAL_INT_MASK 0x100004 @@ -164,7 +162,6 @@ /* interfunction communication */ #define QM_IFC_READY_STATUS 0x100128 -#define QM_IFC_C_STS_M 0x10012C #define QM_IFC_INT_SET_P 0x100130 #define QM_IFC_INT_CFG 0x100134 #define QM_IFC_INT_SOURCE_P 0x100138 @@ -198,7 +195,6 @@ #define PCI_BAR_2 2 #define PCI_BAR_4 4 -#define QM_SQE_DATA_ALIGN_MASK GENMASK(6, 0) #define QMC_ALIGN(sz) ALIGN(sz, 32) #define QM_DBG_READ_LEN 256 @@ -212,8 +208,6 @@ #define QM_DRIVER_REMOVING 0 #define QM_RST_SCHED 1 #define QM_QOS_PARAM_NUM 2 -#define QM_QOS_VAL_NUM 1 -#define QM_QOS_BDF_PARAM_NUM 4 #define QM_QOS_MAX_VAL 1000 #define QM_QOS_RATE 100 #define QM_QOS_EXPAND_RATE 1000 @@ -225,38 +219,34 @@ #define QM_SHAPER_FACTOR_CBS_B_SHIFT 15 #define QM_SHAPER_FACTOR_CBS_S_SHIFT 19 #define QM_SHAPER_CBS_B 1 -#define QM_SHAPER_CBS_S 16 #define QM_SHAPER_VFT_OFFSET 6 -#define WAIT_FOR_QOS_VF 100 #define QM_QOS_MIN_ERROR_RATE 5 -#define QM_QOS_TYPICAL_NUM 8 #define QM_SHAPER_MIN_CBS_S 8 #define QM_QOS_TICK 0x300U #define QM_QOS_DIVISOR_CLK 0x1f40U #define QM_QOS_MAX_CIR_B 200 #define QM_QOS_MIN_CIR_B 100 #define QM_QOS_MAX_CIR_U 6 -#define QM_QOS_MAX_CIR_S 11 #define QM_AUTOSUSPEND_DELAY 3000 #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \ - (((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \ - ((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \ - ((buf_sz) << QM_CQ_BUF_SIZE_SHIFT) | \ + (((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \ + ((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \ + ((buf_sz) << QM_CQ_BUF_SIZE_SHIFT) | \ ((cqe_sz) << QM_CQ_CQE_SIZE_SHIFT)) #define QM_MK_CQC_DW3_V2(cqe_sz, cq_depth) \ ((((u32)cq_depth) - 1) | ((cqe_sz) << QM_CQ_CQE_SIZE_SHIFT)) #define QM_MK_SQC_W13(priority, orders, alg_type) \ - (((priority) << QM_SQ_PRIORITY_SHIFT) | \ - ((orders) << QM_SQ_ORDERS_SHIFT) | \ + (((priority) << QM_SQ_PRIORITY_SHIFT) | \ + ((orders) << QM_SQ_ORDERS_SHIFT) | \ (((alg_type) & QM_SQ_TYPE_MASK) << QM_SQ_TYPE_SHIFT)) #define QM_MK_SQC_DW3_V1(hop_num, pg_sz, buf_sz, sqe_sz) \ - (((hop_num) << QM_SQ_HOP_NUM_SHIFT) | \ - ((pg_sz) << QM_SQ_PAGE_SIZE_SHIFT) | \ - ((buf_sz) << QM_SQ_BUF_SIZE_SHIFT) | \ + (((hop_num) << QM_SQ_HOP_NUM_SHIFT) | \ + ((pg_sz) << QM_SQ_PAGE_SIZE_SHIFT) | \ + ((buf_sz) << QM_SQ_BUF_SIZE_SHIFT) | \ ((u32)ilog2(sqe_sz) << QM_SQ_SQE_SIZE_SHIFT)) #define QM_MK_SQC_DW3_V2(sqe_sz, sq_depth) \ @@ -706,7 +696,7 @@ static void qm_db_v2(struct hisi_qm *qm, u16 qn, u8 cmd, u16 index, u8 priority) doorbell = qn | ((u64)cmd << QM_DB_CMD_SHIFT_V2) | ((u64)randata << QM_DB_RAND_SHIFT_V2) | - ((u64)index << QM_DB_INDEX_SHIFT_V2) | + ((u64)index << QM_DB_INDEX_SHIFT_V2) | ((u64)priority << QM_DB_PRIORITY_SHIFT_V2); writeq(doorbell, io_base); @@ -905,7 +895,7 @@ static void qm_work_process(struct work_struct *work) } } -static bool do_qm_irq(struct hisi_qm *qm) +static bool do_qm_eq_irq(struct hisi_qm *qm) { struct qm_eqe *eqe = qm->eqe + qm->status.eq_head; struct hisi_qm_poll_data *poll_data; @@ -925,12 +915,12 @@ static bool do_qm_irq(struct hisi_qm *qm) return false; } -static irqreturn_t qm_irq(int irq, void *data) +static irqreturn_t qm_eq_irq(int irq, void *data) { struct hisi_qm *qm = data; bool ret; - ret = do_qm_irq(qm); + ret = do_qm_eq_irq(qm); if (ret) return IRQ_HANDLED; @@ -1304,7 +1294,7 @@ static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number) sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) | ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << 32); *base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2); - *number = (QM_SQC_VFT_NUM_MASK_v2 & + *number = (QM_SQC_VFT_NUM_MASK_V2 & (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1; return 0; @@ -1892,8 +1882,7 @@ static struct hisi_qp *qm_create_qp_nolock(struct hisi_qm *qm, u8 alg_type) * @qm: The qm we create a qp from. * @alg_type: Accelerator specific algorithm type in sqc. * - * return created qp, -EBUSY if all qps in qm allocated, -ENOMEM if allocating - * qp memory fails. + * Return created qp, negative error code if failed. */ static struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) { @@ -2062,7 +2051,7 @@ static int qm_start_qp_nolock(struct hisi_qp *qp, unsigned long arg) * @arg: Accelerator specific argument. * * After this function, qp can receive request from user. Return 0 if - * successful, Return -EBUSY if failed. + * successful, negative error code if failed. */ int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg) { @@ -3074,7 +3063,6 @@ static int qm_stop_started_qp(struct hisi_qm *qm) return 0; } - /** * qm_clear_queues() - Clear all queues memory in a qm. * @qm: The qm in which the queues will be cleared. @@ -3371,7 +3359,7 @@ static int qm_vf_q_assign(struct hisi_qm *qm, u32 num_vfs) act_q_num = q_num; } - act_q_num = min_t(int, act_q_num, max_qp_num); + act_q_num = min(act_q_num, max_qp_num); ret = hisi_qm_set_vft(qm, i, q_base, act_q_num); if (ret) { for (j = num_vfs; j > i; j--) @@ -3558,7 +3546,7 @@ static ssize_t qm_algqos_read(struct file *filp, char __user *buf, qos_val = ir / QM_QOS_RATE; ret = scnprintf(tbuf, QM_DBG_READ_LEN, "%u\n", qos_val); - ret = simple_read_from_buffer(buf, count, pos, tbuf, ret); + ret = simple_read_from_buffer(buf, count, pos, tbuf, ret); err_get_status: clear_bit(QM_RESETTING, &qm->misc_ctl); @@ -4049,13 +4037,10 @@ static void qm_dev_ecc_mbit_handle(struct hisi_qm *qm) if (!qm->err_status.is_dev_ecc_mbit && qm->err_status.is_qm_ecc_mbit && qm->err_ini->close_axi_master_ooo) { - qm->err_ini->close_axi_master_ooo(qm); - } else if (qm->err_status.is_dev_ecc_mbit && !qm->err_status.is_qm_ecc_mbit && !qm->err_ini->close_axi_master_ooo) { - nfe_enb = readl(qm->io_base + QM_RAS_NFE_ENABLE); writel(nfe_enb & QM_RAS_NFE_MBIT_DISABLE, qm->io_base + QM_RAS_NFE_ENABLE); @@ -4499,7 +4484,6 @@ static irqreturn_t qm_abnormal_irq(int irq, void *data) return IRQ_HANDLED; } - /** * hisi_qm_dev_shutdown() - Shutdown device. * @pdev: The device will be shutdown. @@ -4903,7 +4887,7 @@ static int qm_register_eq_irq(struct hisi_qm *qm) return 0; irq_vector = val & QM_IRQ_VECTOR_MASK; - ret = request_irq(pci_irq_vector(pdev, irq_vector), qm_irq, 0, qm->dev_name, qm); + ret = request_irq(pci_irq_vector(pdev, irq_vector), qm_eq_irq, 0, qm->dev_name, qm); if (ret) dev_err(&pdev->dev, "failed to request eq irq, ret = %d", ret); diff --git a/drivers/crypto/hisilicon/sec/sec_algs.c b/drivers/crypto/hisilicon/sec/sec_algs.c index 490e1542305e..1189effcdad0 100644 --- a/drivers/crypto/hisilicon/sec/sec_algs.c +++ b/drivers/crypto/hisilicon/sec/sec_algs.c @@ -504,8 +504,8 @@ static void sec_skcipher_alg_callback(struct sec_bd_info *sec_resp, kfifo_avail(&ctx->queue->softqueue) > backlog_req->num_elements)) { sec_send_request(backlog_req, ctx->queue); - backlog_req->req_base->complete(backlog_req->req_base, - -EINPROGRESS); + crypto_request_complete(backlog_req->req_base, + -EINPROGRESS); list_del(&backlog_req->backlog_head); } } @@ -534,7 +534,7 @@ static void sec_skcipher_alg_callback(struct sec_bd_info *sec_resp, if (skreq->src != skreq->dst) dma_unmap_sg(dev, skreq->dst, sec_req->len_out, DMA_BIDIRECTIONAL); - skreq->base.complete(&skreq->base, sec_req->err); + skcipher_request_complete(skreq, sec_req->err); } } diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index f5bfc9755a4a..074e50ef512c 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -1459,12 +1459,11 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req, break; backlog_sk_req = backlog_req->c_req.sk_req; - backlog_sk_req->base.complete(&backlog_sk_req->base, - -EINPROGRESS); + skcipher_request_complete(backlog_sk_req, -EINPROGRESS); atomic64_inc(&ctx->sec->debug.dfx.recv_busy_cnt); } - sk_req->base.complete(&sk_req->base, err); + skcipher_request_complete(sk_req, err); } static void set_aead_auth_iv(struct sec_ctx *ctx, struct sec_req *req) @@ -1736,12 +1735,11 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err) break; backlog_aead_req = backlog_req->aead_req.aead_req; - backlog_aead_req->base.complete(&backlog_aead_req->base, - -EINPROGRESS); + aead_request_complete(backlog_aead_req, -EINPROGRESS); atomic64_inc(&c->sec->debug.dfx.recv_busy_cnt); } - a_req->base.complete(&a_req->base, err); + aead_request_complete(a_req, err); } static void sec_request_uninit(struct sec_ctx *ctx, struct sec_req *req) diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c index 0974b0041405..09586a837b1e 100644 --- a/drivers/crypto/hisilicon/sgl.c +++ b/drivers/crypto/hisilicon/sgl.c @@ -249,7 +249,6 @@ hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, dev_err(dev, "Get SGL error!\n"); dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL); return ERR_PTR(-ENOMEM); - } curr_hw_sgl->entry_length_in_sgl = cpu_to_le16(pool->sge_nr); curr_hw_sge = curr_hw_sgl->sge_entries; diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c index 9629e98bd68b..fe93d19e3044 100644 --- a/drivers/crypto/img-hash.c +++ b/drivers/crypto/img-hash.c @@ -157,9 +157,9 @@ static inline void img_hash_write(struct img_hash_dev *hdev, writel_relaxed(value, hdev->io_base + offset); } -static inline u32 img_hash_read_result_queue(struct img_hash_dev *hdev) +static inline __be32 img_hash_read_result_queue(struct img_hash_dev *hdev) { - return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE)); + return cpu_to_be32(img_hash_read(hdev, CR_RESULT_QUEUE)); } static void img_hash_start(struct img_hash_dev *hdev, bool dma) @@ -283,10 +283,10 @@ static int img_hash_finish(struct ahash_request *req) static void img_hash_copy_hash(struct ahash_request *req) { struct img_hash_request_ctx *ctx = ahash_request_ctx(req); - u32 *hash = (u32 *)ctx->digest; + __be32 *hash = (__be32 *)ctx->digest; int i; - for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--) + for (i = (ctx->digsize / sizeof(*hash)) - 1; i >= 0; i--) hash[i] = img_hash_read_result_queue(ctx->hdev); } @@ -308,7 +308,7 @@ static void img_hash_finish_req(struct ahash_request *req, int err) DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY | DRIVER_FLAGS_FINAL); if (req->base.complete) - req->base.complete(&req->base, err); + ahash_request_complete(req, err); } static int img_hash_write_via_dma(struct img_hash_dev *hdev) @@ -526,7 +526,7 @@ static int img_hash_handle_queue(struct img_hash_dev *hdev, return res; if (backlog) - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); req = ahash_request_cast(async_req); hdev->req = req; diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index ae6110376e21..6858753af6b3 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -850,7 +850,7 @@ handle_req: goto request_failed; if (backlog) - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); /* In case the send() helper did not issue any command to push * to the engine because the input data was cached, continue to @@ -970,17 +970,6 @@ void safexcel_complete(struct safexcel_crypto_priv *priv, int ring) } while (!cdesc->last_seg); } -void safexcel_inv_complete(struct crypto_async_request *req, int error) -{ - struct safexcel_inv_result *result = req->data; - - if (error == -EINPROGRESS) - return; - - result->error = error; - complete(&result->completion); -} - int safexcel_invalidate_cache(struct crypto_async_request *async, struct safexcel_crypto_priv *priv, dma_addr_t ctxr_dma, int ring) @@ -1050,7 +1039,7 @@ handle_results: if (should_complete) { local_bh_disable(); - req->complete(req, ret); + crypto_request_complete(req, ret); local_bh_enable(); } diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h index 6c2fc662f64f..47ef6c7cd02c 100644 --- a/drivers/crypto/inside-secure/safexcel.h +++ b/drivers/crypto/inside-secure/safexcel.h @@ -884,11 +884,6 @@ struct safexcel_alg_template { } alg; }; -struct safexcel_inv_result { - struct completion completion; - int error; -}; - void safexcel_dequeue(struct safexcel_crypto_priv *priv, int ring); int safexcel_rdesc_check_errors(struct safexcel_crypto_priv *priv, void *rdp); @@ -927,7 +922,6 @@ void safexcel_rdr_req_set(struct safexcel_crypto_priv *priv, struct crypto_async_request *req); inline struct crypto_async_request * safexcel_rdr_req_get(struct safexcel_crypto_priv *priv, int ring); -void safexcel_inv_complete(struct crypto_async_request *req, int error); int safexcel_hmac_setkey(struct safexcel_context *base, const u8 *key, unsigned int keylen, const char *alg, unsigned int state_sz); diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c index 32a37e3850c5..272c28b5a088 100644 --- a/drivers/crypto/inside-secure/safexcel_cipher.c +++ b/drivers/crypto/inside-secure/safexcel_cipher.c @@ -1091,13 +1091,12 @@ static int safexcel_aead_send(struct crypto_async_request *async, int ring, static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm, struct crypto_async_request *base, struct safexcel_cipher_req *sreq, - struct safexcel_inv_result *result) + struct crypto_wait *result) { struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->base.priv; int ring = ctx->base.ring; - - init_completion(&result->completion); + int err; ctx = crypto_tfm_ctx(base->tfm); ctx->base.exit_inv = true; @@ -1110,13 +1109,13 @@ static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm, queue_work(priv->ring[ring].workqueue, &priv->ring[ring].work_data.work); - wait_for_completion(&result->completion); + err = crypto_wait_req(-EINPROGRESS, result); - if (result->error) { + if (err) { dev_warn(priv->dev, "cipher: sync: invalidate: completion error %d\n", - result->error); - return result->error; + err); + return err; } return 0; @@ -1126,12 +1125,12 @@ static int safexcel_skcipher_exit_inv(struct crypto_tfm *tfm) { EIP197_REQUEST_ON_STACK(req, skcipher, EIP197_SKCIPHER_REQ_SIZE); struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); - struct safexcel_inv_result result = {}; + DECLARE_CRYPTO_WAIT(result); memset(req, 0, sizeof(struct skcipher_request)); skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - safexcel_inv_complete, &result); + crypto_req_done, &result); skcipher_request_set_tfm(req, __crypto_skcipher_cast(tfm)); return safexcel_cipher_exit_inv(tfm, &req->base, sreq, &result); @@ -1141,12 +1140,12 @@ static int safexcel_aead_exit_inv(struct crypto_tfm *tfm) { EIP197_REQUEST_ON_STACK(req, aead, EIP197_AEAD_REQ_SIZE); struct safexcel_cipher_req *sreq = aead_request_ctx(req); - struct safexcel_inv_result result = {}; + DECLARE_CRYPTO_WAIT(result); memset(req, 0, sizeof(struct aead_request)); aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - safexcel_inv_complete, &result); + crypto_req_done, &result); aead_request_set_tfm(req, __crypto_aead_cast(tfm)); return safexcel_cipher_exit_inv(tfm, &req->base, sreq, &result); diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index ca46328472d4..e17577b785c3 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -625,15 +625,16 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) struct safexcel_crypto_priv *priv = ctx->base.priv; EIP197_REQUEST_ON_STACK(req, ahash, EIP197_AHASH_REQ_SIZE); struct safexcel_ahash_req *rctx = ahash_request_ctx_dma(req); - struct safexcel_inv_result result = {}; + DECLARE_CRYPTO_WAIT(result); int ring = ctx->base.ring; + int err; memset(req, 0, EIP197_AHASH_REQ_SIZE); /* create invalidation request */ init_completion(&result.completion); ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - safexcel_inv_complete, &result); + crypto_req_done, &result); ahash_request_set_tfm(req, __crypto_ahash_cast(tfm)); ctx = crypto_tfm_ctx(req->base.tfm); @@ -647,12 +648,11 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) queue_work(priv->ring[ring].workqueue, &priv->ring[ring].work_data.work); - wait_for_completion(&result.completion); + err = crypto_wait_req(-EINPROGRESS, &result); - if (result.error) { - dev_warn(priv->dev, "hash: completion error (%d)\n", - result.error); - return result.error; + if (err) { + dev_warn(priv->dev, "hash: completion error (%d)\n", err); + return err; } return 0; @@ -1042,27 +1042,11 @@ static int safexcel_hmac_sha1_digest(struct ahash_request *areq) return safexcel_ahash_finup(areq); } -struct safexcel_ahash_result { - struct completion completion; - int error; -}; - -static void safexcel_ahash_complete(struct crypto_async_request *req, int error) -{ - struct safexcel_ahash_result *result = req->data; - - if (error == -EINPROGRESS) - return; - - result->error = error; - complete(&result->completion); -} - static int safexcel_hmac_init_pad(struct ahash_request *areq, unsigned int blocksize, const u8 *key, unsigned int keylen, u8 *ipad, u8 *opad) { - struct safexcel_ahash_result result; + DECLARE_CRYPTO_WAIT(result); struct scatterlist sg; int ret, i; u8 *keydup; @@ -1075,16 +1059,12 @@ static int safexcel_hmac_init_pad(struct ahash_request *areq, return -ENOMEM; ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_BACKLOG, - safexcel_ahash_complete, &result); + crypto_req_done, &result); sg_init_one(&sg, keydup, keylen); ahash_request_set_crypt(areq, &sg, ipad, keylen); - init_completion(&result.completion); ret = crypto_ahash_digest(areq); - if (ret == -EINPROGRESS || ret == -EBUSY) { - wait_for_completion_interruptible(&result.completion); - ret = result.error; - } + ret = crypto_wait_req(ret, &result); /* Avoid leaking */ kfree_sensitive(keydup); @@ -1109,16 +1089,15 @@ static int safexcel_hmac_init_pad(struct ahash_request *areq, static int safexcel_hmac_init_iv(struct ahash_request *areq, unsigned int blocksize, u8 *pad, void *state) { - struct safexcel_ahash_result result; struct safexcel_ahash_req *req; + DECLARE_CRYPTO_WAIT(result); struct scatterlist sg; int ret; ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_BACKLOG, - safexcel_ahash_complete, &result); + crypto_req_done, &result); sg_init_one(&sg, pad, blocksize); ahash_request_set_crypt(areq, &sg, pad, blocksize); - init_completion(&result.completion); ret = crypto_ahash_init(areq); if (ret) @@ -1129,14 +1108,9 @@ static int safexcel_hmac_init_iv(struct ahash_request *areq, req->last_req = true; ret = crypto_ahash_update(areq); - if (ret && ret != -EINPROGRESS && ret != -EBUSY) - return ret; - - wait_for_completion_interruptible(&result.completion); - if (result.error) - return result.error; + ret = crypto_wait_req(ret, &result); - return crypto_ahash_export(areq, state); + return ret ?: crypto_ahash_export(areq, state); } static int __safexcel_hmac_setkey(const char *alg, const u8 *key, diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 984b3cc0237c..b63e2359a133 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -382,7 +382,7 @@ static void one_packet(dma_addr_t phys) if (req_ctx->hmac_virt) finish_scattered_hmac(crypt); - req->base.complete(&req->base, failed); + aead_request_complete(req, failed); break; } case CTL_FLAG_PERFORM_ABLK: { @@ -407,7 +407,7 @@ static void one_packet(dma_addr_t phys) free_buf_chain(dev, req_ctx->dst, crypt->dst_buf); free_buf_chain(dev, req_ctx->src, crypt->src_buf); - req->base.complete(&req->base, failed); + skcipher_request_complete(req, failed); break; } case CTL_FLAG_GEN_ICV: diff --git a/drivers/crypto/marvell/cesa/cesa.c b/drivers/crypto/marvell/cesa/cesa.c index 5cd332880653..b61e35b932e5 100644 --- a/drivers/crypto/marvell/cesa/cesa.c +++ b/drivers/crypto/marvell/cesa/cesa.c @@ -66,7 +66,7 @@ static void mv_cesa_rearm_engine(struct mv_cesa_engine *engine) return; if (backlog) - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); ctx = crypto_tfm_ctx(req->tfm); ctx->ops->step(req); @@ -106,7 +106,7 @@ mv_cesa_complete_req(struct mv_cesa_ctx *ctx, struct crypto_async_request *req, { ctx->ops->cleanup(req); local_bh_disable(); - req->complete(req, res); + crypto_request_complete(req, res); local_bh_enable(); } diff --git a/drivers/crypto/marvell/cesa/hash.c b/drivers/crypto/marvell/cesa/hash.c index c72b0672fc71..8d84ad45571c 100644 --- a/drivers/crypto/marvell/cesa/hash.c +++ b/drivers/crypto/marvell/cesa/hash.c @@ -1104,47 +1104,27 @@ struct ahash_alg mv_sha256_alg = { } }; -struct mv_cesa_ahash_result { - struct completion completion; - int error; -}; - -static void mv_cesa_hmac_ahash_complete(struct crypto_async_request *req, - int error) -{ - struct mv_cesa_ahash_result *result = req->data; - - if (error == -EINPROGRESS) - return; - - result->error = error; - complete(&result->completion); -} - static int mv_cesa_ahmac_iv_state_init(struct ahash_request *req, u8 *pad, void *state, unsigned int blocksize) { - struct mv_cesa_ahash_result result; + DECLARE_CRYPTO_WAIT(result); struct scatterlist sg; int ret; ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - mv_cesa_hmac_ahash_complete, &result); + crypto_req_done, &result); sg_init_one(&sg, pad, blocksize); ahash_request_set_crypt(req, &sg, pad, blocksize); - init_completion(&result.completion); ret = crypto_ahash_init(req); if (ret) return ret; ret = crypto_ahash_update(req); - if (ret && ret != -EINPROGRESS) - return ret; + ret = crypto_wait_req(ret, &result); - wait_for_completion_interruptible(&result.completion); - if (result.error) - return result.error; + if (ret) + return ret; ret = crypto_ahash_export(req, state); if (ret) @@ -1158,7 +1138,7 @@ static int mv_cesa_ahmac_pad_init(struct ahash_request *req, u8 *ipad, u8 *opad, unsigned int blocksize) { - struct mv_cesa_ahash_result result; + DECLARE_CRYPTO_WAIT(result); struct scatterlist sg; int ret; int i; @@ -1172,17 +1152,12 @@ static int mv_cesa_ahmac_pad_init(struct ahash_request *req, return -ENOMEM; ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - mv_cesa_hmac_ahash_complete, - &result); + crypto_req_done, &result); sg_init_one(&sg, keydup, keylen); ahash_request_set_crypt(req, &sg, ipad, keylen); - init_completion(&result.completion); ret = crypto_ahash_digest(req); - if (ret == -EINPROGRESS) { - wait_for_completion_interruptible(&result.completion); - ret = result.error; - } + ret = crypto_wait_req(ret, &result); /* Set the memory region to 0 to avoid any leak. */ kfree_sensitive(keydup); diff --git a/drivers/crypto/marvell/cesa/tdma.c b/drivers/crypto/marvell/cesa/tdma.c index f0b5537038c2..388a06e180d6 100644 --- a/drivers/crypto/marvell/cesa/tdma.c +++ b/drivers/crypto/marvell/cesa/tdma.c @@ -168,7 +168,7 @@ int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status) req); if (backlog) - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); } if (res || tdma->cur_dma == tdma_cur) diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c index 80ba77c793a7..1c2c870e887a 100644 --- a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c @@ -138,7 +138,7 @@ static void otx_cpt_aead_callback(int status, void *arg1, void *arg2) complete: if (areq) - areq->complete(areq, status); + crypto_request_complete(areq, status); } static void output_iv_copyback(struct crypto_async_request *areq) @@ -188,7 +188,7 @@ static void otx_cpt_skcipher_callback(int status, void *arg1, void *arg2) pdev = cpt_info->pdev; do_request_cleanup(pdev, cpt_info); } - areq->complete(areq, status); + crypto_request_complete(areq, status); } } @@ -398,7 +398,7 @@ static int otx_cpt_skcipher_xts_setkey(struct crypto_skcipher *tfm, const u8 *key1 = key; int ret; - ret = xts_check_key(crypto_skcipher_tfm(tfm), key, keylen); + ret = xts_verify_key(tfm, key, keylen); if (ret) return ret; ctx->key_len = keylen; diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile index 965297e96954..f0f2942c1d27 100644 --- a/drivers/crypto/marvell/octeontx2/Makefile +++ b/drivers/crypto/marvell/octeontx2/Makefile @@ -1,11 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += rvu_cptpf.o rvu_cptvf.o +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += rvu_cptcommon.o rvu_cptpf.o rvu_cptvf.o +rvu_cptcommon-objs := cn10k_cpt.o otx2_cptlf.o otx2_cpt_mbox_common.o rvu_cptpf-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \ - otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o \ - cn10k_cpt.o otx2_cpt_devlink.o -rvu_cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \ - otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o \ - otx2_cptvf_algs.o cn10k_cpt.o + otx2_cptpf_ucode.o otx2_cpt_devlink.o +rvu_cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o \ + otx2_cptvf_reqmgr.o otx2_cptvf_algs.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c index 1499ef75b5c2..93d22b328991 100644 --- a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c +++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c @@ -7,6 +7,9 @@ #include "otx2_cptlf.h" #include "cn10k_cpt.h" +static void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num, + struct otx2_cptlf_info *lf); + static struct cpt_hw_ops otx2_hw_ops = { .send_cmd = otx2_cpt_send_cmd, .cpt_get_compcode = otx2_cpt_get_compcode, @@ -19,8 +22,8 @@ static struct cpt_hw_ops cn10k_hw_ops = { .cpt_get_uc_compcode = cn10k_cpt_get_uc_compcode, }; -void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num, - struct otx2_cptlf_info *lf) +static void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num, + struct otx2_cptlf_info *lf) { void __iomem *lmtline = lf->lmtline; u64 val = (lf->slot & 0x7FF); @@ -68,6 +71,7 @@ int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf) return 0; } +EXPORT_SYMBOL_NS_GPL(cn10k_cptpf_lmtst_init, CRYPTO_DEV_OCTEONTX2_CPT); int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf) { @@ -91,3 +95,4 @@ int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf) return 0; } +EXPORT_SYMBOL_NS_GPL(cn10k_cptvf_lmtst_init, CRYPTO_DEV_OCTEONTX2_CPT); diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.h b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h index c091392b47e0..aaefc7e38e06 100644 --- a/drivers/crypto/marvell/octeontx2/cn10k_cpt.h +++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h @@ -28,8 +28,6 @@ static inline u8 otx2_cpt_get_uc_compcode(union otx2_cpt_res_s *result) return ((struct cn9k_cpt_res_s *)result)->uc_compcode; } -void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num, - struct otx2_cptlf_info *lf); int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf); int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h index 5012b7e669f0..6019066a6451 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -145,8 +145,6 @@ int otx2_cpt_send_mbox_msg(struct otx2_mbox *mbox, struct pci_dev *pdev); int otx2_cpt_send_af_reg_requests(struct otx2_mbox *mbox, struct pci_dev *pdev); -int otx2_cpt_add_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, - u64 reg, u64 *val, int blkaddr); int otx2_cpt_add_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, u64 reg, u64 val, int blkaddr); int otx2_cpt_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c index a317319696ef..115997475beb 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c @@ -19,6 +19,7 @@ int otx2_cpt_send_mbox_msg(struct otx2_mbox *mbox, struct pci_dev *pdev) } return ret; } +EXPORT_SYMBOL_NS_GPL(otx2_cpt_send_mbox_msg, CRYPTO_DEV_OCTEONTX2_CPT); int otx2_cpt_send_ready_msg(struct otx2_mbox *mbox, struct pci_dev *pdev) { @@ -36,14 +37,17 @@ int otx2_cpt_send_ready_msg(struct otx2_mbox *mbox, struct pci_dev *pdev) return otx2_cpt_send_mbox_msg(mbox, pdev); } +EXPORT_SYMBOL_NS_GPL(otx2_cpt_send_ready_msg, CRYPTO_DEV_OCTEONTX2_CPT); int otx2_cpt_send_af_reg_requests(struct otx2_mbox *mbox, struct pci_dev *pdev) { return otx2_cpt_send_mbox_msg(mbox, pdev); } +EXPORT_SYMBOL_NS_GPL(otx2_cpt_send_af_reg_requests, CRYPTO_DEV_OCTEONTX2_CPT); -int otx2_cpt_add_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, - u64 reg, u64 *val, int blkaddr) +static int otx2_cpt_add_read_af_reg(struct otx2_mbox *mbox, + struct pci_dev *pdev, u64 reg, + u64 *val, int blkaddr) { struct cpt_rd_wr_reg_msg *reg_msg; @@ -91,6 +95,7 @@ int otx2_cpt_add_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, return 0; } +EXPORT_SYMBOL_NS_GPL(otx2_cpt_add_write_af_reg, CRYPTO_DEV_OCTEONTX2_CPT); int otx2_cpt_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, u64 reg, u64 *val, int blkaddr) @@ -103,6 +108,7 @@ int otx2_cpt_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, return otx2_cpt_send_mbox_msg(mbox, pdev); } +EXPORT_SYMBOL_NS_GPL(otx2_cpt_read_af_reg, CRYPTO_DEV_OCTEONTX2_CPT); int otx2_cpt_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, u64 reg, u64 val, int blkaddr) @@ -115,6 +121,7 @@ int otx2_cpt_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, return otx2_cpt_send_mbox_msg(mbox, pdev); } +EXPORT_SYMBOL_NS_GPL(otx2_cpt_write_af_reg, CRYPTO_DEV_OCTEONTX2_CPT); int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs) { @@ -170,6 +177,7 @@ int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs) return ret; } +EXPORT_SYMBOL_NS_GPL(otx2_cpt_detach_rsrcs_msg, CRYPTO_DEV_OCTEONTX2_CPT); int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs) { @@ -202,6 +210,7 @@ int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs) } return ret; } +EXPORT_SYMBOL_NS_GPL(otx2_cpt_msix_offset_msg, CRYPTO_DEV_OCTEONTX2_CPT); int otx2_cpt_sync_mbox_msg(struct otx2_mbox *mbox) { @@ -216,3 +225,4 @@ int otx2_cpt_sync_mbox_msg(struct otx2_mbox *mbox) return otx2_mbox_check_rsp_msgs(mbox, 0); } +EXPORT_SYMBOL_NS_GPL(otx2_cpt_sync_mbox_msg, CRYPTO_DEV_OCTEONTX2_CPT); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c index c8350fcd60fa..71e5f79431af 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c @@ -274,6 +274,8 @@ void otx2_cptlf_unregister_interrupts(struct otx2_cptlfs_info *lfs) } cptlf_disable_intrs(lfs); } +EXPORT_SYMBOL_NS_GPL(otx2_cptlf_unregister_interrupts, + CRYPTO_DEV_OCTEONTX2_CPT); static int cptlf_do_register_interrrupts(struct otx2_cptlfs_info *lfs, int lf_num, int irq_offset, @@ -321,6 +323,7 @@ free_irq: otx2_cptlf_unregister_interrupts(lfs); return ret; } +EXPORT_SYMBOL_NS_GPL(otx2_cptlf_register_interrupts, CRYPTO_DEV_OCTEONTX2_CPT); void otx2_cptlf_free_irqs_affinity(struct otx2_cptlfs_info *lfs) { @@ -334,6 +337,7 @@ void otx2_cptlf_free_irqs_affinity(struct otx2_cptlfs_info *lfs) free_cpumask_var(lfs->lf[slot].affinity_mask); } } +EXPORT_SYMBOL_NS_GPL(otx2_cptlf_free_irqs_affinity, CRYPTO_DEV_OCTEONTX2_CPT); int otx2_cptlf_set_irqs_affinity(struct otx2_cptlfs_info *lfs) { @@ -366,6 +370,7 @@ free_affinity_mask: otx2_cptlf_free_irqs_affinity(lfs); return ret; } +EXPORT_SYMBOL_NS_GPL(otx2_cptlf_set_irqs_affinity, CRYPTO_DEV_OCTEONTX2_CPT); int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_mask, int pri, int lfs_num) @@ -422,6 +427,7 @@ clear_lfs_num: lfs->lfs_num = 0; return ret; } +EXPORT_SYMBOL_NS_GPL(otx2_cptlf_init, CRYPTO_DEV_OCTEONTX2_CPT); void otx2_cptlf_shutdown(struct otx2_cptlfs_info *lfs) { @@ -431,3 +437,8 @@ void otx2_cptlf_shutdown(struct otx2_cptlfs_info *lfs) /* Send request to detach LFs */ otx2_cpt_detach_rsrcs_msg(lfs); } +EXPORT_SYMBOL_NS_GPL(otx2_cptlf_shutdown, CRYPTO_DEV_OCTEONTX2_CPT); + +MODULE_AUTHOR("Marvell"); +MODULE_DESCRIPTION("Marvell RVU CPT Common module"); +MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c index a402ccfac557..ddf6e913c1c4 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c @@ -831,6 +831,8 @@ static struct pci_driver otx2_cpt_pci_driver = { module_pci_driver(otx2_cpt_pci_driver); +MODULE_IMPORT_NS(CRYPTO_DEV_OCTEONTX2_CPT); + MODULE_AUTHOR("Marvell"); MODULE_DESCRIPTION(OTX2_CPT_DRV_STRING); MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c index 30b423605c9c..e27ddd3c4e55 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c @@ -120,7 +120,7 @@ static void otx2_cpt_aead_callback(int status, void *arg1, void *arg2) otx2_cpt_info_destroy(pdev, inst_info); } if (areq) - areq->complete(areq, status); + crypto_request_complete(areq, status); } static void output_iv_copyback(struct crypto_async_request *areq) @@ -170,7 +170,7 @@ static void otx2_cpt_skcipher_callback(int status, void *arg1, void *arg2) pdev = inst_info->pdev; otx2_cpt_info_destroy(pdev, inst_info); } - areq->complete(areq, status); + crypto_request_complete(areq, status); } } @@ -412,7 +412,7 @@ static int otx2_cpt_skcipher_xts_setkey(struct crypto_skcipher *tfm, const u8 *key1 = key; int ret; - ret = xts_check_key(crypto_skcipher_tfm(tfm), key, keylen); + ret = xts_verify_key(tfm, key, keylen); if (ret) return ret; ctx->key_len = keylen; diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c index 3411e664cf50..392e9fee05e8 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c @@ -429,6 +429,8 @@ static struct pci_driver otx2_cptvf_pci_driver = { module_pci_driver(otx2_cptvf_pci_driver); +MODULE_IMPORT_NS(CRYPTO_DEV_OCTEONTX2_CPT); + MODULE_AUTHOR("Marvell"); MODULE_DESCRIPTION("Marvell RVU CPT Virtual Function Driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index d6f9e2fe863d..1c11946a4f0b 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -413,11 +413,11 @@ static int dcp_chan_thread_aes(void *data) set_current_state(TASK_RUNNING); if (backlog) - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); if (arq) { ret = mxs_dcp_aes_block_crypt(arq); - arq->complete(arq, ret); + crypto_request_complete(arq, ret); } } @@ -709,11 +709,11 @@ static int dcp_chan_thread_sha(void *data) set_current_state(TASK_RUNNING); if (backlog) - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); if (arq) { ret = dcp_sha_req_to_buf(arq); - arq->complete(arq, ret); + crypto_request_complete(arq, ret); } } diff --git a/drivers/crypto/nx/nx-common-powernv.c b/drivers/crypto/nx/nx-common-powernv.c index f34c75a862f2..8c859872c183 100644 --- a/drivers/crypto/nx/nx-common-powernv.c +++ b/drivers/crypto/nx/nx-common-powernv.c @@ -72,7 +72,7 @@ static int (*nx842_powernv_exec)(const unsigned char *in, unsigned int inlen, unsigned char *out, unsigned int *outlenp, void *workmem, int fc); -/** +/* * setup_indirect_dde - Setup an indirect DDE * * The DDE is setup with the DDE count, byte count, and address of @@ -89,7 +89,7 @@ static void setup_indirect_dde(struct data_descriptor_entry *dde, dde->address = cpu_to_be64(nx842_get_pa(ddl)); } -/** +/* * setup_direct_dde - Setup single DDE from buffer * * The DDE is setup with the buffer and length. The buffer must be properly @@ -111,7 +111,7 @@ static unsigned int setup_direct_dde(struct data_descriptor_entry *dde, return l; } -/** +/* * setup_ddl - Setup DDL from buffer * * Returns: @@ -181,9 +181,6 @@ static int setup_ddl(struct data_descriptor_entry *dde, CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__, \ (unsigned long)be64_to_cpu((csb)->address)) -/** - * wait_for_csb - */ static int wait_for_csb(struct nx842_workmem *wmem, struct coprocessor_status_block *csb) { @@ -632,8 +629,8 @@ static int nx842_exec_vas(const unsigned char *in, unsigned int inlen, * @inlen: input buffer size * @out: output buffer pointer * @outlenp: output buffer size pointer - * @workmem: working memory buffer pointer, size determined by - * nx842_powernv_driver.workmem_size + * @wmem: working memory buffer pointer, size determined by + * nx842_powernv_driver.workmem_size * * Returns: see @nx842_powernv_exec() */ diff --git a/drivers/crypto/nx/nx-common-pseries.c b/drivers/crypto/nx/nx-common-pseries.c index 3ea334b7f820..35f2d0d8507e 100644 --- a/drivers/crypto/nx/nx-common-pseries.c +++ b/drivers/crypto/nx/nx-common-pseries.c @@ -123,14 +123,16 @@ struct ibm_nx842_counters { atomic64_t decomp_times[32]; }; -static struct nx842_devdata { +struct nx842_devdata { struct vio_dev *vdev; struct device *dev; struct ibm_nx842_counters *counters; unsigned int max_sg_len; unsigned int max_sync_size; unsigned int max_sync_sg; -} __rcu *devdata; +}; + +static struct nx842_devdata __rcu *devdata; static DEFINE_SPINLOCK(devdata_mutex); #define NX842_COUNTER_INC(_x) \ diff --git a/drivers/crypto/qat/qat_common/adf_transport_access_macros.h b/drivers/crypto/qat/qat_common/adf_transport_access_macros.h index 3b6b0267bbec..d3667dbd9826 100644 --- a/drivers/crypto/qat/qat_common/adf_transport_access_macros.h +++ b/drivers/crypto/qat/qat_common/adf_transport_access_macros.h @@ -37,7 +37,7 @@ #define ADF_SIZE_TO_RING_SIZE_IN_BYTES(SIZE) ((1 << (SIZE - 1)) << 7) #define ADF_RING_SIZE_IN_BYTES_TO_SIZE(SIZE) ((1 << (SIZE - 1)) >> 7) -/* Minimum ring bufer size for memory allocation */ +/* Minimum ring buffer size for memory allocation */ #define ADF_RING_SIZE_BYTES_MIN(SIZE) \ ((SIZE < ADF_SIZE_TO_RING_SIZE_IN_BYTES(ADF_RING_SIZE_4K)) ? \ ADF_SIZE_TO_RING_SIZE_IN_BYTES(ADF_RING_SIZE_4K) : SIZE) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index b4b9f0aa59b9..538dcbfbcd26 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -435,8 +435,8 @@ static void qat_alg_skcipher_init_com(struct qat_alg_skcipher_ctx *ctx, } else if (aes_v2_capable && mode == ICP_QAT_HW_CIPHER_CTR_MODE) { ICP_QAT_FW_LA_SLICE_TYPE_SET(header->serv_specif_flags, ICP_QAT_FW_LA_USE_UCS_SLICE_TYPE); - keylen = round_up(keylen, 16); memcpy(cd->ucs_aes.key, key, keylen); + keylen = round_up(keylen, 16); } else { memcpy(cd->aes.key, key, keylen); } @@ -676,7 +676,7 @@ static void qat_aead_alg_callback(struct icp_qat_fw_la_resp *qat_resp, qat_bl_free_bufl(inst->accel_dev, &qat_req->buf); if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) res = -EBADMSG; - areq->base.complete(&areq->base, res); + aead_request_complete(areq, res); } static void qat_alg_update_iv_ctr_mode(struct qat_crypto_request *qat_req) @@ -752,7 +752,7 @@ static void qat_skcipher_alg_callback(struct icp_qat_fw_la_resp *qat_resp, memcpy(sreq->iv, qat_req->iv, AES_BLOCK_SIZE); - sreq->base.complete(&sreq->base, res); + skcipher_request_complete(sreq, res); } void qat_alg_callback(void *resp) diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.c b/drivers/crypto/qat/qat_common/qat_algs_send.c index ff5b4347f783..bb80455b3e81 100644 --- a/drivers/crypto/qat/qat_common/qat_algs_send.c +++ b/drivers/crypto/qat/qat_common/qat_algs_send.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) /* Copyright(c) 2022 Intel Corporation */ +#include <crypto/algapi.h> #include "adf_transport.h" #include "qat_algs_send.h" #include "qat_crypto.h" @@ -34,7 +35,7 @@ void qat_alg_send_backlog(struct qat_instance_backlog *backlog) break; } list_del(&req->list); - req->base->complete(req->base, -EINPROGRESS); + crypto_request_complete(req->base, -EINPROGRESS); } spin_unlock_bh(&backlog->lock); } diff --git a/drivers/crypto/qat/qat_common/qat_bl.c b/drivers/crypto/qat/qat_common/qat_bl.c index 2e89ff08041b..76baed0a76c0 100644 --- a/drivers/crypto/qat/qat_common/qat_bl.c +++ b/drivers/crypto/qat/qat_common/qat_bl.c @@ -26,8 +26,8 @@ void qat_bl_free_bufl(struct adf_accel_dev *accel_dev, bl_dma_dir = blp != blpout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; for (i = 0; i < bl->num_bufs; i++) - dma_unmap_single(dev, bl->bufers[i].addr, - bl->bufers[i].len, bl_dma_dir); + dma_unmap_single(dev, bl->buffers[i].addr, + bl->buffers[i].len, bl_dma_dir); dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); @@ -36,8 +36,8 @@ void qat_bl_free_bufl(struct adf_accel_dev *accel_dev, if (blp != blpout) { for (i = 0; i < blout->num_mapped_bufs; i++) { - dma_unmap_single(dev, blout->bufers[i].addr, - blout->bufers[i].len, + dma_unmap_single(dev, blout->buffers[i].addr, + blout->buffers[i].len, DMA_FROM_DEVICE); } dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE); @@ -53,6 +53,8 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, struct qat_request_buffs *buf, dma_addr_t extra_dst_buff, size_t sz_extra_dst_buff, + unsigned int sskip, + unsigned int dskip, gfp_t flags) { struct device *dev = &GET_DEV(accel_dev); @@ -63,8 +65,9 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, dma_addr_t blp = DMA_MAPPING_ERROR; dma_addr_t bloutp = DMA_MAPPING_ERROR; struct scatterlist *sg; - size_t sz_out, sz = struct_size(bufl, bufers, n); + size_t sz_out, sz = struct_size(bufl, buffers, n); int node = dev_to_node(&GET_DEV(accel_dev)); + unsigned int left; int bufl_dma_dir; if (unlikely(!n)) @@ -86,7 +89,9 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, bufl_dma_dir = sgl != sglout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; for (i = 0; i < n; i++) - bufl->bufers[i].addr = DMA_MAPPING_ERROR; + bufl->buffers[i].addr = DMA_MAPPING_ERROR; + + left = sskip; for_each_sg(sgl, sg, n, i) { int y = sg_nctr; @@ -94,13 +99,21 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, if (!sg->length) continue; - bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg), - sg->length, - bufl_dma_dir); - bufl->bufers[y].len = sg->length; - if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr))) + if (left >= sg->length) { + left -= sg->length; + continue; + } + bufl->buffers[y].addr = dma_map_single(dev, sg_virt(sg) + left, + sg->length - left, + bufl_dma_dir); + bufl->buffers[y].len = sg->length; + if (unlikely(dma_mapping_error(dev, bufl->buffers[y].addr))) goto err_in; sg_nctr++; + if (left) { + bufl->buffers[y].len -= left; + left = 0; + } } bufl->num_bufs = sg_nctr; blp = dma_map_single(dev, bufl, sz, DMA_TO_DEVICE); @@ -111,12 +124,14 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, buf->sz = sz; /* Handle out of place operation */ if (sgl != sglout) { - struct qat_alg_buf *bufers; + struct qat_alg_buf *buffers; int extra_buff = extra_dst_buff ? 1 : 0; int n_sglout = sg_nents(sglout); n = n_sglout + extra_buff; - sz_out = struct_size(buflout, bufers, n); + sz_out = struct_size(buflout, buffers, n); + left = dskip; + sg_nctr = 0; if (n > QAT_MAX_BUFF_DESC) { @@ -129,9 +144,9 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, buf->sgl_dst_valid = true; } - bufers = buflout->bufers; + buffers = buflout->buffers; for (i = 0; i < n; i++) - bufers[i].addr = DMA_MAPPING_ERROR; + buffers[i].addr = DMA_MAPPING_ERROR; for_each_sg(sglout, sg, n_sglout, i) { int y = sg_nctr; @@ -139,17 +154,25 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, if (!sg->length) continue; - bufers[y].addr = dma_map_single(dev, sg_virt(sg), - sg->length, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, bufers[y].addr))) + if (left >= sg->length) { + left -= sg->length; + continue; + } + buffers[y].addr = dma_map_single(dev, sg_virt(sg) + left, + sg->length - left, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, buffers[y].addr))) goto err_out; - bufers[y].len = sg->length; + buffers[y].len = sg->length; sg_nctr++; + if (left) { + buffers[y].len -= left; + left = 0; + } } if (extra_buff) { - bufers[sg_nctr].addr = extra_dst_buff; - bufers[sg_nctr].len = sz_extra_dst_buff; + buffers[sg_nctr].addr = extra_dst_buff; + buffers[sg_nctr].len = sz_extra_dst_buff; } buflout->num_bufs = sg_nctr; @@ -174,11 +197,11 @@ err_out: n = sg_nents(sglout); for (i = 0; i < n; i++) { - if (buflout->bufers[i].addr == extra_dst_buff) + if (buflout->buffers[i].addr == extra_dst_buff) break; - if (!dma_mapping_error(dev, buflout->bufers[i].addr)) - dma_unmap_single(dev, buflout->bufers[i].addr, - buflout->bufers[i].len, + if (!dma_mapping_error(dev, buflout->buffers[i].addr)) + dma_unmap_single(dev, buflout->buffers[i].addr, + buflout->buffers[i].len, DMA_FROM_DEVICE); } @@ -191,9 +214,9 @@ err_in: n = sg_nents(sgl); for (i = 0; i < n; i++) - if (!dma_mapping_error(dev, bufl->bufers[i].addr)) - dma_unmap_single(dev, bufl->bufers[i].addr, - bufl->bufers[i].len, + if (!dma_mapping_error(dev, bufl->buffers[i].addr)) + dma_unmap_single(dev, bufl->buffers[i].addr, + bufl->buffers[i].len, bufl_dma_dir); if (!buf->sgl_src_valid) @@ -212,15 +235,19 @@ int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, { dma_addr_t extra_dst_buff = 0; size_t sz_extra_dst_buff = 0; + unsigned int sskip = 0; + unsigned int dskip = 0; if (params) { extra_dst_buff = params->extra_dst_buff; sz_extra_dst_buff = params->sz_extra_dst_buff; + sskip = params->sskip; + dskip = params->dskip; } return __qat_bl_sgl_to_bufl(accel_dev, sgl, sglout, buf, extra_dst_buff, sz_extra_dst_buff, - flags); + sskip, dskip, flags); } static void qat_bl_sgl_unmap(struct adf_accel_dev *accel_dev, @@ -231,9 +258,9 @@ static void qat_bl_sgl_unmap(struct adf_accel_dev *accel_dev, int i; for (i = 0; i < n; i++) - if (!dma_mapping_error(dev, bl->bufers[i].addr)) - dma_unmap_single(dev, bl->bufers[i].addr, - bl->bufers[i].len, DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, bl->buffers[i].addr)) + dma_unmap_single(dev, bl->buffers[i].addr, + bl->buffers[i].len, DMA_FROM_DEVICE); } static int qat_bl_sgl_map(struct adf_accel_dev *accel_dev, @@ -248,13 +275,13 @@ static int qat_bl_sgl_map(struct adf_accel_dev *accel_dev, size_t sz; n = sg_nents(sgl); - sz = struct_size(bufl, bufers, n); + sz = struct_size(bufl, buffers, n); bufl = kzalloc_node(sz, GFP_KERNEL, node); if (unlikely(!bufl)) return -ENOMEM; for (i = 0; i < n; i++) - bufl->bufers[i].addr = DMA_MAPPING_ERROR; + bufl->buffers[i].addr = DMA_MAPPING_ERROR; sg_nctr = 0; for_each_sg(sgl, sg, n, i) { @@ -263,11 +290,11 @@ static int qat_bl_sgl_map(struct adf_accel_dev *accel_dev, if (!sg->length) continue; - bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg), - sg->length, - DMA_FROM_DEVICE); - bufl->bufers[y].len = sg->length; - if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr))) + bufl->buffers[y].addr = dma_map_single(dev, sg_virt(sg), + sg->length, + DMA_FROM_DEVICE); + bufl->buffers[y].len = sg->length; + if (unlikely(dma_mapping_error(dev, bufl->buffers[y].addr))) goto err_map; sg_nctr++; } @@ -280,9 +307,9 @@ static int qat_bl_sgl_map(struct adf_accel_dev *accel_dev, err_map: for (i = 0; i < n; i++) - if (!dma_mapping_error(dev, bufl->bufers[i].addr)) - dma_unmap_single(dev, bufl->bufers[i].addr, - bufl->bufers[i].len, + if (!dma_mapping_error(dev, bufl->buffers[i].addr)) + dma_unmap_single(dev, bufl->buffers[i].addr, + bufl->buffers[i].len, DMA_FROM_DEVICE); kfree(bufl); *bl = NULL; @@ -351,7 +378,7 @@ int qat_bl_realloc_map_new_dst(struct adf_accel_dev *accel_dev, if (ret) return ret; - new_bl_size = struct_size(new_bl, bufers, new_bl->num_bufs); + new_bl_size = struct_size(new_bl, buffers, new_bl->num_bufs); /* Map new firmware SGL descriptor */ new_blp = dma_map_single(dev, new_bl, new_bl_size, DMA_TO_DEVICE); diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h index 8ca5e52ee9e2..d87e4f35ac39 100644 --- a/drivers/crypto/qat/qat_common/qat_bl.h +++ b/drivers/crypto/qat/qat_common/qat_bl.h @@ -18,7 +18,7 @@ struct qat_alg_buf_list { u64 resrvd; u32 num_bufs; u32 num_mapped_bufs; - struct qat_alg_buf bufers[]; + struct qat_alg_buf buffers[]; } __packed; struct qat_alg_fixed_buf_list { @@ -42,6 +42,8 @@ struct qat_request_buffs { struct qat_sgl_to_bufl_params { dma_addr_t extra_dst_buff; size_t sz_extra_dst_buff; + unsigned int sskip; + unsigned int dskip; }; void qat_bl_free_bufl(struct adf_accel_dev *accel_dev, diff --git a/drivers/crypto/qat/qat_common/qat_comp_algs.c b/drivers/crypto/qat/qat_common/qat_comp_algs.c index 1480d36a8d2b..b533984906ec 100644 --- a/drivers/crypto/qat/qat_common/qat_comp_algs.c +++ b/drivers/crypto/qat/qat_common/qat_comp_algs.c @@ -13,6 +13,15 @@ #include "qat_compression.h" #include "qat_algs_send.h" +#define QAT_RFC_1950_HDR_SIZE 2 +#define QAT_RFC_1950_FOOTER_SIZE 4 +#define QAT_RFC_1950_CM_DEFLATE 8 +#define QAT_RFC_1950_CM_DEFLATE_CINFO_32K 7 +#define QAT_RFC_1950_CM_MASK 0x0f +#define QAT_RFC_1950_CM_OFFSET 4 +#define QAT_RFC_1950_DICT_MASK 0x20 +#define QAT_RFC_1950_COMP_HDR 0x785e + static DEFINE_MUTEX(algs_lock); static unsigned int active_devs; @@ -21,9 +30,12 @@ enum direction { COMPRESSION = 1, }; +struct qat_compression_req; + struct qat_compression_ctx { u8 comp_ctx[QAT_COMP_CTX_SIZE]; struct qat_compression_instance *inst; + int (*qat_comp_callback)(struct qat_compression_req *qat_req, void *resp); }; struct qat_dst { @@ -94,7 +106,70 @@ static void qat_comp_resubmit(struct work_struct *work) err: qat_bl_free_bufl(accel_dev, qat_bufs); - areq->base.complete(&areq->base, ret); + acomp_request_complete(areq, ret); +} + +static int parse_zlib_header(u16 zlib_h) +{ + int ret = -EINVAL; + __be16 header; + u8 *header_p; + u8 cmf, flg; + + header = cpu_to_be16(zlib_h); + header_p = (u8 *)&header; + + flg = header_p[0]; + cmf = header_p[1]; + + if (cmf >> QAT_RFC_1950_CM_OFFSET > QAT_RFC_1950_CM_DEFLATE_CINFO_32K) + return ret; + + if ((cmf & QAT_RFC_1950_CM_MASK) != QAT_RFC_1950_CM_DEFLATE) + return ret; + + if (flg & QAT_RFC_1950_DICT_MASK) + return ret; + + return 0; +} + +static int qat_comp_rfc1950_callback(struct qat_compression_req *qat_req, + void *resp) +{ + struct acomp_req *areq = qat_req->acompress_req; + enum direction dir = qat_req->dir; + __be32 qat_produced_adler; + + qat_produced_adler = cpu_to_be32(qat_comp_get_produced_adler32(resp)); + + if (dir == COMPRESSION) { + __be16 zlib_header; + + zlib_header = cpu_to_be16(QAT_RFC_1950_COMP_HDR); + scatterwalk_map_and_copy(&zlib_header, areq->dst, 0, QAT_RFC_1950_HDR_SIZE, 1); + areq->dlen += QAT_RFC_1950_HDR_SIZE; + + scatterwalk_map_and_copy(&qat_produced_adler, areq->dst, areq->dlen, + QAT_RFC_1950_FOOTER_SIZE, 1); + areq->dlen += QAT_RFC_1950_FOOTER_SIZE; + } else { + __be32 decomp_adler; + int footer_offset; + int consumed; + + consumed = qat_comp_get_consumed_ctr(resp); + footer_offset = consumed + QAT_RFC_1950_HDR_SIZE; + if (footer_offset + QAT_RFC_1950_FOOTER_SIZE > areq->slen) + return -EBADMSG; + + scatterwalk_map_and_copy(&decomp_adler, areq->src, footer_offset, + QAT_RFC_1950_FOOTER_SIZE, 0); + + if (qat_produced_adler != decomp_adler) + return -EBADMSG; + } + return 0; } static void qat_comp_generic_callback(struct qat_compression_req *qat_req, @@ -167,9 +242,12 @@ static void qat_comp_generic_callback(struct qat_compression_req *qat_req, res = 0; areq->dlen = produced; + if (ctx->qat_comp_callback) + res = ctx->qat_comp_callback(qat_req, resp); + end: qat_bl_free_bufl(accel_dev, &qat_req->buf); - areq->base.complete(&areq->base, res); + acomp_request_complete(areq, res); } void qat_comp_alg_callback(void *resp) @@ -215,24 +293,39 @@ static void qat_comp_alg_exit_tfm(struct crypto_acomp *acomp_tfm) memset(ctx, 0, sizeof(*ctx)); } -static int qat_comp_alg_compress_decompress(struct acomp_req *areq, - enum direction dir) +static int qat_comp_alg_rfc1950_init_tfm(struct crypto_acomp *acomp_tfm) +{ + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); + struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm); + int ret; + + ret = qat_comp_alg_init_tfm(acomp_tfm); + ctx->qat_comp_callback = &qat_comp_rfc1950_callback; + + return ret; +} + +static int qat_comp_alg_compress_decompress(struct acomp_req *areq, enum direction dir, + unsigned int shdr, unsigned int sftr, + unsigned int dhdr, unsigned int dftr) { struct qat_compression_req *qat_req = acomp_request_ctx(areq); struct crypto_acomp *acomp_tfm = crypto_acomp_reqtfm(areq); struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm); struct qat_compression_instance *inst = ctx->inst; - struct qat_sgl_to_bufl_params *p_params = NULL; gfp_t f = qat_algs_alloc_flags(&areq->base); - struct qat_sgl_to_bufl_params params; - unsigned int slen = areq->slen; - unsigned int dlen = areq->dlen; + struct qat_sgl_to_bufl_params params = {0}; + int slen = areq->slen - shdr - sftr; + int dlen = areq->dlen - dhdr - dftr; dma_addr_t sfbuf, dfbuf; u8 *req = qat_req->req; size_t ovf_buff_sz; int ret; + params.sskip = shdr; + params.dskip = dhdr; + if (!areq->src || !slen) return -EINVAL; @@ -254,6 +347,7 @@ static int qat_comp_alg_compress_decompress(struct acomp_req *areq, if (!areq->dst) return -ENOMEM; + dlen -= dhdr + dftr; areq->dlen = dlen; qat_req->dst.resubmitted = false; } @@ -262,11 +356,10 @@ static int qat_comp_alg_compress_decompress(struct acomp_req *areq, params.extra_dst_buff = inst->dc_data->ovf_buff_p; ovf_buff_sz = inst->dc_data->ovf_buff_sz; params.sz_extra_dst_buff = ovf_buff_sz; - p_params = ¶ms; } ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst, - &qat_req->buf, p_params, f); + &qat_req->buf, ¶ms, f); if (unlikely(ret)) return ret; @@ -299,12 +392,49 @@ static int qat_comp_alg_compress_decompress(struct acomp_req *areq, static int qat_comp_alg_compress(struct acomp_req *req) { - return qat_comp_alg_compress_decompress(req, COMPRESSION); + return qat_comp_alg_compress_decompress(req, COMPRESSION, 0, 0, 0, 0); } static int qat_comp_alg_decompress(struct acomp_req *req) { - return qat_comp_alg_compress_decompress(req, DECOMPRESSION); + return qat_comp_alg_compress_decompress(req, DECOMPRESSION, 0, 0, 0, 0); +} + +static int qat_comp_alg_rfc1950_compress(struct acomp_req *req) +{ + if (!req->dst && req->dlen != 0) + return -EINVAL; + + if (req->dst && req->dlen <= QAT_RFC_1950_HDR_SIZE + QAT_RFC_1950_FOOTER_SIZE) + return -EINVAL; + + return qat_comp_alg_compress_decompress(req, COMPRESSION, 0, 0, + QAT_RFC_1950_HDR_SIZE, + QAT_RFC_1950_FOOTER_SIZE); +} + +static int qat_comp_alg_rfc1950_decompress(struct acomp_req *req) +{ + struct crypto_acomp *acomp_tfm = crypto_acomp_reqtfm(req); + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); + struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm); + struct adf_accel_dev *accel_dev = ctx->inst->accel_dev; + u16 zlib_header; + int ret; + + if (req->slen <= QAT_RFC_1950_HDR_SIZE + QAT_RFC_1950_FOOTER_SIZE) + return -EBADMSG; + + scatterwalk_map_and_copy(&zlib_header, req->src, 0, QAT_RFC_1950_HDR_SIZE, 0); + + ret = parse_zlib_header(zlib_header); + if (ret) { + dev_dbg(&GET_DEV(accel_dev), "Error parsing zlib header\n"); + return ret; + } + + return qat_comp_alg_compress_decompress(req, DECOMPRESSION, QAT_RFC_1950_HDR_SIZE, + QAT_RFC_1950_FOOTER_SIZE, 0, 0); } static struct acomp_alg qat_acomp[] = { { @@ -322,6 +452,21 @@ static struct acomp_alg qat_acomp[] = { { .decompress = qat_comp_alg_decompress, .dst_free = sgl_free, .reqsize = sizeof(struct qat_compression_req), +}, { + .base = { + .cra_name = "zlib-deflate", + .cra_driver_name = "qat_zlib_deflate", + .cra_priority = 4001, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct qat_compression_ctx), + .cra_module = THIS_MODULE, + }, + .init = qat_comp_alg_rfc1950_init_tfm, + .exit = qat_comp_alg_exit_tfm, + .compress = qat_comp_alg_rfc1950_compress, + .decompress = qat_comp_alg_rfc1950_decompress, + .dst_free = sgl_free, + .reqsize = sizeof(struct qat_compression_req), } }; int qat_comp_algs_register(void) diff --git a/drivers/crypto/qat/qat_common/qat_compression.c b/drivers/crypto/qat/qat_common/qat_compression.c index 9fd10f4242f8..3f1f35283266 100644 --- a/drivers/crypto/qat/qat_common/qat_compression.c +++ b/drivers/crypto/qat/qat_common/qat_compression.c @@ -72,7 +72,7 @@ struct qat_compression_instance *qat_compression_get_instance_node(int node) } if (!accel_dev) { - pr_info("QAT: Could not find a device on node %d\n", node); + pr_debug_ratelimited("QAT: Could not find a device on node %d\n", node); /* Get any started device */ list_for_each(itr, adf_devmgr_get_head()) { struct adf_accel_dev *tmp_dev; diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index e31199eade5b..40c8e74d1cf9 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -70,7 +70,7 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node) } if (!accel_dev) { - pr_info("QAT: Could not find a device on node %d\n", node); + pr_debug_ratelimited("QAT: Could not find a device on node %d\n", node); /* Get any started device */ list_for_each_entry(tmp_dev, adf_devmgr_get_head(), list) { if (adf_dev_started(tmp_dev) && diff --git a/drivers/crypto/qce/core.c b/drivers/crypto/qce/core.c index d3780be44a76..74deca4f96e0 100644 --- a/drivers/crypto/qce/core.c +++ b/drivers/crypto/qce/core.c @@ -107,7 +107,7 @@ static int qce_handle_queue(struct qce_device *qce, if (backlog) { spin_lock_bh(&qce->lock); - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); spin_unlock_bh(&qce->lock); } @@ -132,7 +132,7 @@ static void qce_tasklet_req_done(unsigned long data) spin_unlock_irqrestore(&qce->lock, flags); if (req) - req->complete(req, qce->result); + crypto_request_complete(req, qce->result); qce_handle_queue(qce, NULL); } diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index b79e49aa724f..1c4d5fb05d69 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -499,7 +499,7 @@ static void s5p_sg_done(struct s5p_aes_dev *dev) /* Calls the completion. Cannot be called with dev->lock hold. */ static void s5p_aes_complete(struct skcipher_request *req, int err) { - req->base.complete(&req->base, err); + skcipher_request_complete(req, err); } static void s5p_unset_outdata(struct s5p_aes_dev *dev) @@ -1355,7 +1355,7 @@ static void s5p_hash_finish_req(struct ahash_request *req, int err) spin_unlock_irqrestore(&dd->hash_lock, flags); if (req->base.complete) - req->base.complete(&req->base, err); + ahash_request_complete(req, err); } /** @@ -1397,7 +1397,7 @@ retry: return ret; if (backlog) - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); req = ahash_request_cast(async_req); dd->hash_req = req; @@ -1991,7 +1991,7 @@ static void s5p_tasklet_cb(unsigned long data) spin_unlock_irqrestore(&dev->lock, flags); if (backlog) - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); dev->req = skcipher_request_cast(async_req); dev->ctx = crypto_tfm_ctx(dev->req->base.tfm); diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index 7ab20fb95166..dd4c703cd855 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -1049,7 +1049,7 @@ static int sahara_queue_manage(void *data) spin_unlock_bh(&dev->queue_spinlock); if (backlog) - backlog->complete(backlog, -EINPROGRESS); + crypto_request_complete(backlog, -EINPROGRESS); if (async_req) { if (crypto_tfm_alg_type(async_req->tfm) == @@ -1065,7 +1065,7 @@ static int sahara_queue_manage(void *data) ret = sahara_aes_process(req); } - async_req->complete(async_req, ret); + crypto_request_complete(async_req, ret); continue; } diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c index 4208338e72b6..6b8d731092a4 100644 --- a/drivers/crypto/stm32/stm32-cryp.c +++ b/drivers/crypto/stm32/stm32-cryp.c @@ -597,7 +597,6 @@ static void stm32_crypt_gcmccm_end_header(struct stm32_cryp *cryp) static void stm32_cryp_write_ccm_first_header(struct stm32_cryp *cryp) { - unsigned int i; size_t written; size_t len; u32 alen = cryp->areq->assoclen; @@ -623,8 +622,8 @@ static void stm32_cryp_write_ccm_first_header(struct stm32_cryp *cryp) written = min_t(size_t, AES_BLOCK_SIZE - len, alen); scatterwalk_copychunks((char *)block + len, &cryp->in_walk, written, 0); - for (i = 0; i < AES_BLOCK_32; i++) - stm32_cryp_write(cryp, cryp->caps->din, block[i]); + + writesl(cryp->regs + cryp->caps->din, block, AES_BLOCK_32); cryp->header_in -= written; @@ -1363,18 +1362,14 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) u32 out_tag[AES_BLOCK_32]; /* Get and write tag */ - for (i = 0; i < AES_BLOCK_32; i++) - out_tag[i] = stm32_cryp_read(cryp, cryp->caps->dout); - + readsl(cryp->regs + cryp->caps->dout, out_tag, AES_BLOCK_32); scatterwalk_copychunks(out_tag, &cryp->out_walk, cryp->authsize, 1); } else { /* Get and check tag */ u32 in_tag[AES_BLOCK_32], out_tag[AES_BLOCK_32]; scatterwalk_copychunks(in_tag, &cryp->in_walk, cryp->authsize, 0); - - for (i = 0; i < AES_BLOCK_32; i++) - out_tag[i] = stm32_cryp_read(cryp, cryp->caps->dout); + readsl(cryp->regs + cryp->caps->dout, out_tag, AES_BLOCK_32); if (crypto_memneq(in_tag, out_tag, cryp->authsize)) ret = -EBADMSG; @@ -1415,12 +1410,9 @@ static void stm32_cryp_check_ctr_counter(struct stm32_cryp *cryp) static void stm32_cryp_irq_read_data(struct stm32_cryp *cryp) { - unsigned int i; u32 block[AES_BLOCK_32]; - for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) - block[i] = stm32_cryp_read(cryp, cryp->caps->dout); - + readsl(cryp->regs + cryp->caps->dout, block, cryp->hw_blocksize / sizeof(u32)); scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, cryp->payload_out), 1); cryp->payload_out -= min_t(size_t, cryp->hw_blocksize, @@ -1429,14 +1421,11 @@ static void stm32_cryp_irq_read_data(struct stm32_cryp *cryp) static void stm32_cryp_irq_write_block(struct stm32_cryp *cryp) { - unsigned int i; u32 block[AES_BLOCK_32] = {0}; scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, cryp->hw_blocksize, cryp->payload_in), 0); - for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) - stm32_cryp_write(cryp, cryp->caps->din, block[i]); - + writesl(cryp->regs + cryp->caps->din, block, cryp->hw_blocksize / sizeof(u32)); cryp->payload_in -= min_t(size_t, cryp->hw_blocksize, cryp->payload_in); } @@ -1480,8 +1469,7 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) * Same code as stm32_cryp_irq_read_data(), but we want to store * block value */ - for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) - block[i] = stm32_cryp_read(cryp, cryp->caps->dout); + readsl(cryp->regs + cryp->caps->dout, block, cryp->hw_blocksize / sizeof(u32)); scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, cryp->payload_out), 1); @@ -1499,8 +1487,7 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* f) write padded data */ - for (i = 0; i < AES_BLOCK_32; i++) - stm32_cryp_write(cryp, cryp->caps->din, block[i]); + writesl(cryp->regs + cryp->caps->din, block, AES_BLOCK_32); /* g) Empty fifo out */ err = stm32_cryp_wait_output(cryp); @@ -1580,8 +1567,7 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) * Same code as stm32_cryp_irq_read_data(), but we want to store * block value */ - for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) - block[i] = stm32_cryp_read(cryp, cryp->caps->dout); + readsl(cryp->regs + cryp->caps->dout, block, cryp->hw_blocksize / sizeof(u32)); scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, cryp->payload_out), 1); @@ -1660,15 +1646,14 @@ static void stm32_cryp_irq_write_data(struct stm32_cryp *cryp) static void stm32_cryp_irq_write_gcmccm_header(struct stm32_cryp *cryp) { - unsigned int i; u32 block[AES_BLOCK_32] = {0}; size_t written; written = min_t(size_t, AES_BLOCK_SIZE, cryp->header_in); scatterwalk_copychunks(block, &cryp->in_walk, written, 0); - for (i = 0; i < AES_BLOCK_32; i++) - stm32_cryp_write(cryp, cryp->caps->din, block[i]); + + writesl(cryp->regs + cryp->caps->din, block, AES_BLOCK_32); cryp->header_in -= written; diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c index d33006d43f76..7bf805563ac2 100644 --- a/drivers/crypto/stm32/stm32-hash.c +++ b/drivers/crypto/stm32/stm32-hash.c @@ -32,6 +32,7 @@ #define HASH_CR 0x00 #define HASH_DIN 0x04 #define HASH_STR 0x08 +#define HASH_UX500_HREG(x) (0x0c + ((x) * 0x04)) #define HASH_IMR 0x20 #define HASH_SR 0x24 #define HASH_CSR(x) (0x0F8 + ((x) * 0x04)) @@ -54,6 +55,10 @@ #define HASH_CR_ALGO_SHA224 0x40000 #define HASH_CR_ALGO_SHA256 0x40080 +#define HASH_CR_UX500_EMPTYMSG BIT(20) +#define HASH_CR_UX500_ALGO_SHA1 BIT(7) +#define HASH_CR_UX500_ALGO_SHA256 0x0 + /* Interrupt */ #define HASH_DINIE BIT(0) #define HASH_DCIE BIT(1) @@ -115,6 +120,7 @@ enum stm32_hash_data_format { struct stm32_hash_ctx { struct crypto_engine_ctx enginectx; struct stm32_hash_dev *hdev; + struct crypto_shash *xtfm; unsigned long flags; u8 key[HASH_MAX_KEY_SIZE]; @@ -157,6 +163,10 @@ struct stm32_hash_algs_info { struct stm32_hash_pdata { struct stm32_hash_algs_info *algs_info; size_t algs_info_size; + bool has_sr; + bool has_mdmat; + bool broken_emptymsg; + bool ux500; }; struct stm32_hash_dev { @@ -168,6 +178,7 @@ struct stm32_hash_dev { phys_addr_t phys_base; u32 dma_mode; u32 dma_maxburst; + bool polled; struct ahash_request *req; struct crypto_engine *engine; @@ -208,6 +219,11 @@ static inline int stm32_hash_wait_busy(struct stm32_hash_dev *hdev) { u32 status; + /* The Ux500 lacks the special status register, we poll the DCAL bit instead */ + if (!hdev->pdata->has_sr) + return readl_relaxed_poll_timeout(hdev->io_base + HASH_STR, status, + !(status & HASH_STR_DCAL), 10, 10000); + return readl_relaxed_poll_timeout(hdev->io_base + HASH_SR, status, !(status & HASH_SR_BUSY), 10, 10000); } @@ -249,7 +265,7 @@ static int stm32_hash_write_key(struct stm32_hash_dev *hdev) return 0; } -static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev) +static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev, int bufcnt) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(hdev->req); @@ -263,13 +279,19 @@ static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev) reg |= HASH_CR_ALGO_MD5; break; case HASH_FLAGS_SHA1: - reg |= HASH_CR_ALGO_SHA1; + if (hdev->pdata->ux500) + reg |= HASH_CR_UX500_ALGO_SHA1; + else + reg |= HASH_CR_ALGO_SHA1; break; case HASH_FLAGS_SHA224: reg |= HASH_CR_ALGO_SHA224; break; case HASH_FLAGS_SHA256: - reg |= HASH_CR_ALGO_SHA256; + if (hdev->pdata->ux500) + reg |= HASH_CR_UX500_ALGO_SHA256; + else + reg |= HASH_CR_ALGO_SHA256; break; default: reg |= HASH_CR_ALGO_MD5; @@ -284,7 +306,15 @@ static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev) reg |= HASH_CR_LKEY; } - stm32_hash_write(hdev, HASH_IMR, HASH_DCIE); + /* + * On the Ux500 we need to set a special flag to indicate that + * the message is zero length. + */ + if (hdev->pdata->ux500 && bufcnt == 0) + reg |= HASH_CR_UX500_EMPTYMSG; + + if (!hdev->polled) + stm32_hash_write(hdev, HASH_IMR, HASH_DCIE); stm32_hash_write(hdev, HASH_CR, reg); @@ -345,7 +375,7 @@ static int stm32_hash_xmit_cpu(struct stm32_hash_dev *hdev, hdev->flags |= HASH_FLAGS_CPU; - stm32_hash_write_ctrl(hdev); + stm32_hash_write_ctrl(hdev, length); if (stm32_hash_wait_busy(hdev)) return -ETIMEDOUT; @@ -362,6 +392,9 @@ static int stm32_hash_xmit_cpu(struct stm32_hash_dev *hdev, stm32_hash_write(hdev, HASH_DIN, buffer[count]); if (final) { + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + stm32_hash_set_nblw(hdev, length); reg = stm32_hash_read(hdev, HASH_STR); reg |= HASH_STR_DCAL; @@ -399,8 +432,15 @@ static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev) if (final) { bufcnt = rctx->bufcnt; rctx->bufcnt = 0; - err = stm32_hash_xmit_cpu(hdev, rctx->buffer, bufcnt, - (rctx->flags & HASH_FLAGS_FINUP)); + err = stm32_hash_xmit_cpu(hdev, rctx->buffer, bufcnt, 1); + + /* If we have an IRQ, wait for that, else poll for completion */ + if (hdev->polled) { + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + hdev->flags |= HASH_FLAGS_OUTPUT_READY; + err = 0; + } } return err; @@ -431,11 +471,12 @@ static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev, reg = stm32_hash_read(hdev, HASH_CR); - if (mdma) - reg |= HASH_CR_MDMAT; - else - reg &= ~HASH_CR_MDMAT; - + if (!hdev->pdata->has_mdmat) { + if (mdma) + reg |= HASH_CR_MDMAT; + else + reg &= ~HASH_CR_MDMAT; + } reg |= HASH_CR_DMAE; stm32_hash_write(hdev, HASH_CR, reg); @@ -556,7 +597,7 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) if (rctx->nents < 0) return -EINVAL; - stm32_hash_write_ctrl(hdev); + stm32_hash_write_ctrl(hdev, rctx->total); if (hdev->flags & HASH_FLAGS_HMAC) { err = stm32_hash_hmac_dma_send(hdev); @@ -743,16 +784,57 @@ static int stm32_hash_final_req(struct stm32_hash_dev *hdev) else err = stm32_hash_xmit_cpu(hdev, rctx->buffer, buflen, 1); + /* If we have an IRQ, wait for that, else poll for completion */ + if (hdev->polled) { + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + hdev->flags |= HASH_FLAGS_OUTPUT_READY; + /* Caller will call stm32_hash_finish_req() */ + err = 0; + } return err; } +static void stm32_hash_emptymsg_fallback(struct ahash_request *req) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_dev *hdev = rctx->hdev; + int ret; + + dev_dbg(hdev->dev, "use fallback message size 0 key size %d\n", + ctx->keylen); + + if (!ctx->xtfm) { + dev_err(hdev->dev, "no fallback engine\n"); + return; + } + + if (ctx->keylen) { + ret = crypto_shash_setkey(ctx->xtfm, ctx->key, ctx->keylen); + if (ret) { + dev_err(hdev->dev, "failed to set key ret=%d\n", ret); + return; + } + } + + ret = crypto_shash_tfm_digest(ctx->xtfm, NULL, 0, rctx->digest); + if (ret) + dev_err(hdev->dev, "shash digest error\n"); +} + static void stm32_hash_copy_hash(struct ahash_request *req) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_dev *hdev = rctx->hdev; __be32 *hash = (void *)rctx->digest; unsigned int i, hashsize; + if (hdev->pdata->broken_emptymsg && !req->nbytes) + return stm32_hash_emptymsg_fallback(req); + switch (rctx->flags & HASH_FLAGS_ALGO_MASK) { case HASH_FLAGS_MD5: hashsize = MD5_DIGEST_SIZE; @@ -770,9 +852,14 @@ static void stm32_hash_copy_hash(struct ahash_request *req) return; } - for (i = 0; i < hashsize / sizeof(u32); i++) - hash[i] = cpu_to_be32(stm32_hash_read(rctx->hdev, - HASH_HREG(i))); + for (i = 0; i < hashsize / sizeof(u32); i++) { + if (hdev->pdata->ux500) + hash[i] = cpu_to_be32(stm32_hash_read(hdev, + HASH_UX500_HREG(i))); + else + hash[i] = cpu_to_be32(stm32_hash_read(hdev, + HASH_HREG(i))); + } } static int stm32_hash_finish(struct ahash_request *req) @@ -961,11 +1048,13 @@ static int stm32_hash_export(struct ahash_request *req, void *out) struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); u32 *preg; unsigned int i; + int ret; pm_runtime_get_sync(hdev->dev); - while ((stm32_hash_read(hdev, HASH_SR) & HASH_SR_BUSY)) - cpu_relax(); + ret = stm32_hash_wait_busy(hdev); + if (ret) + return ret; rctx->hw_context = kmalloc_array(3 + HASH_CSR_REGISTER_NUMBER, sizeof(u32), @@ -973,7 +1062,8 @@ static int stm32_hash_export(struct ahash_request *req, void *out) preg = rctx->hw_context; - *preg++ = stm32_hash_read(hdev, HASH_IMR); + if (!hdev->pdata->ux500) + *preg++ = stm32_hash_read(hdev, HASH_IMR); *preg++ = stm32_hash_read(hdev, HASH_STR); *preg++ = stm32_hash_read(hdev, HASH_CR); for (i = 0; i < HASH_CSR_REGISTER_NUMBER; i++) @@ -1002,7 +1092,8 @@ static int stm32_hash_import(struct ahash_request *req, const void *in) pm_runtime_get_sync(hdev->dev); - stm32_hash_write(hdev, HASH_IMR, *preg++); + if (!hdev->pdata->ux500) + stm32_hash_write(hdev, HASH_IMR, *preg++); stm32_hash_write(hdev, HASH_STR, *preg++); stm32_hash_write(hdev, HASH_CR, *preg); reg = *preg++ | HASH_CR_INIT; @@ -1034,6 +1125,29 @@ static int stm32_hash_setkey(struct crypto_ahash *tfm, return 0; } +static int stm32_hash_init_fallback(struct crypto_tfm *tfm) +{ + struct stm32_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + const char *name = crypto_tfm_alg_name(tfm); + struct crypto_shash *xtfm; + + /* The fallback is only needed on Ux500 */ + if (!hdev->pdata->ux500) + return 0; + + xtfm = crypto_alloc_shash(name, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(xtfm)) { + dev_err(hdev->dev, "failed to allocate %s fallback\n", + name); + return PTR_ERR(xtfm); + } + dev_info(hdev->dev, "allocated %s fallback\n", name); + ctx->xtfm = xtfm; + + return 0; +} + static int stm32_hash_cra_init_algs(struct crypto_tfm *tfm, const char *algs_hmac_name) { @@ -1050,7 +1164,8 @@ static int stm32_hash_cra_init_algs(struct crypto_tfm *tfm, ctx->enginectx.op.do_one_request = stm32_hash_one_request; ctx->enginectx.op.prepare_request = stm32_hash_prepare_req; ctx->enginectx.op.unprepare_request = NULL; - return 0; + + return stm32_hash_init_fallback(tfm); } static int stm32_hash_cra_init(struct crypto_tfm *tfm) @@ -1078,6 +1193,14 @@ static int stm32_hash_cra_sha256_init(struct crypto_tfm *tfm) return stm32_hash_cra_init_algs(tfm, "sha256"); } +static void stm32_hash_cra_exit(struct crypto_tfm *tfm) +{ + struct stm32_hash_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->xtfm) + crypto_free_shash(ctx->xtfm); +} + static irqreturn_t stm32_hash_irq_thread(int irq, void *dev_id) { struct stm32_hash_dev *hdev = dev_id; @@ -1121,7 +1244,7 @@ static irqreturn_t stm32_hash_irq_handler(int irq, void *dev_id) return IRQ_NONE; } -static struct ahash_alg algs_md5_sha1[] = { +static struct ahash_alg algs_md5[] = { { .init = stm32_hash_init, .update = stm32_hash_update, @@ -1143,6 +1266,7 @@ static struct ahash_alg algs_md5_sha1[] = { .cra_ctxsize = sizeof(struct stm32_hash_ctx), .cra_alignmask = 3, .cra_init = stm32_hash_cra_init, + .cra_exit = stm32_hash_cra_exit, .cra_module = THIS_MODULE, } } @@ -1169,10 +1293,14 @@ static struct ahash_alg algs_md5_sha1[] = { .cra_ctxsize = sizeof(struct stm32_hash_ctx), .cra_alignmask = 3, .cra_init = stm32_hash_cra_md5_init, + .cra_exit = stm32_hash_cra_exit, .cra_module = THIS_MODULE, } } }, +}; + +static struct ahash_alg algs_sha1[] = { { .init = stm32_hash_init, .update = stm32_hash_update, @@ -1194,6 +1322,7 @@ static struct ahash_alg algs_md5_sha1[] = { .cra_ctxsize = sizeof(struct stm32_hash_ctx), .cra_alignmask = 3, .cra_init = stm32_hash_cra_init, + .cra_exit = stm32_hash_cra_exit, .cra_module = THIS_MODULE, } } @@ -1220,13 +1349,14 @@ static struct ahash_alg algs_md5_sha1[] = { .cra_ctxsize = sizeof(struct stm32_hash_ctx), .cra_alignmask = 3, .cra_init = stm32_hash_cra_sha1_init, + .cra_exit = stm32_hash_cra_exit, .cra_module = THIS_MODULE, } } }, }; -static struct ahash_alg algs_sha224_sha256[] = { +static struct ahash_alg algs_sha224[] = { { .init = stm32_hash_init, .update = stm32_hash_update, @@ -1248,6 +1378,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .cra_ctxsize = sizeof(struct stm32_hash_ctx), .cra_alignmask = 3, .cra_init = stm32_hash_cra_init, + .cra_exit = stm32_hash_cra_exit, .cra_module = THIS_MODULE, } } @@ -1274,10 +1405,14 @@ static struct ahash_alg algs_sha224_sha256[] = { .cra_ctxsize = sizeof(struct stm32_hash_ctx), .cra_alignmask = 3, .cra_init = stm32_hash_cra_sha224_init, + .cra_exit = stm32_hash_cra_exit, .cra_module = THIS_MODULE, } } }, +}; + +static struct ahash_alg algs_sha256[] = { { .init = stm32_hash_init, .update = stm32_hash_update, @@ -1299,6 +1434,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .cra_ctxsize = sizeof(struct stm32_hash_ctx), .cra_alignmask = 3, .cra_init = stm32_hash_cra_init, + .cra_exit = stm32_hash_cra_exit, .cra_module = THIS_MODULE, } } @@ -1325,6 +1461,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .cra_ctxsize = sizeof(struct stm32_hash_ctx), .cra_alignmask = 3, .cra_init = stm32_hash_cra_sha256_init, + .cra_exit = stm32_hash_cra_exit, .cra_module = THIS_MODULE, } } @@ -1370,36 +1507,74 @@ static int stm32_hash_unregister_algs(struct stm32_hash_dev *hdev) return 0; } +static struct stm32_hash_algs_info stm32_hash_algs_info_ux500[] = { + { + .algs_list = algs_sha1, + .size = ARRAY_SIZE(algs_sha1), + }, + { + .algs_list = algs_sha256, + .size = ARRAY_SIZE(algs_sha256), + }, +}; + +static const struct stm32_hash_pdata stm32_hash_pdata_ux500 = { + .algs_info = stm32_hash_algs_info_ux500, + .algs_info_size = ARRAY_SIZE(stm32_hash_algs_info_ux500), + .broken_emptymsg = true, + .ux500 = true, +}; + static struct stm32_hash_algs_info stm32_hash_algs_info_stm32f4[] = { { - .algs_list = algs_md5_sha1, - .size = ARRAY_SIZE(algs_md5_sha1), + .algs_list = algs_md5, + .size = ARRAY_SIZE(algs_md5), + }, + { + .algs_list = algs_sha1, + .size = ARRAY_SIZE(algs_sha1), }, }; static const struct stm32_hash_pdata stm32_hash_pdata_stm32f4 = { .algs_info = stm32_hash_algs_info_stm32f4, .algs_info_size = ARRAY_SIZE(stm32_hash_algs_info_stm32f4), + .has_sr = true, + .has_mdmat = true, }; static struct stm32_hash_algs_info stm32_hash_algs_info_stm32f7[] = { { - .algs_list = algs_md5_sha1, - .size = ARRAY_SIZE(algs_md5_sha1), + .algs_list = algs_md5, + .size = ARRAY_SIZE(algs_md5), + }, + { + .algs_list = algs_sha1, + .size = ARRAY_SIZE(algs_sha1), + }, + { + .algs_list = algs_sha224, + .size = ARRAY_SIZE(algs_sha224), }, { - .algs_list = algs_sha224_sha256, - .size = ARRAY_SIZE(algs_sha224_sha256), + .algs_list = algs_sha256, + .size = ARRAY_SIZE(algs_sha256), }, }; static const struct stm32_hash_pdata stm32_hash_pdata_stm32f7 = { .algs_info = stm32_hash_algs_info_stm32f7, .algs_info_size = ARRAY_SIZE(stm32_hash_algs_info_stm32f7), + .has_sr = true, + .has_mdmat = true, }; static const struct of_device_id stm32_hash_of_match[] = { { + .compatible = "stericsson,ux500-hash", + .data = &stm32_hash_pdata_ux500, + }, + { .compatible = "st,stm32f456-hash", .data = &stm32_hash_pdata_stm32f4, }, @@ -1452,16 +1627,23 @@ static int stm32_hash_probe(struct platform_device *pdev) if (ret) return ret; - irq = platform_get_irq(pdev, 0); - if (irq < 0) + irq = platform_get_irq_optional(pdev, 0); + if (irq < 0 && irq != -ENXIO) return irq; - ret = devm_request_threaded_irq(dev, irq, stm32_hash_irq_handler, - stm32_hash_irq_thread, IRQF_ONESHOT, - dev_name(dev), hdev); - if (ret) { - dev_err(dev, "Cannot grab IRQ\n"); - return ret; + if (irq > 0) { + ret = devm_request_threaded_irq(dev, irq, + stm32_hash_irq_handler, + stm32_hash_irq_thread, + IRQF_ONESHOT, + dev_name(dev), hdev); + if (ret) { + dev_err(dev, "Cannot grab IRQ\n"); + return ret; + } + } else { + dev_info(dev, "No IRQ, use polling mode\n"); + hdev->polled = true; } hdev->clk = devm_clk_get(&pdev->dev, NULL); @@ -1503,9 +1685,11 @@ static int stm32_hash_probe(struct platform_device *pdev) case 0: break; case -ENOENT: - dev_dbg(dev, "DMA mode not available\n"); + case -ENODEV: + dev_info(dev, "DMA mode not available\n"); break; default: + dev_err(dev, "DMA init error %d\n", ret); goto err_dma; } @@ -1524,7 +1708,11 @@ static int stm32_hash_probe(struct platform_device *pdev) if (ret) goto err_engine_start; - hdev->dma_mode = stm32_hash_read(hdev, HASH_HWCFGR); + if (hdev->pdata->ux500) + /* FIXME: implement DMA mode for Ux500 */ + hdev->dma_mode = 0; + else + hdev->dma_mode = stm32_hash_read(hdev, HASH_HWCFGR); /* Register algos */ ret = stm32_hash_register_algs(hdev); diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 71db6450b6aa..bb27f011cf31 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1393,7 +1393,7 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, alloc_len += sizeof(struct talitos_desc); alloc_len += ivsize; - edesc = kmalloc(alloc_len, GFP_DMA | flags); + edesc = kmalloc(ALIGN(alloc_len, dma_get_cache_alignment()), flags); if (!edesc) return ERR_PTR(-ENOMEM); if (ivsize) { @@ -1560,7 +1560,7 @@ static void skcipher_done(struct device *dev, kfree(edesc); - areq->base.complete(&areq->base, err); + skcipher_request_complete(areq, err); } static int common_nonsnoop(struct talitos_edesc *edesc, @@ -1759,7 +1759,7 @@ static void ahash_done(struct device *dev, kfree(edesc); - areq->base.complete(&areq->base, err); + ahash_request_complete(areq, err); } /* diff --git a/drivers/crypto/ux500/Kconfig b/drivers/crypto/ux500/Kconfig deleted file mode 100644 index dcbd7404768f..000000000000 --- a/drivers/crypto/ux500/Kconfig +++ /dev/null @@ -1,22 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Copyright (C) ST-Ericsson SA 2010 -# Author: Shujuan Chen (shujuan.chen@stericsson.com) -# - -config CRYPTO_DEV_UX500_HASH - tristate "UX500 crypto driver for HASH block" - depends on CRYPTO_DEV_UX500 - select CRYPTO_HASH - select CRYPTO_SHA1 - select CRYPTO_SHA256 - help - This selects the hash driver for the UX500_HASH hardware. - Depends on UX500/STM DMA if running in DMA mode. - -config CRYPTO_DEV_UX500_DEBUG - bool "Activate ux500 platform debug-mode for crypto and hash block" - depends on CRYPTO_DEV_UX500_CRYP || CRYPTO_DEV_UX500_HASH - help - Say Y if you want to add debug prints to ux500_hash and - ux500_cryp devices. diff --git a/drivers/crypto/ux500/Makefile b/drivers/crypto/ux500/Makefile deleted file mode 100644 index f1aa4edf66f4..000000000000 --- a/drivers/crypto/ux500/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Copyright (C) ST-Ericsson SA 2010 -# Author: Shujuan Chen (shujuan.chen@stericsson.com) -# - -obj-$(CONFIG_CRYPTO_DEV_UX500_HASH) += hash/ diff --git a/drivers/crypto/ux500/hash/Makefile b/drivers/crypto/ux500/hash/Makefile deleted file mode 100644 index a8f088724772..000000000000 --- a/drivers/crypto/ux500/hash/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Copyright (C) ST-Ericsson SA 2010 -# Author: Shujuan Chen (shujuan.chen@stericsson.com) -# -ifdef CONFIG_CRYPTO_DEV_UX500_DEBUG -CFLAGS_hash_core.o := -DDEBUG -endif - -obj-$(CONFIG_CRYPTO_DEV_UX500_HASH) += ux500_hash.o -ux500_hash-objs := hash_core.o diff --git a/drivers/crypto/ux500/hash/hash_alg.h b/drivers/crypto/ux500/hash/hash_alg.h deleted file mode 100644 index 7c9bcc15125f..000000000000 --- a/drivers/crypto/ux500/hash/hash_alg.h +++ /dev/null @@ -1,398 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen (shujuan.chen@stericsson.com) - * Author: Joakim Bech (joakim.xx.bech@stericsson.com) - * Author: Berne Hebark (berne.hebark@stericsson.com)) - */ -#ifndef _HASH_ALG_H -#define _HASH_ALG_H - -#include <linux/bitops.h> - -#define HASH_BLOCK_SIZE 64 -#define HASH_DMA_FIFO 4 -#define HASH_DMA_ALIGN_SIZE 4 -#define HASH_DMA_PERFORMANCE_MIN_SIZE 1024 -#define HASH_BYTES_PER_WORD 4 - -/* Maximum value of the length's high word */ -#define HASH_HIGH_WORD_MAX_VAL 0xFFFFFFFFUL - -/* Power on Reset values HASH registers */ -#define HASH_RESET_CR_VALUE 0x0 -#define HASH_RESET_STR_VALUE 0x0 - -/* Number of context swap registers */ -#define HASH_CSR_COUNT 52 - -#define HASH_RESET_CSRX_REG_VALUE 0x0 -#define HASH_RESET_CSFULL_REG_VALUE 0x0 -#define HASH_RESET_CSDATAIN_REG_VALUE 0x0 - -#define HASH_RESET_INDEX_VAL 0x0 -#define HASH_RESET_BIT_INDEX_VAL 0x0 -#define HASH_RESET_BUFFER_VAL 0x0 -#define HASH_RESET_LEN_HIGH_VAL 0x0 -#define HASH_RESET_LEN_LOW_VAL 0x0 - -/* Control register bitfields */ -#define HASH_CR_RESUME_MASK 0x11FCF - -#define HASH_CR_SWITCHON_POS 31 -#define HASH_CR_SWITCHON_MASK BIT(31) - -#define HASH_CR_EMPTYMSG_POS 20 -#define HASH_CR_EMPTYMSG_MASK BIT(20) - -#define HASH_CR_DINF_POS 12 -#define HASH_CR_DINF_MASK BIT(12) - -#define HASH_CR_NBW_POS 8 -#define HASH_CR_NBW_MASK 0x00000F00UL - -#define HASH_CR_LKEY_POS 16 -#define HASH_CR_LKEY_MASK BIT(16) - -#define HASH_CR_ALGO_POS 7 -#define HASH_CR_ALGO_MASK BIT(7) - -#define HASH_CR_MODE_POS 6 -#define HASH_CR_MODE_MASK BIT(6) - -#define HASH_CR_DATAFORM_POS 4 -#define HASH_CR_DATAFORM_MASK (BIT(4) | BIT(5)) - -#define HASH_CR_DMAE_POS 3 -#define HASH_CR_DMAE_MASK BIT(3) - -#define HASH_CR_INIT_POS 2 -#define HASH_CR_INIT_MASK BIT(2) - -#define HASH_CR_PRIVN_POS 1 -#define HASH_CR_PRIVN_MASK BIT(1) - -#define HASH_CR_SECN_POS 0 -#define HASH_CR_SECN_MASK BIT(0) - -/* Start register bitfields */ -#define HASH_STR_DCAL_POS 8 -#define HASH_STR_DCAL_MASK BIT(8) -#define HASH_STR_DEFAULT 0x0 - -#define HASH_STR_NBLW_POS 0 -#define HASH_STR_NBLW_MASK 0x0000001FUL - -#define HASH_NBLW_MAX_VAL 0x1F - -/* PrimeCell IDs */ -#define HASH_P_ID0 0xE0 -#define HASH_P_ID1 0x05 -#define HASH_P_ID2 0x38 -#define HASH_P_ID3 0x00 -#define HASH_CELL_ID0 0x0D -#define HASH_CELL_ID1 0xF0 -#define HASH_CELL_ID2 0x05 -#define HASH_CELL_ID3 0xB1 - -#define HASH_SET_BITS(reg_name, mask) \ - writel_relaxed((readl_relaxed(reg_name) | mask), reg_name) - -#define HASH_CLEAR_BITS(reg_name, mask) \ - writel_relaxed((readl_relaxed(reg_name) & ~mask), reg_name) - -#define HASH_PUT_BITS(reg, val, shift, mask) \ - writel_relaxed(((readl(reg) & ~(mask)) | \ - (((u32)val << shift) & (mask))), reg) - -#define HASH_SET_DIN(val, len) writesl(&device_data->base->din, (val), (len)) - -#define HASH_INITIALIZE \ - HASH_PUT_BITS( \ - &device_data->base->cr, \ - 0x01, HASH_CR_INIT_POS, \ - HASH_CR_INIT_MASK) - -#define HASH_SET_DATA_FORMAT(data_format) \ - HASH_PUT_BITS( \ - &device_data->base->cr, \ - (u32) (data_format), HASH_CR_DATAFORM_POS, \ - HASH_CR_DATAFORM_MASK) -#define HASH_SET_NBLW(val) \ - HASH_PUT_BITS( \ - &device_data->base->str, \ - (u32) (val), HASH_STR_NBLW_POS, \ - HASH_STR_NBLW_MASK) -#define HASH_SET_DCAL \ - HASH_PUT_BITS( \ - &device_data->base->str, \ - 0x01, HASH_STR_DCAL_POS, \ - HASH_STR_DCAL_MASK) - -/* Hardware access method */ -enum hash_mode { - HASH_MODE_CPU, - HASH_MODE_DMA -}; - -/** - * struct uint64 - Structure to handle 64 bits integers. - * @high_word: Most significant bits. - * @low_word: Least significant bits. - * - * Used to handle 64 bits integers. - */ -struct uint64 { - u32 high_word; - u32 low_word; -}; - -/** - * struct hash_register - Contains all registers in ux500 hash hardware. - * @cr: HASH control register (0x000). - * @din: HASH data input register (0x004). - * @str: HASH start register (0x008). - * @hx: HASH digest register 0..7 (0x00c-0x01C). - * @padding0: Reserved (0x02C). - * @itcr: Integration test control register (0x080). - * @itip: Integration test input register (0x084). - * @itop: Integration test output register (0x088). - * @padding1: Reserved (0x08C). - * @csfull: HASH context full register (0x0F8). - * @csdatain: HASH context swap data input register (0x0FC). - * @csrx: HASH context swap register 0..51 (0x100-0x1CC). - * @padding2: Reserved (0x1D0). - * @periphid0: HASH peripheral identification register 0 (0xFE0). - * @periphid1: HASH peripheral identification register 1 (0xFE4). - * @periphid2: HASH peripheral identification register 2 (0xFE8). - * @periphid3: HASH peripheral identification register 3 (0xFEC). - * @cellid0: HASH PCell identification register 0 (0xFF0). - * @cellid1: HASH PCell identification register 1 (0xFF4). - * @cellid2: HASH PCell identification register 2 (0xFF8). - * @cellid3: HASH PCell identification register 3 (0xFFC). - * - * The device communicates to the HASH via 32-bit-wide control registers - * accessible via the 32-bit width AMBA rev. 2.0 AHB Bus. Below is a structure - * with the registers used. - */ -struct hash_register { - u32 cr; - u32 din; - u32 str; - u32 hx[8]; - - u32 padding0[(0x080 - 0x02C) / sizeof(u32)]; - - u32 itcr; - u32 itip; - u32 itop; - - u32 padding1[(0x0F8 - 0x08C) / sizeof(u32)]; - - u32 csfull; - u32 csdatain; - u32 csrx[HASH_CSR_COUNT]; - - u32 padding2[(0xFE0 - 0x1D0) / sizeof(u32)]; - - u32 periphid0; - u32 periphid1; - u32 periphid2; - u32 periphid3; - - u32 cellid0; - u32 cellid1; - u32 cellid2; - u32 cellid3; -}; - -/** - * struct hash_state - Hash context state. - * @temp_cr: Temporary HASH Control Register. - * @str_reg: HASH Start Register. - * @din_reg: HASH Data Input Register. - * @csr[52]: HASH Context Swap Registers 0-39. - * @csfull: HASH Context Swap Registers 40 ie Status flags. - * @csdatain: HASH Context Swap Registers 41 ie Input data. - * @buffer: Working buffer for messages going to the hardware. - * @length: Length of the part of message hashed so far (floor(N/64) * 64). - * @index: Valid number of bytes in buffer (N % 64). - * @bit_index: Valid number of bits in buffer (N % 8). - * - * This structure is used between context switches, i.e. when ongoing jobs are - * interupted with new jobs. When this happens we need to store intermediate - * results in software. - * - * WARNING: "index" is the member of the structure, to be sure that "buffer" - * is aligned on a 4-bytes boundary. This is highly implementation dependent - * and MUST be checked whenever this code is ported on new platforms. - */ -struct hash_state { - u32 temp_cr; - u32 str_reg; - u32 din_reg; - u32 csr[52]; - u32 csfull; - u32 csdatain; - u32 buffer[HASH_BLOCK_SIZE / sizeof(u32)]; - struct uint64 length; - u8 index; - u8 bit_index; -}; - -/** - * enum hash_device_id - HASH device ID. - * @HASH_DEVICE_ID_0: Hash hardware with ID 0 - * @HASH_DEVICE_ID_1: Hash hardware with ID 1 - */ -enum hash_device_id { - HASH_DEVICE_ID_0 = 0, - HASH_DEVICE_ID_1 = 1 -}; - -/** - * enum hash_data_format - HASH data format. - * @HASH_DATA_32_BITS: 32 bits data format - * @HASH_DATA_16_BITS: 16 bits data format - * @HASH_DATA_8_BITS: 8 bits data format. - * @HASH_DATA_1_BITS: 1 bit data format. - */ -enum hash_data_format { - HASH_DATA_32_BITS = 0x0, - HASH_DATA_16_BITS = 0x1, - HASH_DATA_8_BITS = 0x2, - HASH_DATA_1_BIT = 0x3 -}; - -/** - * enum hash_algo - Enumeration for selecting between SHA1 or SHA2 algorithm. - * @HASH_ALGO_SHA1: Indicates that SHA1 is used. - * @HASH_ALGO_SHA2: Indicates that SHA2 (SHA256) is used. - */ -enum hash_algo { - HASH_ALGO_SHA1 = 0x0, - HASH_ALGO_SHA256 = 0x1 -}; - -/** - * enum hash_op - Enumeration for selecting between HASH or HMAC mode. - * @HASH_OPER_MODE_HASH: Indicates usage of normal HASH mode. - * @HASH_OPER_MODE_HMAC: Indicates usage of HMAC. - */ -enum hash_op { - HASH_OPER_MODE_HASH = 0x0, - HASH_OPER_MODE_HMAC = 0x1 -}; - -/** - * struct hash_config - Configuration data for the hardware. - * @data_format: Format of data entered into the hash data in register. - * @algorithm: Algorithm selection bit. - * @oper_mode: Operating mode selection bit. - */ -struct hash_config { - int data_format; - int algorithm; - int oper_mode; -}; - -/** - * struct hash_dma - Structure used for dma. - * @mask: DMA capabilities bitmap mask. - * @complete: Used to maintain state for a "completion". - * @chan_mem2hash: DMA channel. - * @cfg_mem2hash: DMA channel configuration. - * @sg_len: Scatterlist length. - * @sg: Scatterlist. - * @nents: Number of sg entries. - */ -struct hash_dma { - dma_cap_mask_t mask; - struct completion complete; - struct dma_chan *chan_mem2hash; - void *cfg_mem2hash; - int sg_len; - struct scatterlist *sg; - int nents; -}; - -/** - * struct hash_ctx - The context used for hash calculations. - * @key: The key used in the operation. - * @keylen: The length of the key. - * @state: The state of the current calculations. - * @config: The current configuration. - * @digestsize: The size of current digest. - * @device: Pointer to the device structure. - */ -struct hash_ctx { - u8 *key; - u32 keylen; - struct hash_config config; - int digestsize; - struct hash_device_data *device; -}; - -/** - * struct hash_ctx - The request context used for hash calculations. - * @state: The state of the current calculations. - * @dma_mode: Used in special cases (workaround), e.g. need to change to - * cpu mode, if not supported/working in dma mode. - * @updated: Indicates if hardware is initialized for new operations. - */ -struct hash_req_ctx { - struct hash_state state; - bool dma_mode; - u8 updated; -}; - -/** - * struct hash_device_data - structure for a hash device. - * @base: Pointer to virtual base address of the hash device. - * @phybase: Pointer to physical memory location of the hash device. - * @list_node: For inclusion in klist. - * @dev: Pointer to the device dev structure. - * @ctx_lock: Spinlock for current_ctx. - * @current_ctx: Pointer to the currently allocated context. - * @power_state: TRUE = power state on, FALSE = power state off. - * @power_state_lock: Spinlock for power_state. - * @regulator: Pointer to the device's power control. - * @clk: Pointer to the device's clock control. - * @restore_dev_state: TRUE = saved state, FALSE = no saved state. - * @dma: Structure used for dma. - */ -struct hash_device_data { - struct hash_register __iomem *base; - phys_addr_t phybase; - struct klist_node list_node; - struct device *dev; - spinlock_t ctx_lock; - struct hash_ctx *current_ctx; - bool power_state; - spinlock_t power_state_lock; - struct regulator *regulator; - struct clk *clk; - bool restore_dev_state; - struct hash_state state; /* Used for saving and resuming state */ - struct hash_dma dma; -}; - -int hash_check_hw(struct hash_device_data *device_data); - -int hash_setconfiguration(struct hash_device_data *device_data, - struct hash_config *config); - -void hash_begin(struct hash_device_data *device_data, struct hash_ctx *ctx); - -void hash_get_digest(struct hash_device_data *device_data, - u8 *digest, int algorithm); - -int hash_hw_update(struct ahash_request *req); - -int hash_save_state(struct hash_device_data *device_data, - struct hash_state *state); - -int hash_resume_state(struct hash_device_data *device_data, - const struct hash_state *state); - -#endif diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c deleted file mode 100644 index f104e8a43036..000000000000 --- a/drivers/crypto/ux500/hash/hash_core.c +++ /dev/null @@ -1,1966 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Cryptographic API. - * Support for Nomadik hardware crypto engine. - - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen <shujuan.chen@stericsson.com> for ST-Ericsson - * Author: Joakim Bech <joakim.xx.bech@stericsson.com> for ST-Ericsson - * Author: Berne Hebark <berne.herbark@stericsson.com> for ST-Ericsson. - * Author: Niklas Hernaeus <niklas.hernaeus@stericsson.com> for ST-Ericsson. - * Author: Andreas Westin <andreas.westin@stericsson.com> for ST-Ericsson. - */ - -#define pr_fmt(fmt) "hashX hashX: " fmt - -#include <linux/clk.h> -#include <linux/device.h> -#include <linux/dma-mapping.h> -#include <linux/err.h> -#include <linux/init.h> -#include <linux/io.h> -#include <linux/klist.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/mod_devicetable.h> -#include <linux/platform_device.h> -#include <linux/crypto.h> - -#include <linux/regulator/consumer.h> -#include <linux/dmaengine.h> -#include <linux/bitops.h> - -#include <crypto/internal/hash.h> -#include <crypto/sha1.h> -#include <crypto/sha2.h> -#include <crypto/scatterwalk.h> -#include <crypto/algapi.h> - -#include <linux/platform_data/crypto-ux500.h> - -#include "hash_alg.h" - -static int hash_mode; -module_param(hash_mode, int, 0); -MODULE_PARM_DESC(hash_mode, "CPU or DMA mode. CPU = 0 (default), DMA = 1"); - -/* HMAC-SHA1, no key */ -static const u8 zero_message_hmac_sha1[SHA1_DIGEST_SIZE] = { - 0xfb, 0xdb, 0x1d, 0x1b, 0x18, 0xaa, 0x6c, 0x08, - 0x32, 0x4b, 0x7d, 0x64, 0xb7, 0x1f, 0xb7, 0x63, - 0x70, 0x69, 0x0e, 0x1d -}; - -/* HMAC-SHA256, no key */ -static const u8 zero_message_hmac_sha256[SHA256_DIGEST_SIZE] = { - 0xb6, 0x13, 0x67, 0x9a, 0x08, 0x14, 0xd9, 0xec, - 0x77, 0x2f, 0x95, 0xd7, 0x78, 0xc3, 0x5f, 0xc5, - 0xff, 0x16, 0x97, 0xc4, 0x93, 0x71, 0x56, 0x53, - 0xc6, 0xc7, 0x12, 0x14, 0x42, 0x92, 0xc5, 0xad -}; - -/** - * struct hash_driver_data - data specific to the driver. - * - * @device_list: A list of registered devices to choose from. - * @device_allocation: A semaphore initialized with number of devices. - */ -struct hash_driver_data { - struct klist device_list; - struct semaphore device_allocation; -}; - -static struct hash_driver_data driver_data; - -/* Declaration of functions */ -/** - * hash_messagepad - Pads a message and write the nblw bits. - * @device_data: Structure for the hash device. - * @message: Last word of a message - * @index_bytes: The number of bytes in the last message - * - * This function manages the final part of the digest calculation, when less - * than 512 bits (64 bytes) remain in message. This means index_bytes < 64. - * - */ -static void hash_messagepad(struct hash_device_data *device_data, - const u32 *message, u8 index_bytes); - -/** - * release_hash_device - Releases a previously allocated hash device. - * @device_data: Structure for the hash device. - * - */ -static void release_hash_device(struct hash_device_data *device_data) -{ - spin_lock(&device_data->ctx_lock); - device_data->current_ctx->device = NULL; - device_data->current_ctx = NULL; - spin_unlock(&device_data->ctx_lock); - - /* - * The down_interruptible part for this semaphore is called in - * cryp_get_device_data. - */ - up(&driver_data.device_allocation); -} - -static void hash_dma_setup_channel(struct hash_device_data *device_data, - struct device *dev) -{ - struct hash_platform_data *platform_data = dev->platform_data; - struct dma_slave_config conf = { - .direction = DMA_MEM_TO_DEV, - .dst_addr = device_data->phybase + HASH_DMA_FIFO, - .dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, - .dst_maxburst = 16, - }; - - dma_cap_zero(device_data->dma.mask); - dma_cap_set(DMA_SLAVE, device_data->dma.mask); - - device_data->dma.cfg_mem2hash = platform_data->mem_to_engine; - device_data->dma.chan_mem2hash = - dma_request_channel(device_data->dma.mask, - platform_data->dma_filter, - device_data->dma.cfg_mem2hash); - - dmaengine_slave_config(device_data->dma.chan_mem2hash, &conf); - - init_completion(&device_data->dma.complete); -} - -static void hash_dma_callback(void *data) -{ - struct hash_ctx *ctx = data; - - complete(&ctx->device->dma.complete); -} - -static int hash_set_dma_transfer(struct hash_ctx *ctx, struct scatterlist *sg, - int len, enum dma_data_direction direction) -{ - struct dma_async_tx_descriptor *desc = NULL; - struct dma_chan *channel = NULL; - - if (direction != DMA_TO_DEVICE) { - dev_err(ctx->device->dev, "%s: Invalid DMA direction\n", - __func__); - return -EFAULT; - } - - sg->length = ALIGN(sg->length, HASH_DMA_ALIGN_SIZE); - - channel = ctx->device->dma.chan_mem2hash; - ctx->device->dma.sg = sg; - ctx->device->dma.sg_len = dma_map_sg(channel->device->dev, - ctx->device->dma.sg, ctx->device->dma.nents, - direction); - - if (!ctx->device->dma.sg_len) { - dev_err(ctx->device->dev, "%s: Could not map the sg list (TO_DEVICE)\n", - __func__); - return -EFAULT; - } - - dev_dbg(ctx->device->dev, "%s: Setting up DMA for buffer (TO_DEVICE)\n", - __func__); - desc = dmaengine_prep_slave_sg(channel, - ctx->device->dma.sg, ctx->device->dma.sg_len, - DMA_MEM_TO_DEV, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); - if (!desc) { - dev_err(ctx->device->dev, - "%s: dmaengine_prep_slave_sg() failed!\n", __func__); - return -EFAULT; - } - - desc->callback = hash_dma_callback; - desc->callback_param = ctx; - - dmaengine_submit(desc); - dma_async_issue_pending(channel); - - return 0; -} - -static void hash_dma_done(struct hash_ctx *ctx) -{ - struct dma_chan *chan; - - chan = ctx->device->dma.chan_mem2hash; - dmaengine_terminate_all(chan); - dma_unmap_sg(chan->device->dev, ctx->device->dma.sg, - ctx->device->dma.nents, DMA_TO_DEVICE); -} - -static int hash_dma_write(struct hash_ctx *ctx, - struct scatterlist *sg, int len) -{ - int error = hash_set_dma_transfer(ctx, sg, len, DMA_TO_DEVICE); - if (error) { - dev_dbg(ctx->device->dev, - "%s: hash_set_dma_transfer() failed\n", __func__); - return error; - } - - return len; -} - -/** - * get_empty_message_digest - Returns a pre-calculated digest for - * the empty message. - * @device_data: Structure for the hash device. - * @zero_hash: Buffer to return the empty message digest. - * @zero_hash_size: Hash size of the empty message digest. - * @zero_digest: True if zero_digest returned. - */ -static int get_empty_message_digest( - struct hash_device_data *device_data, - u8 *zero_hash, u32 *zero_hash_size, bool *zero_digest) -{ - int ret = 0; - struct hash_ctx *ctx = device_data->current_ctx; - *zero_digest = false; - - /** - * Caller responsible for ctx != NULL. - */ - - if (HASH_OPER_MODE_HASH == ctx->config.oper_mode) { - if (HASH_ALGO_SHA1 == ctx->config.algorithm) { - memcpy(zero_hash, &sha1_zero_message_hash[0], - SHA1_DIGEST_SIZE); - *zero_hash_size = SHA1_DIGEST_SIZE; - *zero_digest = true; - } else if (HASH_ALGO_SHA256 == - ctx->config.algorithm) { - memcpy(zero_hash, &sha256_zero_message_hash[0], - SHA256_DIGEST_SIZE); - *zero_hash_size = SHA256_DIGEST_SIZE; - *zero_digest = true; - } else { - dev_err(device_data->dev, "%s: Incorrect algorithm!\n", - __func__); - ret = -EINVAL; - goto out; - } - } else if (HASH_OPER_MODE_HMAC == ctx->config.oper_mode) { - if (!ctx->keylen) { - if (HASH_ALGO_SHA1 == ctx->config.algorithm) { - memcpy(zero_hash, &zero_message_hmac_sha1[0], - SHA1_DIGEST_SIZE); - *zero_hash_size = SHA1_DIGEST_SIZE; - *zero_digest = true; - } else if (HASH_ALGO_SHA256 == ctx->config.algorithm) { - memcpy(zero_hash, &zero_message_hmac_sha256[0], - SHA256_DIGEST_SIZE); - *zero_hash_size = SHA256_DIGEST_SIZE; - *zero_digest = true; - } else { - dev_err(device_data->dev, "%s: Incorrect algorithm!\n", - __func__); - ret = -EINVAL; - goto out; - } - } else { - dev_dbg(device_data->dev, - "%s: Continue hash calculation, since hmac key available\n", - __func__); - } - } -out: - - return ret; -} - -/** - * hash_disable_power - Request to disable power and clock. - * @device_data: Structure for the hash device. - * @save_device_state: If true, saves the current hw state. - * - * This function request for disabling power (regulator) and clock, - * and could also save current hw state. - */ -static int hash_disable_power(struct hash_device_data *device_data, - bool save_device_state) -{ - int ret = 0; - struct device *dev = device_data->dev; - - spin_lock(&device_data->power_state_lock); - if (!device_data->power_state) - goto out; - - if (save_device_state) { - hash_save_state(device_data, - &device_data->state); - device_data->restore_dev_state = true; - } - - clk_disable(device_data->clk); - ret = regulator_disable(device_data->regulator); - if (ret) - dev_err(dev, "%s: regulator_disable() failed!\n", __func__); - - device_data->power_state = false; - -out: - spin_unlock(&device_data->power_state_lock); - - return ret; -} - -/** - * hash_enable_power - Request to enable power and clock. - * @device_data: Structure for the hash device. - * @restore_device_state: If true, restores a previous saved hw state. - * - * This function request for enabling power (regulator) and clock, - * and could also restore a previously saved hw state. - */ -static int hash_enable_power(struct hash_device_data *device_data, - bool restore_device_state) -{ - int ret = 0; - struct device *dev = device_data->dev; - - spin_lock(&device_data->power_state_lock); - if (!device_data->power_state) { - ret = regulator_enable(device_data->regulator); - if (ret) { - dev_err(dev, "%s: regulator_enable() failed!\n", - __func__); - goto out; - } - ret = clk_enable(device_data->clk); - if (ret) { - dev_err(dev, "%s: clk_enable() failed!\n", __func__); - ret = regulator_disable( - device_data->regulator); - goto out; - } - device_data->power_state = true; - } - - if (device_data->restore_dev_state) { - if (restore_device_state) { - device_data->restore_dev_state = false; - hash_resume_state(device_data, &device_data->state); - } - } -out: - spin_unlock(&device_data->power_state_lock); - - return ret; -} - -/** - * hash_get_device_data - Checks for an available hash device and return it. - * @ctx: Structure for the hash context. - * @device_data: Structure for the hash device. - * - * This function check for an available hash device and return it to - * the caller. - * Note! Caller need to release the device, calling up(). - */ -static int hash_get_device_data(struct hash_ctx *ctx, - struct hash_device_data **device_data) -{ - int ret; - struct klist_iter device_iterator; - struct klist_node *device_node; - struct hash_device_data *local_device_data = NULL; - - /* Wait until a device is available */ - ret = down_interruptible(&driver_data.device_allocation); - if (ret) - return ret; /* Interrupted */ - - /* Select a device */ - klist_iter_init(&driver_data.device_list, &device_iterator); - device_node = klist_next(&device_iterator); - while (device_node) { - local_device_data = container_of(device_node, - struct hash_device_data, list_node); - spin_lock(&local_device_data->ctx_lock); - /* current_ctx allocates a device, NULL = unallocated */ - if (local_device_data->current_ctx) { - device_node = klist_next(&device_iterator); - } else { - local_device_data->current_ctx = ctx; - ctx->device = local_device_data; - spin_unlock(&local_device_data->ctx_lock); - break; - } - spin_unlock(&local_device_data->ctx_lock); - } - klist_iter_exit(&device_iterator); - - if (!device_node) { - /** - * No free device found. - * Since we allocated a device with down_interruptible, this - * should not be able to happen. - * Number of available devices, which are contained in - * device_allocation, is therefore decremented by not doing - * an up(device_allocation). - */ - return -EBUSY; - } - - *device_data = local_device_data; - - return 0; -} - -/** - * hash_hw_write_key - Writes the key to the hardware registries. - * - * @device_data: Structure for the hash device. - * @key: Key to be written. - * @keylen: The lengt of the key. - * - * Note! This function DOES NOT write to the NBLW registry, even though - * specified in the hw design spec. Either due to incorrect info in the - * spec or due to a bug in the hw. - */ -static void hash_hw_write_key(struct hash_device_data *device_data, - const u8 *key, unsigned int keylen) -{ - u32 word = 0; - int nwords = 1; - - HASH_CLEAR_BITS(&device_data->base->str, HASH_STR_NBLW_MASK); - - while (keylen >= 4) { - u32 *key_word = (u32 *)key; - - HASH_SET_DIN(key_word, nwords); - keylen -= 4; - key += 4; - } - - /* Take care of the remaining bytes in the last word */ - if (keylen) { - word = 0; - while (keylen) { - word |= (key[keylen - 1] << (8 * (keylen - 1))); - keylen--; - } - - HASH_SET_DIN(&word, nwords); - } - - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) - cpu_relax(); - - HASH_SET_DCAL; - - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) - cpu_relax(); -} - -/** - * init_hash_hw - Initialise the hash hardware for a new calculation. - * @device_data: Structure for the hash device. - * @ctx: The hash context. - * - * This function will enable the bits needed to clear and start a new - * calculation. - */ -static int init_hash_hw(struct hash_device_data *device_data, - struct hash_ctx *ctx) -{ - int ret = 0; - - ret = hash_setconfiguration(device_data, &ctx->config); - if (ret) { - dev_err(device_data->dev, "%s: hash_setconfiguration() failed!\n", - __func__); - return ret; - } - - hash_begin(device_data, ctx); - - if (ctx->config.oper_mode == HASH_OPER_MODE_HMAC) - hash_hw_write_key(device_data, ctx->key, ctx->keylen); - - return ret; -} - -/** - * hash_get_nents - Return number of entries (nents) in scatterlist (sg). - * - * @sg: Scatterlist. - * @size: Size in bytes. - * @aligned: True if sg data aligned to work in DMA mode. - * - */ -static int hash_get_nents(struct scatterlist *sg, int size, bool *aligned) -{ - int nents = 0; - bool aligned_data = true; - - while (size > 0 && sg) { - nents++; - size -= sg->length; - - /* hash_set_dma_transfer will align last nent */ - if ((aligned && !IS_ALIGNED(sg->offset, HASH_DMA_ALIGN_SIZE)) || - (!IS_ALIGNED(sg->length, HASH_DMA_ALIGN_SIZE) && size > 0)) - aligned_data = false; - - sg = sg_next(sg); - } - - if (aligned) - *aligned = aligned_data; - - if (size != 0) - return -EFAULT; - - return nents; -} - -/** - * hash_dma_valid_data - checks for dma valid sg data. - * @sg: Scatterlist. - * @datasize: Datasize in bytes. - * - * NOTE! This function checks for dma valid sg data, since dma - * only accept datasizes of even wordsize. - */ -static bool hash_dma_valid_data(struct scatterlist *sg, int datasize) -{ - bool aligned; - - /* Need to include at least one nent, else error */ - if (hash_get_nents(sg, datasize, &aligned) < 1) - return false; - - return aligned; -} - -/** - * ux500_hash_init - Common hash init function for SHA1/SHA2 (SHA256). - * @req: The hash request for the job. - * - * Initialize structures. - */ -static int ux500_hash_init(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); - struct hash_req_ctx *req_ctx = ahash_request_ctx(req); - - if (!ctx->key) - ctx->keylen = 0; - - memset(&req_ctx->state, 0, sizeof(struct hash_state)); - req_ctx->updated = 0; - if (hash_mode == HASH_MODE_DMA) { - if (req->nbytes < HASH_DMA_ALIGN_SIZE) { - req_ctx->dma_mode = false; /* Don't use DMA */ - - pr_debug("%s: DMA mode, but direct to CPU mode for data size < %d\n", - __func__, HASH_DMA_ALIGN_SIZE); - } else { - if (req->nbytes >= HASH_DMA_PERFORMANCE_MIN_SIZE && - hash_dma_valid_data(req->src, req->nbytes)) { - req_ctx->dma_mode = true; - } else { - req_ctx->dma_mode = false; - pr_debug("%s: DMA mode, but use CPU mode for datalength < %d or non-aligned data, except in last nent\n", - __func__, - HASH_DMA_PERFORMANCE_MIN_SIZE); - } - } - } - return 0; -} - -/** - * hash_processblock - This function processes a single block of 512 bits (64 - * bytes), word aligned, starting at message. - * @device_data: Structure for the hash device. - * @message: Block (512 bits) of message to be written to - * the HASH hardware. - * @length: Message length - * - */ -static void hash_processblock(struct hash_device_data *device_data, - const u32 *message, int length) -{ - int len = length / HASH_BYTES_PER_WORD; - /* - * NBLW bits. Reset the number of bits in last word (NBLW). - */ - HASH_CLEAR_BITS(&device_data->base->str, HASH_STR_NBLW_MASK); - - /* - * Write message data to the HASH_DIN register. - */ - HASH_SET_DIN(message, len); -} - -/** - * hash_messagepad - Pads a message and write the nblw bits. - * @device_data: Structure for the hash device. - * @message: Last word of a message. - * @index_bytes: The number of bytes in the last message. - * - * This function manages the final part of the digest calculation, when less - * than 512 bits (64 bytes) remain in message. This means index_bytes < 64. - * - */ -static void hash_messagepad(struct hash_device_data *device_data, - const u32 *message, u8 index_bytes) -{ - int nwords = 1; - - /* - * Clear hash str register, only clear NBLW - * since DCAL will be reset by hardware. - */ - HASH_CLEAR_BITS(&device_data->base->str, HASH_STR_NBLW_MASK); - - /* Main loop */ - while (index_bytes >= 4) { - HASH_SET_DIN(message, nwords); - index_bytes -= 4; - message++; - } - - if (index_bytes) - HASH_SET_DIN(message, nwords); - - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) - cpu_relax(); - - /* num_of_bytes == 0 => NBLW <- 0 (32 bits valid in DATAIN) */ - HASH_SET_NBLW(index_bytes * 8); - dev_dbg(device_data->dev, "%s: DIN=0x%08x NBLW=%lu\n", - __func__, readl_relaxed(&device_data->base->din), - readl_relaxed(&device_data->base->str) & HASH_STR_NBLW_MASK); - HASH_SET_DCAL; - dev_dbg(device_data->dev, "%s: after dcal -> DIN=0x%08x NBLW=%lu\n", - __func__, readl_relaxed(&device_data->base->din), - readl_relaxed(&device_data->base->str) & HASH_STR_NBLW_MASK); - - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) - cpu_relax(); -} - -/** - * hash_incrementlength - Increments the length of the current message. - * @ctx: Hash context - * @incr: Length of message processed already - * - * Overflow cannot occur, because conditions for overflow are checked in - * hash_hw_update. - */ -static void hash_incrementlength(struct hash_req_ctx *ctx, u32 incr) -{ - ctx->state.length.low_word += incr; - - /* Check for wrap-around */ - if (ctx->state.length.low_word < incr) - ctx->state.length.high_word++; -} - -/** - * hash_setconfiguration - Sets the required configuration for the hash - * hardware. - * @device_data: Structure for the hash device. - * @config: Pointer to a configuration structure. - */ -int hash_setconfiguration(struct hash_device_data *device_data, - struct hash_config *config) -{ - int ret = 0; - - if (config->algorithm != HASH_ALGO_SHA1 && - config->algorithm != HASH_ALGO_SHA256) - return -EPERM; - - /* - * DATAFORM bits. Set the DATAFORM bits to 0b11, which means the data - * to be written to HASH_DIN is considered as 32 bits. - */ - HASH_SET_DATA_FORMAT(config->data_format); - - /* - * ALGO bit. Set to 0b1 for SHA-1 and 0b0 for SHA-256 - */ - switch (config->algorithm) { - case HASH_ALGO_SHA1: - HASH_SET_BITS(&device_data->base->cr, HASH_CR_ALGO_MASK); - break; - - case HASH_ALGO_SHA256: - HASH_CLEAR_BITS(&device_data->base->cr, HASH_CR_ALGO_MASK); - break; - - default: - dev_err(device_data->dev, "%s: Incorrect algorithm\n", - __func__); - return -EPERM; - } - - /* - * MODE bit. This bit selects between HASH or HMAC mode for the - * selected algorithm. 0b0 = HASH and 0b1 = HMAC. - */ - if (HASH_OPER_MODE_HASH == config->oper_mode) - HASH_CLEAR_BITS(&device_data->base->cr, - HASH_CR_MODE_MASK); - else if (HASH_OPER_MODE_HMAC == config->oper_mode) { - HASH_SET_BITS(&device_data->base->cr, HASH_CR_MODE_MASK); - if (device_data->current_ctx->keylen > HASH_BLOCK_SIZE) { - /* Truncate key to blocksize */ - dev_dbg(device_data->dev, "%s: LKEY set\n", __func__); - HASH_SET_BITS(&device_data->base->cr, - HASH_CR_LKEY_MASK); - } else { - dev_dbg(device_data->dev, "%s: LKEY cleared\n", - __func__); - HASH_CLEAR_BITS(&device_data->base->cr, - HASH_CR_LKEY_MASK); - } - } else { /* Wrong hash mode */ - ret = -EPERM; - dev_err(device_data->dev, "%s: HASH_INVALID_PARAMETER!\n", - __func__); - } - return ret; -} - -/** - * hash_begin - This routine resets some globals and initializes the hash - * hardware. - * @device_data: Structure for the hash device. - * @ctx: Hash context. - */ -void hash_begin(struct hash_device_data *device_data, struct hash_ctx *ctx) -{ - /* HW and SW initializations */ - /* Note: there is no need to initialize buffer and digest members */ - - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) - cpu_relax(); - - /* - * INIT bit. Set this bit to 0b1 to reset the HASH processor core and - * prepare the initialize the HASH accelerator to compute the message - * digest of a new message. - */ - HASH_INITIALIZE; - - /* - * NBLW bits. Reset the number of bits in last word (NBLW). - */ - HASH_CLEAR_BITS(&device_data->base->str, HASH_STR_NBLW_MASK); -} - -static int hash_process_data(struct hash_device_data *device_data, - struct hash_ctx *ctx, struct hash_req_ctx *req_ctx, - int msg_length, u8 *data_buffer, u8 *buffer, - u8 *index) -{ - int ret = 0; - u32 count; - - do { - if ((*index + msg_length) < HASH_BLOCK_SIZE) { - for (count = 0; count < msg_length; count++) { - buffer[*index + count] = - *(data_buffer + count); - } - *index += msg_length; - msg_length = 0; - } else { - if (req_ctx->updated) { - ret = hash_resume_state(device_data, - &device_data->state); - memmove(req_ctx->state.buffer, - device_data->state.buffer, - HASH_BLOCK_SIZE); - if (ret) { - dev_err(device_data->dev, - "%s: hash_resume_state() failed!\n", - __func__); - goto out; - } - } else { - ret = init_hash_hw(device_data, ctx); - if (ret) { - dev_err(device_data->dev, - "%s: init_hash_hw() failed!\n", - __func__); - goto out; - } - req_ctx->updated = 1; - } - /* - * If 'data_buffer' is four byte aligned and - * local buffer does not have any data, we can - * write data directly from 'data_buffer' to - * HW peripheral, otherwise we first copy data - * to a local buffer - */ - if (IS_ALIGNED((unsigned long)data_buffer, 4) && - (0 == *index)) - hash_processblock(device_data, - (const u32 *)data_buffer, - HASH_BLOCK_SIZE); - else { - for (count = 0; - count < (u32)(HASH_BLOCK_SIZE - *index); - count++) { - buffer[*index + count] = - *(data_buffer + count); - } - hash_processblock(device_data, - (const u32 *)buffer, - HASH_BLOCK_SIZE); - } - hash_incrementlength(req_ctx, HASH_BLOCK_SIZE); - data_buffer += (HASH_BLOCK_SIZE - *index); - - msg_length -= (HASH_BLOCK_SIZE - *index); - *index = 0; - - ret = hash_save_state(device_data, - &device_data->state); - - memmove(device_data->state.buffer, - req_ctx->state.buffer, - HASH_BLOCK_SIZE); - if (ret) { - dev_err(device_data->dev, "%s: hash_save_state() failed!\n", - __func__); - goto out; - } - } - } while (msg_length != 0); -out: - - return ret; -} - -/** - * hash_dma_final - The hash dma final function for SHA1/SHA256. - * @req: The hash request for the job. - */ -static int hash_dma_final(struct ahash_request *req) -{ - int ret = 0; - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); - struct hash_req_ctx *req_ctx = ahash_request_ctx(req); - struct hash_device_data *device_data; - u8 digest[SHA256_DIGEST_SIZE]; - int bytes_written = 0; - - ret = hash_get_device_data(ctx, &device_data); - if (ret) - return ret; - - dev_dbg(device_data->dev, "%s: (ctx=0x%lx)!\n", __func__, - (unsigned long)ctx); - - if (req_ctx->updated) { - ret = hash_resume_state(device_data, &device_data->state); - - if (ret) { - dev_err(device_data->dev, "%s: hash_resume_state() failed!\n", - __func__); - goto out; - } - } else { - ret = hash_setconfiguration(device_data, &ctx->config); - if (ret) { - dev_err(device_data->dev, - "%s: hash_setconfiguration() failed!\n", - __func__); - goto out; - } - - /* Enable DMA input */ - if (hash_mode != HASH_MODE_DMA || !req_ctx->dma_mode) { - HASH_CLEAR_BITS(&device_data->base->cr, - HASH_CR_DMAE_MASK); - } else { - HASH_SET_BITS(&device_data->base->cr, - HASH_CR_DMAE_MASK); - HASH_SET_BITS(&device_data->base->cr, - HASH_CR_PRIVN_MASK); - } - - HASH_INITIALIZE; - - if (ctx->config.oper_mode == HASH_OPER_MODE_HMAC) - hash_hw_write_key(device_data, ctx->key, ctx->keylen); - - /* Number of bits in last word = (nbytes * 8) % 32 */ - HASH_SET_NBLW((req->nbytes * 8) % 32); - req_ctx->updated = 1; - } - - /* Store the nents in the dma struct. */ - ctx->device->dma.nents = hash_get_nents(req->src, req->nbytes, NULL); - if (!ctx->device->dma.nents) { - dev_err(device_data->dev, "%s: ctx->device->dma.nents = 0\n", - __func__); - ret = ctx->device->dma.nents; - goto out; - } - - bytes_written = hash_dma_write(ctx, req->src, req->nbytes); - if (bytes_written != req->nbytes) { - dev_err(device_data->dev, "%s: hash_dma_write() failed!\n", - __func__); - ret = bytes_written; - goto out; - } - - wait_for_completion(&ctx->device->dma.complete); - hash_dma_done(ctx); - - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) - cpu_relax(); - - if (ctx->config.oper_mode == HASH_OPER_MODE_HMAC && ctx->key) { - unsigned int keylen = ctx->keylen; - u8 *key = ctx->key; - - dev_dbg(device_data->dev, "%s: keylen: %d\n", - __func__, ctx->keylen); - hash_hw_write_key(device_data, key, keylen); - } - - hash_get_digest(device_data, digest, ctx->config.algorithm); - memcpy(req->result, digest, ctx->digestsize); - -out: - release_hash_device(device_data); - - /** - * Allocated in setkey, and only used in HMAC. - */ - kfree(ctx->key); - - return ret; -} - -/** - * hash_hw_final - The final hash calculation function - * @req: The hash request for the job. - */ -static int hash_hw_final(struct ahash_request *req) -{ - int ret = 0; - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); - struct hash_req_ctx *req_ctx = ahash_request_ctx(req); - struct hash_device_data *device_data; - u8 digest[SHA256_DIGEST_SIZE]; - - ret = hash_get_device_data(ctx, &device_data); - if (ret) - return ret; - - dev_dbg(device_data->dev, "%s: (ctx=0x%lx)!\n", __func__, - (unsigned long)ctx); - - if (req_ctx->updated) { - ret = hash_resume_state(device_data, &device_data->state); - - if (ret) { - dev_err(device_data->dev, - "%s: hash_resume_state() failed!\n", __func__); - goto out; - } - } else if (req->nbytes == 0 && ctx->keylen == 0) { - u8 zero_hash[SHA256_DIGEST_SIZE]; - u32 zero_hash_size = 0; - bool zero_digest = false; - /** - * Use a pre-calculated empty message digest - * (workaround since hw return zeroes, hw bug!?) - */ - ret = get_empty_message_digest(device_data, &zero_hash[0], - &zero_hash_size, &zero_digest); - if (!ret && likely(zero_hash_size == ctx->digestsize) && - zero_digest) { - memcpy(req->result, &zero_hash[0], ctx->digestsize); - goto out; - } else if (!ret && !zero_digest) { - dev_dbg(device_data->dev, - "%s: HMAC zero msg with key, continue...\n", - __func__); - } else { - dev_err(device_data->dev, - "%s: ret=%d, or wrong digest size? %s\n", - __func__, ret, - zero_hash_size == ctx->digestsize ? - "true" : "false"); - /* Return error */ - goto out; - } - } else if (req->nbytes == 0 && ctx->keylen > 0) { - ret = -EPERM; - dev_err(device_data->dev, "%s: Empty message with keylength > 0, NOT supported\n", - __func__); - goto out; - } - - if (!req_ctx->updated) { - ret = init_hash_hw(device_data, ctx); - if (ret) { - dev_err(device_data->dev, - "%s: init_hash_hw() failed!\n", __func__); - goto out; - } - } - - if (req_ctx->state.index) { - hash_messagepad(device_data, req_ctx->state.buffer, - req_ctx->state.index); - } else { - HASH_SET_DCAL; - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) - cpu_relax(); - } - - if (ctx->config.oper_mode == HASH_OPER_MODE_HMAC && ctx->key) { - unsigned int keylen = ctx->keylen; - u8 *key = ctx->key; - - dev_dbg(device_data->dev, "%s: keylen: %d\n", - __func__, ctx->keylen); - hash_hw_write_key(device_data, key, keylen); - } - - hash_get_digest(device_data, digest, ctx->config.algorithm); - memcpy(req->result, digest, ctx->digestsize); - -out: - release_hash_device(device_data); - - /** - * Allocated in setkey, and only used in HMAC. - */ - kfree(ctx->key); - - return ret; -} - -/** - * hash_hw_update - Updates current HASH computation hashing another part of - * the message. - * @req: Byte array containing the message to be hashed (caller - * allocated). - */ -int hash_hw_update(struct ahash_request *req) -{ - int ret = 0; - u8 index = 0; - u8 *buffer; - struct hash_device_data *device_data; - u8 *data_buffer; - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); - struct hash_req_ctx *req_ctx = ahash_request_ctx(req); - struct crypto_hash_walk walk; - int msg_length; - - index = req_ctx->state.index; - buffer = (u8 *)req_ctx->state.buffer; - - ret = hash_get_device_data(ctx, &device_data); - if (ret) - return ret; - - msg_length = crypto_hash_walk_first(req, &walk); - - /* Empty message ("") is correct indata */ - if (msg_length == 0) { - ret = 0; - goto release_dev; - } - - /* Check if ctx->state.length + msg_length - overflows */ - if (msg_length > (req_ctx->state.length.low_word + msg_length) && - HASH_HIGH_WORD_MAX_VAL == req_ctx->state.length.high_word) { - pr_err("%s: HASH_MSG_LENGTH_OVERFLOW!\n", __func__); - ret = crypto_hash_walk_done(&walk, -EPERM); - goto release_dev; - } - - /* Main loop */ - while (0 != msg_length) { - data_buffer = walk.data; - ret = hash_process_data(device_data, ctx, req_ctx, msg_length, - data_buffer, buffer, &index); - - if (ret) { - dev_err(device_data->dev, "%s: hash_internal_hw_update() failed!\n", - __func__); - crypto_hash_walk_done(&walk, ret); - goto release_dev; - } - - msg_length = crypto_hash_walk_done(&walk, 0); - } - - req_ctx->state.index = index; - dev_dbg(device_data->dev, "%s: indata length=%d, bin=%d\n", - __func__, req_ctx->state.index, req_ctx->state.bit_index); - -release_dev: - release_hash_device(device_data); - - return ret; -} - -/** - * hash_resume_state - Function that resumes the state of an calculation. - * @device_data: Pointer to the device structure. - * @device_state: The state to be restored in the hash hardware - */ -int hash_resume_state(struct hash_device_data *device_data, - const struct hash_state *device_state) -{ - u32 temp_cr; - s32 count; - int hash_mode = HASH_OPER_MODE_HASH; - - if (NULL == device_state) { - dev_err(device_data->dev, "%s: HASH_INVALID_PARAMETER!\n", - __func__); - return -EPERM; - } - - /* Check correctness of index and length members */ - if (device_state->index > HASH_BLOCK_SIZE || - (device_state->length.low_word % HASH_BLOCK_SIZE) != 0) { - dev_err(device_data->dev, "%s: HASH_INVALID_PARAMETER!\n", - __func__); - return -EPERM; - } - - /* - * INIT bit. Set this bit to 0b1 to reset the HASH processor core and - * prepare the initialize the HASH accelerator to compute the message - * digest of a new message. - */ - HASH_INITIALIZE; - - temp_cr = device_state->temp_cr; - writel_relaxed(temp_cr & HASH_CR_RESUME_MASK, &device_data->base->cr); - - if (readl(&device_data->base->cr) & HASH_CR_MODE_MASK) - hash_mode = HASH_OPER_MODE_HMAC; - else - hash_mode = HASH_OPER_MODE_HASH; - - for (count = 0; count < HASH_CSR_COUNT; count++) { - if ((count >= 36) && (hash_mode == HASH_OPER_MODE_HASH)) - break; - - writel_relaxed(device_state->csr[count], - &device_data->base->csrx[count]); - } - - writel_relaxed(device_state->csfull, &device_data->base->csfull); - writel_relaxed(device_state->csdatain, &device_data->base->csdatain); - - writel_relaxed(device_state->str_reg, &device_data->base->str); - writel_relaxed(temp_cr, &device_data->base->cr); - - return 0; -} - -/** - * hash_save_state - Function that saves the state of hardware. - * @device_data: Pointer to the device structure. - * @device_state: The strucure where the hardware state should be saved. - */ -int hash_save_state(struct hash_device_data *device_data, - struct hash_state *device_state) -{ - u32 temp_cr; - u32 count; - int hash_mode = HASH_OPER_MODE_HASH; - - if (NULL == device_state) { - dev_err(device_data->dev, "%s: HASH_INVALID_PARAMETER!\n", - __func__); - return -ENOTSUPP; - } - - /* Write dummy value to force digest intermediate calculation. This - * actually makes sure that there isn't any ongoing calculation in the - * hardware. - */ - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) - cpu_relax(); - - temp_cr = readl_relaxed(&device_data->base->cr); - - device_state->str_reg = readl_relaxed(&device_data->base->str); - - device_state->din_reg = readl_relaxed(&device_data->base->din); - - if (readl(&device_data->base->cr) & HASH_CR_MODE_MASK) - hash_mode = HASH_OPER_MODE_HMAC; - else - hash_mode = HASH_OPER_MODE_HASH; - - for (count = 0; count < HASH_CSR_COUNT; count++) { - if ((count >= 36) && (hash_mode == HASH_OPER_MODE_HASH)) - break; - - device_state->csr[count] = - readl_relaxed(&device_data->base->csrx[count]); - } - - device_state->csfull = readl_relaxed(&device_data->base->csfull); - device_state->csdatain = readl_relaxed(&device_data->base->csdatain); - - device_state->temp_cr = temp_cr; - - return 0; -} - -/** - * hash_check_hw - This routine checks for peripheral Ids and PCell Ids. - * @device_data: - * - */ -int hash_check_hw(struct hash_device_data *device_data) -{ - /* Checking Peripheral Ids */ - if (HASH_P_ID0 == readl_relaxed(&device_data->base->periphid0) && - HASH_P_ID1 == readl_relaxed(&device_data->base->periphid1) && - HASH_P_ID2 == readl_relaxed(&device_data->base->periphid2) && - HASH_P_ID3 == readl_relaxed(&device_data->base->periphid3) && - HASH_CELL_ID0 == readl_relaxed(&device_data->base->cellid0) && - HASH_CELL_ID1 == readl_relaxed(&device_data->base->cellid1) && - HASH_CELL_ID2 == readl_relaxed(&device_data->base->cellid2) && - HASH_CELL_ID3 == readl_relaxed(&device_data->base->cellid3)) { - return 0; - } - - dev_err(device_data->dev, "%s: HASH_UNSUPPORTED_HW!\n", __func__); - return -ENOTSUPP; -} - -/** - * hash_get_digest - Gets the digest. - * @device_data: Pointer to the device structure. - * @digest: User allocated byte array for the calculated digest. - * @algorithm: The algorithm in use. - */ -void hash_get_digest(struct hash_device_data *device_data, - u8 *digest, int algorithm) -{ - u32 temp_hx_val, count; - int loop_ctr; - - if (algorithm != HASH_ALGO_SHA1 && algorithm != HASH_ALGO_SHA256) { - dev_err(device_data->dev, "%s: Incorrect algorithm %d\n", - __func__, algorithm); - return; - } - - if (algorithm == HASH_ALGO_SHA1) - loop_ctr = SHA1_DIGEST_SIZE / sizeof(u32); - else - loop_ctr = SHA256_DIGEST_SIZE / sizeof(u32); - - dev_dbg(device_data->dev, "%s: digest array:(0x%lx)\n", - __func__, (unsigned long)digest); - - /* Copy result into digest array */ - for (count = 0; count < loop_ctr; count++) { - temp_hx_val = readl_relaxed(&device_data->base->hx[count]); - digest[count * 4] = (u8) ((temp_hx_val >> 24) & 0xFF); - digest[count * 4 + 1] = (u8) ((temp_hx_val >> 16) & 0xFF); - digest[count * 4 + 2] = (u8) ((temp_hx_val >> 8) & 0xFF); - digest[count * 4 + 3] = (u8) ((temp_hx_val >> 0) & 0xFF); - } -} - -/** - * ahash_update - The hash update function for SHA1/SHA2 (SHA256). - * @req: The hash request for the job. - */ -static int ahash_update(struct ahash_request *req) -{ - int ret = 0; - struct hash_req_ctx *req_ctx = ahash_request_ctx(req); - - if (hash_mode != HASH_MODE_DMA || !req_ctx->dma_mode) - ret = hash_hw_update(req); - /* Skip update for DMA, all data will be passed to DMA in final */ - - if (ret) { - pr_err("%s: hash_hw_update() failed!\n", __func__); - } - - return ret; -} - -/** - * ahash_final - The hash final function for SHA1/SHA2 (SHA256). - * @req: The hash request for the job. - */ -static int ahash_final(struct ahash_request *req) -{ - int ret = 0; - struct hash_req_ctx *req_ctx = ahash_request_ctx(req); - - pr_debug("%s: data size: %d\n", __func__, req->nbytes); - - if ((hash_mode == HASH_MODE_DMA) && req_ctx->dma_mode) - ret = hash_dma_final(req); - else - ret = hash_hw_final(req); - - if (ret) { - pr_err("%s: hash_hw/dma_final() failed\n", __func__); - } - - return ret; -} - -static int hash_setkey(struct crypto_ahash *tfm, - const u8 *key, unsigned int keylen, int alg) -{ - int ret = 0; - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); - - /** - * Freed in final. - */ - ctx->key = kmemdup(key, keylen, GFP_KERNEL); - if (!ctx->key) { - pr_err("%s: Failed to allocate ctx->key for %d\n", - __func__, alg); - return -ENOMEM; - } - ctx->keylen = keylen; - - return ret; -} - -static int ahash_sha1_init(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); - - ctx->config.data_format = HASH_DATA_8_BITS; - ctx->config.algorithm = HASH_ALGO_SHA1; - ctx->config.oper_mode = HASH_OPER_MODE_HASH; - ctx->digestsize = SHA1_DIGEST_SIZE; - - return ux500_hash_init(req); -} - -static int ahash_sha256_init(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); - - ctx->config.data_format = HASH_DATA_8_BITS; - ctx->config.algorithm = HASH_ALGO_SHA256; - ctx->config.oper_mode = HASH_OPER_MODE_HASH; - ctx->digestsize = SHA256_DIGEST_SIZE; - - return ux500_hash_init(req); -} - -static int ahash_sha1_digest(struct ahash_request *req) -{ - int ret2, ret1; - - ret1 = ahash_sha1_init(req); - if (ret1) - goto out; - - ret1 = ahash_update(req); - ret2 = ahash_final(req); - -out: - return ret1 ? ret1 : ret2; -} - -static int ahash_sha256_digest(struct ahash_request *req) -{ - int ret2, ret1; - - ret1 = ahash_sha256_init(req); - if (ret1) - goto out; - - ret1 = ahash_update(req); - ret2 = ahash_final(req); - -out: - return ret1 ? ret1 : ret2; -} - -static int ahash_noimport(struct ahash_request *req, const void *in) -{ - return -ENOSYS; -} - -static int ahash_noexport(struct ahash_request *req, void *out) -{ - return -ENOSYS; -} - -static int hmac_sha1_init(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); - - ctx->config.data_format = HASH_DATA_8_BITS; - ctx->config.algorithm = HASH_ALGO_SHA1; - ctx->config.oper_mode = HASH_OPER_MODE_HMAC; - ctx->digestsize = SHA1_DIGEST_SIZE; - - return ux500_hash_init(req); -} - -static int hmac_sha256_init(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); - - ctx->config.data_format = HASH_DATA_8_BITS; - ctx->config.algorithm = HASH_ALGO_SHA256; - ctx->config.oper_mode = HASH_OPER_MODE_HMAC; - ctx->digestsize = SHA256_DIGEST_SIZE; - - return ux500_hash_init(req); -} - -static int hmac_sha1_digest(struct ahash_request *req) -{ - int ret2, ret1; - - ret1 = hmac_sha1_init(req); - if (ret1) - goto out; - - ret1 = ahash_update(req); - ret2 = ahash_final(req); - -out: - return ret1 ? ret1 : ret2; -} - -static int hmac_sha256_digest(struct ahash_request *req) -{ - int ret2, ret1; - - ret1 = hmac_sha256_init(req); - if (ret1) - goto out; - - ret1 = ahash_update(req); - ret2 = ahash_final(req); - -out: - return ret1 ? ret1 : ret2; -} - -static int hmac_sha1_setkey(struct crypto_ahash *tfm, - const u8 *key, unsigned int keylen) -{ - return hash_setkey(tfm, key, keylen, HASH_ALGO_SHA1); -} - -static int hmac_sha256_setkey(struct crypto_ahash *tfm, - const u8 *key, unsigned int keylen) -{ - return hash_setkey(tfm, key, keylen, HASH_ALGO_SHA256); -} - -struct hash_algo_template { - struct hash_config conf; - struct ahash_alg hash; -}; - -static int hash_cra_init(struct crypto_tfm *tfm) -{ - struct hash_ctx *ctx = crypto_tfm_ctx(tfm); - struct crypto_alg *alg = tfm->__crt_alg; - struct hash_algo_template *hash_alg; - - hash_alg = container_of(__crypto_ahash_alg(alg), - struct hash_algo_template, - hash); - - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct hash_req_ctx)); - - ctx->config.data_format = HASH_DATA_8_BITS; - ctx->config.algorithm = hash_alg->conf.algorithm; - ctx->config.oper_mode = hash_alg->conf.oper_mode; - - ctx->digestsize = hash_alg->hash.halg.digestsize; - - return 0; -} - -static struct hash_algo_template hash_algs[] = { - { - .conf.algorithm = HASH_ALGO_SHA1, - .conf.oper_mode = HASH_OPER_MODE_HASH, - .hash = { - .init = ux500_hash_init, - .update = ahash_update, - .final = ahash_final, - .digest = ahash_sha1_digest, - .export = ahash_noexport, - .import = ahash_noimport, - .halg.digestsize = SHA1_DIGEST_SIZE, - .halg.statesize = sizeof(struct hash_ctx), - .halg.base = { - .cra_name = "sha1", - .cra_driver_name = "sha1-ux500", - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct hash_ctx), - .cra_init = hash_cra_init, - .cra_module = THIS_MODULE, - } - } - }, - { - .conf.algorithm = HASH_ALGO_SHA256, - .conf.oper_mode = HASH_OPER_MODE_HASH, - .hash = { - .init = ux500_hash_init, - .update = ahash_update, - .final = ahash_final, - .digest = ahash_sha256_digest, - .export = ahash_noexport, - .import = ahash_noimport, - .halg.digestsize = SHA256_DIGEST_SIZE, - .halg.statesize = sizeof(struct hash_ctx), - .halg.base = { - .cra_name = "sha256", - .cra_driver_name = "sha256-ux500", - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct hash_ctx), - .cra_init = hash_cra_init, - .cra_module = THIS_MODULE, - } - } - }, - { - .conf.algorithm = HASH_ALGO_SHA1, - .conf.oper_mode = HASH_OPER_MODE_HMAC, - .hash = { - .init = ux500_hash_init, - .update = ahash_update, - .final = ahash_final, - .digest = hmac_sha1_digest, - .setkey = hmac_sha1_setkey, - .export = ahash_noexport, - .import = ahash_noimport, - .halg.digestsize = SHA1_DIGEST_SIZE, - .halg.statesize = sizeof(struct hash_ctx), - .halg.base = { - .cra_name = "hmac(sha1)", - .cra_driver_name = "hmac-sha1-ux500", - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct hash_ctx), - .cra_init = hash_cra_init, - .cra_module = THIS_MODULE, - } - } - }, - { - .conf.algorithm = HASH_ALGO_SHA256, - .conf.oper_mode = HASH_OPER_MODE_HMAC, - .hash = { - .init = ux500_hash_init, - .update = ahash_update, - .final = ahash_final, - .digest = hmac_sha256_digest, - .setkey = hmac_sha256_setkey, - .export = ahash_noexport, - .import = ahash_noimport, - .halg.digestsize = SHA256_DIGEST_SIZE, - .halg.statesize = sizeof(struct hash_ctx), - .halg.base = { - .cra_name = "hmac(sha256)", - .cra_driver_name = "hmac-sha256-ux500", - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct hash_ctx), - .cra_init = hash_cra_init, - .cra_module = THIS_MODULE, - } - } - } -}; - -static int ahash_algs_register_all(struct hash_device_data *device_data) -{ - int ret; - int i; - int count; - - for (i = 0; i < ARRAY_SIZE(hash_algs); i++) { - ret = crypto_register_ahash(&hash_algs[i].hash); - if (ret) { - count = i; - dev_err(device_data->dev, "%s: alg registration failed\n", - hash_algs[i].hash.halg.base.cra_driver_name); - goto unreg; - } - } - return 0; -unreg: - for (i = 0; i < count; i++) - crypto_unregister_ahash(&hash_algs[i].hash); - return ret; -} - -static void ahash_algs_unregister_all(struct hash_device_data *device_data) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(hash_algs); i++) - crypto_unregister_ahash(&hash_algs[i].hash); -} - -/** - * ux500_hash_probe - Function that probes the hash hardware. - * @pdev: The platform device. - */ -static int ux500_hash_probe(struct platform_device *pdev) -{ - int ret = 0; - struct resource *res = NULL; - struct hash_device_data *device_data; - struct device *dev = &pdev->dev; - - device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_KERNEL); - if (!device_data) { - ret = -ENOMEM; - goto out; - } - - device_data->dev = dev; - device_data->current_ctx = NULL; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_dbg(dev, "%s: platform_get_resource() failed!\n", __func__); - ret = -ENODEV; - goto out; - } - - device_data->phybase = res->start; - device_data->base = devm_ioremap_resource(dev, res); - if (IS_ERR(device_data->base)) { - ret = PTR_ERR(device_data->base); - goto out; - } - spin_lock_init(&device_data->ctx_lock); - spin_lock_init(&device_data->power_state_lock); - - /* Enable power for HASH1 hardware block */ - device_data->regulator = regulator_get(dev, "v-ape"); - if (IS_ERR(device_data->regulator)) { - dev_err(dev, "%s: regulator_get() failed!\n", __func__); - ret = PTR_ERR(device_data->regulator); - device_data->regulator = NULL; - goto out; - } - - /* Enable the clock for HASH1 hardware block */ - device_data->clk = devm_clk_get(dev, NULL); - if (IS_ERR(device_data->clk)) { - dev_err(dev, "%s: clk_get() failed!\n", __func__); - ret = PTR_ERR(device_data->clk); - goto out_regulator; - } - - ret = clk_prepare(device_data->clk); - if (ret) { - dev_err(dev, "%s: clk_prepare() failed!\n", __func__); - goto out_regulator; - } - - /* Enable device power (and clock) */ - ret = hash_enable_power(device_data, false); - if (ret) { - dev_err(dev, "%s: hash_enable_power() failed!\n", __func__); - goto out_clk_unprepare; - } - - ret = hash_check_hw(device_data); - if (ret) { - dev_err(dev, "%s: hash_check_hw() failed!\n", __func__); - goto out_power; - } - - if (hash_mode == HASH_MODE_DMA) - hash_dma_setup_channel(device_data, dev); - - platform_set_drvdata(pdev, device_data); - - /* Put the new device into the device list... */ - klist_add_tail(&device_data->list_node, &driver_data.device_list); - /* ... and signal that a new device is available. */ - up(&driver_data.device_allocation); - - ret = ahash_algs_register_all(device_data); - if (ret) { - dev_err(dev, "%s: ahash_algs_register_all() failed!\n", - __func__); - goto out_power; - } - - dev_info(dev, "successfully registered\n"); - return 0; - -out_power: - hash_disable_power(device_data, false); - -out_clk_unprepare: - clk_unprepare(device_data->clk); - -out_regulator: - regulator_put(device_data->regulator); - -out: - return ret; -} - -/** - * ux500_hash_remove - Function that removes the hash device from the platform. - * @pdev: The platform device. - */ -static int ux500_hash_remove(struct platform_device *pdev) -{ - struct hash_device_data *device_data; - struct device *dev = &pdev->dev; - - device_data = platform_get_drvdata(pdev); - if (!device_data) { - dev_err(dev, "%s: platform_get_drvdata() failed!\n", __func__); - return -ENOMEM; - } - - /* Try to decrease the number of available devices. */ - if (down_trylock(&driver_data.device_allocation)) - return -EBUSY; - - /* Check that the device is free */ - spin_lock(&device_data->ctx_lock); - /* current_ctx allocates a device, NULL = unallocated */ - if (device_data->current_ctx) { - /* The device is busy */ - spin_unlock(&device_data->ctx_lock); - /* Return the device to the pool. */ - up(&driver_data.device_allocation); - return -EBUSY; - } - - spin_unlock(&device_data->ctx_lock); - - /* Remove the device from the list */ - if (klist_node_attached(&device_data->list_node)) - klist_remove(&device_data->list_node); - - /* If this was the last device, remove the services */ - if (list_empty(&driver_data.device_list.k_list)) - ahash_algs_unregister_all(device_data); - - if (hash_disable_power(device_data, false)) - dev_err(dev, "%s: hash_disable_power() failed\n", - __func__); - - clk_unprepare(device_data->clk); - regulator_put(device_data->regulator); - - return 0; -} - -/** - * ux500_hash_shutdown - Function that shutdown the hash device. - * @pdev: The platform device - */ -static void ux500_hash_shutdown(struct platform_device *pdev) -{ - struct hash_device_data *device_data; - - device_data = platform_get_drvdata(pdev); - if (!device_data) { - dev_err(&pdev->dev, "%s: platform_get_drvdata() failed!\n", - __func__); - return; - } - - /* Check that the device is free */ - spin_lock(&device_data->ctx_lock); - /* current_ctx allocates a device, NULL = unallocated */ - if (!device_data->current_ctx) { - if (down_trylock(&driver_data.device_allocation)) - dev_dbg(&pdev->dev, "%s: Cryp still in use! Shutting down anyway...\n", - __func__); - /** - * (Allocate the device) - * Need to set this to non-null (dummy) value, - * to avoid usage if context switching. - */ - device_data->current_ctx++; - } - spin_unlock(&device_data->ctx_lock); - - /* Remove the device from the list */ - if (klist_node_attached(&device_data->list_node)) - klist_remove(&device_data->list_node); - - /* If this was the last device, remove the services */ - if (list_empty(&driver_data.device_list.k_list)) - ahash_algs_unregister_all(device_data); - - if (hash_disable_power(device_data, false)) - dev_err(&pdev->dev, "%s: hash_disable_power() failed\n", - __func__); -} - -#ifdef CONFIG_PM_SLEEP -/** - * ux500_hash_suspend - Function that suspends the hash device. - * @dev: Device to suspend. - */ -static int ux500_hash_suspend(struct device *dev) -{ - int ret; - struct hash_device_data *device_data; - struct hash_ctx *temp_ctx = NULL; - - device_data = dev_get_drvdata(dev); - if (!device_data) { - dev_err(dev, "%s: platform_get_drvdata() failed!\n", __func__); - return -ENOMEM; - } - - spin_lock(&device_data->ctx_lock); - if (!device_data->current_ctx) - device_data->current_ctx++; - spin_unlock(&device_data->ctx_lock); - - if (device_data->current_ctx == ++temp_ctx) { - if (down_interruptible(&driver_data.device_allocation)) - dev_dbg(dev, "%s: down_interruptible() failed\n", - __func__); - ret = hash_disable_power(device_data, false); - - } else { - ret = hash_disable_power(device_data, true); - } - - if (ret) - dev_err(dev, "%s: hash_disable_power()\n", __func__); - - return ret; -} - -/** - * ux500_hash_resume - Function that resume the hash device. - * @dev: Device to resume. - */ -static int ux500_hash_resume(struct device *dev) -{ - int ret = 0; - struct hash_device_data *device_data; - struct hash_ctx *temp_ctx = NULL; - - device_data = dev_get_drvdata(dev); - if (!device_data) { - dev_err(dev, "%s: platform_get_drvdata() failed!\n", __func__); - return -ENOMEM; - } - - spin_lock(&device_data->ctx_lock); - if (device_data->current_ctx == ++temp_ctx) - device_data->current_ctx = NULL; - spin_unlock(&device_data->ctx_lock); - - if (!device_data->current_ctx) - up(&driver_data.device_allocation); - else - ret = hash_enable_power(device_data, true); - - if (ret) - dev_err(dev, "%s: hash_enable_power() failed!\n", __func__); - - return ret; -} -#endif - -static SIMPLE_DEV_PM_OPS(ux500_hash_pm, ux500_hash_suspend, ux500_hash_resume); - -static const struct of_device_id ux500_hash_match[] = { - { .compatible = "stericsson,ux500-hash" }, - { }, -}; -MODULE_DEVICE_TABLE(of, ux500_hash_match); - -static struct platform_driver hash_driver = { - .probe = ux500_hash_probe, - .remove = ux500_hash_remove, - .shutdown = ux500_hash_shutdown, - .driver = { - .name = "hash1", - .of_match_table = ux500_hash_match, - .pm = &ux500_hash_pm, - } -}; - -/** - * ux500_hash_mod_init - The kernel module init function. - */ -static int __init ux500_hash_mod_init(void) -{ - klist_init(&driver_data.device_list, NULL, NULL); - /* Initialize the semaphore to 0 devices (locked state) */ - sema_init(&driver_data.device_allocation, 0); - - return platform_driver_register(&hash_driver); -} - -/** - * ux500_hash_mod_fini - The kernel module exit function. - */ -static void __exit ux500_hash_mod_fini(void) -{ - platform_driver_unregister(&hash_driver); -} - -module_init(ux500_hash_mod_init); -module_exit(ux500_hash_mod_fini); - -MODULE_DESCRIPTION("Driver for ST-Ericsson UX500 HASH engine."); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_CRYPTO("sha1-all"); -MODULE_ALIAS_CRYPTO("sha256-all"); -MODULE_ALIAS_CRYPTO("hmac-sha1-all"); -MODULE_ALIAS_CRYPTO("hmac-sha256-all"); diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index b2979be613b8..6963344f6a3a 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -116,7 +116,7 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher struct virtio_crypto_session_input *input; struct virtio_crypto_ctrl_request *vc_ctrl_req; - pkey = kmemdup(key, keylen, GFP_ATOMIC); + pkey = kmemdup(key, keylen, GFP_KERNEL); if (!pkey) return -ENOMEM; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 2653516bcdef..3aeeb8f2802f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1458,8 +1458,7 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc, return r; } -static void kcryptd_async_done(struct crypto_async_request *async_req, - int error); +static void kcryptd_async_done(void *async_req, int error); static int crypt_alloc_req_skcipher(struct crypt_config *cc, struct convert_context *ctx) @@ -2147,10 +2146,9 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) crypt_dec_pending(io); } -static void kcryptd_async_done(struct crypto_async_request *async_req, - int error) +static void kcryptd_async_done(void *data, int error) { - struct dm_crypt_request *dmreq = async_req->data; + struct dm_crypt_request *dmreq = data; struct convert_context *ctx = dmreq->ctx; struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); struct crypt_config *cc = io->cc; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 1388ee35571e..c58156deb2b1 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -955,9 +955,9 @@ static void xor_journal(struct dm_integrity_c *ic, bool encrypt, unsigned sectio async_tx_issue_pending_all(); } -static void complete_journal_encrypt(struct crypto_async_request *req, int err) +static void complete_journal_encrypt(void *data, int err) { - struct journal_completion *comp = req->data; + struct journal_completion *comp = data; if (unlikely(err)) { if (likely(err == -EINPROGRESS)) { complete(&comp->ic->crypto_backoff); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index bf8ac7a3ded7..becb04123d3e 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -528,9 +528,9 @@ static void count_tx(struct net_device *dev, int ret, int len) } } -static void macsec_encrypt_done(struct crypto_async_request *base, int err) +static void macsec_encrypt_done(void *data, int err) { - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; struct net_device *dev = skb->dev; struct macsec_dev *macsec = macsec_priv(dev); struct macsec_tx_sa *sa = macsec_skb_cb(skb)->tx_sa; @@ -835,9 +835,9 @@ static void count_rx(struct net_device *dev, int len) u64_stats_update_end(&stats->syncp); } -static void macsec_decrypt_done(struct crypto_async_request *base, int err) +static void macsec_decrypt_done(void *data, int err) { - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; struct net_device *dev = skb->dev; struct macsec_dev *macsec = macsec_priv(dev); struct macsec_rx_sa *rx_sa = macsec_skb_cb(skb)->rx_sa; diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index bd3f3c755b24..c16f0d660cb7 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -260,22 +260,6 @@ int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, return i; } -struct extent_crypt_result { - struct completion completion; - int rc; -}; - -static void extent_crypt_complete(struct crypto_async_request *req, int rc) -{ - struct extent_crypt_result *ecr = req->data; - - if (rc == -EINPROGRESS) - return; - - ecr->rc = rc; - complete(&ecr->completion); -} - /** * crypt_scatterlist * @crypt_stat: Pointer to the crypt_stat struct to initialize. @@ -293,7 +277,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, unsigned char *iv, int op) { struct skcipher_request *req = NULL; - struct extent_crypt_result ecr; + DECLARE_CRYPTO_WAIT(ecr); int rc = 0; if (unlikely(ecryptfs_verbosity > 0)) { @@ -303,8 +287,6 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, crypt_stat->key_size); } - init_completion(&ecr.completion); - mutex_lock(&crypt_stat->cs_tfm_mutex); req = skcipher_request_alloc(crypt_stat->tfm, GFP_NOFS); if (!req) { @@ -315,7 +297,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - extent_crypt_complete, &ecr); + crypto_req_done, &ecr); /* Consider doing this once, when the file is opened */ if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) { rc = crypto_skcipher_setkey(crypt_stat->tfm, crypt_stat->key, @@ -334,13 +316,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, skcipher_request_set_crypt(req, src_sg, dst_sg, size, iv); rc = op == ENCRYPT ? crypto_skcipher_encrypt(req) : crypto_skcipher_decrypt(req); - if (rc == -EINPROGRESS || rc == -EBUSY) { - struct extent_crypt_result *ecr = req->base.data; - - wait_for_completion(&ecr->completion); - rc = ecr->rc; - reinit_completion(&ecr->completion); - } + rc = crypto_wait_req(rc, &ecr); out: skcipher_request_free(req); return rc; diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 14db3bee0519..4a2b7e6e0c1f 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -27,15 +27,12 @@ * * For example: authenc(hmac(sha256), cbc(aes)) * - * The example code provided for the symmetric key cipher operation - * applies here as well. Naturally all *skcipher* symbols must be exchanged - * the *aead* pendants discussed in the following. In addition, for the AEAD - * operation, the aead_request_set_ad function must be used to set the - * pointer to the associated data memory location before performing the - * encryption or decryption operation. In case of an encryption, the associated - * data memory is filled during the encryption operation. For decryption, the - * associated data memory must contain data that is used to verify the integrity - * of the decrypted data. Another deviation from the asynchronous block cipher + * The example code provided for the symmetric key cipher operation applies + * here as well. Naturally all *skcipher* symbols must be exchanged the *aead* + * pendants discussed in the following. In addition, for the AEAD operation, + * the aead_request_set_ad function must be used to set the pointer to the + * associated data memory location before performing the encryption or + * decryption operation. Another deviation from the asynchronous block cipher * operation is that the caller should explicitly check for -EBADMSG of the * crypto_aead_decrypt. That error indicates an authentication error, i.e. * a breach in the integrity of the message. In essence, that -EBADMSG error @@ -49,7 +46,10 @@ * * The destination scatterlist has the same layout, except that the plaintext * (resp. ciphertext) will grow (resp. shrink) by the authentication tag size - * during encryption (resp. decryption). + * during encryption (resp. decryption). The authentication tag is generated + * during the encryption operation and appended to the ciphertext. During + * decryption, the authentication tag is consumed along with the ciphertext and + * used to verify the integrity of the plaintext and the associated data. * * In-place encryption/decryption is enabled by using the same scatterlist * pointer for both the source and destination. diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 61b327206b55..fede394ae2ab 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -302,4 +302,10 @@ enum { CRYPTO_MSG_ALG_LOADED, }; +static inline void crypto_request_complete(struct crypto_async_request *req, + int err) +{ + req->complete(req->data, err); +} + #endif /* _CRYPTO_ALGAPI_H */ diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index a5db86670bdf..7e76623f9ec3 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -21,8 +21,6 @@ #define ALG_MAX_PAGES 16 -struct crypto_async_request; - struct alg_sock { /* struct sock must be the first member of struct alg_sock */ struct sock sk; @@ -235,7 +233,7 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, ssize_t af_alg_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); void af_alg_free_resources(struct af_alg_async_req *areq); -void af_alg_async_cb(struct crypto_async_request *_req, int err); +void af_alg_async_cb(void *data, int err); __poll_t af_alg_poll(struct file *file, struct socket *sock, poll_table *wait); struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk, diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index 49339003bd2c..978b57a3f4f0 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -28,7 +28,7 @@ static inline void *acomp_tfm_ctx(struct crypto_acomp *tfm) static inline void acomp_request_complete(struct acomp_req *req, int err) { - req->base.complete(&req->base, err); + crypto_request_complete(&req->base, err); } static inline const char *acomp_alg_name(struct crypto_acomp *tfm) diff --git a/include/crypto/internal/aead.h b/include/crypto/internal/aead.h index cd8cb1e921b7..28a95eb3182d 100644 --- a/include/crypto/internal/aead.h +++ b/include/crypto/internal/aead.h @@ -82,7 +82,7 @@ static inline void *aead_request_ctx_dma(struct aead_request *req) static inline void aead_request_complete(struct aead_request *req, int err) { - req->base.complete(&req->base, err); + crypto_request_complete(&req->base, err); } static inline u32 aead_request_flags(struct aead_request *req) diff --git a/include/crypto/internal/akcipher.h b/include/crypto/internal/akcipher.h index aaf1092b93b8..a0fba4b2eccf 100644 --- a/include/crypto/internal/akcipher.h +++ b/include/crypto/internal/akcipher.h @@ -69,7 +69,7 @@ static inline void *akcipher_tfm_ctx_dma(struct crypto_akcipher *tfm) static inline void akcipher_request_complete(struct akcipher_request *req, int err) { - req->base.complete(&req->base, err); + crypto_request_complete(&req->base, err); } static inline const char *akcipher_alg_name(struct crypto_akcipher *tfm) diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 1a2a41b79253..0b259dbb97af 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -199,7 +199,7 @@ static inline void *ahash_request_ctx_dma(struct ahash_request *req) static inline void ahash_request_complete(struct ahash_request *req, int err) { - req->base.complete(&req->base, err); + crypto_request_complete(&req->base, err); } static inline u32 ahash_request_flags(struct ahash_request *req) diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h index 3c9726e89f53..0a6db8c4a9a0 100644 --- a/include/crypto/internal/kpp.h +++ b/include/crypto/internal/kpp.h @@ -85,7 +85,7 @@ static inline void *kpp_tfm_ctx_dma(struct crypto_kpp *tfm) static inline void kpp_request_complete(struct kpp_request *req, int err) { - req->base.complete(&req->base, err); + crypto_request_complete(&req->base, err); } static inline const char *kpp_alg_name(struct crypto_kpp *tfm) diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index 06d0a5491cf3..fb3d9e899f52 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -94,7 +94,7 @@ static inline void *skcipher_instance_ctx(struct skcipher_instance *inst) static inline void skcipher_request_complete(struct skcipher_request *req, int err) { - req->base.complete(&req->base, err); + crypto_request_complete(&req->base, err); } int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h index f2c42b4111b1..32fc4473175b 100644 --- a/include/crypto/scatterwalk.h +++ b/include/crypto/scatterwalk.h @@ -53,7 +53,7 @@ static inline struct page *scatterwalk_page(struct scatter_walk *walk) static inline void scatterwalk_unmap(void *vaddr) { - kunmap_atomic(vaddr); + kunmap_local(vaddr); } static inline void scatterwalk_start(struct scatter_walk *walk, @@ -65,7 +65,7 @@ static inline void scatterwalk_start(struct scatter_walk *walk, static inline void *scatterwalk_map(struct scatter_walk *walk) { - return kmap_atomic(scatterwalk_page(walk)) + + return kmap_local_page(scatterwalk_page(walk)) + offset_in_page(walk->offset); } diff --git a/include/crypto/xts.h b/include/crypto/xts.h index 0f8dba69feb4..15b16c4853d8 100644 --- a/include/crypto/xts.h +++ b/include/crypto/xts.h @@ -8,8 +8,8 @@ #define XTS_BLOCK_SIZE 16 -static inline int xts_check_key(struct crypto_tfm *tfm, - const u8 *key, unsigned int keylen) +static inline int xts_verify_key(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) { /* * key consists of keys of equal size concatenated, therefore @@ -18,24 +18,17 @@ static inline int xts_check_key(struct crypto_tfm *tfm, if (keylen % 2) return -EINVAL; - /* ensure that the AES and tweak key are not identical */ - if (fips_enabled && !crypto_memneq(key, key + (keylen / 2), keylen / 2)) - return -EINVAL; - - return 0; -} - -static inline int xts_verify_key(struct crypto_skcipher *tfm, - const u8 *key, unsigned int keylen) -{ /* - * key consists of keys of equal size concatenated, therefore - * the length must be even. + * In FIPS mode only a combined key length of either 256 or + * 512 bits is allowed, c.f. FIPS 140-3 IG C.I. */ - if (keylen % 2) + if (fips_enabled && keylen != 32 && keylen != 64) return -EINVAL; - /* ensure that the AES and tweak key are not identical */ + /* + * Ensure that the AES and tweak key are not identical when + * in FIPS mode or the FORBID_WEAK_KEYS flag is set. + */ if ((fips_enabled || (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) && !crypto_memneq(key, key + (keylen / 2), keylen / 2)) diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 5d1e961f810e..bb1d9b0e1647 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -176,7 +176,7 @@ struct crypto_async_request; struct crypto_tfm; struct crypto_type; -typedef void (*crypto_completion_t)(struct crypto_async_request *req, int err); +typedef void (*crypto_completion_t)(void *req, int err); /** * DOC: Block Cipher Context Data Structures @@ -595,7 +595,7 @@ struct crypto_wait { /* * Async ops completion helper functioons */ -void crypto_req_done(struct crypto_async_request *req, int err); +void crypto_req_done(void *req, int err); static inline int crypto_wait_req(int err, struct crypto_wait *wait) { diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index be3aedaa96dc..a59fbffa238f 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -122,7 +122,6 @@ enum qp_state { }; enum qm_hw_ver { - QM_HW_UNKNOWN = -1, QM_HW_V1 = 0x20, QM_HW_V2 = 0x21, QM_HW_V3 = 0x30, @@ -309,7 +308,8 @@ struct hisi_qm { const struct hisi_qm_err_ini *err_ini; struct hisi_qm_err_info err_info; struct hisi_qm_err_status err_status; - unsigned long misc_ctl; /* driver removing and reset sched */ + /* driver removing and reset sched */ + unsigned long misc_ctl; /* Device capability bit */ unsigned long caps; @@ -332,7 +332,6 @@ struct hisi_qm { const char *algs; bool use_sva; - bool is_frozen; resource_size_t phys_base; resource_size_t db_phys_base; diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c index 7d77dea15587..d0634ed6a937 100644 --- a/lib/crypto/blake2s-selftest.c +++ b/lib/crypto/blake2s-selftest.c @@ -545,7 +545,7 @@ static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, }, }; -bool __init blake2s_selftest(void) +static bool __init noinline_for_stack blake2s_digest_test(void) { u8 key[BLAKE2S_KEY_SIZE]; u8 buf[ARRAY_SIZE(blake2s_testvecs)]; @@ -589,11 +589,20 @@ bool __init blake2s_selftest(void) } } + return success; +} + +static bool __init noinline_for_stack blake2s_random_test(void) +{ + struct blake2s_state state; + bool success = true; + int i, l; + for (i = 0; i < 32; ++i) { enum { TEST_ALIGNMENT = 16 }; - u8 unaligned_block[BLAKE2S_BLOCK_SIZE + TEST_ALIGNMENT - 1] + u8 blocks[BLAKE2S_BLOCK_SIZE * 2 + TEST_ALIGNMENT - 1] __aligned(TEST_ALIGNMENT); - u8 blocks[BLAKE2S_BLOCK_SIZE * 2]; + u8 *unaligned_block = blocks + BLAKE2S_BLOCK_SIZE; struct blake2s_state state1, state2; get_random_bytes(blocks, sizeof(blocks)); @@ -630,3 +639,13 @@ bool __init blake2s_selftest(void) return success; } + +bool __init blake2s_selftest(void) +{ + bool success; + + success = blake2s_digest_test(); + success &= blake2s_random_test(); + + return success; +} diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 39c4c6731094..3cb6bd148fa9 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -504,7 +504,8 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) while (sg_miter_next(&miter)) { buff = miter.addr; - len = miter.length; + len = min_t(unsigned, miter.length, nbytes); + nbytes -= len; for (x = 0; x < len; x++) { a <<= 8; diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c index 989401f116e9..0efc93fdae8a 100644 --- a/net/bluetooth/ecdh_helper.c +++ b/net/bluetooth/ecdh_helper.c @@ -25,22 +25,6 @@ #include <linux/scatterlist.h> #include <crypto/ecdh.h> -struct ecdh_completion { - struct completion completion; - int err; -}; - -static void ecdh_complete(struct crypto_async_request *req, int err) -{ - struct ecdh_completion *res = req->data; - - if (err == -EINPROGRESS) - return; - - res->err = err; - complete(&res->completion); -} - static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) { int i; @@ -60,9 +44,9 @@ static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64], u8 secret[32]) { + DECLARE_CRYPTO_WAIT(result); struct kpp_request *req; u8 *tmp; - struct ecdh_completion result; struct scatterlist src, dst; int err; @@ -76,8 +60,6 @@ int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64], goto free_tmp; } - init_completion(&result.completion); - swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */ swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */ @@ -86,12 +68,9 @@ int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64], kpp_request_set_input(req, &src, 64); kpp_request_set_output(req, &dst, 32); kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - ecdh_complete, &result); + crypto_req_done, &result); err = crypto_kpp_compute_shared_secret(req); - if (err == -EINPROGRESS) { - wait_for_completion(&result.completion); - err = result.err; - } + err = crypto_wait_req(err, &result); if (err < 0) { pr_err("alg: ecdh: compute shared secret failed. err %d\n", err); @@ -165,9 +144,9 @@ free_tmp: */ int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]) { + DECLARE_CRYPTO_WAIT(result); struct kpp_request *req; u8 *tmp; - struct ecdh_completion result; struct scatterlist dst; int err; @@ -181,18 +160,14 @@ int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]) goto free_tmp; } - init_completion(&result.completion); sg_init_one(&dst, tmp, 64); kpp_request_set_input(req, NULL, 0); kpp_request_set_output(req, &dst, 64); kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - ecdh_complete, &result); + crypto_req_done, &result); err = crypto_kpp_generate_public_key(req); - if (err == -EINPROGRESS) { - wait_for_completion(&result.completion); - err = result.err; - } + err = crypto_wait_req(err, &result); if (err < 0) goto free_all; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index ee4e578c7f20..015c0f4ec5ba 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -117,11 +117,11 @@ static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr) return 0; } -static void ah_output_done(struct crypto_async_request *base, int err) +static void ah_output_done(void *data, int err) { u8 *icv; struct iphdr *iph; - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; struct xfrm_state *x = skb_dst(skb)->xfrm; struct ah_data *ahp = x->data; struct iphdr *top_iph = ip_hdr(skb); @@ -262,12 +262,12 @@ out: return err; } -static void ah_input_done(struct crypto_async_request *base, int err) +static void ah_input_done(void *data, int err) { u8 *auth_data; u8 *icv; struct iphdr *work_iph; - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; struct xfrm_state *x = xfrm_input_state(skb); struct ah_data *ahp = x->data; struct ip_auth_hdr *ah = ip_auth_hdr(skb); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 52c8047efedb..ba06ed42e428 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -244,9 +244,9 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) } #endif -static void esp_output_done(struct crypto_async_request *base, int err) +static void esp_output_done(void *data, int err) { - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; struct xfrm_offload *xo = xfrm_offload(skb); void *tmp; struct xfrm_state *x; @@ -332,12 +332,12 @@ static struct ip_esp_hdr *esp_output_set_extra(struct sk_buff *skb, return esph; } -static void esp_output_done_esn(struct crypto_async_request *base, int err) +static void esp_output_done_esn(void *data, int err) { - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; esp_output_restore_header(skb); - esp_output_done(base, err); + esp_output_done(data, err); } static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, @@ -830,9 +830,9 @@ out: } EXPORT_SYMBOL_GPL(esp_input_done2); -static void esp_input_done(struct crypto_async_request *base, int err) +static void esp_input_done(void *data, int err) { - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; xfrm_input_resume(skb, esp_input_done2(skb, err)); } @@ -860,12 +860,12 @@ static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) } } -static void esp_input_done_esn(struct crypto_async_request *base, int err) +static void esp_input_done_esn(void *data, int err) { - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; esp_input_restore_header(skb); - esp_input_done(base, err); + esp_input_done(data, err); } /* diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 5228d2716289..01005035ad10 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -281,12 +281,12 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) return 0; } -static void ah6_output_done(struct crypto_async_request *base, int err) +static void ah6_output_done(void *data, int err) { int extlen; u8 *iph_base; u8 *icv; - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; struct xfrm_state *x = skb_dst(skb)->xfrm; struct ah_data *ahp = x->data; struct ipv6hdr *top_iph = ipv6_hdr(skb); @@ -451,12 +451,12 @@ out: return err; } -static void ah6_input_done(struct crypto_async_request *base, int err) +static void ah6_input_done(void *data, int err) { u8 *auth_data; u8 *icv; u8 *work_iph; - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; struct xfrm_state *x = xfrm_input_state(skb); struct ah_data *ahp = x->data; struct ip_auth_hdr *ah = ip_auth_hdr(skb); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 14ed868680c6..fddd0cbdede1 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -278,9 +278,9 @@ static void esp_output_encap_csum(struct sk_buff *skb) } } -static void esp_output_done(struct crypto_async_request *base, int err) +static void esp_output_done(void *data, int err) { - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; struct xfrm_offload *xo = xfrm_offload(skb); void *tmp; struct xfrm_state *x; @@ -368,12 +368,12 @@ static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb, return esph; } -static void esp_output_done_esn(struct crypto_async_request *base, int err) +static void esp_output_done_esn(void *data, int err) { - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; esp_output_restore_header(skb); - esp_output_done(base, err); + esp_output_done(data, err); } static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb, @@ -879,9 +879,9 @@ out: } EXPORT_SYMBOL_GPL(esp6_input_done2); -static void esp_input_done(struct crypto_async_request *base, int err) +static void esp_input_done(void *data, int err) { - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; xfrm_input_resume(skb, esp6_input_done2(skb, err)); } @@ -909,12 +909,12 @@ static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) } } -static void esp_input_done_esn(struct crypto_async_request *base, int err) +static void esp_input_done_esn(void *data, int err) { - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; esp_input_restore_header(skb); - esp_input_done(base, err); + esp_input_done(data, err); } static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index d67440de011e..577fa5af33ec 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -267,10 +267,10 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, struct tipc_bearer *b, struct tipc_media_addr *dst, struct tipc_node *__dnode); -static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err); +static void tipc_aead_encrypt_done(void *data, int err); static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, struct sk_buff *skb, struct tipc_bearer *b); -static void tipc_aead_decrypt_done(struct crypto_async_request *base, int err); +static void tipc_aead_decrypt_done(void *data, int err); static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr); static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead, u8 tx_key, struct sk_buff *skb, @@ -830,9 +830,9 @@ exit: return rc; } -static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err) +static void tipc_aead_encrypt_done(void *data, int err) { - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; struct tipc_crypto_tx_ctx *tx_ctx = TIPC_SKB_CB(skb)->crypto_ctx; struct tipc_bearer *b = tx_ctx->bearer; struct tipc_aead *aead = tx_ctx->aead; @@ -954,9 +954,9 @@ exit: return rc; } -static void tipc_aead_decrypt_done(struct crypto_async_request *base, int err) +static void tipc_aead_decrypt_done(void *data, int err) { - struct sk_buff *skb = base->data; + struct sk_buff *skb = data; struct tipc_crypto_rx_ctx *rx_ctx = TIPC_SKB_CB(skb)->crypto_ctx; struct tipc_bearer *b = rx_ctx->bearer; struct tipc_aead *aead = rx_ctx->aead; diff --git a/net/tls/tls.h b/net/tls/tls.h index 0e840a0c3437..804c3880d028 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -70,6 +70,8 @@ struct tls_rec { char content_type; struct scatterlist sg_content_type; + struct sock *sk; + char aad_space[TLS_AAD_SPACE_SIZE]; u8 iv_data[MAX_IV_SIZE]; struct aead_request aead_req; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index a83d2b4275fa..7c5de4afbe99 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -38,6 +38,7 @@ #include <linux/bug.h> #include <linux/sched/signal.h> #include <linux/module.h> +#include <linux/kernel.h> #include <linux/splice.h> #include <crypto/aead.h> @@ -57,6 +58,7 @@ struct tls_decrypt_arg { }; struct tls_decrypt_ctx { + struct sock *sk; u8 iv[MAX_IV_SIZE]; u8 aad[TLS_MAX_AAD_SIZE]; u8 tail; @@ -177,18 +179,25 @@ static int tls_padding_length(struct tls_prot_info *prot, struct sk_buff *skb, return sub; } -static void tls_decrypt_done(struct crypto_async_request *req, int err) +static void tls_decrypt_done(void *data, int err) { - struct aead_request *aead_req = (struct aead_request *)req; + struct aead_request *aead_req = data; + struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); struct scatterlist *sgout = aead_req->dst; struct scatterlist *sgin = aead_req->src; struct tls_sw_context_rx *ctx; + struct tls_decrypt_ctx *dctx; struct tls_context *tls_ctx; struct scatterlist *sg; unsigned int pages; struct sock *sk; + int aead_size; - sk = (struct sock *)req->data; + aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead); + aead_size = ALIGN(aead_size, __alignof__(*dctx)); + dctx = (void *)((u8 *)aead_req + aead_size); + + sk = dctx->sk; tls_ctx = tls_get_ctx(sk); ctx = tls_sw_ctx_rx(tls_ctx); @@ -240,7 +249,7 @@ static int tls_do_decryption(struct sock *sk, if (darg->async) { aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, - tls_decrypt_done, sk); + tls_decrypt_done, aead_req); atomic_inc(&ctx->decrypt_pending); } else { aead_request_set_callback(aead_req, @@ -336,6 +345,8 @@ static struct tls_rec *tls_get_rec(struct sock *sk) sg_set_buf(&rec->sg_aead_out[0], rec->aad_space, prot->aad_size); sg_unmark_end(&rec->sg_aead_out[1]); + rec->sk = sk; + return rec; } @@ -417,22 +428,25 @@ tx_err: return rc; } -static void tls_encrypt_done(struct crypto_async_request *req, int err) +static void tls_encrypt_done(void *data, int err) { - struct aead_request *aead_req = (struct aead_request *)req; - struct sock *sk = req->data; - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_prot_info *prot = &tls_ctx->prot_info; - struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + struct tls_sw_context_tx *ctx; + struct tls_context *tls_ctx; + struct tls_prot_info *prot; + struct tls_rec *rec = data; struct scatterlist *sge; struct sk_msg *msg_en; - struct tls_rec *rec; bool ready = false; + struct sock *sk; int pending; - rec = container_of(aead_req, struct tls_rec, aead_req); msg_en = &rec->msg_encrypted; + sk = rec->sk; + tls_ctx = tls_get_ctx(sk); + prot = &tls_ctx->prot_info; + ctx = tls_sw_ctx_tx(tls_ctx); + sge = sk_msg_elem(msg_en, msg_en->sg.curr); sge->offset -= prot->prepend_size; sge->length += prot->prepend_size; @@ -520,7 +534,7 @@ static int tls_do_encryption(struct sock *sk, data_len, rec->iv_data); aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, - tls_encrypt_done, sk); + tls_encrypt_done, rec); /* Add the record in tx_list */ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); @@ -1485,6 +1499,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, * Both structs are variable length. */ aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv); + aead_size = ALIGN(aead_size, __alignof__(*dctx)); mem = kmalloc(aead_size + struct_size(dctx, sg, n_sgin + n_sgout), sk->sk_allocation); if (!mem) { @@ -1495,6 +1510,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, /* Segment the allocated memory */ aead_req = (struct aead_request *)mem; dctx = (struct tls_decrypt_ctx *)(mem + aead_size); + dctx->sk = sk; sgin = &dctx->sg[0]; sgout = &dctx->sg[n_sgin]; diff --git a/security/keys/dh.c b/security/keys/dh.c index b339760a31dd..da64c358474b 100644 --- a/security/keys/dh.c +++ b/security/keys/dh.c @@ -64,22 +64,6 @@ static void dh_free_data(struct dh *dh) kfree_sensitive(dh->g); } -struct dh_completion { - struct completion completion; - int err; -}; - -static void dh_crypto_done(struct crypto_async_request *req, int err) -{ - struct dh_completion *compl = req->data; - - if (err == -EINPROGRESS) - return; - - compl->err = err; - complete(&compl->completion); -} - static int kdf_alloc(struct crypto_shash **hash, char *hashname) { struct crypto_shash *tfm; @@ -146,7 +130,7 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params, struct keyctl_dh_params pcopy; struct dh dh_inputs; struct scatterlist outsg; - struct dh_completion compl; + DECLARE_CRYPTO_WAIT(compl); struct crypto_kpp *tfm; struct kpp_request *req; uint8_t *secret; @@ -266,22 +250,18 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params, kpp_request_set_input(req, NULL, 0); kpp_request_set_output(req, &outsg, outlen); - init_completion(&compl.completion); kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - dh_crypto_done, &compl); + crypto_req_done, &compl); /* * For DH, generate_public_key and generate_shared_secret are * the same calculation */ ret = crypto_kpp_generate_public_key(req); - if (ret == -EINPROGRESS) { - wait_for_completion(&compl.completion); - ret = compl.err; - if (ret) - goto out6; - } + ret = crypto_wait_req(ret, &compl); + if (ret) + goto out6; if (kdfcopy) { /* |