diff options
author | Ard Biesheuvel <ard.biesheuvel@linaro.org> | 2018-08-23 15:48:51 +0100 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2018-09-04 11:37:04 +0800 |
commit | 00227e3a1d0855e9777cf53c52b842503435e22b (patch) | |
tree | bb7737ca90746dc304e7c5cfae17e396e0912b75 /arch/arm/crypto/ghash-ce-glue.c | |
parent | ab8085c130edd65be0d95cc95c28b51c4c6faf9d (diff) |
crypto: arm/ghash-ce - implement support for 4-way aggregation
Speed up the GHASH algorithm based on 64-bit polynomial multiplication
by adding support for 4-way aggregation. This improves throughput by
~85% on Cortex-A53, from 1.7 cycles per byte to 0.9 cycles per byte.
When combined with AES into GCM, throughput improves by ~25%, from
3.8 cycles per byte to 3.0 cycles per byte.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/arm/crypto/ghash-ce-glue.c')
-rw-r--r-- | arch/arm/crypto/ghash-ce-glue.c | 38 |
1 files changed, 27 insertions, 11 deletions
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c index 8930fc4e7c22..b7d30b6cf49c 100644 --- a/arch/arm/crypto/ghash-ce-glue.c +++ b/arch/arm/crypto/ghash-ce-glue.c @@ -1,7 +1,7 @@ /* * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. * - * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2015 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -28,8 +28,10 @@ MODULE_ALIAS_CRYPTO("ghash"); #define GHASH_DIGEST_SIZE 16 struct ghash_key { - u64 a; - u64 b; + u64 h[2]; + u64 h2[2]; + u64 h3[2]; + u64 h4[2]; }; struct ghash_desc_ctx { @@ -117,26 +119,40 @@ static int ghash_final(struct shash_desc *desc, u8 *dst) return 0; } +static void ghash_reflect(u64 h[], const be128 *k) +{ + u64 carry = be64_to_cpu(k->a) >> 63; + + h[0] = (be64_to_cpu(k->b) << 1) | carry; + h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63); + + if (carry) + h[1] ^= 0xc200000000000000UL; +} + static int ghash_setkey(struct crypto_shash *tfm, const u8 *inkey, unsigned int keylen) { struct ghash_key *key = crypto_shash_ctx(tfm); - u64 a, b; + be128 h, k; if (keylen != GHASH_BLOCK_SIZE) { crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } - /* perform multiplication by 'x' in GF(2^128) */ - b = get_unaligned_be64(inkey); - a = get_unaligned_be64(inkey + 8); + memcpy(&k, inkey, GHASH_BLOCK_SIZE); + ghash_reflect(key->h, &k); + + h = k; + gf128mul_lle(&h, &k); + ghash_reflect(key->h2, &h); - key->a = (a << 1) | (b >> 63); - key->b = (b << 1) | (a >> 63); + gf128mul_lle(&h, &k); + ghash_reflect(key->h3, &h); - if (b >> 63) - key->b ^= 0xc200000000000000UL; + gf128mul_lle(&h, &k); + ghash_reflect(key->h4, &h); return 0; } |