crypto: x86/chacha20 - Add a 8-block AVX-512VL variant

This variant is similar to the AVX2 version, but benefits from the AVX-512 rotate instructions and the additional registers, so it can operate without any data on the stack. It uses ymm registers only to avoid the massive core throttling on Skylake-X platforms. Nontheless does it bring a ~30% speed improvement compared to the AVX2 variant for random encryption lengths. The AVX2 version uses "rep movsb" for partial block XORing via the stack. With AVX-512, the new "vmovdqu8" can do this much more efficiently. The associated "kmov" instructions to work with dynamic masks is not part of the AVX-512VL instruction set, hence we depend on AVX-512BW as well. Given that the major AVX-512VL architectures provide AVX-512BW and this extension does not affect core clocking, this seems to be no problem at least for now. Signed-off-by: Martin Willi <martin@strongswan.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author: Martin Willi <martin@strongswan.org> 2018-11-20 17:30:48 +0100
committer: Herbert Xu <herbert@gondor.apana.org.au> 2018-11-29 16:27:04 +0800
commit: cee7a36ecb5bafef8c87fb2c10641e6125044154 (patch)
tree: 7e52243f3733ef595aeaa7df6094a8db30fb3747 /arch/x86/crypto/Makefile
parent: 059c2a4d8e164dccc3078e49e7f286023b019a98 (diff)
1 files changed, 5 insertions, 0 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index a4b0007a54e1..ce4e43642984 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -8,6 +8,7 @@ OBJECT_FILES_NON_STANDARD := y
 avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
 				$(comma)4)$(comma)%ymm2,yes,no)
+avx512_supported :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,yes,no)
 sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no)
 sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no)
 
@@ -103,6 +104,10 @@ ifeq ($(avx2_supported),yes)
 	morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
 endif
 
+ifeq ($(avx512_supported),yes)
+	chacha20-x86_64-y += chacha20-avx512vl-x86_64.o
+endif
+
 aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
 aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
author	Martin Willi <martin@strongswan.org>	2018-11-20 17:30:48 +0100
committer	Herbert Xu <herbert@gondor.apana.org.au>	2018-11-29 16:27:04 +0800
commit	cee7a36ecb5bafef8c87fb2c10641e6125044154 (patch)
tree	7e52243f3733ef595aeaa7df6094a8db30fb3747 /arch/x86/crypto/Makefile
parent	059c2a4d8e164dccc3078e49e7f286023b019a98 (diff)