summaryrefslogtreecommitdiff
path: root/arch/x86/crypto/camellia-aesni-avx-asm_64.S
diff options
context:
space:
mode:
authorArd Biesheuvel <ardb@kernel.org>2021-01-05 17:47:49 +0100
committerHerbert Xu <herbert@gondor.apana.org.au>2021-01-14 17:10:27 +1100
commit55a7e88f016873ef1717295d8460416b1ccd05a5 (patch)
tree055ecb3a017497622e6e94d8868f96410b34ba5d /arch/x86/crypto/camellia-aesni-avx-asm_64.S
parent4d6a5a4b1e4a7606bf666ce694671f6897bdabaa (diff)
crypto: x86/camellia - switch to XTS template
Now that the XTS template can wrap accelerated ECB modes, it can be used to implement Camellia in XTS mode as well, which turns out to be at least as fast, and sometimes even faster. Acked-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/camellia-aesni-avx-asm_64.S')
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S181
1 files changed, 0 insertions, 181 deletions
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index ecc0a9a905c4..471c34e6cac2 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -17,7 +17,6 @@
#include <linux/linkage.h>
#include <asm/frame.h>
-#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -593,10 +592,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-/* For XTS mode IV generation */
-.Lxts_gf128mul_and_shl1_mask:
- .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
-
/*
* pre-SubByte transform
*
@@ -1111,179 +1106,3 @@ SYM_FUNC_START(camellia_ctr_16way)
FRAME_END
ret;
SYM_FUNC_END(camellia_ctr_16way)
-
-#define gf128mul_x_ble(iv, mask, tmp) \
- vpsrad $31, iv, tmp; \
- vpaddq iv, iv, iv; \
- vpshufd $0x13, tmp, tmp; \
- vpand mask, tmp, tmp; \
- vpxor tmp, iv, iv;
-
-.align 8
-SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (16 blocks)
- * %rdx: src (16 blocks)
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- * %r8: index for input whitening key
- * %r9: pointer to __camellia_enc_blk16 or __camellia_dec_blk16
- */
- FRAME_BEGIN
-
- subq $(16 * 16), %rsp;
- movq %rsp, %rax;
-
- vmovdqa .Lxts_gf128mul_and_shl1_mask, %xmm14;
-
- /* load IV */
- vmovdqu (%rcx), %xmm0;
- vpxor 0 * 16(%rdx), %xmm0, %xmm15;
- vmovdqu %xmm15, 15 * 16(%rax);
- vmovdqu %xmm0, 0 * 16(%rsi);
-
- /* construct IVs */
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 1 * 16(%rdx), %xmm0, %xmm15;
- vmovdqu %xmm15, 14 * 16(%rax);
- vmovdqu %xmm0, 1 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 2 * 16(%rdx), %xmm0, %xmm13;
- vmovdqu %xmm0, 2 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 3 * 16(%rdx), %xmm0, %xmm12;
- vmovdqu %xmm0, 3 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 4 * 16(%rdx), %xmm0, %xmm11;
- vmovdqu %xmm0, 4 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 5 * 16(%rdx), %xmm0, %xmm10;
- vmovdqu %xmm0, 5 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 6 * 16(%rdx), %xmm0, %xmm9;
- vmovdqu %xmm0, 6 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 7 * 16(%rdx), %xmm0, %xmm8;
- vmovdqu %xmm0, 7 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 8 * 16(%rdx), %xmm0, %xmm7;
- vmovdqu %xmm0, 8 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 9 * 16(%rdx), %xmm0, %xmm6;
- vmovdqu %xmm0, 9 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 10 * 16(%rdx), %xmm0, %xmm5;
- vmovdqu %xmm0, 10 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 11 * 16(%rdx), %xmm0, %xmm4;
- vmovdqu %xmm0, 11 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 12 * 16(%rdx), %xmm0, %xmm3;
- vmovdqu %xmm0, 12 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 13 * 16(%rdx), %xmm0, %xmm2;
- vmovdqu %xmm0, 13 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 14 * 16(%rdx), %xmm0, %xmm1;
- vmovdqu %xmm0, 14 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 15 * 16(%rdx), %xmm0, %xmm15;
- vmovdqu %xmm15, 0 * 16(%rax);
- vmovdqu %xmm0, 15 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vmovdqu %xmm0, (%rcx);
-
- /* inpack16_pre: */
- vmovq (key_table)(CTX, %r8, 8), %xmm15;
- vpshufb .Lpack_bswap, %xmm15, %xmm15;
- vpxor 0 * 16(%rax), %xmm15, %xmm0;
- vpxor %xmm1, %xmm15, %xmm1;
- vpxor %xmm2, %xmm15, %xmm2;
- vpxor %xmm3, %xmm15, %xmm3;
- vpxor %xmm4, %xmm15, %xmm4;
- vpxor %xmm5, %xmm15, %xmm5;
- vpxor %xmm6, %xmm15, %xmm6;
- vpxor %xmm7, %xmm15, %xmm7;
- vpxor %xmm8, %xmm15, %xmm8;
- vpxor %xmm9, %xmm15, %xmm9;
- vpxor %xmm10, %xmm15, %xmm10;
- vpxor %xmm11, %xmm15, %xmm11;
- vpxor %xmm12, %xmm15, %xmm12;
- vpxor %xmm13, %xmm15, %xmm13;
- vpxor 14 * 16(%rax), %xmm15, %xmm14;
- vpxor 15 * 16(%rax), %xmm15, %xmm15;
-
- CALL_NOSPEC r9;
-
- addq $(16 * 16), %rsp;
-
- vpxor 0 * 16(%rsi), %xmm7, %xmm7;
- vpxor 1 * 16(%rsi), %xmm6, %xmm6;
- vpxor 2 * 16(%rsi), %xmm5, %xmm5;
- vpxor 3 * 16(%rsi), %xmm4, %xmm4;
- vpxor 4 * 16(%rsi), %xmm3, %xmm3;
- vpxor 5 * 16(%rsi), %xmm2, %xmm2;
- vpxor 6 * 16(%rsi), %xmm1, %xmm1;
- vpxor 7 * 16(%rsi), %xmm0, %xmm0;
- vpxor 8 * 16(%rsi), %xmm15, %xmm15;
- vpxor 9 * 16(%rsi), %xmm14, %xmm14;
- vpxor 10 * 16(%rsi), %xmm13, %xmm13;
- vpxor 11 * 16(%rsi), %xmm12, %xmm12;
- vpxor 12 * 16(%rsi), %xmm11, %xmm11;
- vpxor 13 * 16(%rsi), %xmm10, %xmm10;
- vpxor 14 * 16(%rsi), %xmm9, %xmm9;
- vpxor 15 * 16(%rsi), %xmm8, %xmm8;
- write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
- %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
- %xmm8, %rsi);
-
- FRAME_END
- ret;
-SYM_FUNC_END(camellia_xts_crypt_16way)
-
-SYM_FUNC_START(camellia_xts_enc_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (16 blocks)
- * %rdx: src (16 blocks)
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
- xorl %r8d, %r8d; /* input whitening key, 0 for enc */
-
- leaq __camellia_enc_blk16, %r9;
-
- jmp camellia_xts_crypt_16way;
-SYM_FUNC_END(camellia_xts_enc_16way)
-
-SYM_FUNC_START(camellia_xts_dec_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (16 blocks)
- * %rdx: src (16 blocks)
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
-
- cmpl $16, key_length(CTX);
- movl $32, %r8d;
- movl $24, %eax;
- cmovel %eax, %r8d; /* input whitening key, last for dec */
-
- leaq __camellia_dec_blk16, %r9;
-
- jmp camellia_xts_crypt_16way;
-SYM_FUNC_END(camellia_xts_dec_16way)