1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Scalar AES core transform
*
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/cache.h>
.text
rk .req x0
out .req x1
in .req x2
rounds .req x3
tt .req x2
.macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
.ifc \op\shift, b0
ubfiz \reg0, \in0, #2, #8
ubfiz \reg1, \in1e, #2, #8
.else
ubfx \reg0, \in0, #\shift, #8
ubfx \reg1, \in1e, #\shift, #8
.endif
/*
* AArch64 cannot do byte size indexed loads from a table containing
* 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
* valid instruction. So perform the shift explicitly first for the
* high bytes (the low byte is shifted implicitly by using ubfiz rather
* than ubfx above)
*/
.ifnc \op, b
ldr \reg0, [tt, \reg0, uxtw #2]
ldr \reg1, [tt, \reg1, uxtw #2]
.else
.if \shift > 0
lsl \reg0, \reg0, #2
lsl \reg1, \reg1, #2
.endif
ldrb \reg0, [tt, \reg0, uxtw]
ldrb \reg1, [tt, \reg1, uxtw]
.endif
.endm
.macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
ubfx \reg0, \in0, #\shift, #8
ubfx \reg1, \in1d, #\shift, #8
ldr\op \reg0, [tt, \reg0, uxtw #\sz]
ldr\op \reg1, [tt, \reg1, uxtw #\sz]
.endm
.macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
ldp \out0, \out1, [rk], #8
__pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0
__pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8
__pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16
__pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24
eor \out0, \out0, w12
eor \out1, \out1, w13
eor \out0, \out0, w14, ror #24
eor \out1, \out1, w15, ror #24
eor \out0, \out0, w16, ror #16
eor \out1, \out1, w17, ror #16
eor \out0, \out0, \t0, ror #8
eor \out1, \out1, \t1, ror #8
.endm
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
.endm
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
.endm
.macro do_crypt, round, ttab, ltab, bsz
ldp w4, w5, [in]
ldp w6, w7, [in, #8]
ldp w8, w9, [rk], #16
ldp w10, w11, [rk, #-8]
CPU_BE( rev w4, w4 )
CPU_BE( rev w5, w5 )
CPU_BE( rev w6, w6 )
CPU_BE( rev w7, w7 )
eor w4, w4, w8
eor w5, w5, w9
eor w6, w6, w10
eor w7, w7, w11
adr_l tt, \ttab
tbnz rounds, #1, 1f
0: \round w8, w9, w10, w11, w4, w5, w6, w7
\round w4, w5, w6, w7, w8, w9, w10, w11
1: subs rounds, rounds, #4
\round w8, w9, w10, w11, w4, w5, w6, w7
b.ls 3f
2: \round w4, w5, w6, w7, w8, w9, w10, w11
b 0b
3: adr_l tt, \ltab
\round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
CPU_BE( rev w4, w4 )
CPU_BE( rev w5, w5 )
CPU_BE( rev w6, w6 )
CPU_BE( rev w7, w7 )
stp w4, w5, [out]
stp w6, w7, [out, #8]
ret
.endm
SYM_FUNC_START(__aes_arm64_encrypt)
do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2
SYM_FUNC_END(__aes_arm64_encrypt)
.align 5
SYM_FUNC_START(__aes_arm64_decrypt)
do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0
SYM_FUNC_END(__aes_arm64_decrypt)
|