1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20.file "twofish-x86_64-asm.S"
21.text
22
23#include <asm/asm-offsets.h>
24
25#define a_offset 0
26#define b_offset 4
27#define c_offset 8
28#define d_offset 12
29
30
31
32#define s0 0
33#define s1 1024
34#define s2 2048
35#define s3 3072
36#define w 4096
37#define k 4128
38
39
40
41#define R0 %rax
42#define R0D %eax
43#define R0B %al
44#define R0H %ah
45
46#define R1 %rbx
47#define R1D %ebx
48#define R1B %bl
49#define R1H %bh
50
51#define R2 %rcx
52#define R2D %ecx
53#define R2B %cl
54#define R2H %ch
55
56#define R3 %rdx
57#define R3D %edx
58#define R3B %dl
59#define R3H %dh
60
61
62
63#define input_whitening(src,context,offset)\
64 xor w+offset(context), src;
65
66
67#define output_whitening(src,context,offset)\
68 xor w+16+offset(context), src;
69
70
71
72
73
74
75
76
77
78#define encrypt_round(a,b,c,d,round)\
79 movzx b
80 mov s1(%r11,%rdi,4),%r8d;\
81 movzx a
82 mov s2(%r11,%rdi,4),%r9d;\
83 movzx b
84 ror $16, b
85 xor s2(%r11,%rdi,4),%r8d;\
86 movzx a
87 ror $16, a
88 xor s3(%r11,%rdi,4),%r9d;\
89 movzx b
90 xor s3(%r11,%rdi,4),%r8d;\
91 movzx a
92 xor (%r11,%rdi,4), %r9d;\
93 movzx b
94 ror $15, b
95 xor (%r11,%rdi,4), %r8d;\
96 movzx a
97 xor s1(%r11,%rdi,4),%r9d;\
98 add %r8d, %r9d;\
99 add %r9d, %r8d;\
100 add k+round(%r11), %r9d;\
101 xor %r9d, c
102 rol $15, c
103 add k+4+round(%r11),%r8d;\
104 xor %r8d, d
105
106
107
108
109
110
111
112
113
114#define encrypt_last_round(a,b,c,d,round)\
115 mov b
116 shl $32, %r10;\
117 movzx b
118 mov s1(%r11,%rdi,4),%r8d;\
119 movzx a
120 mov s2(%r11,%rdi,4),%r9d;\
121 movzx b
122 ror $16, b
123 xor s2(%r11,%rdi,4),%r8d;\
124 movzx a
125 ror $16, a
126 xor s3(%r11,%rdi,4),%r9d;\
127 movzx b
128 xor s3(%r11,%rdi,4),%r8d;\
129 movzx a
130 xor (%r11,%rdi,4), %r9d;\
131 xor a, %r10;\
132 movzx b
133 xor (%r11,%rdi,4), %r8d;\
134 movzx a
135 xor s1(%r11,%rdi,4),%r9d;\
136 add %r8d, %r9d;\
137 add %r9d, %r8d;\
138 add k+round(%r11), %r9d;\
139 xor %r9d, c
140 ror $1, c
141 add k+4+round(%r11),%r8d;\
142 xor %r8d, d
143
144
145
146
147
148
149
150
151#define decrypt_round(a,b,c,d,round)\
152 movzx a
153 mov (%r11,%rdi,4), %r9d;\
154 movzx b
155 mov s3(%r11,%rdi,4),%r8d;\
156 movzx a
157 ror $16, a
158 xor s1(%r11,%rdi,4),%r9d;\
159 movzx b
160 ror $16, b
161 xor (%r11,%rdi,4), %r8d;\
162 movzx a
163 xor s2(%r11,%rdi,4),%r9d;\
164 movzx b
165 xor s1(%r11,%rdi,4),%r8d;\
166 movzx a
167 ror $15, a
168 xor s3(%r11,%rdi,4),%r9d;\
169 movzx b
170 xor s2(%r11,%rdi,4),%r8d;\
171 add %r8d, %r9d;\
172 add %r9d, %r8d;\
173 add k+round(%r11), %r9d;\
174 xor %r9d, c
175 add k+4+round(%r11),%r8d;\
176 xor %r8d, d
177 rol $15, d
178
179
180
181
182
183
184
185
186
187#define decrypt_last_round(a,b,c,d,round)\
188 movzx a
189 mov (%r11,%rdi,4), %r9d;\
190 movzx b
191 mov s3(%r11,%rdi,4),%r8d;\
192 movzx b
193 ror $16, b
194 xor (%r11,%rdi,4), %r8d;\
195 movzx a
196 mov b
197 shl $32, %r10;\
198 xor a, %r10;\
199 ror $16, a
200 xor s1(%r11,%rdi,4),%r9d;\
201 movzx b
202 xor s1(%r11,%rdi,4),%r8d;\
203 movzx a
204 xor s2(%r11,%rdi,4),%r9d;\
205 movzx b
206 xor s2(%r11,%rdi,4),%r8d;\
207 movzx a
208 xor s3(%r11,%rdi,4),%r9d;\
209 add %r8d, %r9d;\
210 add %r9d, %r8d;\
211 add k+round(%r11), %r9d;\
212 xor %r9d, c
213 add k+4+round(%r11),%r8d;\
214 xor %r8d, d
215 ror $1, d
216
217.align 8
218.global twofish_enc_blk
219.global twofish_dec_blk
220
221twofish_enc_blk:
222 pushq R1
223
224
225
226
227
228
229 mov %rdi, %r11
230
231 movq (R3), R1
232 movq 8(R3), R3
233 input_whitening(R1,%r11,a_offset)
234 input_whitening(R3,%r11,c_offset)
235 mov R1D, R0D
236 rol $16, R0D
237 shr $32, R1
238 mov R3D, R2D
239 shr $32, R3
240 rol $1, R3D
241
242 encrypt_round(R0,R1,R2,R3,0);
243 encrypt_round(R2,R3,R0,R1,8);
244 encrypt_round(R0,R1,R2,R3,2*8);
245 encrypt_round(R2,R3,R0,R1,3*8);
246 encrypt_round(R0,R1,R2,R3,4*8);
247 encrypt_round(R2,R3,R0,R1,5*8);
248 encrypt_round(R0,R1,R2,R3,6*8);
249 encrypt_round(R2,R3,R0,R1,7*8);
250 encrypt_round(R0,R1,R2,R3,8*8);
251 encrypt_round(R2,R3,R0,R1,9*8);
252 encrypt_round(R0,R1,R2,R3,10*8);
253 encrypt_round(R2,R3,R0,R1,11*8);
254 encrypt_round(R0,R1,R2,R3,12*8);
255 encrypt_round(R2,R3,R0,R1,13*8);
256 encrypt_round(R0,R1,R2,R3,14*8);
257 encrypt_last_round(R2,R3,R0,R1,15*8);
258
259
260 output_whitening(%r10,%r11,a_offset)
261 movq %r10, (%rsi)
262
263 shl $32, R1
264 xor R0, R1
265
266 output_whitening(R1,%r11,c_offset)
267 movq R1, 8(%rsi)
268
269 popq R1
270 movq $1,%rax
271 ret
272
273twofish_dec_blk:
274 pushq R1
275
276
277
278
279
280
281 mov %rdi, %r11
282
283 movq (R3), R1
284 movq 8(R3), R3
285 output_whitening(R1,%r11,a_offset)
286 output_whitening(R3,%r11,c_offset)
287 mov R1D, R0D
288 shr $32, R1
289 rol $16, R1D
290 mov R3D, R2D
291 shr $32, R3
292 rol $1, R2D
293
294 decrypt_round(R0,R1,R2,R3,15*8);
295 decrypt_round(R2,R3,R0,R1,14*8);
296 decrypt_round(R0,R1,R2,R3,13*8);
297 decrypt_round(R2,R3,R0,R1,12*8);
298 decrypt_round(R0,R1,R2,R3,11*8);
299 decrypt_round(R2,R3,R0,R1,10*8);
300 decrypt_round(R0,R1,R2,R3,9*8);
301 decrypt_round(R2,R3,R0,R1,8*8);
302 decrypt_round(R0,R1,R2,R3,7*8);
303 decrypt_round(R2,R3,R0,R1,6*8);
304 decrypt_round(R0,R1,R2,R3,5*8);
305 decrypt_round(R2,R3,R0,R1,4*8);
306 decrypt_round(R0,R1,R2,R3,3*8);
307 decrypt_round(R2,R3,R0,R1,2*8);
308 decrypt_round(R0,R1,R2,R3,1*8);
309 decrypt_last_round(R2,R3,R0,R1,0);
310
311 input_whitening(%r10,%r11,a_offset)
312 movq %r10, (%rsi)
313
314 shl $32, R1
315 xor R0, R1
316
317 input_whitening(R1,%r11,c_offset)
318 movq R1, 8(%rsi)
319
320 popq R1
321 movq $1,%rax
322 ret
323