1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20.file "twofish-i586-asm.S"
21.text
22
23#include <linux/linkage.h>
24#include <asm/asm-offsets.h>
25
26
27
28#define in_blk 12
29#define out_blk 8
30#define ctx 4
31
32#define a_offset 0
33#define b_offset 4
34#define c_offset 8
35#define d_offset 12
36
37
38
39#define s0 0
40#define s1 1024
41#define s2 2048
42#define s3 3072
43#define w 4096
44#define k 4128
45
46
47
48#define R0D %eax
49#define R0B %al
50#define R0H %ah
51
52#define R1D %ebx
53#define R1B %bl
54#define R1H %bh
55
56#define R2D %ecx
57#define R2B %cl
58#define R2H %ch
59
60#define R3D %edx
61#define R3B %dl
62#define R3H %dh
63
64
65
66#define input_whitening(src,context,offset)\
67 xor w+offset(context), src;
68
69
70#define output_whitening(src,context,offset)\
71 xor w+16+offset(context), src;
72
73
74
75
76
77
78
79
80#define encrypt_round(a,b,c,d,round)\
81 push d
82 movzx b
83 mov s1(%ebp,%edi,4),d
84 movzx a
85 mov s2(%ebp,%edi,4),%esi;\
86 movzx b
87 ror $16, b
88 xor s2(%ebp,%edi,4),d
89 movzx a
90 ror $16, a
91 xor s3(%ebp,%edi,4),%esi;\
92 movzx b
93 xor s3(%ebp,%edi,4),d
94 movzx a
95 xor (%ebp,%edi,4), %esi;\
96 movzx b
97 ror $15, b
98 xor (%ebp,%edi,4), d
99 movzx a
100 xor s1(%ebp,%edi,4),%esi;\
101 pop %edi;\
102 add d
103 add %esi, d
104 add k+round(%ebp), %esi;\
105 xor %esi, c
106 rol $15, c
107 add k+4+round(%ebp),d
108 xor %edi, d
109
110
111
112
113
114
115
116
117
118#define encrypt_last_round(a,b,c,d,round)\
119 push d
120 movzx b
121 mov s1(%ebp,%edi,4),d
122 movzx a
123 mov s2(%ebp,%edi,4),%esi;\
124 movzx b
125 ror $16, b
126 xor s2(%ebp,%edi,4),d
127 movzx a
128 ror $16, a
129 xor s3(%ebp,%edi,4),%esi;\
130 movzx b
131 xor s3(%ebp,%edi,4),d
132 movzx a
133 xor (%ebp,%edi,4), %esi;\
134 movzx b
135 ror $16, b
136 xor (%ebp,%edi,4), d
137 movzx a
138 xor s1(%ebp,%edi,4),%esi;\
139 pop %edi;\
140 add d
141 add %esi, d
142 add k+round(%ebp), %esi;\
143 xor %esi, c
144 ror $1, c
145 add k+4+round(%ebp),d
146 xor %edi, d
147
148
149
150
151
152
153
154
155#define decrypt_round(a,b,c,d,round)\
156 push c
157 movzx a
158 mov (%ebp,%edi,4), c
159 movzx b
160 mov s3(%ebp,%edi,4),%esi;\
161 movzx a
162 ror $16, a
163 xor s1(%ebp,%edi,4),c
164 movzx b
165 ror $16, b
166 xor (%ebp,%edi,4), %esi;\
167 movzx a
168 xor s2(%ebp,%edi,4),c
169 movzx b
170 xor s1(%ebp,%edi,4),%esi;\
171 movzx a
172 ror $15, a
173 xor s3(%ebp,%edi,4),c
174 movzx b
175 xor s2(%ebp,%edi,4),%esi;\
176 pop %edi;\
177 add %esi, c
178 add c
179 add k+round(%ebp), c
180 xor %edi, c
181 add k+4+round(%ebp),%esi;\
182 xor %esi, d
183 rol $15, d
184
185
186
187
188
189
190
191
192
193#define decrypt_last_round(a,b,c,d,round)\
194 push c
195 movzx a
196 mov (%ebp,%edi,4), c
197 movzx b
198 mov s3(%ebp,%edi,4),%esi;\
199 movzx a
200 ror $16, a
201 xor s1(%ebp,%edi,4),c
202 movzx b
203 ror $16, b
204 xor (%ebp,%edi,4), %esi;\
205 movzx a
206 xor s2(%ebp,%edi,4),c
207 movzx b
208 xor s1(%ebp,%edi,4),%esi;\
209 movzx a
210 ror $16, a
211 xor s3(%ebp,%edi,4),c
212 movzx b
213 xor s2(%ebp,%edi,4),%esi;\
214 pop %edi;\
215 add %esi, c
216 add c
217 add k+round(%ebp), c
218 xor %edi, c
219 add k+4+round(%ebp),%esi;\
220 xor %esi, d
221 ror $1, d
222
223ENTRY(twofish_enc_blk)
224 push %ebp
225 push %ebx
226 push %esi
227 push %edi
228
229 mov ctx + 16(%esp), %ebp
230
231 mov in_blk+16(%esp),%edi
232
233 mov (%edi), %eax
234 mov b_offset(%edi), %ebx
235 mov c_offset(%edi), %ecx
236 mov d_offset(%edi), %edx
237 input_whitening(%eax,%ebp,a_offset)
238 ror $16, %eax
239 input_whitening(%ebx,%ebp,b_offset)
240 input_whitening(%ecx,%ebp,c_offset)
241 input_whitening(%edx,%ebp,d_offset)
242 rol $1, %edx
243
244 encrypt_round(R0,R1,R2,R3,0);
245 encrypt_round(R2,R3,R0,R1,8);
246 encrypt_round(R0,R1,R2,R3,2*8);
247 encrypt_round(R2,R3,R0,R1,3*8);
248 encrypt_round(R0,R1,R2,R3,4*8);
249 encrypt_round(R2,R3,R0,R1,5*8);
250 encrypt_round(R0,R1,R2,R3,6*8);
251 encrypt_round(R2,R3,R0,R1,7*8);
252 encrypt_round(R0,R1,R2,R3,8*8);
253 encrypt_round(R2,R3,R0,R1,9*8);
254 encrypt_round(R0,R1,R2,R3,10*8);
255 encrypt_round(R2,R3,R0,R1,11*8);
256 encrypt_round(R0,R1,R2,R3,12*8);
257 encrypt_round(R2,R3,R0,R1,13*8);
258 encrypt_round(R0,R1,R2,R3,14*8);
259 encrypt_last_round(R2,R3,R0,R1,15*8);
260
261 output_whitening(%eax,%ebp,c_offset)
262 output_whitening(%ebx,%ebp,d_offset)
263 output_whitening(%ecx,%ebp,a_offset)
264 output_whitening(%edx,%ebp,b_offset)
265 mov out_blk+16(%esp),%edi;
266 mov %eax, c_offset(%edi)
267 mov %ebx, d_offset(%edi)
268 mov %ecx, (%edi)
269 mov %edx, b_offset(%edi)
270
271 pop %edi
272 pop %esi
273 pop %ebx
274 pop %ebp
275 mov $1, %eax
276 ret
277ENDPROC(twofish_enc_blk)
278
279ENTRY(twofish_dec_blk)
280 push %ebp
281 push %ebx
282 push %esi
283 push %edi
284
285
286 mov ctx + 16(%esp), %ebp
287
288 mov in_blk+16(%esp),%edi
289
290 mov (%edi), %eax
291 mov b_offset(%edi), %ebx
292 mov c_offset(%edi), %ecx
293 mov d_offset(%edi), %edx
294 output_whitening(%eax,%ebp,a_offset)
295 output_whitening(%ebx,%ebp,b_offset)
296 ror $16, %ebx
297 output_whitening(%ecx,%ebp,c_offset)
298 output_whitening(%edx,%ebp,d_offset)
299 rol $1, %ecx
300
301 decrypt_round(R0,R1,R2,R3,15*8);
302 decrypt_round(R2,R3,R0,R1,14*8);
303 decrypt_round(R0,R1,R2,R3,13*8);
304 decrypt_round(R2,R3,R0,R1,12*8);
305 decrypt_round(R0,R1,R2,R3,11*8);
306 decrypt_round(R2,R3,R0,R1,10*8);
307 decrypt_round(R0,R1,R2,R3,9*8);
308 decrypt_round(R2,R3,R0,R1,8*8);
309 decrypt_round(R0,R1,R2,R3,7*8);
310 decrypt_round(R2,R3,R0,R1,6*8);
311 decrypt_round(R0,R1,R2,R3,5*8);
312 decrypt_round(R2,R3,R0,R1,4*8);
313 decrypt_round(R0,R1,R2,R3,3*8);
314 decrypt_round(R2,R3,R0,R1,2*8);
315 decrypt_round(R0,R1,R2,R3,1*8);
316 decrypt_last_round(R2,R3,R0,R1,0);
317
318 input_whitening(%eax,%ebp,c_offset)
319 input_whitening(%ebx,%ebp,d_offset)
320 input_whitening(%ecx,%ebp,a_offset)
321 input_whitening(%edx,%ebp,b_offset)
322 mov out_blk+16(%esp),%edi;
323 mov %eax, c_offset(%edi)
324 mov %ebx, d_offset(%edi)
325 mov %ecx, (%edi)
326 mov %edx, b_offset(%edi)
327
328 pop %edi
329 pop %esi
330 pop %ebx
331 pop %ebp
332 mov $1, %eax
333 ret
334ENDPROC(twofish_dec_blk)
335