1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28#include <linux/linkage.h>
29#include <asm/frame.h>
30#include <asm/nospec-branch.h>
31
32
33
34
35
36
37
38
39
40#define MOVADQ movaps
41#define MOVUDQ movups
42
43#ifdef __x86_64__
44
45
46.section .rodata.cst16.POLY, "aM", @progbits, 16
47.align 16
48POLY: .octa 0xC2000000000000000000000000000001
49.section .rodata.cst16.TWOONE, "aM", @progbits, 16
50.align 16
51TWOONE: .octa 0x00000001000000000000000000000001
52
53.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
54.align 16
55SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
56.section .rodata.cst16.MASK1, "aM", @progbits, 16
57.align 16
58MASK1: .octa 0x0000000000000000ffffffffffffffff
59.section .rodata.cst16.MASK2, "aM", @progbits, 16
60.align 16
61MASK2: .octa 0xffffffffffffffff0000000000000000
62.section .rodata.cst16.ONE, "aM", @progbits, 16
63.align 16
64ONE: .octa 0x00000000000000000000000000000001
65.section .rodata.cst16.F_MIN_MASK, "aM", @progbits, 16
66.align 16
67F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
68.section .rodata.cst16.dec, "aM", @progbits, 16
69.align 16
70dec: .octa 0x1
71.section .rodata.cst16.enc, "aM", @progbits, 16
72.align 16
73enc: .octa 0x2
74
75
76
77
78.section .rodata, "a", @progbits
79.align 16
80SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
81ALL_F: .octa 0xffffffffffffffffffffffffffffffff
82 .octa 0x00000000000000000000000000000000
83
84.text
85
86
87#define STACK_OFFSET 8*3
88
89#define AadHash 16*0
90#define AadLen 16*1
91#define InLen (16*1)+8
92#define PBlockEncKey 16*2
93#define OrigIV 16*3
94#define CurCount 16*4
95#define PBlockLen 16*5
96#define HashKey 16*6
97#define HashKey_2 16*7
98#define HashKey_3 16*8
99#define HashKey_4 16*9
100#define HashKey_k 16*10
101
102
103#define HashKey_2_k 16*11
104
105
106#define HashKey_3_k 16*12
107
108
109#define HashKey_4_k 16*13
110
111
112
113#define arg1 rdi
114#define arg2 rsi
115#define arg3 rdx
116#define arg4 rcx
117#define arg5 r8
118#define arg6 r9
119#define arg7 STACK_OFFSET+8(%rsp)
120#define arg8 STACK_OFFSET+16(%rsp)
121#define arg9 STACK_OFFSET+24(%rsp)
122#define arg10 STACK_OFFSET+32(%rsp)
123#define arg11 STACK_OFFSET+40(%rsp)
124#define keysize 2*15*16(%arg1)
125#endif
126
127
128#define STATE1 %xmm0
129#define STATE2 %xmm4
130#define STATE3 %xmm5
131#define STATE4 %xmm6
132#define STATE STATE1
133#define IN1 %xmm1
134#define IN2 %xmm7
135#define IN3 %xmm8
136#define IN4 %xmm9
137#define IN IN1
138#define KEY %xmm2
139#define IV %xmm3
140
141#define BSWAP_MASK %xmm10
142#define CTR %xmm11
143#define INC %xmm12
144
145#define GF128MUL_MASK %xmm7
146
147#ifdef __x86_64__
148#define AREG %rax
149#define KEYP %rdi
150#define OUTP %rsi
151#define UKEYP OUTP
152#define INP %rdx
153#define LEN %rcx
154#define IVP %r8
155#define KLEN %r9d
156#define T1 %r10
157#define TKEYP T1
158#define T2 %r11
159#define TCTR_LOW T2
160#else
161#define AREG %eax
162#define KEYP %edi
163#define OUTP AREG
164#define UKEYP OUTP
165#define INP %edx
166#define LEN %esi
167#define IVP %ebp
168#define KLEN %ebx
169#define T1 %ecx
170#define TKEYP T1
171#endif
172
173.macro FUNC_SAVE
174 push %r12
175 push %r13
176 push %r14
177
178
179
180
181.endm
182
183
184.macro FUNC_RESTORE
185 pop %r14
186 pop %r13
187 pop %r12
188.endm
189
190
191
192
193
194
195.macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7
196 mov \SUBKEY, %r12
197 movdqu (%r12), \TMP3
198 movdqa SHUF_MASK(%rip), \TMP2
199 pshufb \TMP2, \TMP3
200
201
202
203 movdqa \TMP3, \TMP2
204 psllq $1, \TMP3
205 psrlq $63, \TMP2
206 movdqa \TMP2, \TMP1
207 pslldq $8, \TMP2
208 psrldq $8, \TMP1
209 por \TMP2, \TMP3
210
211
212
213 pshufd $0x24, \TMP1, \TMP2
214 pcmpeqd TWOONE(%rip), \TMP2
215 pand POLY(%rip), \TMP2
216 pxor \TMP2, \TMP3
217 movdqu \TMP3, HashKey(%arg2)
218
219 movdqa \TMP3, \TMP5
220 pshufd $78, \TMP3, \TMP1
221 pxor \TMP3, \TMP1
222 movdqu \TMP1, HashKey_k(%arg2)
223
224 GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
225
226 movdqu \TMP5, HashKey_2(%arg2)
227
228 pshufd $78, \TMP5, \TMP1
229 pxor \TMP5, \TMP1
230 movdqu \TMP1, HashKey_2_k(%arg2)
231
232 GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
233
234 movdqu \TMP5, HashKey_3(%arg2)
235 pshufd $78, \TMP5, \TMP1
236 pxor \TMP5, \TMP1
237 movdqu \TMP1, HashKey_3_k(%arg2)
238
239 GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
240
241 movdqu \TMP5, HashKey_4(%arg2)
242 pshufd $78, \TMP5, \TMP1
243 pxor \TMP5, \TMP1
244 movdqu \TMP1, HashKey_4_k(%arg2)
245.endm
246
247
248
249.macro GCM_INIT Iv SUBKEY AAD AADLEN
250 mov \AADLEN, %r11
251 mov %r11, AadLen(%arg2)
252 xor %r11d, %r11d
253 mov %r11, InLen(%arg2)
254 mov %r11, PBlockLen(%arg2)
255 mov %r11, PBlockEncKey(%arg2)
256 mov \Iv, %rax
257 movdqu (%rax), %xmm0
258 movdqu %xmm0, OrigIV(%arg2)
259
260 movdqa SHUF_MASK(%rip), %xmm2
261 pshufb %xmm2, %xmm0
262 movdqu %xmm0, CurCount(%arg2)
263
264 PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7
265 movdqu HashKey(%arg2), %xmm13
266
267 CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
268 %xmm4, %xmm5, %xmm6
269.endm
270
271
272
273
274
275.macro GCM_ENC_DEC operation
276 movdqu AadHash(%arg2), %xmm8
277 movdqu HashKey(%arg2), %xmm13
278 add %arg5, InLen(%arg2)
279
280 xor %r11d, %r11d
281 PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
282
283 sub %r11, %arg5
284 mov %arg5, %r13
285
286 and $-16, %r13
287 mov %r13, %r12
288
289
290 and $(3<<4), %r12
291 jz _initial_num_blocks_is_0_\@
292 cmp $(2<<4), %r12
293 jb _initial_num_blocks_is_1_\@
294 je _initial_num_blocks_is_2_\@
295_initial_num_blocks_is_3_\@:
296 INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
297%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation
298 sub $48, %r13
299 jmp _initial_blocks_\@
300_initial_num_blocks_is_2_\@:
301 INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
302%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation
303 sub $32, %r13
304 jmp _initial_blocks_\@
305_initial_num_blocks_is_1_\@:
306 INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
307%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation
308 sub $16, %r13
309 jmp _initial_blocks_\@
310_initial_num_blocks_is_0_\@:
311 INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
312%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation
313_initial_blocks_\@:
314
315
316
317 test %r13, %r13
318 je _zero_cipher_left_\@
319 sub $64, %r13
320 je _four_cipher_left_\@
321_crypt_by_4_\@:
322 GHASH_4_ENCRYPT_4_PARALLEL_\operation %xmm9, %xmm10, %xmm11, %xmm12, \
323 %xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \
324 %xmm7, %xmm8, enc
325 add $64, %r11
326 sub $64, %r13
327 jne _crypt_by_4_\@
328_four_cipher_left_\@:
329 GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
330%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
331_zero_cipher_left_\@:
332 movdqu %xmm8, AadHash(%arg2)
333 movdqu %xmm0, CurCount(%arg2)
334
335 mov %arg5, %r13
336 and $15, %r13
337 je _multiple_of_16_bytes_\@
338
339 mov %r13, PBlockLen(%arg2)
340
341
342 paddd ONE(%rip), %xmm0
343 movdqu %xmm0, CurCount(%arg2)
344 movdqa SHUF_MASK(%rip), %xmm10
345 pshufb %xmm10, %xmm0
346
347 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1
348 movdqu %xmm0, PBlockEncKey(%arg2)
349
350 cmp $16, %arg5
351 jge _large_enough_update_\@
352
353 lea (%arg4,%r11,1), %r10
354 mov %r13, %r12
355 READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
356 jmp _data_read_\@
357
358_large_enough_update_\@:
359 sub $16, %r11
360 add %r13, %r11
361
362
363 movdqu (%arg4, %r11, 1), %xmm1
364
365 sub %r13, %r11
366 add $16, %r11
367
368 lea SHIFT_MASK+16(%rip), %r12
369
370
371 sub %r13, %r12
372
373 movdqu (%r12), %xmm2
374
375 pshufb %xmm2, %xmm1
376
377_data_read_\@:
378 lea ALL_F+16(%rip), %r12
379 sub %r13, %r12
380
381.ifc \operation, dec
382 movdqa %xmm1, %xmm2
383.endif
384 pxor %xmm1, %xmm0
385 movdqu (%r12), %xmm1
386
387 pand %xmm1, %xmm0
388.ifc \operation, dec
389 pand %xmm1, %xmm2
390 movdqa SHUF_MASK(%rip), %xmm10
391 pshufb %xmm10 ,%xmm2
392
393 pxor %xmm2, %xmm8
394.else
395 movdqa SHUF_MASK(%rip), %xmm10
396 pshufb %xmm10,%xmm0
397
398 pxor %xmm0, %xmm8
399.endif
400
401 movdqu %xmm8, AadHash(%arg2)
402.ifc \operation, enc
403
404 movdqa SHUF_MASK(%rip), %xmm10
405
406 pshufb %xmm10, %xmm0
407.endif
408
409
410 movq %xmm0, %rax
411 cmp $8, %r13
412 jle _less_than_8_bytes_left_\@
413 mov %rax, (%arg3 , %r11, 1)
414 add $8, %r11
415 psrldq $8, %xmm0
416 movq %xmm0, %rax
417 sub $8, %r13
418_less_than_8_bytes_left_\@:
419 mov %al, (%arg3, %r11, 1)
420 add $1, %r11
421 shr $8, %rax
422 sub $1, %r13
423 jne _less_than_8_bytes_left_\@
424_multiple_of_16_bytes_\@:
425.endm
426
427
428
429
430.macro GCM_COMPLETE AUTHTAG AUTHTAGLEN
431 movdqu AadHash(%arg2), %xmm8
432 movdqu HashKey(%arg2), %xmm13
433
434 mov PBlockLen(%arg2), %r12
435
436 test %r12, %r12
437 je _partial_done\@
438
439 GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
440
441_partial_done\@:
442 mov AadLen(%arg2), %r12
443 shl $3, %r12
444 movd %r12d, %xmm15
445 mov InLen(%arg2), %r12
446 shl $3, %r12
447 movq %r12, %xmm1
448
449 pslldq $8, %xmm15
450 pxor %xmm1, %xmm15
451 pxor %xmm15, %xmm8
452 GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
453
454 movdqa SHUF_MASK(%rip), %xmm10
455 pshufb %xmm10, %xmm8
456
457 movdqu OrigIV(%arg2), %xmm0
458 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1
459 pxor %xmm8, %xmm0
460_return_T_\@:
461 mov \AUTHTAG, %r10
462 mov \AUTHTAGLEN, %r11
463 cmp $16, %r11
464 je _T_16_\@
465 cmp $8, %r11
466 jl _T_4_\@
467_T_8_\@:
468 movq %xmm0, %rax
469 mov %rax, (%r10)
470 add $8, %r10
471 sub $8, %r11
472 psrldq $8, %xmm0
473 test %r11, %r11
474 je _return_T_done_\@
475_T_4_\@:
476 movd %xmm0, %eax
477 mov %eax, (%r10)
478 add $4, %r10
479 sub $4, %r11
480 psrldq $4, %xmm0
481 test %r11, %r11
482 je _return_T_done_\@
483_T_123_\@:
484 movd %xmm0, %eax
485 cmp $2, %r11
486 jl _T_1_\@
487 mov %ax, (%r10)
488 cmp $2, %r11
489 je _return_T_done_\@
490 add $2, %r10
491 sar $16, %eax
492_T_1_\@:
493 mov %al, (%r10)
494 jmp _return_T_done_\@
495_T_16_\@:
496 movdqu %xmm0, (%r10)
497_return_T_done_\@:
498.endm
499
500#ifdef __x86_64__
501
502
503
504
505
506
507
508
509
510.macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5
511 movdqa \GH, \TMP1
512 pshufd $78, \GH, \TMP2
513 pshufd $78, \HK, \TMP3
514 pxor \GH, \TMP2
515 pxor \HK, \TMP3
516 pclmulqdq $0x11, \HK, \TMP1
517 pclmulqdq $0x00, \HK, \GH
518 pclmulqdq $0x00, \TMP3, \TMP2
519 pxor \GH, \TMP2
520 pxor \TMP1, \TMP2
521 movdqa \TMP2, \TMP3
522 pslldq $8, \TMP3
523 psrldq $8, \TMP2
524 pxor \TMP3, \GH
525 pxor \TMP2, \TMP1
526
527
528
529 movdqa \GH, \TMP2
530 movdqa \GH, \TMP3
531 movdqa \GH, \TMP4
532
533
534 pslld $31, \TMP2
535 pslld $30, \TMP3
536 pslld $25, \TMP4
537 pxor \TMP3, \TMP2
538 pxor \TMP4, \TMP2
539 movdqa \TMP2, \TMP5
540 psrldq $4, \TMP5
541 pslldq $12, \TMP2
542 pxor \TMP2, \GH
543
544
545
546 movdqa \GH,\TMP2
547
548
549 movdqa \GH,\TMP3
550 movdqa \GH,\TMP4
551 psrld $1,\TMP2
552 psrld $2,\TMP3
553 psrld $7,\TMP4
554 pxor \TMP3,\TMP2
555 pxor \TMP4,\TMP2
556 pxor \TMP5, \TMP2
557 pxor \TMP2, \GH
558 pxor \TMP1, \GH
559.endm
560
561
562
563
564.macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst
565 cmp $8, \DLEN
566 jl _read_lt8_\@
567 mov (\DPTR), %rax
568 movq %rax, \XMMDst
569 sub $8, \DLEN
570 jz _done_read_partial_block_\@
571 xor %eax, %eax
572_read_next_byte_\@:
573 shl $8, %rax
574 mov 7(\DPTR, \DLEN, 1), %al
575 dec \DLEN
576 jnz _read_next_byte_\@
577 movq %rax, \XMM1
578 pslldq $8, \XMM1
579 por \XMM1, \XMMDst
580 jmp _done_read_partial_block_\@
581_read_lt8_\@:
582 xor %eax, %eax
583_read_next_byte_lt8_\@:
584 shl $8, %rax
585 mov -1(\DPTR, \DLEN, 1), %al
586 dec \DLEN
587 jnz _read_next_byte_lt8_\@
588 movq %rax, \XMMDst
589_done_read_partial_block_\@:
590.endm
591
592
593
594.macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \
595 TMP6 TMP7
596 MOVADQ SHUF_MASK(%rip), %xmm14
597 mov \AAD, %r10
598 mov \AADLEN, %r11
599 pxor \TMP7, \TMP7
600 pxor \TMP6, \TMP6
601
602 cmp $16, %r11
603 jl _get_AAD_rest\@
604_get_AAD_blocks\@:
605 movdqu (%r10), \TMP7
606 pshufb %xmm14, \TMP7
607 pxor \TMP7, \TMP6
608 GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
609 add $16, %r10
610 sub $16, %r11
611 cmp $16, %r11
612 jge _get_AAD_blocks\@
613
614 movdqu \TMP6, \TMP7
615
616
617_get_AAD_rest\@:
618 test %r11, %r11
619 je _get_AAD_done\@
620
621 READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
622 pshufb %xmm14, \TMP7
623 pxor \TMP6, \TMP7
624 GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
625 movdqu \TMP7, \TMP6
626
627_get_AAD_done\@:
628 movdqu \TMP6, AadHash(%arg2)
629.endm
630
631
632
633
634
635
636.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
637 AAD_HASH operation
638 mov PBlockLen(%arg2), %r13
639 test %r13, %r13
640 je _partial_block_done_\@
641
642 cmp $16, \PLAIN_CYPH_LEN
643 jl _fewer_than_16_bytes_\@
644 movups (\PLAIN_CYPH_IN), %xmm1
645 jmp _data_read_\@
646
647_fewer_than_16_bytes_\@:
648 lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
649 mov \PLAIN_CYPH_LEN, %r12
650 READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1
651
652 mov PBlockLen(%arg2), %r13
653
654_data_read_\@:
655
656 movdqu PBlockEncKey(%arg2), %xmm9
657 movdqu HashKey(%arg2), %xmm13
658
659 lea SHIFT_MASK(%rip), %r12
660
661
662
663 add %r13, %r12
664 movdqu (%r12), %xmm2
665 pshufb %xmm2, %xmm9
666
667.ifc \operation, dec
668 movdqa %xmm1, %xmm3
669 pxor %xmm1, %xmm9
670
671 mov \PLAIN_CYPH_LEN, %r10
672 add %r13, %r10
673
674 sub $16, %r10
675
676
677 jge _no_extra_mask_1_\@
678 sub %r10, %r12
679_no_extra_mask_1_\@:
680
681 movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
682
683 pand %xmm1, %xmm9
684
685 pand %xmm1, %xmm3
686 movdqa SHUF_MASK(%rip), %xmm10
687 pshufb %xmm10, %xmm3
688 pshufb %xmm2, %xmm3
689 pxor %xmm3, \AAD_HASH
690
691 test %r10, %r10
692 jl _partial_incomplete_1_\@
693
694
695 GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
696 xor %eax, %eax
697
698 mov %rax, PBlockLen(%arg2)
699 jmp _dec_done_\@
700_partial_incomplete_1_\@:
701 add \PLAIN_CYPH_LEN, PBlockLen(%arg2)
702_dec_done_\@:
703 movdqu \AAD_HASH, AadHash(%arg2)
704.else
705 pxor %xmm1, %xmm9
706
707 mov \PLAIN_CYPH_LEN, %r10
708 add %r13, %r10
709
710 sub $16, %r10
711
712
713 jge _no_extra_mask_2_\@
714 sub %r10, %r12
715_no_extra_mask_2_\@:
716
717 movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
718
719 pand %xmm1, %xmm9
720
721 movdqa SHUF_MASK(%rip), %xmm1
722 pshufb %xmm1, %xmm9
723 pshufb %xmm2, %xmm9
724 pxor %xmm9, \AAD_HASH
725
726 test %r10, %r10
727 jl _partial_incomplete_2_\@
728
729
730 GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
731 xor %eax, %eax
732
733 mov %rax, PBlockLen(%arg2)
734 jmp _encode_done_\@
735_partial_incomplete_2_\@:
736 add \PLAIN_CYPH_LEN, PBlockLen(%arg2)
737_encode_done_\@:
738 movdqu \AAD_HASH, AadHash(%arg2)
739
740 movdqa SHUF_MASK(%rip), %xmm10
741
742 pshufb %xmm10, %xmm9
743 pshufb %xmm2, %xmm9
744.endif
745
746 test %r10, %r10
747 jl _partial_fill_\@
748 mov %r13, %r12
749 mov $16, %r13
750
751 sub %r12, %r13
752 jmp _count_set_\@
753_partial_fill_\@:
754 mov \PLAIN_CYPH_LEN, %r13
755_count_set_\@:
756 movdqa %xmm9, %xmm0
757 movq %xmm0, %rax
758 cmp $8, %r13
759 jle _less_than_8_bytes_left_\@
760
761 mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
762 add $8, \DATA_OFFSET
763 psrldq $8, %xmm0
764 movq %xmm0, %rax
765 sub $8, %r13
766_less_than_8_bytes_left_\@:
767 movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
768 add $1, \DATA_OFFSET
769 shr $8, %rax
770 sub $1, %r13
771 jne _less_than_8_bytes_left_\@
772_partial_block_done_\@:
773.endm
774
775
776
777
778
779
780
781
782
783
784
785
786
787.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
788 XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
789 MOVADQ SHUF_MASK(%rip), %xmm14
790
791 movdqu AadHash(%arg2), %xmm\i
792
793
794
795 movdqu CurCount(%arg2), \XMM0
796
797.if (\i == 5) || (\i == 6) || (\i == 7)
798
799 MOVADQ ONE(%RIP),\TMP1
800 MOVADQ 0(%arg1),\TMP2
801.irpc index, \i_seq
802 paddd \TMP1, \XMM0
803.ifc \operation, dec
804 movdqa \XMM0, %xmm\index
805.else
806 MOVADQ \XMM0, %xmm\index
807.endif
808 pshufb %xmm14, %xmm\index
809 pxor \TMP2, %xmm\index
810.endr
811 lea 0x10(%arg1),%r10
812 mov keysize,%eax
813 shr $2,%eax
814 add $5,%eax
815
816aes_loop_initial_\@:
817 MOVADQ (%r10),\TMP1
818.irpc index, \i_seq
819 aesenc \TMP1, %xmm\index
820.endr
821 add $16,%r10
822 sub $1,%eax
823 jnz aes_loop_initial_\@
824
825 MOVADQ (%r10), \TMP1
826.irpc index, \i_seq
827 aesenclast \TMP1, %xmm\index
828.endr
829.irpc index, \i_seq
830 movdqu (%arg4 , %r11, 1), \TMP1
831 pxor \TMP1, %xmm\index
832 movdqu %xmm\index, (%arg3 , %r11, 1)
833
834 add $16, %r11
835
836.ifc \operation, dec
837 movdqa \TMP1, %xmm\index
838.endif
839 pshufb %xmm14, %xmm\index
840
841
842.endr
843.endif
844
845
846
847.if \i == 5
848 pxor %xmm5, %xmm6
849 GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
850 pxor %xmm6, %xmm7
851 GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
852 pxor %xmm7, %xmm8
853 GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
854.elseif \i == 6
855 pxor %xmm6, %xmm7
856 GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
857 pxor %xmm7, %xmm8
858 GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
859.elseif \i == 7
860 pxor %xmm7, %xmm8
861 GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
862.endif
863 cmp $64, %r13
864 jl _initial_blocks_done\@
865
866
867
868
869
870
871 MOVADQ ONE(%RIP),\TMP1
872 paddd \TMP1, \XMM0
873 MOVADQ \XMM0, \XMM1
874 pshufb %xmm14, \XMM1
875
876 paddd \TMP1, \XMM0
877 MOVADQ \XMM0, \XMM2
878 pshufb %xmm14, \XMM2
879
880 paddd \TMP1, \XMM0
881 MOVADQ \XMM0, \XMM3
882 pshufb %xmm14, \XMM3
883
884 paddd \TMP1, \XMM0
885 MOVADQ \XMM0, \XMM4
886 pshufb %xmm14, \XMM4
887
888 MOVADQ 0(%arg1),\TMP1
889 pxor \TMP1, \XMM1
890 pxor \TMP1, \XMM2
891 pxor \TMP1, \XMM3
892 pxor \TMP1, \XMM4
893.irpc index, 1234
894 movaps 0x10*\index(%arg1), \TMP1
895 aesenc \TMP1, \XMM1
896 aesenc \TMP1, \XMM2
897 aesenc \TMP1, \XMM3
898 aesenc \TMP1, \XMM4
899.endr
900.irpc index, 56789
901 movaps 0x10*\index(%arg1), \TMP1
902 aesenc \TMP1, \XMM1
903 aesenc \TMP1, \XMM2
904 aesenc \TMP1, \XMM3
905 aesenc \TMP1, \XMM4
906.endr
907 lea 0xa0(%arg1),%r10
908 mov keysize,%eax
909 shr $2,%eax
910 sub $4,%eax
911 jz aes_loop_pre_done\@
912
913aes_loop_pre_\@:
914 MOVADQ (%r10),\TMP2
915.irpc index, 1234
916 aesenc \TMP2, %xmm\index
917.endr
918 add $16,%r10
919 sub $1,%eax
920 jnz aes_loop_pre_\@
921
922aes_loop_pre_done\@:
923 MOVADQ (%r10), \TMP2
924 aesenclast \TMP2, \XMM1
925 aesenclast \TMP2, \XMM2
926 aesenclast \TMP2, \XMM3
927 aesenclast \TMP2, \XMM4
928 movdqu 16*0(%arg4 , %r11 , 1), \TMP1
929 pxor \TMP1, \XMM1
930.ifc \operation, dec
931 movdqu \XMM1, 16*0(%arg3 , %r11 , 1)
932 movdqa \TMP1, \XMM1
933.endif
934 movdqu 16*1(%arg4 , %r11 , 1), \TMP1
935 pxor \TMP1, \XMM2
936.ifc \operation, dec
937 movdqu \XMM2, 16*1(%arg3 , %r11 , 1)
938 movdqa \TMP1, \XMM2
939.endif
940 movdqu 16*2(%arg4 , %r11 , 1), \TMP1
941 pxor \TMP1, \XMM3
942.ifc \operation, dec
943 movdqu \XMM3, 16*2(%arg3 , %r11 , 1)
944 movdqa \TMP1, \XMM3
945.endif
946 movdqu 16*3(%arg4 , %r11 , 1), \TMP1
947 pxor \TMP1, \XMM4
948.ifc \operation, dec
949 movdqu \XMM4, 16*3(%arg3 , %r11 , 1)
950 movdqa \TMP1, \XMM4
951.else
952 movdqu \XMM1, 16*0(%arg3 , %r11 , 1)
953 movdqu \XMM2, 16*1(%arg3 , %r11 , 1)
954 movdqu \XMM3, 16*2(%arg3 , %r11 , 1)
955 movdqu \XMM4, 16*3(%arg3 , %r11 , 1)
956.endif
957
958 add $64, %r11
959 pshufb %xmm14, \XMM1
960 pxor \XMMDst, \XMM1
961
962 pshufb %xmm14, \XMM2
963 pshufb %xmm14, \XMM3
964 pshufb %xmm14, \XMM4
965
966_initial_blocks_done\@:
967
968.endm
969
970
971
972
973
974
975
976.macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \
977TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
978
979 movdqa \XMM1, \XMM5
980 movdqa \XMM2, \XMM6
981 movdqa \XMM3, \XMM7
982 movdqa \XMM4, \XMM8
983
984 movdqa SHUF_MASK(%rip), %xmm15
985
986
987 movdqa \XMM5, \TMP4
988 pshufd $78, \XMM5, \TMP6
989 pxor \XMM5, \TMP6
990 paddd ONE(%rip), \XMM0
991 movdqu HashKey_4(%arg2), \TMP5
992 pclmulqdq $0x11, \TMP5, \TMP4
993 movdqa \XMM0, \XMM1
994 paddd ONE(%rip), \XMM0
995 movdqa \XMM0, \XMM2
996 paddd ONE(%rip), \XMM0
997 movdqa \XMM0, \XMM3
998 paddd ONE(%rip), \XMM0
999 movdqa \XMM0, \XMM4
1000 pshufb %xmm15, \XMM1
1001 pclmulqdq $0x00, \TMP5, \XMM5
1002 pshufb %xmm15, \XMM2
1003 pshufb %xmm15, \XMM3
1004 pshufb %xmm15, \XMM4
1005
1006 pxor (%arg1), \XMM1
1007 pxor (%arg1), \XMM2
1008 pxor (%arg1), \XMM3
1009 pxor (%arg1), \XMM4
1010 movdqu HashKey_4_k(%arg2), \TMP5
1011 pclmulqdq $0x00, \TMP5, \TMP6
1012 movaps 0x10(%arg1), \TMP1
1013 aesenc \TMP1, \XMM1
1014 aesenc \TMP1, \XMM2
1015 aesenc \TMP1, \XMM3
1016 aesenc \TMP1, \XMM4
1017 movaps 0x20(%arg1), \TMP1
1018 aesenc \TMP1, \XMM1
1019 aesenc \TMP1, \XMM2
1020 aesenc \TMP1, \XMM3
1021 aesenc \TMP1, \XMM4
1022 movdqa \XMM6, \TMP1
1023 pshufd $78, \XMM6, \TMP2
1024 pxor \XMM6, \TMP2
1025 movdqu HashKey_3(%arg2), \TMP5
1026 pclmulqdq $0x11, \TMP5, \TMP1
1027 movaps 0x30(%arg1), \TMP3
1028 aesenc \TMP3, \XMM1
1029 aesenc \TMP3, \XMM2
1030 aesenc \TMP3, \XMM3
1031 aesenc \TMP3, \XMM4
1032 pclmulqdq $0x00, \TMP5, \XMM6
1033 movaps 0x40(%arg1), \TMP3
1034 aesenc \TMP3, \XMM1
1035 aesenc \TMP3, \XMM2
1036 aesenc \TMP3, \XMM3
1037 aesenc \TMP3, \XMM4
1038 movdqu HashKey_3_k(%arg2), \TMP5
1039 pclmulqdq $0x00, \TMP5, \TMP2
1040 movaps 0x50(%arg1), \TMP3
1041 aesenc \TMP3, \XMM1
1042 aesenc \TMP3, \XMM2
1043 aesenc \TMP3, \XMM3
1044 aesenc \TMP3, \XMM4
1045 pxor \TMP1, \TMP4
1046
1047 pxor \XMM6, \XMM5
1048 pxor \TMP2, \TMP6
1049 movdqa \XMM7, \TMP1
1050 pshufd $78, \XMM7, \TMP2
1051 pxor \XMM7, \TMP2
1052 movdqu HashKey_2(%arg2), \TMP5
1053
1054
1055
1056 pclmulqdq $0x11, \TMP5, \TMP1
1057 movaps 0x60(%arg1), \TMP3
1058 aesenc \TMP3, \XMM1
1059 aesenc \TMP3, \XMM2
1060 aesenc \TMP3, \XMM3
1061 aesenc \TMP3, \XMM4
1062 pclmulqdq $0x00, \TMP5, \XMM7
1063 movaps 0x70(%arg1), \TMP3
1064 aesenc \TMP3, \XMM1
1065 aesenc \TMP3, \XMM2
1066 aesenc \TMP3, \XMM3
1067 aesenc \TMP3, \XMM4
1068 movdqu HashKey_2_k(%arg2), \TMP5
1069 pclmulqdq $0x00, \TMP5, \TMP2
1070 movaps 0x80(%arg1), \TMP3
1071 aesenc \TMP3, \XMM1
1072 aesenc \TMP3, \XMM2
1073 aesenc \TMP3, \XMM3
1074 aesenc \TMP3, \XMM4
1075 pxor \TMP1, \TMP4
1076
1077 pxor \XMM7, \XMM5
1078 pxor \TMP2, \TMP6
1079
1080
1081
1082
1083 movdqa \XMM8, \TMP1
1084 pshufd $78, \XMM8, \TMP2
1085 pxor \XMM8, \TMP2
1086 movdqu HashKey(%arg2), \TMP5
1087 pclmulqdq $0x11, \TMP5, \TMP1
1088 movaps 0x90(%arg1), \TMP3
1089 aesenc \TMP3, \XMM1
1090 aesenc \TMP3, \XMM2
1091 aesenc \TMP3, \XMM3
1092 aesenc \TMP3, \XMM4
1093 pclmulqdq $0x00, \TMP5, \XMM8
1094 lea 0xa0(%arg1),%r10
1095 mov keysize,%eax
1096 shr $2,%eax
1097 sub $4,%eax
1098 jz aes_loop_par_enc_done\@
1099
1100aes_loop_par_enc\@:
1101 MOVADQ (%r10),\TMP3
1102.irpc index, 1234
1103 aesenc \TMP3, %xmm\index
1104.endr
1105 add $16,%r10
1106 sub $1,%eax
1107 jnz aes_loop_par_enc\@
1108
1109aes_loop_par_enc_done\@:
1110 MOVADQ (%r10), \TMP3
1111 aesenclast \TMP3, \XMM1
1112 aesenclast \TMP3, \XMM2
1113 aesenclast \TMP3, \XMM3
1114 aesenclast \TMP3, \XMM4
1115 movdqu HashKey_k(%arg2), \TMP5
1116 pclmulqdq $0x00, \TMP5, \TMP2
1117 movdqu (%arg4,%r11,1), \TMP3
1118 pxor \TMP3, \XMM1
1119 movdqu 16(%arg4,%r11,1), \TMP3
1120 pxor \TMP3, \XMM2
1121 movdqu 32(%arg4,%r11,1), \TMP3
1122 pxor \TMP3, \XMM3
1123 movdqu 48(%arg4,%r11,1), \TMP3
1124 pxor \TMP3, \XMM4
1125 movdqu \XMM1, (%arg3,%r11,1)
1126 movdqu \XMM2, 16(%arg3,%r11,1)
1127 movdqu \XMM3, 32(%arg3,%r11,1)
1128 movdqu \XMM4, 48(%arg3,%r11,1)
1129 pshufb %xmm15, \XMM1
1130 pshufb %xmm15, \XMM2
1131 pshufb %xmm15, \XMM3
1132 pshufb %xmm15, \XMM4
1133
1134 pxor \TMP4, \TMP1
1135 pxor \XMM8, \XMM5
1136 pxor \TMP6, \TMP2
1137 pxor \TMP1, \TMP2
1138 pxor \XMM5, \TMP2
1139 movdqa \TMP2, \TMP3
1140 pslldq $8, \TMP3
1141 psrldq $8, \TMP2
1142 pxor \TMP3, \XMM5
1143 pxor \TMP2, \TMP1
1144
1145
1146
1147 movdqa \XMM5, \TMP2
1148 movdqa \XMM5, \TMP3
1149 movdqa \XMM5, \TMP4
1150
1151 pslld $31, \TMP2
1152 pslld $30, \TMP3
1153 pslld $25, \TMP4
1154 pxor \TMP3, \TMP2
1155 pxor \TMP4, \TMP2
1156 movdqa \TMP2, \TMP5
1157 psrldq $4, \TMP5
1158 pslldq $12, \TMP2
1159 pxor \TMP2, \XMM5
1160
1161
1162
1163 movdqa \XMM5,\TMP2
1164 movdqa \XMM5,\TMP3
1165 movdqa \XMM5,\TMP4
1166 psrld $1, \TMP2
1167 psrld $2, \TMP3
1168 psrld $7, \TMP4
1169 pxor \TMP3,\TMP2
1170 pxor \TMP4,\TMP2
1171 pxor \TMP5, \TMP2
1172 pxor \TMP2, \XMM5
1173 pxor \TMP1, \XMM5
1174
1175 pxor \XMM5, \XMM1
1176.endm
1177
1178
1179
1180
1181
1182
1183
1184.macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \
1185TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
1186
1187 movdqa \XMM1, \XMM5
1188 movdqa \XMM2, \XMM6
1189 movdqa \XMM3, \XMM7
1190 movdqa \XMM4, \XMM8
1191
1192 movdqa SHUF_MASK(%rip), %xmm15
1193
1194
1195 movdqa \XMM5, \TMP4
1196 pshufd $78, \XMM5, \TMP6
1197 pxor \XMM5, \TMP6
1198 paddd ONE(%rip), \XMM0
1199 movdqu HashKey_4(%arg2), \TMP5
1200 pclmulqdq $0x11, \TMP5, \TMP4
1201 movdqa \XMM0, \XMM1
1202 paddd ONE(%rip), \XMM0
1203 movdqa \XMM0, \XMM2
1204 paddd ONE(%rip), \XMM0
1205 movdqa \XMM0, \XMM3
1206 paddd ONE(%rip), \XMM0
1207 movdqa \XMM0, \XMM4
1208 pshufb %xmm15, \XMM1
1209 pclmulqdq $0x00, \TMP5, \XMM5
1210 pshufb %xmm15, \XMM2
1211 pshufb %xmm15, \XMM3
1212 pshufb %xmm15, \XMM4
1213
1214 pxor (%arg1), \XMM1
1215 pxor (%arg1), \XMM2
1216 pxor (%arg1), \XMM3
1217 pxor (%arg1), \XMM4
1218 movdqu HashKey_4_k(%arg2), \TMP5
1219 pclmulqdq $0x00, \TMP5, \TMP6
1220 movaps 0x10(%arg1), \TMP1
1221 aesenc \TMP1, \XMM1
1222 aesenc \TMP1, \XMM2
1223 aesenc \TMP1, \XMM3
1224 aesenc \TMP1, \XMM4
1225 movaps 0x20(%arg1), \TMP1
1226 aesenc \TMP1, \XMM1
1227 aesenc \TMP1, \XMM2
1228 aesenc \TMP1, \XMM3
1229 aesenc \TMP1, \XMM4
1230 movdqa \XMM6, \TMP1
1231 pshufd $78, \XMM6, \TMP2
1232 pxor \XMM6, \TMP2
1233 movdqu HashKey_3(%arg2), \TMP5
1234 pclmulqdq $0x11, \TMP5, \TMP1
1235 movaps 0x30(%arg1), \TMP3
1236 aesenc \TMP3, \XMM1
1237 aesenc \TMP3, \XMM2
1238 aesenc \TMP3, \XMM3
1239 aesenc \TMP3, \XMM4
1240 pclmulqdq $0x00, \TMP5, \XMM6
1241 movaps 0x40(%arg1), \TMP3
1242 aesenc \TMP3, \XMM1
1243 aesenc \TMP3, \XMM2
1244 aesenc \TMP3, \XMM3
1245 aesenc \TMP3, \XMM4
1246 movdqu HashKey_3_k(%arg2), \TMP5
1247 pclmulqdq $0x00, \TMP5, \TMP2
1248 movaps 0x50(%arg1), \TMP3
1249 aesenc \TMP3, \XMM1
1250 aesenc \TMP3, \XMM2
1251 aesenc \TMP3, \XMM3
1252 aesenc \TMP3, \XMM4
1253 pxor \TMP1, \TMP4
1254
1255 pxor \XMM6, \XMM5
1256 pxor \TMP2, \TMP6
1257 movdqa \XMM7, \TMP1
1258 pshufd $78, \XMM7, \TMP2
1259 pxor \XMM7, \TMP2
1260 movdqu HashKey_2(%arg2), \TMP5
1261
1262
1263
1264 pclmulqdq $0x11, \TMP5, \TMP1
1265 movaps 0x60(%arg1), \TMP3
1266 aesenc \TMP3, \XMM1
1267 aesenc \TMP3, \XMM2
1268 aesenc \TMP3, \XMM3
1269 aesenc \TMP3, \XMM4
1270 pclmulqdq $0x00, \TMP5, \XMM7
1271 movaps 0x70(%arg1), \TMP3
1272 aesenc \TMP3, \XMM1
1273 aesenc \TMP3, \XMM2
1274 aesenc \TMP3, \XMM3
1275 aesenc \TMP3, \XMM4
1276 movdqu HashKey_2_k(%arg2), \TMP5
1277 pclmulqdq $0x00, \TMP5, \TMP2
1278 movaps 0x80(%arg1), \TMP3
1279 aesenc \TMP3, \XMM1
1280 aesenc \TMP3, \XMM2
1281 aesenc \TMP3, \XMM3
1282 aesenc \TMP3, \XMM4
1283 pxor \TMP1, \TMP4
1284
1285 pxor \XMM7, \XMM5
1286 pxor \TMP2, \TMP6
1287
1288
1289
1290
1291 movdqa \XMM8, \TMP1
1292 pshufd $78, \XMM8, \TMP2
1293 pxor \XMM8, \TMP2
1294 movdqu HashKey(%arg2), \TMP5
1295 pclmulqdq $0x11, \TMP5, \TMP1
1296 movaps 0x90(%arg1), \TMP3
1297 aesenc \TMP3, \XMM1
1298 aesenc \TMP3, \XMM2
1299 aesenc \TMP3, \XMM3
1300 aesenc \TMP3, \XMM4
1301 pclmulqdq $0x00, \TMP5, \XMM8
1302 lea 0xa0(%arg1),%r10
1303 mov keysize,%eax
1304 shr $2,%eax
1305 sub $4,%eax
1306 jz aes_loop_par_dec_done\@
1307
1308aes_loop_par_dec\@:
1309 MOVADQ (%r10),\TMP3
1310.irpc index, 1234
1311 aesenc \TMP3, %xmm\index
1312.endr
1313 add $16,%r10
1314 sub $1,%eax
1315 jnz aes_loop_par_dec\@
1316
1317aes_loop_par_dec_done\@:
1318 MOVADQ (%r10), \TMP3
1319 aesenclast \TMP3, \XMM1
1320 aesenclast \TMP3, \XMM2
1321 aesenclast \TMP3, \XMM3
1322 aesenclast \TMP3, \XMM4
1323 movdqu HashKey_k(%arg2), \TMP5
1324 pclmulqdq $0x00, \TMP5, \TMP2
1325 movdqu (%arg4,%r11,1), \TMP3
1326 pxor \TMP3, \XMM1
1327 movdqu \XMM1, (%arg3,%r11,1)
1328 movdqa \TMP3, \XMM1
1329 movdqu 16(%arg4,%r11,1), \TMP3
1330 pxor \TMP3, \XMM2
1331 movdqu \XMM2, 16(%arg3,%r11,1)
1332 movdqa \TMP3, \XMM2
1333 movdqu 32(%arg4,%r11,1), \TMP3
1334 pxor \TMP3, \XMM3
1335 movdqu \XMM3, 32(%arg3,%r11,1)
1336 movdqa \TMP3, \XMM3
1337 movdqu 48(%arg4,%r11,1), \TMP3
1338 pxor \TMP3, \XMM4
1339 movdqu \XMM4, 48(%arg3,%r11,1)
1340 movdqa \TMP3, \XMM4
1341 pshufb %xmm15, \XMM1
1342 pshufb %xmm15, \XMM2
1343 pshufb %xmm15, \XMM3
1344 pshufb %xmm15, \XMM4
1345
1346 pxor \TMP4, \TMP1
1347 pxor \XMM8, \XMM5
1348 pxor \TMP6, \TMP2
1349 pxor \TMP1, \TMP2
1350 pxor \XMM5, \TMP2
1351 movdqa \TMP2, \TMP3
1352 pslldq $8, \TMP3
1353 psrldq $8, \TMP2
1354 pxor \TMP3, \XMM5
1355 pxor \TMP2, \TMP1
1356
1357
1358
1359 movdqa \XMM5, \TMP2
1360 movdqa \XMM5, \TMP3
1361 movdqa \XMM5, \TMP4
1362
1363 pslld $31, \TMP2
1364 pslld $30, \TMP3
1365 pslld $25, \TMP4
1366 pxor \TMP3, \TMP2
1367 pxor \TMP4, \TMP2
1368 movdqa \TMP2, \TMP5
1369 psrldq $4, \TMP5
1370 pslldq $12, \TMP2
1371 pxor \TMP2, \XMM5
1372
1373
1374
1375 movdqa \XMM5,\TMP2
1376 movdqa \XMM5,\TMP3
1377 movdqa \XMM5,\TMP4
1378 psrld $1, \TMP2
1379 psrld $2, \TMP3
1380 psrld $7, \TMP4
1381 pxor \TMP3,\TMP2
1382 pxor \TMP4,\TMP2
1383 pxor \TMP5, \TMP2
1384 pxor \TMP2, \XMM5
1385 pxor \TMP1, \XMM5
1386
1387 pxor \XMM5, \XMM1
1388.endm
1389
1390
1391.macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \
1392TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
1393
1394
1395
1396 movdqa \XMM1, \TMP6
1397 pshufd $78, \XMM1, \TMP2
1398 pxor \XMM1, \TMP2
1399 movdqu HashKey_4(%arg2), \TMP5
1400 pclmulqdq $0x11, \TMP5, \TMP6
1401 pclmulqdq $0x00, \TMP5, \XMM1
1402 movdqu HashKey_4_k(%arg2), \TMP4
1403 pclmulqdq $0x00, \TMP4, \TMP2
1404 movdqa \XMM1, \XMMDst
1405 movdqa \TMP2, \XMM1
1406
1407
1408
1409 movdqa \XMM2, \TMP1
1410 pshufd $78, \XMM2, \TMP2
1411 pxor \XMM2, \TMP2
1412 movdqu HashKey_3(%arg2), \TMP5
1413 pclmulqdq $0x11, \TMP5, \TMP1
1414 pclmulqdq $0x00, \TMP5, \XMM2
1415 movdqu HashKey_3_k(%arg2), \TMP4
1416 pclmulqdq $0x00, \TMP4, \TMP2
1417 pxor \TMP1, \TMP6
1418 pxor \XMM2, \XMMDst
1419 pxor \TMP2, \XMM1
1420
1421
1422
1423
1424 movdqa \XMM3, \TMP1
1425 pshufd $78, \XMM3, \TMP2
1426 pxor \XMM3, \TMP2
1427 movdqu HashKey_2(%arg2), \TMP5
1428 pclmulqdq $0x11, \TMP5, \TMP1
1429 pclmulqdq $0x00, \TMP5, \XMM3
1430 movdqu HashKey_2_k(%arg2), \TMP4
1431 pclmulqdq $0x00, \TMP4, \TMP2
1432 pxor \TMP1, \TMP6
1433 pxor \XMM3, \XMMDst
1434 pxor \TMP2, \XMM1
1435
1436
1437 movdqa \XMM4, \TMP1
1438 pshufd $78, \XMM4, \TMP2
1439 pxor \XMM4, \TMP2
1440 movdqu HashKey(%arg2), \TMP5
1441 pclmulqdq $0x11, \TMP5, \TMP1
1442 pclmulqdq $0x00, \TMP5, \XMM4
1443 movdqu HashKey_k(%arg2), \TMP4
1444 pclmulqdq $0x00, \TMP4, \TMP2
1445 pxor \TMP1, \TMP6
1446 pxor \XMM4, \XMMDst
1447 pxor \XMM1, \TMP2
1448 pxor \TMP6, \TMP2
1449 pxor \XMMDst, \TMP2
1450
1451 movdqa \TMP2, \TMP4
1452 pslldq $8, \TMP4
1453 psrldq $8, \TMP2
1454 pxor \TMP4, \XMMDst
1455 pxor \TMP2, \TMP6
1456
1457
1458 movdqa \XMMDst, \TMP2
1459 movdqa \XMMDst, \TMP3
1460 movdqa \XMMDst, \TMP4
1461
1462 pslld $31, \TMP2
1463 pslld $30, \TMP3
1464 pslld $25, \TMP4
1465 pxor \TMP3, \TMP2
1466 pxor \TMP4, \TMP2
1467 movdqa \TMP2, \TMP7
1468 psrldq $4, \TMP7
1469 pslldq $12, \TMP2
1470 pxor \TMP2, \XMMDst
1471
1472
1473 movdqa \XMMDst, \TMP2
1474
1475 movdqa \XMMDst, \TMP3
1476 movdqa \XMMDst, \TMP4
1477 psrld $1, \TMP2
1478 psrld $2, \TMP3
1479 psrld $7, \TMP4
1480 pxor \TMP3, \TMP2
1481 pxor \TMP4, \TMP2
1482 pxor \TMP7, \TMP2
1483 pxor \TMP2, \XMMDst
1484 pxor \TMP6, \XMMDst
1485.endm
1486
1487
1488
1489
1490
1491
1492.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1
1493
1494 pxor (%arg1), \XMM0
1495 mov keysize,%eax
1496 shr $2,%eax
1497 add $5,%eax
1498 lea 16(%arg1), %r10
1499
1500_esb_loop_\@:
1501 MOVADQ (%r10),\TMP1
1502 aesenc \TMP1,\XMM0
1503 add $16,%r10
1504 sub $1,%eax
1505 jnz _esb_loop_\@
1506
1507 MOVADQ (%r10),\TMP1
1508 aesenclast \TMP1,\XMM0
1509.endm
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590SYM_FUNC_START(aesni_gcm_dec)
1591 FUNC_SAVE
1592
1593 GCM_INIT %arg6, arg7, arg8, arg9
1594 GCM_ENC_DEC dec
1595 GCM_COMPLETE arg10, arg11
1596 FUNC_RESTORE
1597 ret
1598SYM_FUNC_END(aesni_gcm_dec)
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678SYM_FUNC_START(aesni_gcm_enc)
1679 FUNC_SAVE
1680
1681 GCM_INIT %arg6, arg7, arg8, arg9
1682 GCM_ENC_DEC enc
1683
1684 GCM_COMPLETE arg10, arg11
1685 FUNC_RESTORE
1686 ret
1687SYM_FUNC_END(aesni_gcm_enc)
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700SYM_FUNC_START(aesni_gcm_init)
1701 FUNC_SAVE
1702 GCM_INIT %arg3, %arg4,%arg5, %arg6
1703 FUNC_RESTORE
1704 ret
1705SYM_FUNC_END(aesni_gcm_init)
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715SYM_FUNC_START(aesni_gcm_enc_update)
1716 FUNC_SAVE
1717 GCM_ENC_DEC enc
1718 FUNC_RESTORE
1719 ret
1720SYM_FUNC_END(aesni_gcm_enc_update)
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730SYM_FUNC_START(aesni_gcm_dec_update)
1731 FUNC_SAVE
1732 GCM_ENC_DEC dec
1733 FUNC_RESTORE
1734 ret
1735SYM_FUNC_END(aesni_gcm_dec_update)
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745SYM_FUNC_START(aesni_gcm_finalize)
1746 FUNC_SAVE
1747 GCM_COMPLETE %arg3 %arg4
1748 FUNC_RESTORE
1749 ret
1750SYM_FUNC_END(aesni_gcm_finalize)
1751
1752#endif
1753
1754
1755SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128)
1756SYM_FUNC_START_LOCAL(_key_expansion_256a)
1757 pshufd $0b11111111, %xmm1, %xmm1
1758 shufps $0b00010000, %xmm0, %xmm4
1759 pxor %xmm4, %xmm0
1760 shufps $0b10001100, %xmm0, %xmm4
1761 pxor %xmm4, %xmm0
1762 pxor %xmm1, %xmm0
1763 movaps %xmm0, (TKEYP)
1764 add $0x10, TKEYP
1765 ret
1766SYM_FUNC_END(_key_expansion_256a)
1767SYM_FUNC_END_ALIAS(_key_expansion_128)
1768
1769SYM_FUNC_START_LOCAL(_key_expansion_192a)
1770 pshufd $0b01010101, %xmm1, %xmm1
1771 shufps $0b00010000, %xmm0, %xmm4
1772 pxor %xmm4, %xmm0
1773 shufps $0b10001100, %xmm0, %xmm4
1774 pxor %xmm4, %xmm0
1775 pxor %xmm1, %xmm0
1776
1777 movaps %xmm2, %xmm5
1778 movaps %xmm2, %xmm6
1779 pslldq $4, %xmm5
1780 pshufd $0b11111111, %xmm0, %xmm3
1781 pxor %xmm3, %xmm2
1782 pxor %xmm5, %xmm2
1783
1784 movaps %xmm0, %xmm1
1785 shufps $0b01000100, %xmm0, %xmm6
1786 movaps %xmm6, (TKEYP)
1787 shufps $0b01001110, %xmm2, %xmm1
1788 movaps %xmm1, 0x10(TKEYP)
1789 add $0x20, TKEYP
1790 ret
1791SYM_FUNC_END(_key_expansion_192a)
1792
1793SYM_FUNC_START_LOCAL(_key_expansion_192b)
1794 pshufd $0b01010101, %xmm1, %xmm1
1795 shufps $0b00010000, %xmm0, %xmm4
1796 pxor %xmm4, %xmm0
1797 shufps $0b10001100, %xmm0, %xmm4
1798 pxor %xmm4, %xmm0
1799 pxor %xmm1, %xmm0
1800
1801 movaps %xmm2, %xmm5
1802 pslldq $4, %xmm5
1803 pshufd $0b11111111, %xmm0, %xmm3
1804 pxor %xmm3, %xmm2
1805 pxor %xmm5, %xmm2
1806
1807 movaps %xmm0, (TKEYP)
1808 add $0x10, TKEYP
1809 ret
1810SYM_FUNC_END(_key_expansion_192b)
1811
1812SYM_FUNC_START_LOCAL(_key_expansion_256b)
1813 pshufd $0b10101010, %xmm1, %xmm1
1814 shufps $0b00010000, %xmm2, %xmm4
1815 pxor %xmm4, %xmm2
1816 shufps $0b10001100, %xmm2, %xmm4
1817 pxor %xmm4, %xmm2
1818 pxor %xmm1, %xmm2
1819 movaps %xmm2, (TKEYP)
1820 add $0x10, TKEYP
1821 ret
1822SYM_FUNC_END(_key_expansion_256b)
1823
1824
1825
1826
1827
1828SYM_FUNC_START(aesni_set_key)
1829 FRAME_BEGIN
1830#ifndef __x86_64__
1831 pushl KEYP
1832 movl (FRAME_OFFSET+8)(%esp), KEYP
1833 movl (FRAME_OFFSET+12)(%esp), UKEYP
1834 movl (FRAME_OFFSET+16)(%esp), %edx
1835#endif
1836 movups (UKEYP), %xmm0
1837 movaps %xmm0, (KEYP)
1838 lea 0x10(KEYP), TKEYP
1839 movl %edx, 480(KEYP)
1840 pxor %xmm4, %xmm4
1841 cmp $24, %dl
1842 jb .Lenc_key128
1843 je .Lenc_key192
1844 movups 0x10(UKEYP), %xmm2
1845 movaps %xmm2, (TKEYP)
1846 add $0x10, TKEYP
1847 aeskeygenassist $0x1, %xmm2, %xmm1
1848 call _key_expansion_256a
1849 aeskeygenassist $0x1, %xmm0, %xmm1
1850 call _key_expansion_256b
1851 aeskeygenassist $0x2, %xmm2, %xmm1
1852 call _key_expansion_256a
1853 aeskeygenassist $0x2, %xmm0, %xmm1
1854 call _key_expansion_256b
1855 aeskeygenassist $0x4, %xmm2, %xmm1
1856 call _key_expansion_256a
1857 aeskeygenassist $0x4, %xmm0, %xmm1
1858 call _key_expansion_256b
1859 aeskeygenassist $0x8, %xmm2, %xmm1
1860 call _key_expansion_256a
1861 aeskeygenassist $0x8, %xmm0, %xmm1
1862 call _key_expansion_256b
1863 aeskeygenassist $0x10, %xmm2, %xmm1
1864 call _key_expansion_256a
1865 aeskeygenassist $0x10, %xmm0, %xmm1
1866 call _key_expansion_256b
1867 aeskeygenassist $0x20, %xmm2, %xmm1
1868 call _key_expansion_256a
1869 aeskeygenassist $0x20, %xmm0, %xmm1
1870 call _key_expansion_256b
1871 aeskeygenassist $0x40, %xmm2, %xmm1
1872 call _key_expansion_256a
1873 jmp .Ldec_key
1874.Lenc_key192:
1875 movq 0x10(UKEYP), %xmm2
1876 aeskeygenassist $0x1, %xmm2, %xmm1
1877 call _key_expansion_192a
1878 aeskeygenassist $0x2, %xmm2, %xmm1
1879 call _key_expansion_192b
1880 aeskeygenassist $0x4, %xmm2, %xmm1
1881 call _key_expansion_192a
1882 aeskeygenassist $0x8, %xmm2, %xmm1
1883 call _key_expansion_192b
1884 aeskeygenassist $0x10, %xmm2, %xmm1
1885 call _key_expansion_192a
1886 aeskeygenassist $0x20, %xmm2, %xmm1
1887 call _key_expansion_192b
1888 aeskeygenassist $0x40, %xmm2, %xmm1
1889 call _key_expansion_192a
1890 aeskeygenassist $0x80, %xmm2, %xmm1
1891 call _key_expansion_192b
1892 jmp .Ldec_key
1893.Lenc_key128:
1894 aeskeygenassist $0x1, %xmm0, %xmm1
1895 call _key_expansion_128
1896 aeskeygenassist $0x2, %xmm0, %xmm1
1897 call _key_expansion_128
1898 aeskeygenassist $0x4, %xmm0, %xmm1
1899 call _key_expansion_128
1900 aeskeygenassist $0x8, %xmm0, %xmm1
1901 call _key_expansion_128
1902 aeskeygenassist $0x10, %xmm0, %xmm1
1903 call _key_expansion_128
1904 aeskeygenassist $0x20, %xmm0, %xmm1
1905 call _key_expansion_128
1906 aeskeygenassist $0x40, %xmm0, %xmm1
1907 call _key_expansion_128
1908 aeskeygenassist $0x80, %xmm0, %xmm1
1909 call _key_expansion_128
1910 aeskeygenassist $0x1b, %xmm0, %xmm1
1911 call _key_expansion_128
1912 aeskeygenassist $0x36, %xmm0, %xmm1
1913 call _key_expansion_128
1914.Ldec_key:
1915 sub $0x10, TKEYP
1916 movaps (KEYP), %xmm0
1917 movaps (TKEYP), %xmm1
1918 movaps %xmm0, 240(TKEYP)
1919 movaps %xmm1, 240(KEYP)
1920 add $0x10, KEYP
1921 lea 240-16(TKEYP), UKEYP
1922.align 4
1923.Ldec_key_loop:
1924 movaps (KEYP), %xmm0
1925 aesimc %xmm0, %xmm1
1926 movaps %xmm1, (UKEYP)
1927 add $0x10, KEYP
1928 sub $0x10, UKEYP
1929 cmp TKEYP, KEYP
1930 jb .Ldec_key_loop
1931 xor AREG, AREG
1932#ifndef __x86_64__
1933 popl KEYP
1934#endif
1935 FRAME_END
1936 ret
1937SYM_FUNC_END(aesni_set_key)
1938
1939
1940
1941
1942SYM_FUNC_START(aesni_enc)
1943 FRAME_BEGIN
1944#ifndef __x86_64__
1945 pushl KEYP
1946 pushl KLEN
1947 movl (FRAME_OFFSET+12)(%esp), KEYP
1948 movl (FRAME_OFFSET+16)(%esp), OUTP
1949 movl (FRAME_OFFSET+20)(%esp), INP
1950#endif
1951 movl 480(KEYP), KLEN
1952 movups (INP), STATE
1953 call _aesni_enc1
1954 movups STATE, (OUTP)
1955#ifndef __x86_64__
1956 popl KLEN
1957 popl KEYP
1958#endif
1959 FRAME_END
1960 ret
1961SYM_FUNC_END(aesni_enc)
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975SYM_FUNC_START_LOCAL(_aesni_enc1)
1976 movaps (KEYP), KEY
1977 mov KEYP, TKEYP
1978 pxor KEY, STATE
1979 add $0x30, TKEYP
1980 cmp $24, KLEN
1981 jb .Lenc128
1982 lea 0x20(TKEYP), TKEYP
1983 je .Lenc192
1984 add $0x20, TKEYP
1985 movaps -0x60(TKEYP), KEY
1986 aesenc KEY, STATE
1987 movaps -0x50(TKEYP), KEY
1988 aesenc KEY, STATE
1989.align 4
1990.Lenc192:
1991 movaps -0x40(TKEYP), KEY
1992 aesenc KEY, STATE
1993 movaps -0x30(TKEYP), KEY
1994 aesenc KEY, STATE
1995.align 4
1996.Lenc128:
1997 movaps -0x20(TKEYP), KEY
1998 aesenc KEY, STATE
1999 movaps -0x10(TKEYP), KEY
2000 aesenc KEY, STATE
2001 movaps (TKEYP), KEY
2002 aesenc KEY, STATE
2003 movaps 0x10(TKEYP), KEY
2004 aesenc KEY, STATE
2005 movaps 0x20(TKEYP), KEY
2006 aesenc KEY, STATE
2007 movaps 0x30(TKEYP), KEY
2008 aesenc KEY, STATE
2009 movaps 0x40(TKEYP), KEY
2010 aesenc KEY, STATE
2011 movaps 0x50(TKEYP), KEY
2012 aesenc KEY, STATE
2013 movaps 0x60(TKEYP), KEY
2014 aesenc KEY, STATE
2015 movaps 0x70(TKEYP), KEY
2016 aesenclast KEY, STATE
2017 ret
2018SYM_FUNC_END(_aesni_enc1)
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038SYM_FUNC_START_LOCAL(_aesni_enc4)
2039 movaps (KEYP), KEY
2040 mov KEYP, TKEYP
2041 pxor KEY, STATE1
2042 pxor KEY, STATE2
2043 pxor KEY, STATE3
2044 pxor KEY, STATE4
2045 add $0x30, TKEYP
2046 cmp $24, KLEN
2047 jb .L4enc128
2048 lea 0x20(TKEYP), TKEYP
2049 je .L4enc192
2050 add $0x20, TKEYP
2051 movaps -0x60(TKEYP), KEY
2052 aesenc KEY, STATE1
2053 aesenc KEY, STATE2
2054 aesenc KEY, STATE3
2055 aesenc KEY, STATE4
2056 movaps -0x50(TKEYP), KEY
2057 aesenc KEY, STATE1
2058 aesenc KEY, STATE2
2059 aesenc KEY, STATE3
2060 aesenc KEY, STATE4
2061
2062.L4enc192:
2063 movaps -0x40(TKEYP), KEY
2064 aesenc KEY, STATE1
2065 aesenc KEY, STATE2
2066 aesenc KEY, STATE3
2067 aesenc KEY, STATE4
2068 movaps -0x30(TKEYP), KEY
2069 aesenc KEY, STATE1
2070 aesenc KEY, STATE2
2071 aesenc KEY, STATE3
2072 aesenc KEY, STATE4
2073
2074.L4enc128:
2075 movaps -0x20(TKEYP), KEY
2076 aesenc KEY, STATE1
2077 aesenc KEY, STATE2
2078 aesenc KEY, STATE3
2079 aesenc KEY, STATE4
2080 movaps -0x10(TKEYP), KEY
2081 aesenc KEY, STATE1
2082 aesenc KEY, STATE2
2083 aesenc KEY, STATE3
2084 aesenc KEY, STATE4
2085 movaps (TKEYP), KEY
2086 aesenc KEY, STATE1
2087 aesenc KEY, STATE2
2088 aesenc KEY, STATE3
2089 aesenc KEY, STATE4
2090 movaps 0x10(TKEYP), KEY
2091 aesenc KEY, STATE1
2092 aesenc KEY, STATE2
2093 aesenc KEY, STATE3
2094 aesenc KEY, STATE4
2095 movaps 0x20(TKEYP), KEY
2096 aesenc KEY, STATE1
2097 aesenc KEY, STATE2
2098 aesenc KEY, STATE3
2099 aesenc KEY, STATE4
2100 movaps 0x30(TKEYP), KEY
2101 aesenc KEY, STATE1
2102 aesenc KEY, STATE2
2103 aesenc KEY, STATE3
2104 aesenc KEY, STATE4
2105 movaps 0x40(TKEYP), KEY
2106 aesenc KEY, STATE1
2107 aesenc KEY, STATE2
2108 aesenc KEY, STATE3
2109 aesenc KEY, STATE4
2110 movaps 0x50(TKEYP), KEY
2111 aesenc KEY, STATE1
2112 aesenc KEY, STATE2
2113 aesenc KEY, STATE3
2114 aesenc KEY, STATE4
2115 movaps 0x60(TKEYP), KEY
2116 aesenc KEY, STATE1
2117 aesenc KEY, STATE2
2118 aesenc KEY, STATE3
2119 aesenc KEY, STATE4
2120 movaps 0x70(TKEYP), KEY
2121 aesenclast KEY, STATE1
2122 aesenclast KEY, STATE2
2123 aesenclast KEY, STATE3
2124 aesenclast KEY, STATE4
2125 ret
2126SYM_FUNC_END(_aesni_enc4)
2127
2128
2129
2130
2131SYM_FUNC_START(aesni_dec)
2132 FRAME_BEGIN
2133#ifndef __x86_64__
2134 pushl KEYP
2135 pushl KLEN
2136 movl (FRAME_OFFSET+12)(%esp), KEYP
2137 movl (FRAME_OFFSET+16)(%esp), OUTP
2138 movl (FRAME_OFFSET+20)(%esp), INP
2139#endif
2140 mov 480(KEYP), KLEN
2141 add $240, KEYP
2142 movups (INP), STATE
2143 call _aesni_dec1
2144 movups STATE, (OUTP)
2145#ifndef __x86_64__
2146 popl KLEN
2147 popl KEYP
2148#endif
2149 FRAME_END
2150 ret
2151SYM_FUNC_END(aesni_dec)
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165SYM_FUNC_START_LOCAL(_aesni_dec1)
2166 movaps (KEYP), KEY
2167 mov KEYP, TKEYP
2168 pxor KEY, STATE
2169 add $0x30, TKEYP
2170 cmp $24, KLEN
2171 jb .Ldec128
2172 lea 0x20(TKEYP), TKEYP
2173 je .Ldec192
2174 add $0x20, TKEYP
2175 movaps -0x60(TKEYP), KEY
2176 aesdec KEY, STATE
2177 movaps -0x50(TKEYP), KEY
2178 aesdec KEY, STATE
2179.align 4
2180.Ldec192:
2181 movaps -0x40(TKEYP), KEY
2182 aesdec KEY, STATE
2183 movaps -0x30(TKEYP), KEY
2184 aesdec KEY, STATE
2185.align 4
2186.Ldec128:
2187 movaps -0x20(TKEYP), KEY
2188 aesdec KEY, STATE
2189 movaps -0x10(TKEYP), KEY
2190 aesdec KEY, STATE
2191 movaps (TKEYP), KEY
2192 aesdec KEY, STATE
2193 movaps 0x10(TKEYP), KEY
2194 aesdec KEY, STATE
2195 movaps 0x20(TKEYP), KEY
2196 aesdec KEY, STATE
2197 movaps 0x30(TKEYP), KEY
2198 aesdec KEY, STATE
2199 movaps 0x40(TKEYP), KEY
2200 aesdec KEY, STATE
2201 movaps 0x50(TKEYP), KEY
2202 aesdec KEY, STATE
2203 movaps 0x60(TKEYP), KEY
2204 aesdec KEY, STATE
2205 movaps 0x70(TKEYP), KEY
2206 aesdeclast KEY, STATE
2207 ret
2208SYM_FUNC_END(_aesni_dec1)
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228SYM_FUNC_START_LOCAL(_aesni_dec4)
2229 movaps (KEYP), KEY
2230 mov KEYP, TKEYP
2231 pxor KEY, STATE1
2232 pxor KEY, STATE2
2233 pxor KEY, STATE3
2234 pxor KEY, STATE4
2235 add $0x30, TKEYP
2236 cmp $24, KLEN
2237 jb .L4dec128
2238 lea 0x20(TKEYP), TKEYP
2239 je .L4dec192
2240 add $0x20, TKEYP
2241 movaps -0x60(TKEYP), KEY
2242 aesdec KEY, STATE1
2243 aesdec KEY, STATE2
2244 aesdec KEY, STATE3
2245 aesdec KEY, STATE4
2246 movaps -0x50(TKEYP), KEY
2247 aesdec KEY, STATE1
2248 aesdec KEY, STATE2
2249 aesdec KEY, STATE3
2250 aesdec KEY, STATE4
2251.align 4
2252.L4dec192:
2253 movaps -0x40(TKEYP), KEY
2254 aesdec KEY, STATE1
2255 aesdec KEY, STATE2
2256 aesdec KEY, STATE3
2257 aesdec KEY, STATE4
2258 movaps -0x30(TKEYP), KEY
2259 aesdec KEY, STATE1
2260 aesdec KEY, STATE2
2261 aesdec KEY, STATE3
2262 aesdec KEY, STATE4
2263.align 4
2264.L4dec128:
2265 movaps -0x20(TKEYP), KEY
2266 aesdec KEY, STATE1
2267 aesdec KEY, STATE2
2268 aesdec KEY, STATE3
2269 aesdec KEY, STATE4
2270 movaps -0x10(TKEYP), KEY
2271 aesdec KEY, STATE1
2272 aesdec KEY, STATE2
2273 aesdec KEY, STATE3
2274 aesdec KEY, STATE4
2275 movaps (TKEYP), KEY
2276 aesdec KEY, STATE1
2277 aesdec KEY, STATE2
2278 aesdec KEY, STATE3
2279 aesdec KEY, STATE4
2280 movaps 0x10(TKEYP), KEY
2281 aesdec KEY, STATE1
2282 aesdec KEY, STATE2
2283 aesdec KEY, STATE3
2284 aesdec KEY, STATE4
2285 movaps 0x20(TKEYP), KEY
2286 aesdec KEY, STATE1
2287 aesdec KEY, STATE2
2288 aesdec KEY, STATE3
2289 aesdec KEY, STATE4
2290 movaps 0x30(TKEYP), KEY
2291 aesdec KEY, STATE1
2292 aesdec KEY, STATE2
2293 aesdec KEY, STATE3
2294 aesdec KEY, STATE4
2295 movaps 0x40(TKEYP), KEY
2296 aesdec KEY, STATE1
2297 aesdec KEY, STATE2
2298 aesdec KEY, STATE3
2299 aesdec KEY, STATE4
2300 movaps 0x50(TKEYP), KEY
2301 aesdec KEY, STATE1
2302 aesdec KEY, STATE2
2303 aesdec KEY, STATE3
2304 aesdec KEY, STATE4
2305 movaps 0x60(TKEYP), KEY
2306 aesdec KEY, STATE1
2307 aesdec KEY, STATE2
2308 aesdec KEY, STATE3
2309 aesdec KEY, STATE4
2310 movaps 0x70(TKEYP), KEY
2311 aesdeclast KEY, STATE1
2312 aesdeclast KEY, STATE2
2313 aesdeclast KEY, STATE3
2314 aesdeclast KEY, STATE4
2315 ret
2316SYM_FUNC_END(_aesni_dec4)
2317
2318
2319
2320
2321
2322SYM_FUNC_START(aesni_ecb_enc)
2323 FRAME_BEGIN
2324#ifndef __x86_64__
2325 pushl LEN
2326 pushl KEYP
2327 pushl KLEN
2328 movl (FRAME_OFFSET+16)(%esp), KEYP
2329 movl (FRAME_OFFSET+20)(%esp), OUTP
2330 movl (FRAME_OFFSET+24)(%esp), INP
2331 movl (FRAME_OFFSET+28)(%esp), LEN
2332#endif
2333 test LEN, LEN
2334 jz .Lecb_enc_ret
2335 mov 480(KEYP), KLEN
2336 cmp $16, LEN
2337 jb .Lecb_enc_ret
2338 cmp $64, LEN
2339 jb .Lecb_enc_loop1
2340.align 4
2341.Lecb_enc_loop4:
2342 movups (INP), STATE1
2343 movups 0x10(INP), STATE2
2344 movups 0x20(INP), STATE3
2345 movups 0x30(INP), STATE4
2346 call _aesni_enc4
2347 movups STATE1, (OUTP)
2348 movups STATE2, 0x10(OUTP)
2349 movups STATE3, 0x20(OUTP)
2350 movups STATE4, 0x30(OUTP)
2351 sub $64, LEN
2352 add $64, INP
2353 add $64, OUTP
2354 cmp $64, LEN
2355 jge .Lecb_enc_loop4
2356 cmp $16, LEN
2357 jb .Lecb_enc_ret
2358.align 4
2359.Lecb_enc_loop1:
2360 movups (INP), STATE1
2361 call _aesni_enc1
2362 movups STATE1, (OUTP)
2363 sub $16, LEN
2364 add $16, INP
2365 add $16, OUTP
2366 cmp $16, LEN
2367 jge .Lecb_enc_loop1
2368.Lecb_enc_ret:
2369#ifndef __x86_64__
2370 popl KLEN
2371 popl KEYP
2372 popl LEN
2373#endif
2374 FRAME_END
2375 ret
2376SYM_FUNC_END(aesni_ecb_enc)
2377
2378
2379
2380
2381
2382SYM_FUNC_START(aesni_ecb_dec)
2383 FRAME_BEGIN
2384#ifndef __x86_64__
2385 pushl LEN
2386 pushl KEYP
2387 pushl KLEN
2388 movl (FRAME_OFFSET+16)(%esp), KEYP
2389 movl (FRAME_OFFSET+20)(%esp), OUTP
2390 movl (FRAME_OFFSET+24)(%esp), INP
2391 movl (FRAME_OFFSET+28)(%esp), LEN
2392#endif
2393 test LEN, LEN
2394 jz .Lecb_dec_ret
2395 mov 480(KEYP), KLEN
2396 add $240, KEYP
2397 cmp $16, LEN
2398 jb .Lecb_dec_ret
2399 cmp $64, LEN
2400 jb .Lecb_dec_loop1
2401.align 4
2402.Lecb_dec_loop4:
2403 movups (INP), STATE1
2404 movups 0x10(INP), STATE2
2405 movups 0x20(INP), STATE3
2406 movups 0x30(INP), STATE4
2407 call _aesni_dec4
2408 movups STATE1, (OUTP)
2409 movups STATE2, 0x10(OUTP)
2410 movups STATE3, 0x20(OUTP)
2411 movups STATE4, 0x30(OUTP)
2412 sub $64, LEN
2413 add $64, INP
2414 add $64, OUTP
2415 cmp $64, LEN
2416 jge .Lecb_dec_loop4
2417 cmp $16, LEN
2418 jb .Lecb_dec_ret
2419.align 4
2420.Lecb_dec_loop1:
2421 movups (INP), STATE1
2422 call _aesni_dec1
2423 movups STATE1, (OUTP)
2424 sub $16, LEN
2425 add $16, INP
2426 add $16, OUTP
2427 cmp $16, LEN
2428 jge .Lecb_dec_loop1
2429.Lecb_dec_ret:
2430#ifndef __x86_64__
2431 popl KLEN
2432 popl KEYP
2433 popl LEN
2434#endif
2435 FRAME_END
2436 ret
2437SYM_FUNC_END(aesni_ecb_dec)
2438
2439
2440
2441
2442
2443SYM_FUNC_START(aesni_cbc_enc)
2444 FRAME_BEGIN
2445#ifndef __x86_64__
2446 pushl IVP
2447 pushl LEN
2448 pushl KEYP
2449 pushl KLEN
2450 movl (FRAME_OFFSET+20)(%esp), KEYP
2451 movl (FRAME_OFFSET+24)(%esp), OUTP
2452 movl (FRAME_OFFSET+28)(%esp), INP
2453 movl (FRAME_OFFSET+32)(%esp), LEN
2454 movl (FRAME_OFFSET+36)(%esp), IVP
2455#endif
2456 cmp $16, LEN
2457 jb .Lcbc_enc_ret
2458 mov 480(KEYP), KLEN
2459 movups (IVP), STATE
2460.align 4
2461.Lcbc_enc_loop:
2462 movups (INP), IN
2463 pxor IN, STATE
2464 call _aesni_enc1
2465 movups STATE, (OUTP)
2466 sub $16, LEN
2467 add $16, INP
2468 add $16, OUTP
2469 cmp $16, LEN
2470 jge .Lcbc_enc_loop
2471 movups STATE, (IVP)
2472.Lcbc_enc_ret:
2473#ifndef __x86_64__
2474 popl KLEN
2475 popl KEYP
2476 popl LEN
2477 popl IVP
2478#endif
2479 FRAME_END
2480 ret
2481SYM_FUNC_END(aesni_cbc_enc)
2482
2483
2484
2485
2486
2487SYM_FUNC_START(aesni_cbc_dec)
2488 FRAME_BEGIN
2489#ifndef __x86_64__
2490 pushl IVP
2491 pushl LEN
2492 pushl KEYP
2493 pushl KLEN
2494 movl (FRAME_OFFSET+20)(%esp), KEYP
2495 movl (FRAME_OFFSET+24)(%esp), OUTP
2496 movl (FRAME_OFFSET+28)(%esp), INP
2497 movl (FRAME_OFFSET+32)(%esp), LEN
2498 movl (FRAME_OFFSET+36)(%esp), IVP
2499#endif
2500 cmp $16, LEN
2501 jb .Lcbc_dec_just_ret
2502 mov 480(KEYP), KLEN
2503 add $240, KEYP
2504 movups (IVP), IV
2505 cmp $64, LEN
2506 jb .Lcbc_dec_loop1
2507.align 4
2508.Lcbc_dec_loop4:
2509 movups (INP), IN1
2510 movaps IN1, STATE1
2511 movups 0x10(INP), IN2
2512 movaps IN2, STATE2
2513#ifdef __x86_64__
2514 movups 0x20(INP), IN3
2515 movaps IN3, STATE3
2516 movups 0x30(INP), IN4
2517 movaps IN4, STATE4
2518#else
2519 movups 0x20(INP), IN1
2520 movaps IN1, STATE3
2521 movups 0x30(INP), IN2
2522 movaps IN2, STATE4
2523#endif
2524 call _aesni_dec4
2525 pxor IV, STATE1
2526#ifdef __x86_64__
2527 pxor IN1, STATE2
2528 pxor IN2, STATE3
2529 pxor IN3, STATE4
2530 movaps IN4, IV
2531#else
2532 pxor IN1, STATE4
2533 movaps IN2, IV
2534 movups (INP), IN1
2535 pxor IN1, STATE2
2536 movups 0x10(INP), IN2
2537 pxor IN2, STATE3
2538#endif
2539 movups STATE1, (OUTP)
2540 movups STATE2, 0x10(OUTP)
2541 movups STATE3, 0x20(OUTP)
2542 movups STATE4, 0x30(OUTP)
2543 sub $64, LEN
2544 add $64, INP
2545 add $64, OUTP
2546 cmp $64, LEN
2547 jge .Lcbc_dec_loop4
2548 cmp $16, LEN
2549 jb .Lcbc_dec_ret
2550.align 4
2551.Lcbc_dec_loop1:
2552 movups (INP), IN
2553 movaps IN, STATE
2554 call _aesni_dec1
2555 pxor IV, STATE
2556 movups STATE, (OUTP)
2557 movaps IN, IV
2558 sub $16, LEN
2559 add $16, INP
2560 add $16, OUTP
2561 cmp $16, LEN
2562 jge .Lcbc_dec_loop1
2563.Lcbc_dec_ret:
2564 movups IV, (IVP)
2565.Lcbc_dec_just_ret:
2566#ifndef __x86_64__
2567 popl KLEN
2568 popl KEYP
2569 popl LEN
2570 popl IVP
2571#endif
2572 FRAME_END
2573 ret
2574SYM_FUNC_END(aesni_cbc_dec)
2575
2576
2577
2578
2579
2580SYM_FUNC_START(aesni_cts_cbc_enc)
2581 FRAME_BEGIN
2582#ifndef __x86_64__
2583 pushl IVP
2584 pushl LEN
2585 pushl KEYP
2586 pushl KLEN
2587 movl (FRAME_OFFSET+20)(%esp), KEYP
2588 movl (FRAME_OFFSET+24)(%esp), OUTP
2589 movl (FRAME_OFFSET+28)(%esp), INP
2590 movl (FRAME_OFFSET+32)(%esp), LEN
2591 movl (FRAME_OFFSET+36)(%esp), IVP
2592 lea .Lcts_permute_table, T1
2593#else
2594 lea .Lcts_permute_table(%rip), T1
2595#endif
2596 mov 480(KEYP), KLEN
2597 movups (IVP), STATE
2598 sub $16, LEN
2599 mov T1, IVP
2600 add $32, IVP
2601 add LEN, T1
2602 sub LEN, IVP
2603 movups (T1), %xmm4
2604 movups (IVP), %xmm5
2605
2606 movups (INP), IN1
2607 add LEN, INP
2608 movups (INP), IN2
2609
2610 pxor IN1, STATE
2611 call _aesni_enc1
2612
2613 pshufb %xmm5, IN2
2614 pxor STATE, IN2
2615 pshufb %xmm4, STATE
2616 add OUTP, LEN
2617 movups STATE, (LEN)
2618
2619 movaps IN2, STATE
2620 call _aesni_enc1
2621 movups STATE, (OUTP)
2622
2623#ifndef __x86_64__
2624 popl KLEN
2625 popl KEYP
2626 popl LEN
2627 popl IVP
2628#endif
2629 FRAME_END
2630 ret
2631SYM_FUNC_END(aesni_cts_cbc_enc)
2632
2633
2634
2635
2636
2637SYM_FUNC_START(aesni_cts_cbc_dec)
2638 FRAME_BEGIN
2639#ifndef __x86_64__
2640 pushl IVP
2641 pushl LEN
2642 pushl KEYP
2643 pushl KLEN
2644 movl (FRAME_OFFSET+20)(%esp), KEYP
2645 movl (FRAME_OFFSET+24)(%esp), OUTP
2646 movl (FRAME_OFFSET+28)(%esp), INP
2647 movl (FRAME_OFFSET+32)(%esp), LEN
2648 movl (FRAME_OFFSET+36)(%esp), IVP
2649 lea .Lcts_permute_table, T1
2650#else
2651 lea .Lcts_permute_table(%rip), T1
2652#endif
2653 mov 480(KEYP), KLEN
2654 add $240, KEYP
2655 movups (IVP), IV
2656 sub $16, LEN
2657 mov T1, IVP
2658 add $32, IVP
2659 add LEN, T1
2660 sub LEN, IVP
2661 movups (T1), %xmm4
2662
2663 movups (INP), STATE
2664 add LEN, INP
2665 movups (INP), IN1
2666
2667 call _aesni_dec1
2668 movaps STATE, IN2
2669 pshufb %xmm4, STATE
2670 pxor IN1, STATE
2671
2672 add OUTP, LEN
2673 movups STATE, (LEN)
2674
2675 movups (IVP), %xmm0
2676 pshufb %xmm0, IN1
2677 pblendvb IN2, IN1
2678 movaps IN1, STATE
2679 call _aesni_dec1
2680
2681 pxor IV, STATE
2682 movups STATE, (OUTP)
2683
2684#ifndef __x86_64__
2685 popl KLEN
2686 popl KEYP
2687 popl LEN
2688 popl IVP
2689#endif
2690 FRAME_END
2691 ret
2692SYM_FUNC_END(aesni_cts_cbc_dec)
2693
2694.pushsection .rodata
2695.align 16
2696.Lcts_permute_table:
2697 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
2698 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
2699 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
2700 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
2701 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
2702 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
2703#ifdef __x86_64__
2704.Lbswap_mask:
2705 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
2706#endif
2707.popsection
2708
2709#ifdef __x86_64__
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721SYM_FUNC_START_LOCAL(_aesni_inc_init)
2722 movaps .Lbswap_mask, BSWAP_MASK
2723 movaps IV, CTR
2724 pshufb BSWAP_MASK, CTR
2725 mov $1, TCTR_LOW
2726 movq TCTR_LOW, INC
2727 movq CTR, TCTR_LOW
2728 ret
2729SYM_FUNC_END(_aesni_inc_init)
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746SYM_FUNC_START_LOCAL(_aesni_inc)
2747 paddq INC, CTR
2748 add $1, TCTR_LOW
2749 jnc .Linc_low
2750 pslldq $8, INC
2751 paddq INC, CTR
2752 psrldq $8, INC
2753.Linc_low:
2754 movaps CTR, IV
2755 pshufb BSWAP_MASK, IV
2756 ret
2757SYM_FUNC_END(_aesni_inc)
2758
2759
2760
2761
2762
2763SYM_FUNC_START(aesni_ctr_enc)
2764 FRAME_BEGIN
2765 cmp $16, LEN
2766 jb .Lctr_enc_just_ret
2767 mov 480(KEYP), KLEN
2768 movups (IVP), IV
2769 call _aesni_inc_init
2770 cmp $64, LEN
2771 jb .Lctr_enc_loop1
2772.align 4
2773.Lctr_enc_loop4:
2774 movaps IV, STATE1
2775 call _aesni_inc
2776 movups (INP), IN1
2777 movaps IV, STATE2
2778 call _aesni_inc
2779 movups 0x10(INP), IN2
2780 movaps IV, STATE3
2781 call _aesni_inc
2782 movups 0x20(INP), IN3
2783 movaps IV, STATE4
2784 call _aesni_inc
2785 movups 0x30(INP), IN4
2786 call _aesni_enc4
2787 pxor IN1, STATE1
2788 movups STATE1, (OUTP)
2789 pxor IN2, STATE2
2790 movups STATE2, 0x10(OUTP)
2791 pxor IN3, STATE3
2792 movups STATE3, 0x20(OUTP)
2793 pxor IN4, STATE4
2794 movups STATE4, 0x30(OUTP)
2795 sub $64, LEN
2796 add $64, INP
2797 add $64, OUTP
2798 cmp $64, LEN
2799 jge .Lctr_enc_loop4
2800 cmp $16, LEN
2801 jb .Lctr_enc_ret
2802.align 4
2803.Lctr_enc_loop1:
2804 movaps IV, STATE
2805 call _aesni_inc
2806 movups (INP), IN
2807 call _aesni_enc1
2808 pxor IN, STATE
2809 movups STATE, (OUTP)
2810 sub $16, LEN
2811 add $16, INP
2812 add $16, OUTP
2813 cmp $16, LEN
2814 jge .Lctr_enc_loop1
2815.Lctr_enc_ret:
2816 movups IV, (IVP)
2817.Lctr_enc_just_ret:
2818 FRAME_END
2819 ret
2820SYM_FUNC_END(aesni_ctr_enc)
2821
2822#endif
2823
2824.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
2825.align 16
2826.Lgf128mul_x_ble_mask:
2827 .octa 0x00000000000000010000000000000087
2828.previous
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841#define _aesni_gf128mul_x_ble() \
2842 pshufd $0x13, IV, KEY; \
2843 paddq IV, IV; \
2844 psrad $31, KEY; \
2845 pand GF128MUL_MASK, KEY; \
2846 pxor KEY, IV;
2847
2848
2849
2850
2851
2852SYM_FUNC_START(aesni_xts_encrypt)
2853 FRAME_BEGIN
2854#ifndef __x86_64__
2855 pushl IVP
2856 pushl LEN
2857 pushl KEYP
2858 pushl KLEN
2859 movl (FRAME_OFFSET+20)(%esp), KEYP
2860 movl (FRAME_OFFSET+24)(%esp), OUTP
2861 movl (FRAME_OFFSET+28)(%esp), INP
2862 movl (FRAME_OFFSET+32)(%esp), LEN
2863 movl (FRAME_OFFSET+36)(%esp), IVP
2864 movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
2865#else
2866 movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
2867#endif
2868 movups (IVP), IV
2869
2870 mov 480(KEYP), KLEN
2871
2872.Lxts_enc_loop4:
2873 sub $64, LEN
2874 jl .Lxts_enc_1x
2875
2876 movdqa IV, STATE1
2877 movdqu 0x00(INP), IN
2878 pxor IN, STATE1
2879 movdqu IV, 0x00(OUTP)
2880
2881 _aesni_gf128mul_x_ble()
2882 movdqa IV, STATE2
2883 movdqu 0x10(INP), IN
2884 pxor IN, STATE2
2885 movdqu IV, 0x10(OUTP)
2886
2887 _aesni_gf128mul_x_ble()
2888 movdqa IV, STATE3
2889 movdqu 0x20(INP), IN
2890 pxor IN, STATE3
2891 movdqu IV, 0x20(OUTP)
2892
2893 _aesni_gf128mul_x_ble()
2894 movdqa IV, STATE4
2895 movdqu 0x30(INP), IN
2896 pxor IN, STATE4
2897 movdqu IV, 0x30(OUTP)
2898
2899 call _aesni_enc4
2900
2901 movdqu 0x00(OUTP), IN
2902 pxor IN, STATE1
2903 movdqu STATE1, 0x00(OUTP)
2904
2905 movdqu 0x10(OUTP), IN
2906 pxor IN, STATE2
2907 movdqu STATE2, 0x10(OUTP)
2908
2909 movdqu 0x20(OUTP), IN
2910 pxor IN, STATE3
2911 movdqu STATE3, 0x20(OUTP)
2912
2913 movdqu 0x30(OUTP), IN
2914 pxor IN, STATE4
2915 movdqu STATE4, 0x30(OUTP)
2916
2917 _aesni_gf128mul_x_ble()
2918
2919 add $64, INP
2920 add $64, OUTP
2921 test LEN, LEN
2922 jnz .Lxts_enc_loop4
2923
2924.Lxts_enc_ret_iv:
2925 movups IV, (IVP)
2926
2927.Lxts_enc_ret:
2928#ifndef __x86_64__
2929 popl KLEN
2930 popl KEYP
2931 popl LEN
2932 popl IVP
2933#endif
2934 FRAME_END
2935 ret
2936
2937.Lxts_enc_1x:
2938 add $64, LEN
2939 jz .Lxts_enc_ret_iv
2940 sub $16, LEN
2941 jl .Lxts_enc_cts4
2942
2943.Lxts_enc_loop1:
2944 movdqu (INP), STATE
2945 pxor IV, STATE
2946 call _aesni_enc1
2947 pxor IV, STATE
2948 _aesni_gf128mul_x_ble()
2949
2950 test LEN, LEN
2951 jz .Lxts_enc_out
2952
2953 add $16, INP
2954 sub $16, LEN
2955 jl .Lxts_enc_cts1
2956
2957 movdqu STATE, (OUTP)
2958 add $16, OUTP
2959 jmp .Lxts_enc_loop1
2960
2961.Lxts_enc_out:
2962 movdqu STATE, (OUTP)
2963 jmp .Lxts_enc_ret_iv
2964
2965.Lxts_enc_cts4:
2966 movdqa STATE4, STATE
2967 sub $16, OUTP
2968
2969.Lxts_enc_cts1:
2970#ifndef __x86_64__
2971 lea .Lcts_permute_table, T1
2972#else
2973 lea .Lcts_permute_table(%rip), T1
2974#endif
2975 add LEN, INP
2976 add $16, LEN
2977 movups (INP), IN1
2978
2979 mov T1, IVP
2980 add $32, IVP
2981 add LEN, T1
2982 sub LEN, IVP
2983 add OUTP, LEN
2984
2985 movups (T1), %xmm4
2986 movaps STATE, IN2
2987 pshufb %xmm4, STATE
2988 movups STATE, (LEN)
2989
2990 movups (IVP), %xmm0
2991 pshufb %xmm0, IN1
2992 pblendvb IN2, IN1
2993 movaps IN1, STATE
2994
2995 pxor IV, STATE
2996 call _aesni_enc1
2997 pxor IV, STATE
2998
2999 movups STATE, (OUTP)
3000 jmp .Lxts_enc_ret
3001SYM_FUNC_END(aesni_xts_encrypt)
3002
3003
3004
3005
3006
3007SYM_FUNC_START(aesni_xts_decrypt)
3008 FRAME_BEGIN
3009#ifndef __x86_64__
3010 pushl IVP
3011 pushl LEN
3012 pushl KEYP
3013 pushl KLEN
3014 movl (FRAME_OFFSET+20)(%esp), KEYP
3015 movl (FRAME_OFFSET+24)(%esp), OUTP
3016 movl (FRAME_OFFSET+28)(%esp), INP
3017 movl (FRAME_OFFSET+32)(%esp), LEN
3018 movl (FRAME_OFFSET+36)(%esp), IVP
3019 movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
3020#else
3021 movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
3022#endif
3023 movups (IVP), IV
3024
3025 mov 480(KEYP), KLEN
3026 add $240, KEYP
3027
3028 test $15, LEN
3029 jz .Lxts_dec_loop4
3030 sub $16, LEN
3031
3032.Lxts_dec_loop4:
3033 sub $64, LEN
3034 jl .Lxts_dec_1x
3035
3036 movdqa IV, STATE1
3037 movdqu 0x00(INP), IN
3038 pxor IN, STATE1
3039 movdqu IV, 0x00(OUTP)
3040
3041 _aesni_gf128mul_x_ble()
3042 movdqa IV, STATE2
3043 movdqu 0x10(INP), IN
3044 pxor IN, STATE2
3045 movdqu IV, 0x10(OUTP)
3046
3047 _aesni_gf128mul_x_ble()
3048 movdqa IV, STATE3
3049 movdqu 0x20(INP), IN
3050 pxor IN, STATE3
3051 movdqu IV, 0x20(OUTP)
3052
3053 _aesni_gf128mul_x_ble()
3054 movdqa IV, STATE4
3055 movdqu 0x30(INP), IN
3056 pxor IN, STATE4
3057 movdqu IV, 0x30(OUTP)
3058
3059 call _aesni_dec4
3060
3061 movdqu 0x00(OUTP), IN
3062 pxor IN, STATE1
3063 movdqu STATE1, 0x00(OUTP)
3064
3065 movdqu 0x10(OUTP), IN
3066 pxor IN, STATE2
3067 movdqu STATE2, 0x10(OUTP)
3068
3069 movdqu 0x20(OUTP), IN
3070 pxor IN, STATE3
3071 movdqu STATE3, 0x20(OUTP)
3072
3073 movdqu 0x30(OUTP), IN
3074 pxor IN, STATE4
3075 movdqu STATE4, 0x30(OUTP)
3076
3077 _aesni_gf128mul_x_ble()
3078
3079 add $64, INP
3080 add $64, OUTP
3081 test LEN, LEN
3082 jnz .Lxts_dec_loop4
3083
3084.Lxts_dec_ret_iv:
3085 movups IV, (IVP)
3086
3087.Lxts_dec_ret:
3088#ifndef __x86_64__
3089 popl KLEN
3090 popl KEYP
3091 popl LEN
3092 popl IVP
3093#endif
3094 FRAME_END
3095 ret
3096
3097.Lxts_dec_1x:
3098 add $64, LEN
3099 jz .Lxts_dec_ret_iv
3100
3101.Lxts_dec_loop1:
3102 movdqu (INP), STATE
3103
3104 add $16, INP
3105 sub $16, LEN
3106 jl .Lxts_dec_cts1
3107
3108 pxor IV, STATE
3109 call _aesni_dec1
3110 pxor IV, STATE
3111 _aesni_gf128mul_x_ble()
3112
3113 test LEN, LEN
3114 jz .Lxts_dec_out
3115
3116 movdqu STATE, (OUTP)
3117 add $16, OUTP
3118 jmp .Lxts_dec_loop1
3119
3120.Lxts_dec_out:
3121 movdqu STATE, (OUTP)
3122 jmp .Lxts_dec_ret_iv
3123
3124.Lxts_dec_cts1:
3125 movdqa IV, STATE4
3126 _aesni_gf128mul_x_ble()
3127
3128 pxor IV, STATE
3129 call _aesni_dec1
3130 pxor IV, STATE
3131
3132#ifndef __x86_64__
3133 lea .Lcts_permute_table, T1
3134#else
3135 lea .Lcts_permute_table(%rip), T1
3136#endif
3137 add LEN, INP
3138 add $16, LEN
3139 movups (INP), IN1
3140
3141 mov T1, IVP
3142 add $32, IVP
3143 add LEN, T1
3144 sub LEN, IVP
3145 add OUTP, LEN
3146
3147 movups (T1), %xmm4
3148 movaps STATE, IN2
3149 pshufb %xmm4, STATE
3150 movups STATE, (LEN)
3151
3152 movups (IVP), %xmm0
3153 pshufb %xmm0, IN1
3154 pblendvb IN2, IN1
3155 movaps IN1, STATE
3156
3157 pxor STATE4, STATE
3158 call _aesni_dec1
3159 pxor STATE4, STATE
3160
3161 movups STATE, (OUTP)
3162 jmp .Lxts_dec_ret
3163SYM_FUNC_END(aesni_xts_decrypt)
3164