1
2
3
4
5
6
7
8
9#include <linux/linkage.h>
10#include <asm/frame.h>
11
12#define STATE0 %xmm0
13#define STATE1 %xmm1
14#define STATE2 %xmm2
15#define STATE3 %xmm3
16#define STATE4 %xmm4
17#define KEY %xmm5
18#define MSG %xmm5
19#define T0 %xmm6
20#define T1 %xmm7
21
22#define STATEP %rdi
23#define LEN %rsi
24#define SRC %rdx
25#define DST %rcx
26
27.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
28.align 16
29.Laegis128_const_0:
30 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
31 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
32.Laegis128_const_1:
33 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
34 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
35
36.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
37.align 16
38.Laegis128_counter:
39 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
40 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
41
42.text
43
44
45
46
47
48
49
50
51
52
53.macro aegis128_update
54 movdqa STATE4, T0
55 aesenc STATE0, STATE4
56 aesenc STATE1, STATE0
57 aesenc STATE2, STATE1
58 aesenc STATE3, STATE2
59 aesenc T0, STATE3
60.endm
61
62
63
64
65
66
67
68
69
70
71
72
73
74__load_partial:
75 xor %r9d, %r9d
76 pxor MSG, MSG
77
78 mov LEN, %r8
79 and $0x1, %r8
80 jz .Lld_partial_1
81
82 mov LEN, %r8
83 and $0x1E, %r8
84 add SRC, %r8
85 mov (%r8), %r9b
86
87.Lld_partial_1:
88 mov LEN, %r8
89 and $0x2, %r8
90 jz .Lld_partial_2
91
92 mov LEN, %r8
93 and $0x1C, %r8
94 add SRC, %r8
95 shl $0x10, %r9
96 mov (%r8), %r9w
97
98.Lld_partial_2:
99 mov LEN, %r8
100 and $0x4, %r8
101 jz .Lld_partial_4
102
103 mov LEN, %r8
104 and $0x18, %r8
105 add SRC, %r8
106 shl $32, %r9
107 mov (%r8), %r8d
108 xor %r8, %r9
109
110.Lld_partial_4:
111 movq %r9, MSG
112
113 mov LEN, %r8
114 and $0x8, %r8
115 jz .Lld_partial_8
116
117 mov LEN, %r8
118 and $0x10, %r8
119 add SRC, %r8
120 pslldq $8, MSG
121 movq (%r8), T0
122 pxor T0, MSG
123
124.Lld_partial_8:
125 ret
126ENDPROC(__load_partial)
127
128
129
130
131
132
133
134
135
136
137
138
139
140__store_partial:
141 mov LEN, %r8
142 mov DST, %r9
143
144 movq T0, %r10
145
146 cmp $8, %r8
147 jl .Lst_partial_8
148
149 mov %r10, (%r9)
150 psrldq $8, T0
151 movq T0, %r10
152
153 sub $8, %r8
154 add $8, %r9
155
156.Lst_partial_8:
157 cmp $4, %r8
158 jl .Lst_partial_4
159
160 mov %r10d, (%r9)
161 shr $32, %r10
162
163 sub $4, %r8
164 add $4, %r9
165
166.Lst_partial_4:
167 cmp $2, %r8
168 jl .Lst_partial_2
169
170 mov %r10w, (%r9)
171 shr $0x10, %r10
172
173 sub $2, %r8
174 add $2, %r9
175
176.Lst_partial_2:
177 cmp $1, %r8
178 jl .Lst_partial_1
179
180 mov %r10b, (%r9)
181
182.Lst_partial_1:
183 ret
184ENDPROC(__store_partial)
185
186
187
188
189ENTRY(crypto_aegis128_aesni_init)
190 FRAME_BEGIN
191
192
193 movdqu (%rdx), T1
194
195
196 movdqa (%rsi), KEY
197 pxor KEY, T1
198 movdqa T1, STATE0
199 movdqa KEY, STATE3
200 movdqa KEY, STATE4
201
202
203 movdqa .Laegis128_const_0, STATE2
204 movdqa .Laegis128_const_1, STATE1
205 pxor STATE2, STATE3
206 pxor STATE1, STATE4
207
208
209 aegis128_update; pxor KEY, STATE4
210 aegis128_update; pxor T1, STATE3
211 aegis128_update; pxor KEY, STATE2
212 aegis128_update; pxor T1, STATE1
213 aegis128_update; pxor KEY, STATE0
214 aegis128_update; pxor T1, STATE4
215 aegis128_update; pxor KEY, STATE3
216 aegis128_update; pxor T1, STATE2
217 aegis128_update; pxor KEY, STATE1
218 aegis128_update; pxor T1, STATE0
219
220
221 movdqu STATE0, 0x00(STATEP)
222 movdqu STATE1, 0x10(STATEP)
223 movdqu STATE2, 0x20(STATEP)
224 movdqu STATE3, 0x30(STATEP)
225 movdqu STATE4, 0x40(STATEP)
226
227 FRAME_END
228 ret
229ENDPROC(crypto_aegis128_aesni_init)
230
231
232
233
234
235ENTRY(crypto_aegis128_aesni_ad)
236 FRAME_BEGIN
237
238 cmp $0x10, LEN
239 jb .Lad_out
240
241
242 movdqu 0x00(STATEP), STATE0
243 movdqu 0x10(STATEP), STATE1
244 movdqu 0x20(STATEP), STATE2
245 movdqu 0x30(STATEP), STATE3
246 movdqu 0x40(STATEP), STATE4
247
248 mov SRC, %r8
249 and $0xF, %r8
250 jnz .Lad_u_loop
251
252.align 8
253.Lad_a_loop:
254 movdqa 0x00(SRC), MSG
255 aegis128_update
256 pxor MSG, STATE4
257 sub $0x10, LEN
258 cmp $0x10, LEN
259 jl .Lad_out_1
260
261 movdqa 0x10(SRC), MSG
262 aegis128_update
263 pxor MSG, STATE3
264 sub $0x10, LEN
265 cmp $0x10, LEN
266 jl .Lad_out_2
267
268 movdqa 0x20(SRC), MSG
269 aegis128_update
270 pxor MSG, STATE2
271 sub $0x10, LEN
272 cmp $0x10, LEN
273 jl .Lad_out_3
274
275 movdqa 0x30(SRC), MSG
276 aegis128_update
277 pxor MSG, STATE1
278 sub $0x10, LEN
279 cmp $0x10, LEN
280 jl .Lad_out_4
281
282 movdqa 0x40(SRC), MSG
283 aegis128_update
284 pxor MSG, STATE0
285 sub $0x10, LEN
286 cmp $0x10, LEN
287 jl .Lad_out_0
288
289 add $0x50, SRC
290 jmp .Lad_a_loop
291
292.align 8
293.Lad_u_loop:
294 movdqu 0x00(SRC), MSG
295 aegis128_update
296 pxor MSG, STATE4
297 sub $0x10, LEN
298 cmp $0x10, LEN
299 jl .Lad_out_1
300
301 movdqu 0x10(SRC), MSG
302 aegis128_update
303 pxor MSG, STATE3
304 sub $0x10, LEN
305 cmp $0x10, LEN
306 jl .Lad_out_2
307
308 movdqu 0x20(SRC), MSG
309 aegis128_update
310 pxor MSG, STATE2
311 sub $0x10, LEN
312 cmp $0x10, LEN
313 jl .Lad_out_3
314
315 movdqu 0x30(SRC), MSG
316 aegis128_update
317 pxor MSG, STATE1
318 sub $0x10, LEN
319 cmp $0x10, LEN
320 jl .Lad_out_4
321
322 movdqu 0x40(SRC), MSG
323 aegis128_update
324 pxor MSG, STATE0
325 sub $0x10, LEN
326 cmp $0x10, LEN
327 jl .Lad_out_0
328
329 add $0x50, SRC
330 jmp .Lad_u_loop
331
332
333.Lad_out_0:
334 movdqu STATE0, 0x00(STATEP)
335 movdqu STATE1, 0x10(STATEP)
336 movdqu STATE2, 0x20(STATEP)
337 movdqu STATE3, 0x30(STATEP)
338 movdqu STATE4, 0x40(STATEP)
339 FRAME_END
340 ret
341
342.Lad_out_1:
343 movdqu STATE4, 0x00(STATEP)
344 movdqu STATE0, 0x10(STATEP)
345 movdqu STATE1, 0x20(STATEP)
346 movdqu STATE2, 0x30(STATEP)
347 movdqu STATE3, 0x40(STATEP)
348 FRAME_END
349 ret
350
351.Lad_out_2:
352 movdqu STATE3, 0x00(STATEP)
353 movdqu STATE4, 0x10(STATEP)
354 movdqu STATE0, 0x20(STATEP)
355 movdqu STATE1, 0x30(STATEP)
356 movdqu STATE2, 0x40(STATEP)
357 FRAME_END
358 ret
359
360.Lad_out_3:
361 movdqu STATE2, 0x00(STATEP)
362 movdqu STATE3, 0x10(STATEP)
363 movdqu STATE4, 0x20(STATEP)
364 movdqu STATE0, 0x30(STATEP)
365 movdqu STATE1, 0x40(STATEP)
366 FRAME_END
367 ret
368
369.Lad_out_4:
370 movdqu STATE1, 0x00(STATEP)
371 movdqu STATE2, 0x10(STATEP)
372 movdqu STATE3, 0x20(STATEP)
373 movdqu STATE4, 0x30(STATEP)
374 movdqu STATE0, 0x40(STATEP)
375 FRAME_END
376 ret
377
378.Lad_out:
379 FRAME_END
380 ret
381ENDPROC(crypto_aegis128_aesni_ad)
382
383.macro encrypt_block a s0 s1 s2 s3 s4 i
384 movdq\a (\i * 0x10)(SRC), MSG
385 movdqa MSG, T0
386 pxor \s1, T0
387 pxor \s4, T0
388 movdqa \s2, T1
389 pand \s3, T1
390 pxor T1, T0
391 movdq\a T0, (\i * 0x10)(DST)
392
393 aegis128_update
394 pxor MSG, \s4
395
396 sub $0x10, LEN
397 cmp $0x10, LEN
398 jl .Lenc_out_\i
399.endm
400
401
402
403
404
405ENTRY(crypto_aegis128_aesni_enc)
406 FRAME_BEGIN
407
408 cmp $0x10, LEN
409 jb .Lenc_out
410
411
412 movdqu 0x00(STATEP), STATE0
413 movdqu 0x10(STATEP), STATE1
414 movdqu 0x20(STATEP), STATE2
415 movdqu 0x30(STATEP), STATE3
416 movdqu 0x40(STATEP), STATE4
417
418 mov SRC, %r8
419 or DST, %r8
420 and $0xF, %r8
421 jnz .Lenc_u_loop
422
423.align 8
424.Lenc_a_loop:
425 encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
426 encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
427 encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
428 encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
429 encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
430
431 add $0x50, SRC
432 add $0x50, DST
433 jmp .Lenc_a_loop
434
435.align 8
436.Lenc_u_loop:
437 encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
438 encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
439 encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
440 encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
441 encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
442
443 add $0x50, SRC
444 add $0x50, DST
445 jmp .Lenc_u_loop
446
447
448.Lenc_out_0:
449 movdqu STATE4, 0x00(STATEP)
450 movdqu STATE0, 0x10(STATEP)
451 movdqu STATE1, 0x20(STATEP)
452 movdqu STATE2, 0x30(STATEP)
453 movdqu STATE3, 0x40(STATEP)
454 FRAME_END
455 ret
456
457.Lenc_out_1:
458 movdqu STATE3, 0x00(STATEP)
459 movdqu STATE4, 0x10(STATEP)
460 movdqu STATE0, 0x20(STATEP)
461 movdqu STATE1, 0x30(STATEP)
462 movdqu STATE2, 0x40(STATEP)
463 FRAME_END
464 ret
465
466.Lenc_out_2:
467 movdqu STATE2, 0x00(STATEP)
468 movdqu STATE3, 0x10(STATEP)
469 movdqu STATE4, 0x20(STATEP)
470 movdqu STATE0, 0x30(STATEP)
471 movdqu STATE1, 0x40(STATEP)
472 FRAME_END
473 ret
474
475.Lenc_out_3:
476 movdqu STATE1, 0x00(STATEP)
477 movdqu STATE2, 0x10(STATEP)
478 movdqu STATE3, 0x20(STATEP)
479 movdqu STATE4, 0x30(STATEP)
480 movdqu STATE0, 0x40(STATEP)
481 FRAME_END
482 ret
483
484.Lenc_out_4:
485 movdqu STATE0, 0x00(STATEP)
486 movdqu STATE1, 0x10(STATEP)
487 movdqu STATE2, 0x20(STATEP)
488 movdqu STATE3, 0x30(STATEP)
489 movdqu STATE4, 0x40(STATEP)
490 FRAME_END
491 ret
492
493.Lenc_out:
494 FRAME_END
495 ret
496ENDPROC(crypto_aegis128_aesni_enc)
497
498
499
500
501
502ENTRY(crypto_aegis128_aesni_enc_tail)
503 FRAME_BEGIN
504
505
506 movdqu 0x00(STATEP), STATE0
507 movdqu 0x10(STATEP), STATE1
508 movdqu 0x20(STATEP), STATE2
509 movdqu 0x30(STATEP), STATE3
510 movdqu 0x40(STATEP), STATE4
511
512
513 call __load_partial
514
515 movdqa MSG, T0
516 pxor STATE1, T0
517 pxor STATE4, T0
518 movdqa STATE2, T1
519 pand STATE3, T1
520 pxor T1, T0
521
522 call __store_partial
523
524 aegis128_update
525 pxor MSG, STATE4
526
527
528 movdqu STATE4, 0x00(STATEP)
529 movdqu STATE0, 0x10(STATEP)
530 movdqu STATE1, 0x20(STATEP)
531 movdqu STATE2, 0x30(STATEP)
532 movdqu STATE3, 0x40(STATEP)
533
534 FRAME_END
535 ret
536ENDPROC(crypto_aegis128_aesni_enc_tail)
537
538.macro decrypt_block a s0 s1 s2 s3 s4 i
539 movdq\a (\i * 0x10)(SRC), MSG
540 pxor \s1, MSG
541 pxor \s4, MSG
542 movdqa \s2, T1
543 pand \s3, T1
544 pxor T1, MSG
545 movdq\a MSG, (\i * 0x10)(DST)
546
547 aegis128_update
548 pxor MSG, \s4
549
550 sub $0x10, LEN
551 cmp $0x10, LEN
552 jl .Ldec_out_\i
553.endm
554
555
556
557
558
559ENTRY(crypto_aegis128_aesni_dec)
560 FRAME_BEGIN
561
562 cmp $0x10, LEN
563 jb .Ldec_out
564
565
566 movdqu 0x00(STATEP), STATE0
567 movdqu 0x10(STATEP), STATE1
568 movdqu 0x20(STATEP), STATE2
569 movdqu 0x30(STATEP), STATE3
570 movdqu 0x40(STATEP), STATE4
571
572 mov SRC, %r8
573 or DST, %r8
574 and $0xF, %r8
575 jnz .Ldec_u_loop
576
577.align 8
578.Ldec_a_loop:
579 decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
580 decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
581 decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
582 decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
583 decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
584
585 add $0x50, SRC
586 add $0x50, DST
587 jmp .Ldec_a_loop
588
589.align 8
590.Ldec_u_loop:
591 decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
592 decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
593 decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
594 decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
595 decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
596
597 add $0x50, SRC
598 add $0x50, DST
599 jmp .Ldec_u_loop
600
601
602.Ldec_out_0:
603 movdqu STATE4, 0x00(STATEP)
604 movdqu STATE0, 0x10(STATEP)
605 movdqu STATE1, 0x20(STATEP)
606 movdqu STATE2, 0x30(STATEP)
607 movdqu STATE3, 0x40(STATEP)
608 FRAME_END
609 ret
610
611.Ldec_out_1:
612 movdqu STATE3, 0x00(STATEP)
613 movdqu STATE4, 0x10(STATEP)
614 movdqu STATE0, 0x20(STATEP)
615 movdqu STATE1, 0x30(STATEP)
616 movdqu STATE2, 0x40(STATEP)
617 FRAME_END
618 ret
619
620.Ldec_out_2:
621 movdqu STATE2, 0x00(STATEP)
622 movdqu STATE3, 0x10(STATEP)
623 movdqu STATE4, 0x20(STATEP)
624 movdqu STATE0, 0x30(STATEP)
625 movdqu STATE1, 0x40(STATEP)
626 FRAME_END
627 ret
628
629.Ldec_out_3:
630 movdqu STATE1, 0x00(STATEP)
631 movdqu STATE2, 0x10(STATEP)
632 movdqu STATE3, 0x20(STATEP)
633 movdqu STATE4, 0x30(STATEP)
634 movdqu STATE0, 0x40(STATEP)
635 FRAME_END
636 ret
637
638.Ldec_out_4:
639 movdqu STATE0, 0x00(STATEP)
640 movdqu STATE1, 0x10(STATEP)
641 movdqu STATE2, 0x20(STATEP)
642 movdqu STATE3, 0x30(STATEP)
643 movdqu STATE4, 0x40(STATEP)
644 FRAME_END
645 ret
646
647.Ldec_out:
648 FRAME_END
649 ret
650ENDPROC(crypto_aegis128_aesni_dec)
651
652
653
654
655
656ENTRY(crypto_aegis128_aesni_dec_tail)
657 FRAME_BEGIN
658
659
660 movdqu 0x00(STATEP), STATE0
661 movdqu 0x10(STATEP), STATE1
662 movdqu 0x20(STATEP), STATE2
663 movdqu 0x30(STATEP), STATE3
664 movdqu 0x40(STATEP), STATE4
665
666
667 call __load_partial
668
669 pxor STATE1, MSG
670 pxor STATE4, MSG
671 movdqa STATE2, T1
672 pand STATE3, T1
673 pxor T1, MSG
674
675 movdqa MSG, T0
676 call __store_partial
677
678
679 movq LEN, T0
680 punpcklbw T0, T0
681 punpcklbw T0, T0
682 punpcklbw T0, T0
683 punpcklbw T0, T0
684 movdqa .Laegis128_counter, T1
685 pcmpgtb T1, T0
686 pand T0, MSG
687
688 aegis128_update
689 pxor MSG, STATE4
690
691
692 movdqu STATE4, 0x00(STATEP)
693 movdqu STATE0, 0x10(STATEP)
694 movdqu STATE1, 0x20(STATEP)
695 movdqu STATE2, 0x30(STATEP)
696 movdqu STATE3, 0x40(STATEP)
697
698 FRAME_END
699 ret
700ENDPROC(crypto_aegis128_aesni_dec_tail)
701
702
703
704
705
706ENTRY(crypto_aegis128_aesni_final)
707 FRAME_BEGIN
708
709
710 movdqu 0x00(STATEP), STATE0
711 movdqu 0x10(STATEP), STATE1
712 movdqu 0x20(STATEP), STATE2
713 movdqu 0x30(STATEP), STATE3
714 movdqu 0x40(STATEP), STATE4
715
716
717 movq %rdx, MSG
718 movq %rcx, T0
719 pslldq $8, T0
720 pxor T0, MSG
721 psllq $3, MSG
722
723 pxor STATE3, MSG
724
725
726 aegis128_update; pxor MSG, STATE4
727 aegis128_update; pxor MSG, STATE3
728 aegis128_update; pxor MSG, STATE2
729 aegis128_update; pxor MSG, STATE1
730 aegis128_update; pxor MSG, STATE0
731 aegis128_update; pxor MSG, STATE4
732 aegis128_update; pxor MSG, STATE3
733
734
735 movdqu (%rsi), MSG
736
737 pxor STATE0, MSG
738 pxor STATE1, MSG
739 pxor STATE2, MSG
740 pxor STATE3, MSG
741 pxor STATE4, MSG
742
743 movdqu MSG, (%rsi)
744
745 FRAME_END
746 ret
747ENDPROC(crypto_aegis128_aesni_final)
748