1
2
3
4
5
6
7
8
9#include <linux/linkage.h>
10#include <asm/frame.h>
11
12#define STATE0 %xmm0
13#define STATE1 %xmm1
14#define STATE2 %xmm2
15#define STATE3 %xmm3
16#define STATE4 %xmm4
17#define STATE5 %xmm5
18#define STATE6 %xmm6
19#define STATE7 %xmm7
20#define MSG0 %xmm8
21#define MSG1 %xmm9
22#define T0 %xmm10
23#define T1 %xmm11
24#define T2 %xmm12
25#define T3 %xmm13
26
27#define STATEP %rdi
28#define LEN %rsi
29#define SRC %rdx
30#define DST %rcx
31
32.section .rodata.cst16.aegis128l_const, "aM", @progbits, 32
33.align 16
34.Laegis128l_const_0:
35 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
36 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
37.Laegis128l_const_1:
38 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
39 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
40
41.section .rodata.cst16.aegis128l_counter, "aM", @progbits, 16
42.align 16
43.Laegis128l_counter0:
44 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
45 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
46.Laegis128l_counter1:
47 .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
48 .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
49
50.text
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65__load_partial:
66 xor %r9d, %r9d
67 pxor MSG0, MSG0
68 pxor MSG1, MSG1
69
70 mov LEN, %r8
71 and $0x1, %r8
72 jz .Lld_partial_1
73
74 mov LEN, %r8
75 and $0x1E, %r8
76 add SRC, %r8
77 mov (%r8), %r9b
78
79.Lld_partial_1:
80 mov LEN, %r8
81 and $0x2, %r8
82 jz .Lld_partial_2
83
84 mov LEN, %r8
85 and $0x1C, %r8
86 add SRC, %r8
87 shl $0x10, %r9
88 mov (%r8), %r9w
89
90.Lld_partial_2:
91 mov LEN, %r8
92 and $0x4, %r8
93 jz .Lld_partial_4
94
95 mov LEN, %r8
96 and $0x18, %r8
97 add SRC, %r8
98 shl $32, %r9
99 mov (%r8), %r8d
100 xor %r8, %r9
101
102.Lld_partial_4:
103 movq %r9, MSG0
104
105 mov LEN, %r8
106 and $0x8, %r8
107 jz .Lld_partial_8
108
109 mov LEN, %r8
110 and $0x10, %r8
111 add SRC, %r8
112 pslldq $8, MSG0
113 movq (%r8), T0
114 pxor T0, MSG0
115
116.Lld_partial_8:
117 mov LEN, %r8
118 and $0x10, %r8
119 jz .Lld_partial_16
120
121 movdqa MSG0, MSG1
122 movdqu (SRC), MSG0
123
124.Lld_partial_16:
125 ret
126ENDPROC(__load_partial)
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141__store_partial:
142 mov LEN, %r8
143 mov DST, %r9
144
145 cmp $16, %r8
146 jl .Lst_partial_16
147
148 movdqu T0, (%r9)
149 movdqa T1, T0
150
151 sub $16, %r8
152 add $16, %r9
153
154.Lst_partial_16:
155 movq T0, %r10
156
157 cmp $8, %r8
158 jl .Lst_partial_8
159
160 mov %r10, (%r9)
161 psrldq $8, T0
162 movq T0, %r10
163
164 sub $8, %r8
165 add $8, %r9
166
167.Lst_partial_8:
168 cmp $4, %r8
169 jl .Lst_partial_4
170
171 mov %r10d, (%r9)
172 shr $32, %r10
173
174 sub $4, %r8
175 add $4, %r9
176
177.Lst_partial_4:
178 cmp $2, %r8
179 jl .Lst_partial_2
180
181 mov %r10w, (%r9)
182 shr $0x10, %r10
183
184 sub $2, %r8
185 add $2, %r9
186
187.Lst_partial_2:
188 cmp $1, %r8
189 jl .Lst_partial_1
190
191 mov %r10b, (%r9)
192
193.Lst_partial_1:
194 ret
195ENDPROC(__store_partial)
196
197.macro update
198 movdqa STATE7, T0
199 aesenc STATE0, STATE7
200 aesenc STATE1, STATE0
201 aesenc STATE2, STATE1
202 aesenc STATE3, STATE2
203 aesenc STATE4, STATE3
204 aesenc STATE5, STATE4
205 aesenc STATE6, STATE5
206 aesenc T0, STATE6
207.endm
208
209.macro update0
210 update
211 pxor MSG0, STATE7
212 pxor MSG1, STATE3
213.endm
214
215.macro update1
216 update
217 pxor MSG0, STATE6
218 pxor MSG1, STATE2
219.endm
220
221.macro update2
222 update
223 pxor MSG0, STATE5
224 pxor MSG1, STATE1
225.endm
226
227.macro update3
228 update
229 pxor MSG0, STATE4
230 pxor MSG1, STATE0
231.endm
232
233.macro update4
234 update
235 pxor MSG0, STATE3
236 pxor MSG1, STATE7
237.endm
238
239.macro update5
240 update
241 pxor MSG0, STATE2
242 pxor MSG1, STATE6
243.endm
244
245.macro update6
246 update
247 pxor MSG0, STATE1
248 pxor MSG1, STATE5
249.endm
250
251.macro update7
252 update
253 pxor MSG0, STATE0
254 pxor MSG1, STATE4
255.endm
256
257.macro state_load
258 movdqu 0x00(STATEP), STATE0
259 movdqu 0x10(STATEP), STATE1
260 movdqu 0x20(STATEP), STATE2
261 movdqu 0x30(STATEP), STATE3
262 movdqu 0x40(STATEP), STATE4
263 movdqu 0x50(STATEP), STATE5
264 movdqu 0x60(STATEP), STATE6
265 movdqu 0x70(STATEP), STATE7
266.endm
267
268.macro state_store s0 s1 s2 s3 s4 s5 s6 s7
269 movdqu \s7, 0x00(STATEP)
270 movdqu \s0, 0x10(STATEP)
271 movdqu \s1, 0x20(STATEP)
272 movdqu \s2, 0x30(STATEP)
273 movdqu \s3, 0x40(STATEP)
274 movdqu \s4, 0x50(STATEP)
275 movdqu \s5, 0x60(STATEP)
276 movdqu \s6, 0x70(STATEP)
277.endm
278
279.macro state_store0
280 state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
281.endm
282
283.macro state_store1
284 state_store STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
285.endm
286
287.macro state_store2
288 state_store STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
289.endm
290
291.macro state_store3
292 state_store STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
293.endm
294
295.macro state_store4
296 state_store STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
297.endm
298
299.macro state_store5
300 state_store STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
301.endm
302
303.macro state_store6
304 state_store STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
305.endm
306
307.macro state_store7
308 state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
309.endm
310
311
312
313
314ENTRY(crypto_aegis128l_aesni_init)
315 FRAME_BEGIN
316
317
318 movdqa (%rsi), MSG1
319 movdqa MSG1, STATE0
320 movdqa MSG1, STATE4
321 movdqa MSG1, STATE5
322 movdqa MSG1, STATE6
323 movdqa MSG1, STATE7
324
325
326 movdqu (%rdx), MSG0
327 pxor MSG0, STATE0
328 pxor MSG0, STATE4
329
330
331 movdqa .Laegis128l_const_0, STATE2
332 movdqa .Laegis128l_const_1, STATE1
333 movdqa STATE1, STATE3
334 pxor STATE2, STATE5
335 pxor STATE1, STATE6
336 pxor STATE2, STATE7
337
338
339 update0
340 update1
341 update2
342 update3
343 update4
344 update5
345 update6
346 update7
347 update0
348 update1
349
350 state_store1
351
352 FRAME_END
353 ret
354ENDPROC(crypto_aegis128l_aesni_init)
355
356.macro ad_block a i
357 movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
358 movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
359 update\i
360 sub $0x20, LEN
361 cmp $0x20, LEN
362 jl .Lad_out_\i
363.endm
364
365
366
367
368
369ENTRY(crypto_aegis128l_aesni_ad)
370 FRAME_BEGIN
371
372 cmp $0x20, LEN
373 jb .Lad_out
374
375 state_load
376
377 mov SRC, %r8
378 and $0xf, %r8
379 jnz .Lad_u_loop
380
381.align 8
382.Lad_a_loop:
383 ad_block a 0
384 ad_block a 1
385 ad_block a 2
386 ad_block a 3
387 ad_block a 4
388 ad_block a 5
389 ad_block a 6
390 ad_block a 7
391
392 add $0x100, SRC
393 jmp .Lad_a_loop
394
395.align 8
396.Lad_u_loop:
397 ad_block u 0
398 ad_block u 1
399 ad_block u 2
400 ad_block u 3
401 ad_block u 4
402 ad_block u 5
403 ad_block u 6
404 ad_block u 7
405
406 add $0x100, SRC
407 jmp .Lad_u_loop
408
409.Lad_out_0:
410 state_store0
411 FRAME_END
412 ret
413
414.Lad_out_1:
415 state_store1
416 FRAME_END
417 ret
418
419.Lad_out_2:
420 state_store2
421 FRAME_END
422 ret
423
424.Lad_out_3:
425 state_store3
426 FRAME_END
427 ret
428
429.Lad_out_4:
430 state_store4
431 FRAME_END
432 ret
433
434.Lad_out_5:
435 state_store5
436 FRAME_END
437 ret
438
439.Lad_out_6:
440 state_store6
441 FRAME_END
442 ret
443
444.Lad_out_7:
445 state_store7
446 FRAME_END
447 ret
448
449.Lad_out:
450 FRAME_END
451 ret
452ENDPROC(crypto_aegis128l_aesni_ad)
453
454.macro crypt m0 m1 s0 s1 s2 s3 s4 s5 s6 s7
455 pxor \s1, \m0
456 pxor \s6, \m0
457 movdqa \s2, T3
458 pand \s3, T3
459 pxor T3, \m0
460
461 pxor \s2, \m1
462 pxor \s5, \m1
463 movdqa \s6, T3
464 pand \s7, T3
465 pxor T3, \m1
466.endm
467
468.macro crypt0 m0 m1
469 crypt \m0 \m1 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
470.endm
471
472.macro crypt1 m0 m1
473 crypt \m0 \m1 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
474.endm
475
476.macro crypt2 m0 m1
477 crypt \m0 \m1 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
478.endm
479
480.macro crypt3 m0 m1
481 crypt \m0 \m1 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
482.endm
483
484.macro crypt4 m0 m1
485 crypt \m0 \m1 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
486.endm
487
488.macro crypt5 m0 m1
489 crypt \m0 \m1 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
490.endm
491
492.macro crypt6 m0 m1
493 crypt \m0 \m1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
494.endm
495
496.macro crypt7 m0 m1
497 crypt \m0 \m1 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
498.endm
499
500.macro encrypt_block a i
501 movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
502 movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
503 movdqa MSG0, T0
504 movdqa MSG1, T1
505 crypt\i T0, T1
506 movdq\a T0, (\i * 0x20 + 0x00)(DST)
507 movdq\a T1, (\i * 0x20 + 0x10)(DST)
508
509 update\i
510
511 sub $0x20, LEN
512 cmp $0x20, LEN
513 jl .Lenc_out_\i
514.endm
515
516.macro decrypt_block a i
517 movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
518 movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
519 crypt\i MSG0, MSG1
520 movdq\a MSG0, (\i * 0x20 + 0x00)(DST)
521 movdq\a MSG1, (\i * 0x20 + 0x10)(DST)
522
523 update\i
524
525 sub $0x20, LEN
526 cmp $0x20, LEN
527 jl .Ldec_out_\i
528.endm
529
530
531
532
533
534ENTRY(crypto_aegis128l_aesni_enc)
535 FRAME_BEGIN
536
537 cmp $0x20, LEN
538 jb .Lenc_out
539
540 state_load
541
542 mov SRC, %r8
543 or DST, %r8
544 and $0xf, %r8
545 jnz .Lenc_u_loop
546
547.align 8
548.Lenc_a_loop:
549 encrypt_block a 0
550 encrypt_block a 1
551 encrypt_block a 2
552 encrypt_block a 3
553 encrypt_block a 4
554 encrypt_block a 5
555 encrypt_block a 6
556 encrypt_block a 7
557
558 add $0x100, SRC
559 add $0x100, DST
560 jmp .Lenc_a_loop
561
562.align 8
563.Lenc_u_loop:
564 encrypt_block u 0
565 encrypt_block u 1
566 encrypt_block u 2
567 encrypt_block u 3
568 encrypt_block u 4
569 encrypt_block u 5
570 encrypt_block u 6
571 encrypt_block u 7
572
573 add $0x100, SRC
574 add $0x100, DST
575 jmp .Lenc_u_loop
576
577.Lenc_out_0:
578 state_store0
579 FRAME_END
580 ret
581
582.Lenc_out_1:
583 state_store1
584 FRAME_END
585 ret
586
587.Lenc_out_2:
588 state_store2
589 FRAME_END
590 ret
591
592.Lenc_out_3:
593 state_store3
594 FRAME_END
595 ret
596
597.Lenc_out_4:
598 state_store4
599 FRAME_END
600 ret
601
602.Lenc_out_5:
603 state_store5
604 FRAME_END
605 ret
606
607.Lenc_out_6:
608 state_store6
609 FRAME_END
610 ret
611
612.Lenc_out_7:
613 state_store7
614 FRAME_END
615 ret
616
617.Lenc_out:
618 FRAME_END
619 ret
620ENDPROC(crypto_aegis128l_aesni_enc)
621
622
623
624
625
626ENTRY(crypto_aegis128l_aesni_enc_tail)
627 FRAME_BEGIN
628
629 state_load
630
631
632 call __load_partial
633
634 movdqa MSG0, T0
635 movdqa MSG1, T1
636 crypt0 T0, T1
637
638 call __store_partial
639
640 update0
641
642 state_store0
643
644 FRAME_END
645 ret
646ENDPROC(crypto_aegis128l_aesni_enc_tail)
647
648
649
650
651
652ENTRY(crypto_aegis128l_aesni_dec)
653 FRAME_BEGIN
654
655 cmp $0x20, LEN
656 jb .Ldec_out
657
658 state_load
659
660 mov SRC, %r8
661 or DST, %r8
662 and $0xF, %r8
663 jnz .Ldec_u_loop
664
665.align 8
666.Ldec_a_loop:
667 decrypt_block a 0
668 decrypt_block a 1
669 decrypt_block a 2
670 decrypt_block a 3
671 decrypt_block a 4
672 decrypt_block a 5
673 decrypt_block a 6
674 decrypt_block a 7
675
676 add $0x100, SRC
677 add $0x100, DST
678 jmp .Ldec_a_loop
679
680.align 8
681.Ldec_u_loop:
682 decrypt_block u 0
683 decrypt_block u 1
684 decrypt_block u 2
685 decrypt_block u 3
686 decrypt_block u 4
687 decrypt_block u 5
688 decrypt_block u 6
689 decrypt_block u 7
690
691 add $0x100, SRC
692 add $0x100, DST
693 jmp .Ldec_u_loop
694
695.Ldec_out_0:
696 state_store0
697 FRAME_END
698 ret
699
700.Ldec_out_1:
701 state_store1
702 FRAME_END
703 ret
704
705.Ldec_out_2:
706 state_store2
707 FRAME_END
708 ret
709
710.Ldec_out_3:
711 state_store3
712 FRAME_END
713 ret
714
715.Ldec_out_4:
716 state_store4
717 FRAME_END
718 ret
719
720.Ldec_out_5:
721 state_store5
722 FRAME_END
723 ret
724
725.Ldec_out_6:
726 state_store6
727 FRAME_END
728 ret
729
730.Ldec_out_7:
731 state_store7
732 FRAME_END
733 ret
734
735.Ldec_out:
736 FRAME_END
737 ret
738ENDPROC(crypto_aegis128l_aesni_dec)
739
740
741
742
743
744ENTRY(crypto_aegis128l_aesni_dec_tail)
745 FRAME_BEGIN
746
747 state_load
748
749
750 call __load_partial
751
752 crypt0 MSG0, MSG1
753
754 movdqa MSG0, T0
755 movdqa MSG1, T1
756 call __store_partial
757
758
759 movq LEN, T0
760 punpcklbw T0, T0
761 punpcklbw T0, T0
762 punpcklbw T0, T0
763 punpcklbw T0, T0
764 movdqa T0, T1
765 movdqa .Laegis128l_counter0, T2
766 movdqa .Laegis128l_counter1, T3
767 pcmpgtb T2, T0
768 pcmpgtb T3, T1
769 pand T0, MSG0
770 pand T1, MSG1
771
772 update0
773
774 state_store0
775
776 FRAME_END
777 ret
778ENDPROC(crypto_aegis128l_aesni_dec_tail)
779
780
781
782
783
784ENTRY(crypto_aegis128l_aesni_final)
785 FRAME_BEGIN
786
787 state_load
788
789
790 movq %rdx, MSG0
791 movq %rcx, T0
792 pslldq $8, T0
793 pxor T0, MSG0
794 psllq $3, MSG0
795
796 pxor STATE2, MSG0
797 movdqa MSG0, MSG1
798
799
800 update0
801 update1
802 update2
803 update3
804 update4
805 update5
806 update6
807
808
809 movdqu (%rsi), T0
810
811 pxor STATE1, T0
812 pxor STATE2, T0
813 pxor STATE3, T0
814 pxor STATE4, T0
815 pxor STATE5, T0
816 pxor STATE6, T0
817 pxor STATE7, T0
818
819 movdqu T0, (%rsi)
820
821 FRAME_END
822 ret
823ENDPROC(crypto_aegis128l_aesni_final)
824