1
2
3
4
5
6
7
8
9
10
11
12#include <linux/linkage.h>
13#include <asm/frame.h>
14
15#define STATE0 %xmm0
16#define STATE1 %xmm1
17#define STATE2 %xmm2
18#define STATE3 %xmm3
19#define STATE4 %xmm4
20#define KEY %xmm5
21#define MSG %xmm5
22#define T0 %xmm6
23#define T1 %xmm7
24
25#define STATEP %rdi
26#define LEN %rsi
27#define SRC %rdx
28#define DST %rcx
29
30.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
31.align 16
32.Laegis128_const_0:
33 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
34 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
35.Laegis128_const_1:
36 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
37 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
38
39.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
40.align 16
41.Laegis128_counter:
42 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
43 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
44
45.text
46
47
48
49
50
51
52
53
54
55
56.macro aegis128_update
57 movdqa STATE4, T0
58 aesenc STATE0, STATE4
59 aesenc STATE1, STATE0
60 aesenc STATE2, STATE1
61 aesenc STATE3, STATE2
62 aesenc T0, STATE3
63.endm
64
65
66
67
68
69
70
71
72
73
74
75
76
77__load_partial:
78 xor %r9, %r9
79 pxor MSG, MSG
80
81 mov LEN, %r8
82 and $0x1, %r8
83 jz .Lld_partial_1
84
85 mov LEN, %r8
86 and $0x1E, %r8
87 add SRC, %r8
88 mov (%r8), %r9b
89
90.Lld_partial_1:
91 mov LEN, %r8
92 and $0x2, %r8
93 jz .Lld_partial_2
94
95 mov LEN, %r8
96 and $0x1C, %r8
97 add SRC, %r8
98 shl $0x10, %r9
99 mov (%r8), %r9w
100
101.Lld_partial_2:
102 mov LEN, %r8
103 and $0x4, %r8
104 jz .Lld_partial_4
105
106 mov LEN, %r8
107 and $0x18, %r8
108 add SRC, %r8
109 shl $32, %r9
110 mov (%r8), %r8d
111 xor %r8, %r9
112
113.Lld_partial_4:
114 movq %r9, MSG
115
116 mov LEN, %r8
117 and $0x8, %r8
118 jz .Lld_partial_8
119
120 mov LEN, %r8
121 and $0x10, %r8
122 add SRC, %r8
123 pslldq $8, MSG
124 movq (%r8), T0
125 pxor T0, MSG
126
127.Lld_partial_8:
128 ret
129ENDPROC(__load_partial)
130
131
132
133
134
135
136
137
138
139
140
141
142
143__store_partial:
144 mov LEN, %r8
145 mov DST, %r9
146
147 movq T0, %r10
148
149 cmp $8, %r8
150 jl .Lst_partial_8
151
152 mov %r10, (%r9)
153 psrldq $8, T0
154 movq T0, %r10
155
156 sub $8, %r8
157 add $8, %r9
158
159.Lst_partial_8:
160 cmp $4, %r8
161 jl .Lst_partial_4
162
163 mov %r10d, (%r9)
164 shr $32, %r10
165
166 sub $4, %r8
167 add $4, %r9
168
169.Lst_partial_4:
170 cmp $2, %r8
171 jl .Lst_partial_2
172
173 mov %r10w, (%r9)
174 shr $0x10, %r10
175
176 sub $2, %r8
177 add $2, %r9
178
179.Lst_partial_2:
180 cmp $1, %r8
181 jl .Lst_partial_1
182
183 mov %r10b, (%r9)
184
185.Lst_partial_1:
186 ret
187ENDPROC(__store_partial)
188
189
190
191
192ENTRY(crypto_aegis128_aesni_init)
193 FRAME_BEGIN
194
195
196 movdqu (%rdx), T1
197
198
199 movdqa (%rsi), KEY
200 pxor KEY, T1
201 movdqa T1, STATE0
202 movdqa KEY, STATE3
203 movdqa KEY, STATE4
204
205
206 movdqa .Laegis128_const_0, STATE2
207 movdqa .Laegis128_const_1, STATE1
208 pxor STATE2, STATE3
209 pxor STATE1, STATE4
210
211
212 aegis128_update; pxor KEY, STATE4
213 aegis128_update; pxor T1, STATE3
214 aegis128_update; pxor KEY, STATE2
215 aegis128_update; pxor T1, STATE1
216 aegis128_update; pxor KEY, STATE0
217 aegis128_update; pxor T1, STATE4
218 aegis128_update; pxor KEY, STATE3
219 aegis128_update; pxor T1, STATE2
220 aegis128_update; pxor KEY, STATE1
221 aegis128_update; pxor T1, STATE0
222
223
224 movdqu STATE0, 0x00(STATEP)
225 movdqu STATE1, 0x10(STATEP)
226 movdqu STATE2, 0x20(STATEP)
227 movdqu STATE3, 0x30(STATEP)
228 movdqu STATE4, 0x40(STATEP)
229
230 FRAME_END
231 ret
232ENDPROC(crypto_aegis128_aesni_init)
233
234
235
236
237
238ENTRY(crypto_aegis128_aesni_ad)
239 FRAME_BEGIN
240
241 cmp $0x10, LEN
242 jb .Lad_out
243
244
245 movdqu 0x00(STATEP), STATE0
246 movdqu 0x10(STATEP), STATE1
247 movdqu 0x20(STATEP), STATE2
248 movdqu 0x30(STATEP), STATE3
249 movdqu 0x40(STATEP), STATE4
250
251 mov SRC, %r8
252 and $0xF, %r8
253 jnz .Lad_u_loop
254
255.align 8
256.Lad_a_loop:
257 movdqa 0x00(SRC), MSG
258 aegis128_update
259 pxor MSG, STATE4
260 sub $0x10, LEN
261 cmp $0x10, LEN
262 jl .Lad_out_1
263
264 movdqa 0x10(SRC), MSG
265 aegis128_update
266 pxor MSG, STATE3
267 sub $0x10, LEN
268 cmp $0x10, LEN
269 jl .Lad_out_2
270
271 movdqa 0x20(SRC), MSG
272 aegis128_update
273 pxor MSG, STATE2
274 sub $0x10, LEN
275 cmp $0x10, LEN
276 jl .Lad_out_3
277
278 movdqa 0x30(SRC), MSG
279 aegis128_update
280 pxor MSG, STATE1
281 sub $0x10, LEN
282 cmp $0x10, LEN
283 jl .Lad_out_4
284
285 movdqa 0x40(SRC), MSG
286 aegis128_update
287 pxor MSG, STATE0
288 sub $0x10, LEN
289 cmp $0x10, LEN
290 jl .Lad_out_0
291
292 add $0x50, SRC
293 jmp .Lad_a_loop
294
295.align 8
296.Lad_u_loop:
297 movdqu 0x00(SRC), MSG
298 aegis128_update
299 pxor MSG, STATE4
300 sub $0x10, LEN
301 cmp $0x10, LEN
302 jl .Lad_out_1
303
304 movdqu 0x10(SRC), MSG
305 aegis128_update
306 pxor MSG, STATE3
307 sub $0x10, LEN
308 cmp $0x10, LEN
309 jl .Lad_out_2
310
311 movdqu 0x20(SRC), MSG
312 aegis128_update
313 pxor MSG, STATE2
314 sub $0x10, LEN
315 cmp $0x10, LEN
316 jl .Lad_out_3
317
318 movdqu 0x30(SRC), MSG
319 aegis128_update
320 pxor MSG, STATE1
321 sub $0x10, LEN
322 cmp $0x10, LEN
323 jl .Lad_out_4
324
325 movdqu 0x40(SRC), MSG
326 aegis128_update
327 pxor MSG, STATE0
328 sub $0x10, LEN
329 cmp $0x10, LEN
330 jl .Lad_out_0
331
332 add $0x50, SRC
333 jmp .Lad_u_loop
334
335
336.Lad_out_0:
337 movdqu STATE0, 0x00(STATEP)
338 movdqu STATE1, 0x10(STATEP)
339 movdqu STATE2, 0x20(STATEP)
340 movdqu STATE3, 0x30(STATEP)
341 movdqu STATE4, 0x40(STATEP)
342 FRAME_END
343 ret
344
345.Lad_out_1:
346 movdqu STATE4, 0x00(STATEP)
347 movdqu STATE0, 0x10(STATEP)
348 movdqu STATE1, 0x20(STATEP)
349 movdqu STATE2, 0x30(STATEP)
350 movdqu STATE3, 0x40(STATEP)
351 FRAME_END
352 ret
353
354.Lad_out_2:
355 movdqu STATE3, 0x00(STATEP)
356 movdqu STATE4, 0x10(STATEP)
357 movdqu STATE0, 0x20(STATEP)
358 movdqu STATE1, 0x30(STATEP)
359 movdqu STATE2, 0x40(STATEP)
360 FRAME_END
361 ret
362
363.Lad_out_3:
364 movdqu STATE2, 0x00(STATEP)
365 movdqu STATE3, 0x10(STATEP)
366 movdqu STATE4, 0x20(STATEP)
367 movdqu STATE0, 0x30(STATEP)
368 movdqu STATE1, 0x40(STATEP)
369 FRAME_END
370 ret
371
372.Lad_out_4:
373 movdqu STATE1, 0x00(STATEP)
374 movdqu STATE2, 0x10(STATEP)
375 movdqu STATE3, 0x20(STATEP)
376 movdqu STATE4, 0x30(STATEP)
377 movdqu STATE0, 0x40(STATEP)
378 FRAME_END
379 ret
380
381.Lad_out:
382 FRAME_END
383 ret
384ENDPROC(crypto_aegis128_aesni_ad)
385
386.macro encrypt_block a s0 s1 s2 s3 s4 i
387 movdq\a (\i * 0x10)(SRC), MSG
388 movdqa MSG, T0
389 pxor \s1, T0
390 pxor \s4, T0
391 movdqa \s2, T1
392 pand \s3, T1
393 pxor T1, T0
394 movdq\a T0, (\i * 0x10)(DST)
395
396 aegis128_update
397 pxor MSG, \s4
398
399 sub $0x10, LEN
400 cmp $0x10, LEN
401 jl .Lenc_out_\i
402.endm
403
404
405
406
407
408ENTRY(crypto_aegis128_aesni_enc)
409 FRAME_BEGIN
410
411 cmp $0x10, LEN
412 jb .Lenc_out
413
414
415 movdqu 0x00(STATEP), STATE0
416 movdqu 0x10(STATEP), STATE1
417 movdqu 0x20(STATEP), STATE2
418 movdqu 0x30(STATEP), STATE3
419 movdqu 0x40(STATEP), STATE4
420
421 mov SRC, %r8
422 or DST, %r8
423 and $0xF, %r8
424 jnz .Lenc_u_loop
425
426.align 8
427.Lenc_a_loop:
428 encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
429 encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
430 encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
431 encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
432 encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
433
434 add $0x50, SRC
435 add $0x50, DST
436 jmp .Lenc_a_loop
437
438.align 8
439.Lenc_u_loop:
440 encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
441 encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
442 encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
443 encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
444 encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
445
446 add $0x50, SRC
447 add $0x50, DST
448 jmp .Lenc_u_loop
449
450
451.Lenc_out_0:
452 movdqu STATE4, 0x00(STATEP)
453 movdqu STATE0, 0x10(STATEP)
454 movdqu STATE1, 0x20(STATEP)
455 movdqu STATE2, 0x30(STATEP)
456 movdqu STATE3, 0x40(STATEP)
457 FRAME_END
458 ret
459
460.Lenc_out_1:
461 movdqu STATE3, 0x00(STATEP)
462 movdqu STATE4, 0x10(STATEP)
463 movdqu STATE0, 0x20(STATEP)
464 movdqu STATE1, 0x30(STATEP)
465 movdqu STATE2, 0x40(STATEP)
466 FRAME_END
467 ret
468
469.Lenc_out_2:
470 movdqu STATE2, 0x00(STATEP)
471 movdqu STATE3, 0x10(STATEP)
472 movdqu STATE4, 0x20(STATEP)
473 movdqu STATE0, 0x30(STATEP)
474 movdqu STATE1, 0x40(STATEP)
475 FRAME_END
476 ret
477
478.Lenc_out_3:
479 movdqu STATE1, 0x00(STATEP)
480 movdqu STATE2, 0x10(STATEP)
481 movdqu STATE3, 0x20(STATEP)
482 movdqu STATE4, 0x30(STATEP)
483 movdqu STATE0, 0x40(STATEP)
484 FRAME_END
485 ret
486
487.Lenc_out_4:
488 movdqu STATE0, 0x00(STATEP)
489 movdqu STATE1, 0x10(STATEP)
490 movdqu STATE2, 0x20(STATEP)
491 movdqu STATE3, 0x30(STATEP)
492 movdqu STATE4, 0x40(STATEP)
493 FRAME_END
494 ret
495
496.Lenc_out:
497 FRAME_END
498 ret
499ENDPROC(crypto_aegis128_aesni_enc)
500
501
502
503
504
505ENTRY(crypto_aegis128_aesni_enc_tail)
506 FRAME_BEGIN
507
508
509 movdqu 0x00(STATEP), STATE0
510 movdqu 0x10(STATEP), STATE1
511 movdqu 0x20(STATEP), STATE2
512 movdqu 0x30(STATEP), STATE3
513 movdqu 0x40(STATEP), STATE4
514
515
516 call __load_partial
517
518 movdqa MSG, T0
519 pxor STATE1, T0
520 pxor STATE4, T0
521 movdqa STATE2, T1
522 pand STATE3, T1
523 pxor T1, T0
524
525 call __store_partial
526
527 aegis128_update
528 pxor MSG, STATE4
529
530
531 movdqu STATE4, 0x00(STATEP)
532 movdqu STATE0, 0x10(STATEP)
533 movdqu STATE1, 0x20(STATEP)
534 movdqu STATE2, 0x30(STATEP)
535 movdqu STATE3, 0x40(STATEP)
536
537 FRAME_END
538 ret
539ENDPROC(crypto_aegis128_aesni_enc_tail)
540
541.macro decrypt_block a s0 s1 s2 s3 s4 i
542 movdq\a (\i * 0x10)(SRC), MSG
543 pxor \s1, MSG
544 pxor \s4, MSG
545 movdqa \s2, T1
546 pand \s3, T1
547 pxor T1, MSG
548 movdq\a MSG, (\i * 0x10)(DST)
549
550 aegis128_update
551 pxor MSG, \s4
552
553 sub $0x10, LEN
554 cmp $0x10, LEN
555 jl .Ldec_out_\i
556.endm
557
558
559
560
561
562ENTRY(crypto_aegis128_aesni_dec)
563 FRAME_BEGIN
564
565 cmp $0x10, LEN
566 jb .Ldec_out
567
568
569 movdqu 0x00(STATEP), STATE0
570 movdqu 0x10(STATEP), STATE1
571 movdqu 0x20(STATEP), STATE2
572 movdqu 0x30(STATEP), STATE3
573 movdqu 0x40(STATEP), STATE4
574
575 mov SRC, %r8
576 or DST, %r8
577 and $0xF, %r8
578 jnz .Ldec_u_loop
579
580.align 8
581.Ldec_a_loop:
582 decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
583 decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
584 decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
585 decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
586 decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
587
588 add $0x50, SRC
589 add $0x50, DST
590 jmp .Ldec_a_loop
591
592.align 8
593.Ldec_u_loop:
594 decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
595 decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
596 decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
597 decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
598 decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
599
600 add $0x50, SRC
601 add $0x50, DST
602 jmp .Ldec_u_loop
603
604
605.Ldec_out_0:
606 movdqu STATE4, 0x00(STATEP)
607 movdqu STATE0, 0x10(STATEP)
608 movdqu STATE1, 0x20(STATEP)
609 movdqu STATE2, 0x30(STATEP)
610 movdqu STATE3, 0x40(STATEP)
611 FRAME_END
612 ret
613
614.Ldec_out_1:
615 movdqu STATE3, 0x00(STATEP)
616 movdqu STATE4, 0x10(STATEP)
617 movdqu STATE0, 0x20(STATEP)
618 movdqu STATE1, 0x30(STATEP)
619 movdqu STATE2, 0x40(STATEP)
620 FRAME_END
621 ret
622
623.Ldec_out_2:
624 movdqu STATE2, 0x00(STATEP)
625 movdqu STATE3, 0x10(STATEP)
626 movdqu STATE4, 0x20(STATEP)
627 movdqu STATE0, 0x30(STATEP)
628 movdqu STATE1, 0x40(STATEP)
629 FRAME_END
630 ret
631
632.Ldec_out_3:
633 movdqu STATE1, 0x00(STATEP)
634 movdqu STATE2, 0x10(STATEP)
635 movdqu STATE3, 0x20(STATEP)
636 movdqu STATE4, 0x30(STATEP)
637 movdqu STATE0, 0x40(STATEP)
638 FRAME_END
639 ret
640
641.Ldec_out_4:
642 movdqu STATE0, 0x00(STATEP)
643 movdqu STATE1, 0x10(STATEP)
644 movdqu STATE2, 0x20(STATEP)
645 movdqu STATE3, 0x30(STATEP)
646 movdqu STATE4, 0x40(STATEP)
647 FRAME_END
648 ret
649
650.Ldec_out:
651 FRAME_END
652 ret
653ENDPROC(crypto_aegis128_aesni_dec)
654
655
656
657
658
659ENTRY(crypto_aegis128_aesni_dec_tail)
660 FRAME_BEGIN
661
662
663 movdqu 0x00(STATEP), STATE0
664 movdqu 0x10(STATEP), STATE1
665 movdqu 0x20(STATEP), STATE2
666 movdqu 0x30(STATEP), STATE3
667 movdqu 0x40(STATEP), STATE4
668
669
670 call __load_partial
671
672 pxor STATE1, MSG
673 pxor STATE4, MSG
674 movdqa STATE2, T1
675 pand STATE3, T1
676 pxor T1, MSG
677
678 movdqa MSG, T0
679 call __store_partial
680
681
682 movq LEN, T0
683 punpcklbw T0, T0
684 punpcklbw T0, T0
685 punpcklbw T0, T0
686 punpcklbw T0, T0
687 movdqa .Laegis128_counter, T1
688 pcmpgtb T1, T0
689 pand T0, MSG
690
691 aegis128_update
692 pxor MSG, STATE4
693
694
695 movdqu STATE4, 0x00(STATEP)
696 movdqu STATE0, 0x10(STATEP)
697 movdqu STATE1, 0x20(STATEP)
698 movdqu STATE2, 0x30(STATEP)
699 movdqu STATE3, 0x40(STATEP)
700
701 FRAME_END
702 ret
703ENDPROC(crypto_aegis128_aesni_dec_tail)
704
705
706
707
708
709ENTRY(crypto_aegis128_aesni_final)
710 FRAME_BEGIN
711
712
713 movdqu 0x00(STATEP), STATE0
714 movdqu 0x10(STATEP), STATE1
715 movdqu 0x20(STATEP), STATE2
716 movdqu 0x30(STATEP), STATE3
717 movdqu 0x40(STATEP), STATE4
718
719
720 movq %rdx, MSG
721 movq %rcx, T0
722 pslldq $8, T0
723 pxor T0, MSG
724 psllq $3, MSG
725
726 pxor STATE3, MSG
727
728
729 aegis128_update; pxor MSG, STATE4
730 aegis128_update; pxor MSG, STATE3
731 aegis128_update; pxor MSG, STATE2
732 aegis128_update; pxor MSG, STATE1
733 aegis128_update; pxor MSG, STATE0
734 aegis128_update; pxor MSG, STATE4
735 aegis128_update; pxor MSG, STATE3
736
737
738 movdqu (%rsi), MSG
739
740 pxor STATE0, MSG
741 pxor STATE1, MSG
742 pxor STATE2, MSG
743 pxor STATE3, MSG
744 pxor STATE4, MSG
745
746 movdqu MSG, (%rsi)
747
748 FRAME_END
749 ret
750ENDPROC(crypto_aegis128_aesni_final)
751