1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/linkage.h>
17#include <asm/inst.h>
18#include <asm/frame.h>
19
20.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
21.align 16
22.Lbswap_mask:
23 .octa 0x000102030405060708090a0b0c0d0e0f
24
25#define DATA %xmm0
26#define SHASH %xmm1
27#define T1 %xmm2
28#define T2 %xmm3
29#define T3 %xmm4
30#define BSWAP %xmm5
31#define IN1 %xmm6
32
33.text
34
35
36
37
38
39
40
41
42
43
44
45
46
47SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
48 movaps DATA, T1
49 pshufd $0b01001110, DATA, T2
50 pshufd $0b01001110, SHASH, T3
51 pxor DATA, T2
52 pxor SHASH, T3
53
54 PCLMULQDQ 0x00 SHASH DATA
55 PCLMULQDQ 0x11 SHASH T1
56 PCLMULQDQ 0x00 T3 T2
57 pxor DATA, T2
58 pxor T1, T2
59
60 movaps T2, T3
61 pslldq $8, T3
62 psrldq $8, T2
63 pxor T3, DATA
64 pxor T2, T1
65
66
67
68 movaps DATA, T3
69 psllq $1, T3
70 pxor DATA, T3
71 psllq $5, T3
72 pxor DATA, T3
73 psllq $57, T3
74 movaps T3, T2
75 pslldq $8, T2
76 psrldq $8, T3
77 pxor T2, DATA
78 pxor T3, T1
79
80
81 movaps DATA, T2
82 psrlq $5, T2
83 pxor DATA, T2
84 psrlq $1, T2
85 pxor DATA, T2
86 psrlq $1, T2
87 pxor T2, T1
88 pxor T1, DATA
89 ret
90SYM_FUNC_END(__clmul_gf128mul_ble)
91
92
93SYM_FUNC_START(clmul_ghash_mul)
94 FRAME_BEGIN
95 movups (%rdi), DATA
96 movups (%rsi), SHASH
97 movaps .Lbswap_mask, BSWAP
98 PSHUFB_XMM BSWAP DATA
99 call __clmul_gf128mul_ble
100 PSHUFB_XMM BSWAP DATA
101 movups DATA, (%rdi)
102 FRAME_END
103 ret
104SYM_FUNC_END(clmul_ghash_mul)
105
106
107
108
109
110SYM_FUNC_START(clmul_ghash_update)
111 FRAME_BEGIN
112 cmp $16, %rdx
113 jb .Lupdate_just_ret
114 movaps .Lbswap_mask, BSWAP
115 movups (%rdi), DATA
116 movups (%rcx), SHASH
117 PSHUFB_XMM BSWAP DATA
118.align 4
119.Lupdate_loop:
120 movups (%rsi), IN1
121 PSHUFB_XMM BSWAP IN1
122 pxor IN1, DATA
123 call __clmul_gf128mul_ble
124 sub $16, %rdx
125 add $16, %rsi
126 cmp $16, %rdx
127 jge .Lupdate_loop
128 PSHUFB_XMM BSWAP DATA
129 movups DATA, (%rdi)
130.Lupdate_just_ret:
131 FRAME_END
132 ret
133SYM_FUNC_END(clmul_ghash_update)
134