1
2
3
4
5
6
7
8#include <crypto/curve25519.h>
9#include <crypto/internal/kpp.h>
10
11#include <linux/types.h>
12#include <linux/jump_label.h>
13#include <linux/kernel.h>
14#include <linux/module.h>
15
16#include <asm/cpufeature.h>
17#include <asm/processor.h>
18
19static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2);
20static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx);
21
22enum { NUM_WORDS_ELTFP25519 = 4 };
23typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519];
24typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519];
25
26#define mul_eltfp25519_1w_adx(c, a, b) do { \
27 mul_256x256_integer_adx(m.buffer, a, b); \
28 red_eltfp25519_1w_adx(c, m.buffer); \
29} while (0)
30
31#define mul_eltfp25519_1w_bmi2(c, a, b) do { \
32 mul_256x256_integer_bmi2(m.buffer, a, b); \
33 red_eltfp25519_1w_bmi2(c, m.buffer); \
34} while (0)
35
36#define sqr_eltfp25519_1w_adx(a) do { \
37 sqr_256x256_integer_adx(m.buffer, a); \
38 red_eltfp25519_1w_adx(a, m.buffer); \
39} while (0)
40
41#define sqr_eltfp25519_1w_bmi2(a) do { \
42 sqr_256x256_integer_bmi2(m.buffer, a); \
43 red_eltfp25519_1w_bmi2(a, m.buffer); \
44} while (0)
45
46#define mul_eltfp25519_2w_adx(c, a, b) do { \
47 mul2_256x256_integer_adx(m.buffer, a, b); \
48 red_eltfp25519_2w_adx(c, m.buffer); \
49} while (0)
50
51#define mul_eltfp25519_2w_bmi2(c, a, b) do { \
52 mul2_256x256_integer_bmi2(m.buffer, a, b); \
53 red_eltfp25519_2w_bmi2(c, m.buffer); \
54} while (0)
55
56#define sqr_eltfp25519_2w_adx(a) do { \
57 sqr2_256x256_integer_adx(m.buffer, a); \
58 red_eltfp25519_2w_adx(a, m.buffer); \
59} while (0)
60
61#define sqr_eltfp25519_2w_bmi2(a) do { \
62 sqr2_256x256_integer_bmi2(m.buffer, a); \
63 red_eltfp25519_2w_bmi2(a, m.buffer); \
64} while (0)
65
66#define sqrn_eltfp25519_1w_adx(a, times) do { \
67 int ____counter = (times); \
68 while (____counter-- > 0) \
69 sqr_eltfp25519_1w_adx(a); \
70} while (0)
71
72#define sqrn_eltfp25519_1w_bmi2(a, times) do { \
73 int ____counter = (times); \
74 while (____counter-- > 0) \
75 sqr_eltfp25519_1w_bmi2(a); \
76} while (0)
77
78#define copy_eltfp25519_1w(C, A) do { \
79 (C)[0] = (A)[0]; \
80 (C)[1] = (A)[1]; \
81 (C)[2] = (A)[2]; \
82 (C)[3] = (A)[3]; \
83} while (0)
84
85#define setzero_eltfp25519_1w(C) do { \
86 (C)[0] = 0; \
87 (C)[1] = 0; \
88 (C)[2] = 0; \
89 (C)[3] = 0; \
90} while (0)
91
92__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = {
93 0xfffffffffffffff3UL, 0xffffffffffffffffUL,
94 0xffffffffffffffffUL, 0x5fffffffffffffffUL,
95 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL,
96 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL,
97 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL,
98 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL,
99 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL,
100 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL,
101 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL,
102 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL,
103 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL,
104 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL,
105 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL,
106 0xc1c20d06231f7614UL, 0x2938218da274f972UL,
107 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL,
108 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL,
109 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL,
110 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL,
111 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL,
112 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL,
113 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL,
114 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL,
115 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL,
116 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL,
117 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL,
118 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL,
119 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL,
120 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL,
121 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL,
122 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL,
123 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL,
124 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL,
125 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL,
126 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL,
127 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL,
128 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL,
129 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL,
130 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL,
131 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL,
132 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL,
133 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL,
134 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL,
135 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL,
136 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL,
137 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL,
138 0x23758739f630a257UL, 0x295a407a01a78580UL,
139 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL,
140 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL,
141 0xc8e0271f70baa20bUL, 0x993748867ca63957UL,
142 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL,
143 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL,
144 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL,
145 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL,
146 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL,
147 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL,
148 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL,
149 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL,
150 0x74b4c4ceab102f64UL, 0x183abadd10139845UL,
151 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL,
152 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL,
153 0x629fa80020156514UL, 0xf223868764a8c1ceUL,
154 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL,
155 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL,
156 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL,
157 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL,
158 0xd88768e4904032d8UL, 0x131384427b3aaeecUL,
159 0x8405e51286234f14UL, 0x14dc4739adb4c529UL,
160 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL,
161 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL,
162 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL,
163 0x343edd46bbaf738fUL, 0xed981828b101a651UL,
164 0xa401760b882c797aUL, 0x1fc223e28dc88730UL,
165 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL,
166 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL,
167 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL,
168 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL,
169 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL,
170 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL,
171 0x46b00b1185af76f6UL, 0x26bac77873187a79UL,
172 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL,
173 0x26449588bd446302UL, 0x7c4bc21c0388439cUL,
174 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL,
175 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL,
176 0x5c217736fa279374UL, 0x7dde05734afeb1faUL,
177 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL,
178 0xe6053bf89595bf7aUL, 0x394faf38da245530UL,
179 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL,
180 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL,
181 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL,
182 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL,
183 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL,
184 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL,
185 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL,
186 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL,
187 0x92872836a08c4091UL, 0xce8375b010c91445UL,
188 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL,
189 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL,
190 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL,
191 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL,
192 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL,
193 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL,
194 0xc189218075e91436UL, 0x6d9284169b3b8484UL,
195 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL,
196 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL,
197 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL,
198 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL,
199 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL,
200 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL,
201 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL,
202 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL,
203 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL,
204 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL,
205 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL,
206 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL,
207 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL,
208 0xf826842130f5ad28UL, 0x3ea988f75301a441UL,
209 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL,
210 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL,
211 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL,
212 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL,
213 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL,
214 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL,
215 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL,
216 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL,
217 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL,
218 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL,
219 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL,
220 0x25232973322dbef4UL, 0x445dc4758c17f770UL,
221 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL,
222 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL,
223 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL,
224 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL,
225 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL,
226 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL,
227 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL,
228 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL,
229 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL,
230 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL,
231 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL,
232 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL,
233 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL,
234 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL,
235 0x71c447fb93f6e009UL, 0x8922a56722845276UL,
236 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL,
237 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL,
238 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL,
239 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL,
240 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL,
241 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL,
242 0x674f1288f8e11217UL, 0x5682250f329f93d0UL,
243 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL,
244 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL,
245 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL,
246 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL,
247 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL,
248 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL,
249 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL,
250 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL,
251 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL,
252 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL,
253 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL,
254 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL,
255 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL,
256 0x894d1d855ae52359UL, 0x68e122157b743d69UL,
257 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL,
258 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL,
259 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL,
260 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL,
261 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL,
262 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL,
263 0xd479dde46b63155bUL, 0xb66e15e93c837976UL,
264 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL,
265 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL,
266 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL,
267 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL,
268 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL,
269 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL,
270 0x45adb16e76cefcf2UL, 0x01f768aead232999UL,
271 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL,
272 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL,
273 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL,
274 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL,
275 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL,
276 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL,
277 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL,
278 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL,
279 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL,
280 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL,
281 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL,
282 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL,
283 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL,
284 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL,
285 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL,
286 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL,
287 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL,
288 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL,
289 0x5936a1b712570975UL, 0x91b9d648debda657UL,
290 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL,
291 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL,
292 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL,
293 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL,
294 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL,
295 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL,
296 0x4a497962066e6043UL, 0x705b3aab41355b44UL,
297 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL,
298 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL,
299 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL,
300 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL,
301 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL,
302 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL,
303 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL,
304 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL,
305 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL,
306 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL,
307 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL,
308 0x2088ce1570033c68UL, 0x7fba1f495c837987UL,
309 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL,
310 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL,
311 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL,
312 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL,
313 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL,
314 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL,
315 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL,
316 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL,
317 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL,
318 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL,
319 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL,
320 0x00f52e3f67280294UL, 0x566d4fc14730c509UL,
321 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL,
322 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL,
323 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL,
324 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL,
325 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL,
326 0x508e862f121692fcUL, 0x3a81907fa093c291UL,
327 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL,
328 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL,
329 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL,
330 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL,
331 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL,
332 0xe488de11d761e352UL, 0x0e878a01a085545cUL,
333 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL,
334 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL,
335 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL,
336 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL,
337 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL,
338 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL,
339 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL,
340 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL,
341 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL,
342 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL,
343 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL,
344 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL,
345 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL,
346 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL,
347 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL,
348 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL,
349 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL,
350 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL,
351 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL,
352 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL,
353 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL,
354 0x904659bb686e3772UL, 0x7215c371746ba8c8UL,
355 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL,
356 0x266fd5809208f294UL, 0x5c847085619a26b9UL,
357 0x52985410fed694eaUL, 0x3c905b934a2ed254UL,
358 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL,
359 0x472726eedda57debUL, 0xefb6c4ae10f41891UL,
360 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL,
361 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL,
362 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL,
363 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL,
364 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL,
365 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL,
366 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL,
367 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL,
368 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL,
369 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL,
370 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL,
371 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL,
372 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL,
373 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL,
374 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL,
375 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL,
376 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL,
377 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL,
378 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL,
379 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL,
380 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL,
381 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL,
382 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL,
383 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL,
384 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL,
385 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL,
386 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL,
387 0xc306a2836769bde7UL, 0x208280622b1e2adbUL,
388 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL,
389 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL,
390 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL,
391 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL,
392 0x52d17436309d4253UL, 0x356f97e13efae576UL,
393 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL,
394 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL,
395 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL,
396 0x66124c6f97bda770UL, 0x0f81a0290654124aUL,
397 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL,
398 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL,
399 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL,
400 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL,
401 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL,
402 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL,
403 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL,
404 0x5da643cb4bf30035UL, 0x77db28d63940f721UL,
405 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL,
406 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL,
407 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL,
408 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL,
409 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL,
410 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL,
411 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL,
412 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL,
413 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL,
414 0x497d723f802e88e1UL, 0x30684dea602f408dUL,
415 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL,
416 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL,
417 0x287fb721556cdd2aUL, 0x0d317ca897022274UL,
418 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL,
419 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL,
420 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL,
421 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL,
422 0x026df551dbb85c20UL, 0x74fcd91047e21901UL,
423 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL,
424 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL,
425 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL,
426 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL,
427 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL,
428 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL,
429 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL,
430 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL,
431 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL,
432 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL,
433 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL,
434 0x13033ac001f66697UL, 0x273b24fe3b367d75UL,
435 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL,
436 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL,
437 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL,
438 0xacc63ca34b8ec145UL, 0x74621888fee66574UL,
439 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL,
440 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL,
441 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL,
442 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL,
443 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL,
444 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL,
445 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL,
446 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL,
447 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL,
448 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL,
449 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL,
450 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL,
451 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL,
452 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL,
453 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL,
454 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL,
455 0xa948a18981c0e254UL, 0x2df6369b65b22830UL,
456 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL,
457 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL,
458 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL,
459 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL,
460 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL,
461 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL,
462 0x81004b71e33cc191UL, 0x44e6be345122803cUL,
463 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL,
464 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL,
465 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL,
466 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL,
467 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL,
468 0x12bc8d6915783712UL, 0x498194c0fc620abbUL,
469 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL,
470 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL,
471 0x429d55f78b4d74c4UL, 0x22f1834643350131UL,
472 0x1e60c24598c71fffUL, 0x59f2f014979983efUL,
473 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL,
474 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL,
475 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL,
476 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL,
477 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL,
478 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL,
479 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL,
480 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL,
481 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL,
482 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL,
483 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL,
484 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL,
485 0x42a161981f190d9aUL, 0x61d849507e6052c1UL,
486 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL,
487 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL,
488 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL,
489 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL,
490 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL,
491 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL,
492 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL,
493 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL,
494 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL,
495 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL,
496 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL,
497 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL,
498 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL,
499 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL,
500 0x33979624f0e917beUL, 0x2c018dc527356b30UL,
501 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL,
502 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL,
503 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL,
504 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL,
505 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL,
506 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL,
507 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL,
508 0x345ead5e972d091eUL, 0x18c8df11a83103baUL,
509 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL,
510 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL,
511 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL,
512 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL,
513 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL,
514 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL,
515 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL,
516 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL,
517 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL,
518 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL,
519 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL,
520 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL,
521 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL,
522 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL,
523 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL,
524 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL,
525 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL,
526 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL,
527 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL,
528 0x1df4c0af01314a60UL, 0x09a62dab89289527UL,
529 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL,
530 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL,
531 0x328689761e451eabUL, 0x2e4d598bff59594aUL,
532 0x49b96853d7a7084aUL, 0x4980a319601420a8UL,
533 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL,
534 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL,
535 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL,
536 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL,
537 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL,
538 0xddeb34a061615d99UL, 0x5129cecceb64b773UL,
539 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL,
540 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL,
541 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL,
542 0x680bd77c73edad2eUL, 0x487c02354edd9041UL,
543 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL,
544 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL,
545 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL,
546 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL,
547 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL,
548 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL,
549 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL,
550 0xe9834262d13921edUL, 0x27fedafaa54bb592UL,
551 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL,
552 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL,
553 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL,
554 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL,
555 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL,
556 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL,
557 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL,
558 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL,
559 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL,
560 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL,
561 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL,
562 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL,
563 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL,
564 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL,
565 0xd368b450330c6401UL, 0x040d3017418f2391UL,
566 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL,
567 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL,
568 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL,
569 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL,
570 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL,
571 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL,
572 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL,
573 0x660be872b18d4a55UL, 0x19992518574e1496UL,
574 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL,
575 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL,
576 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL,
577 0x55cb668548abad0cUL, 0xb4584548da87e527UL,
578 0x2c43ecea0107c1ddUL, 0x526028809372de35UL,
579 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL,
580 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL,
581 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL,
582 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL,
583 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL,
584 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL,
585 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL,
586 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL,
587 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL,
588 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL,
589 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL,
590 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL,
591 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL,
592 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL,
593 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL,
594 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL,
595 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL,
596 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL
597};
598
599
600
601
602
603static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a,
604 const u64 *const b)
605{
606 asm volatile(
607 "xorl %%r14d, %%r14d ;"
608 "movq (%1), %%rdx; "
609 "mulx (%2), %%r8, %%r15; "
610 "xorl %%r10d, %%r10d ;"
611 "movq %%r8, (%0) ;"
612 "mulx 8(%2), %%r10, %%rax; "
613 "adox %%r10, %%r15 ;"
614 "mulx 16(%2), %%r8, %%rbx; "
615 "adox %%r8, %%rax ;"
616 "mulx 24(%2), %%r10, %%rcx; "
617 "adox %%r10, %%rbx ;"
618
619 "adox %%r14, %%rcx ;"
620
621 "movq 8(%1), %%rdx; "
622 "mulx (%2), %%r8, %%r9; "
623 "adox %%r15, %%r8 ;"
624 "movq %%r8, 8(%0) ;"
625 "mulx 8(%2), %%r10, %%r11; "
626 "adox %%r10, %%r9 ;"
627 "adcx %%r9, %%rax ;"
628 "mulx 16(%2), %%r8, %%r13; "
629 "adox %%r8, %%r11 ;"
630 "adcx %%r11, %%rbx ;"
631 "mulx 24(%2), %%r10, %%r15; "
632 "adox %%r10, %%r13 ;"
633 "adcx %%r13, %%rcx ;"
634
635 "adox %%r14, %%r15 ;"
636 "adcx %%r14, %%r15 ;"
637
638 "movq 16(%1), %%rdx; "
639 "xorl %%r10d, %%r10d ;"
640 "mulx (%2), %%r8, %%r9; "
641 "adox %%rax, %%r8 ;"
642 "movq %%r8, 16(%0) ;"
643 "mulx 8(%2), %%r10, %%r11; "
644 "adox %%r10, %%r9 ;"
645 "adcx %%r9, %%rbx ;"
646 "mulx 16(%2), %%r8, %%r13; "
647 "adox %%r8, %%r11 ;"
648 "adcx %%r11, %%rcx ;"
649 "mulx 24(%2), %%r10, %%rax; "
650 "adox %%r10, %%r13 ;"
651 "adcx %%r13, %%r15 ;"
652
653 "adox %%r14, %%rax ;"
654 "adcx %%r14, %%rax ;"
655
656 "movq 24(%1), %%rdx; "
657 "xorl %%r10d, %%r10d ;"
658 "mulx (%2), %%r8, %%r9; "
659 "adox %%rbx, %%r8 ;"
660 "movq %%r8, 24(%0) ;"
661 "mulx 8(%2), %%r10, %%r11; "
662 "adox %%r10, %%r9 ;"
663 "adcx %%r9, %%rcx ;"
664 "movq %%rcx, 32(%0) ;"
665 "mulx 16(%2), %%r8, %%r13; "
666 "adox %%r8, %%r11 ;"
667 "adcx %%r11, %%r15 ;"
668 "movq %%r15, 40(%0) ;"
669 "mulx 24(%2), %%r10, %%rbx; "
670 "adox %%r10, %%r13 ;"
671 "adcx %%r13, %%rax ;"
672 "movq %%rax, 48(%0) ;"
673
674 "adox %%r14, %%rbx ;"
675 "adcx %%r14, %%rbx ;"
676 "movq %%rbx, 56(%0) ;"
677
678 "movq 32(%1), %%rdx; "
679 "mulx 32(%2), %%r8, %%r15; "
680 "xorl %%r10d, %%r10d ;"
681 "movq %%r8, 64(%0);"
682 "mulx 40(%2), %%r10, %%rax; "
683 "adox %%r10, %%r15 ;"
684 "mulx 48(%2), %%r8, %%rbx; "
685 "adox %%r8, %%rax ;"
686 "mulx 56(%2), %%r10, %%rcx; "
687 "adox %%r10, %%rbx ;"
688
689 "adox %%r14, %%rcx ;"
690
691 "movq 40(%1), %%rdx; "
692 "xorl %%r10d, %%r10d ;"
693 "mulx 32(%2), %%r8, %%r9; "
694 "adox %%r15, %%r8 ;"
695 "movq %%r8, 72(%0);"
696 "mulx 40(%2), %%r10, %%r11; "
697 "adox %%r10, %%r9 ;"
698 "adcx %%r9, %%rax ;"
699 "mulx 48(%2), %%r8, %%r13; "
700 "adox %%r8, %%r11 ;"
701 "adcx %%r11, %%rbx ;"
702 "mulx 56(%2), %%r10, %%r15; "
703 "adox %%r10, %%r13 ;"
704 "adcx %%r13, %%rcx ;"
705
706 "adox %%r14, %%r15 ;"
707 "adcx %%r14, %%r15 ;"
708
709 "movq 48(%1), %%rdx; "
710 "xorl %%r10d, %%r10d ;"
711 "mulx 32(%2), %%r8, %%r9; "
712 "adox %%rax, %%r8 ;"
713 "movq %%r8, 80(%0);"
714 "mulx 40(%2), %%r10, %%r11; "
715 "adox %%r10, %%r9 ;"
716 "adcx %%r9, %%rbx ;"
717 "mulx 48(%2), %%r8, %%r13; "
718 "adox %%r8, %%r11 ;"
719 "adcx %%r11, %%rcx ;"
720 "mulx 56(%2), %%r10, %%rax; "
721 "adox %%r10, %%r13 ;"
722 "adcx %%r13, %%r15 ;"
723
724 "adox %%r14, %%rax ;"
725 "adcx %%r14, %%rax ;"
726
727 "movq 56(%1), %%rdx; "
728 "xorl %%r10d, %%r10d ;"
729 "mulx 32(%2), %%r8, %%r9; "
730 "adox %%rbx, %%r8 ;"
731 "movq %%r8, 88(%0);"
732 "mulx 40(%2), %%r10, %%r11; "
733 "adox %%r10, %%r9 ;"
734 "adcx %%r9, %%rcx ;"
735 "movq %%rcx, 96(%0) ;"
736 "mulx 48(%2), %%r8, %%r13; "
737 "adox %%r8, %%r11 ;"
738 "adcx %%r11, %%r15 ;"
739 "movq %%r15, 104(%0) ;"
740 "mulx 56(%2), %%r10, %%rbx; "
741 "adox %%r10, %%r13 ;"
742 "adcx %%r13, %%rax ;"
743 "movq %%rax, 112(%0) ;"
744
745 "adox %%r14, %%rbx ;"
746 "adcx %%r14, %%rbx ;"
747 "movq %%rbx, 120(%0) ;"
748 :
749 : "r"(c), "r"(a), "r"(b)
750 : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
751 "%r10", "%r11", "%r13", "%r14", "%r15");
752}
753
754static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a,
755 const u64 *const b)
756{
757 asm volatile(
758 "movq (%1), %%rdx; "
759 "mulx (%2), %%r8, %%r15; "
760 "movq %%r8, (%0) ;"
761 "mulx 8(%2), %%r10, %%rax; "
762 "addq %%r10, %%r15 ;"
763 "mulx 16(%2), %%r8, %%rbx; "
764 "adcq %%r8, %%rax ;"
765 "mulx 24(%2), %%r10, %%rcx; "
766 "adcq %%r10, %%rbx ;"
767
768 "adcq $0, %%rcx ;"
769
770 "movq 8(%1), %%rdx; "
771 "mulx (%2), %%r8, %%r9; "
772 "addq %%r15, %%r8 ;"
773 "movq %%r8, 8(%0) ;"
774 "mulx 8(%2), %%r10, %%r11; "
775 "adcq %%r10, %%r9 ;"
776 "mulx 16(%2), %%r8, %%r13; "
777 "adcq %%r8, %%r11 ;"
778 "mulx 24(%2), %%r10, %%r15; "
779 "adcq %%r10, %%r13 ;"
780
781 "adcq $0, %%r15 ;"
782
783 "addq %%r9, %%rax ;"
784 "adcq %%r11, %%rbx ;"
785 "adcq %%r13, %%rcx ;"
786 "adcq $0, %%r15 ;"
787
788 "movq 16(%1), %%rdx; "
789 "mulx (%2), %%r8, %%r9; "
790 "addq %%rax, %%r8 ;"
791 "movq %%r8, 16(%0) ;"
792 "mulx 8(%2), %%r10, %%r11; "
793 "adcq %%r10, %%r9 ;"
794 "mulx 16(%2), %%r8, %%r13; "
795 "adcq %%r8, %%r11 ;"
796 "mulx 24(%2), %%r10, %%rax; "
797 "adcq %%r10, %%r13 ;"
798
799 "adcq $0, %%rax ;"
800
801 "addq %%r9, %%rbx ;"
802 "adcq %%r11, %%rcx ;"
803 "adcq %%r13, %%r15 ;"
804 "adcq $0, %%rax ;"
805
806 "movq 24(%1), %%rdx; "
807 "mulx (%2), %%r8, %%r9; "
808 "addq %%rbx, %%r8 ;"
809 "movq %%r8, 24(%0) ;"
810 "mulx 8(%2), %%r10, %%r11; "
811 "adcq %%r10, %%r9 ;"
812 "mulx 16(%2), %%r8, %%r13; "
813 "adcq %%r8, %%r11 ;"
814 "mulx 24(%2), %%r10, %%rbx; "
815 "adcq %%r10, %%r13 ;"
816
817 "adcq $0, %%rbx ;"
818
819 "addq %%r9, %%rcx ;"
820 "movq %%rcx, 32(%0) ;"
821 "adcq %%r11, %%r15 ;"
822 "movq %%r15, 40(%0) ;"
823 "adcq %%r13, %%rax ;"
824 "movq %%rax, 48(%0) ;"
825 "adcq $0, %%rbx ;"
826 "movq %%rbx, 56(%0) ;"
827
828 "movq 32(%1), %%rdx; "
829 "mulx 32(%2), %%r8, %%r15; "
830 "movq %%r8, 64(%0) ;"
831 "mulx 40(%2), %%r10, %%rax; "
832 "addq %%r10, %%r15 ;"
833 "mulx 48(%2), %%r8, %%rbx; "
834 "adcq %%r8, %%rax ;"
835 "mulx 56(%2), %%r10, %%rcx; "
836 "adcq %%r10, %%rbx ;"
837
838 "adcq $0, %%rcx ;"
839
840 "movq 40(%1), %%rdx; "
841 "mulx 32(%2), %%r8, %%r9; "
842 "addq %%r15, %%r8 ;"
843 "movq %%r8, 72(%0) ;"
844 "mulx 40(%2), %%r10, %%r11; "
845 "adcq %%r10, %%r9 ;"
846 "mulx 48(%2), %%r8, %%r13; "
847 "adcq %%r8, %%r11 ;"
848 "mulx 56(%2), %%r10, %%r15; "
849 "adcq %%r10, %%r13 ;"
850
851 "adcq $0, %%r15 ;"
852
853 "addq %%r9, %%rax ;"
854 "adcq %%r11, %%rbx ;"
855 "adcq %%r13, %%rcx ;"
856 "adcq $0, %%r15 ;"
857
858 "movq 48(%1), %%rdx; "
859 "mulx 32(%2), %%r8, %%r9; "
860 "addq %%rax, %%r8 ;"
861 "movq %%r8, 80(%0) ;"
862 "mulx 40(%2), %%r10, %%r11; "
863 "adcq %%r10, %%r9 ;"
864 "mulx 48(%2), %%r8, %%r13; "
865 "adcq %%r8, %%r11 ;"
866 "mulx 56(%2), %%r10, %%rax; "
867 "adcq %%r10, %%r13 ;"
868
869 "adcq $0, %%rax ;"
870
871 "addq %%r9, %%rbx ;"
872 "adcq %%r11, %%rcx ;"
873 "adcq %%r13, %%r15 ;"
874 "adcq $0, %%rax ;"
875
876 "movq 56(%1), %%rdx; "
877 "mulx 32(%2), %%r8, %%r9; "
878 "addq %%rbx, %%r8 ;"
879 "movq %%r8, 88(%0) ;"
880 "mulx 40(%2), %%r10, %%r11; "
881 "adcq %%r10, %%r9 ;"
882 "mulx 48(%2), %%r8, %%r13; "
883 "adcq %%r8, %%r11 ;"
884 "mulx 56(%2), %%r10, %%rbx; "
885 "adcq %%r10, %%r13 ;"
886
887 "adcq $0, %%rbx ;"
888
889 "addq %%r9, %%rcx ;"
890 "movq %%rcx, 96(%0) ;"
891 "adcq %%r11, %%r15 ;"
892 "movq %%r15, 104(%0) ;"
893 "adcq %%r13, %%rax ;"
894 "movq %%rax, 112(%0) ;"
895 "adcq $0, %%rbx ;"
896 "movq %%rbx, 120(%0) ;"
897 :
898 : "r"(c), "r"(a), "r"(b)
899 : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
900 "%r10", "%r11", "%r13", "%r15");
901}
902
903static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a)
904{
905 asm volatile(
906 "movq (%1), %%rdx ;"
907 "mulx 8(%1), %%r8, %%r14 ;"
908 "xorl %%r15d, %%r15d;"
909 "mulx 16(%1), %%r9, %%r10 ;"
910 "adcx %%r14, %%r9 ;"
911 "mulx 24(%1), %%rax, %%rcx ;"
912 "adcx %%rax, %%r10 ;"
913 "movq 24(%1), %%rdx ;"
914 "mulx 8(%1), %%r11, %%rbx ;"
915 "adcx %%rcx, %%r11 ;"
916 "mulx 16(%1), %%rax, %%r13 ;"
917 "adcx %%rax, %%rbx ;"
918 "movq 8(%1), %%rdx ;"
919 "adcx %%r15, %%r13 ;"
920 "mulx 16(%1), %%rax, %%rcx ;"
921 "movq $0, %%r14 ;"
922
923 "adcx %%r15, %%r14 ;"
924
925 "xorl %%r15d, %%r15d;"
926 "adox %%rax, %%r10 ;"
927 "adcx %%r8, %%r8 ;"
928 "adox %%rcx, %%r11 ;"
929 "adcx %%r9, %%r9 ;"
930 "adox %%r15, %%rbx ;"
931 "adcx %%r10, %%r10 ;"
932 "adox %%r15, %%r13 ;"
933 "adcx %%r11, %%r11 ;"
934 "adox %%r15, %%r14 ;"
935 "adcx %%rbx, %%rbx ;"
936 "adcx %%r13, %%r13 ;"
937 "adcx %%r14, %%r14 ;"
938
939 "movq (%1), %%rdx ;"
940 "mulx %%rdx, %%rax, %%rcx ;"
941
942 "movq %%rax, 0(%0) ;"
943 "addq %%rcx, %%r8 ;"
944 "movq %%r8, 8(%0) ;"
945 "movq 8(%1), %%rdx ;"
946 "mulx %%rdx, %%rax, %%rcx ;"
947 "adcq %%rax, %%r9 ;"
948 "movq %%r9, 16(%0) ;"
949 "adcq %%rcx, %%r10 ;"
950 "movq %%r10, 24(%0) ;"
951 "movq 16(%1), %%rdx ;"
952 "mulx %%rdx, %%rax, %%rcx ;"
953 "adcq %%rax, %%r11 ;"
954 "movq %%r11, 32(%0) ;"
955 "adcq %%rcx, %%rbx ;"
956 "movq %%rbx, 40(%0) ;"
957 "movq 24(%1), %%rdx ;"
958 "mulx %%rdx, %%rax, %%rcx ;"
959 "adcq %%rax, %%r13 ;"
960 "movq %%r13, 48(%0) ;"
961 "adcq %%rcx, %%r14 ;"
962 "movq %%r14, 56(%0) ;"
963
964
965 "movq 32(%1), %%rdx ;"
966 "mulx 40(%1), %%r8, %%r14 ;"
967 "xorl %%r15d, %%r15d;"
968 "mulx 48(%1), %%r9, %%r10 ;"
969 "adcx %%r14, %%r9 ;"
970 "mulx 56(%1), %%rax, %%rcx ;"
971 "adcx %%rax, %%r10 ;"
972 "movq 56(%1), %%rdx ;"
973 "mulx 40(%1), %%r11, %%rbx ;"
974 "adcx %%rcx, %%r11 ;"
975 "mulx 48(%1), %%rax, %%r13 ;"
976 "adcx %%rax, %%rbx ;"
977 "movq 40(%1), %%rdx ;"
978 "adcx %%r15, %%r13 ;"
979 "mulx 48(%1), %%rax, %%rcx ;"
980 "movq $0, %%r14 ;"
981
982 "adcx %%r15, %%r14 ;"
983
984 "xorl %%r15d, %%r15d;"
985 "adox %%rax, %%r10 ;"
986 "adcx %%r8, %%r8 ;"
987 "adox %%rcx, %%r11 ;"
988 "adcx %%r9, %%r9 ;"
989 "adox %%r15, %%rbx ;"
990 "adcx %%r10, %%r10 ;"
991 "adox %%r15, %%r13 ;"
992 "adcx %%r11, %%r11 ;"
993 "adox %%r15, %%r14 ;"
994 "adcx %%rbx, %%rbx ;"
995 "adcx %%r13, %%r13 ;"
996 "adcx %%r14, %%r14 ;"
997
998 "movq 32(%1), %%rdx ;"
999 "mulx %%rdx, %%rax, %%rcx ;"
1000
1001 "movq %%rax, 64(%0) ;"
1002 "addq %%rcx, %%r8 ;"
1003 "movq %%r8, 72(%0) ;"
1004 "movq 40(%1), %%rdx ;"
1005 "mulx %%rdx, %%rax, %%rcx ;"
1006 "adcq %%rax, %%r9 ;"
1007 "movq %%r9, 80(%0) ;"
1008 "adcq %%rcx, %%r10 ;"
1009 "movq %%r10, 88(%0) ;"
1010 "movq 48(%1), %%rdx ;"
1011 "mulx %%rdx, %%rax, %%rcx ;"
1012 "adcq %%rax, %%r11 ;"
1013 "movq %%r11, 96(%0) ;"
1014 "adcq %%rcx, %%rbx ;"
1015 "movq %%rbx, 104(%0) ;"
1016 "movq 56(%1), %%rdx ;"
1017 "mulx %%rdx, %%rax, %%rcx ;"
1018 "adcq %%rax, %%r13 ;"
1019 "movq %%r13, 112(%0) ;"
1020 "adcq %%rcx, %%r14 ;"
1021 "movq %%r14, 120(%0) ;"
1022 :
1023 : "r"(c), "r"(a)
1024 : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
1025 "%r10", "%r11", "%r13", "%r14", "%r15");
1026}
1027
1028static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a)
1029{
1030 asm volatile(
1031 "movq 8(%1), %%rdx ;"
1032 "mulx (%1), %%r8, %%r9 ;"
1033 "mulx 16(%1), %%r10, %%r11 ;"
1034 "mulx 24(%1), %%rcx, %%r14 ;"
1035
1036 "movq 16(%1), %%rdx ;"
1037 "mulx 24(%1), %%r15, %%r13 ;"
1038 "mulx (%1), %%rax, %%rdx ;"
1039
1040 "addq %%rax, %%r9 ;"
1041 "adcq %%rdx, %%r10 ;"
1042 "adcq %%rcx, %%r11 ;"
1043 "adcq %%r14, %%r15 ;"
1044 "adcq $0, %%r13 ;"
1045 "movq $0, %%r14 ;"
1046 "adcq $0, %%r14 ;"
1047
1048 "movq (%1), %%rdx ;"
1049 "mulx 24(%1), %%rax, %%rcx ;"
1050
1051 "addq %%rax, %%r10 ;"
1052 "adcq %%rcx, %%r11 ;"
1053 "adcq $0, %%r15 ;"
1054 "adcq $0, %%r13 ;"
1055 "adcq $0, %%r14 ;"
1056
1057 "shldq $1, %%r13, %%r14 ;"
1058 "shldq $1, %%r15, %%r13 ;"
1059 "shldq $1, %%r11, %%r15 ;"
1060 "shldq $1, %%r10, %%r11 ;"
1061 "shldq $1, %%r9, %%r10 ;"
1062 "shldq $1, %%r8, %%r9 ;"
1063 "shlq $1, %%r8 ;"
1064
1065
1066 "mulx %%rdx, %%rax, %%rcx ; "
1067
1068 "movq %%rax, 0(%0) ;"
1069 "addq %%rcx, %%r8 ;"
1070 "movq %%r8, 8(%0) ;"
1071 "movq 8(%1), %%rdx ;"
1072 "mulx %%rdx, %%rax, %%rcx ; "
1073 "adcq %%rax, %%r9 ;"
1074 "movq %%r9, 16(%0) ;"
1075 "adcq %%rcx, %%r10 ;"
1076 "movq %%r10, 24(%0) ;"
1077 "movq 16(%1), %%rdx ;"
1078 "mulx %%rdx, %%rax, %%rcx ; "
1079 "adcq %%rax, %%r11 ;"
1080 "movq %%r11, 32(%0) ;"
1081 "adcq %%rcx, %%r15 ;"
1082 "movq %%r15, 40(%0) ;"
1083 "movq 24(%1), %%rdx ;"
1084 "mulx %%rdx, %%rax, %%rcx ; "
1085 "adcq %%rax, %%r13 ;"
1086 "movq %%r13, 48(%0) ;"
1087 "adcq %%rcx, %%r14 ;"
1088 "movq %%r14, 56(%0) ;"
1089
1090 "movq 40(%1), %%rdx ;"
1091 "mulx 32(%1), %%r8, %%r9 ;"
1092 "mulx 48(%1), %%r10, %%r11 ;"
1093 "mulx 56(%1), %%rcx, %%r14 ;"
1094
1095 "movq 48(%1), %%rdx ;"
1096 "mulx 56(%1), %%r15, %%r13 ;"
1097 "mulx 32(%1), %%rax, %%rdx ;"
1098
1099 "addq %%rax, %%r9 ;"
1100 "adcq %%rdx, %%r10 ;"
1101 "adcq %%rcx, %%r11 ;"
1102 "adcq %%r14, %%r15 ;"
1103 "adcq $0, %%r13 ;"
1104 "movq $0, %%r14 ;"
1105 "adcq $0, %%r14 ;"
1106
1107 "movq 32(%1), %%rdx ;"
1108 "mulx 56(%1), %%rax, %%rcx ;"
1109
1110 "addq %%rax, %%r10 ;"
1111 "adcq %%rcx, %%r11 ;"
1112 "adcq $0, %%r15 ;"
1113 "adcq $0, %%r13 ;"
1114 "adcq $0, %%r14 ;"
1115
1116 "shldq $1, %%r13, %%r14 ;"
1117 "shldq $1, %%r15, %%r13 ;"
1118 "shldq $1, %%r11, %%r15 ;"
1119 "shldq $1, %%r10, %%r11 ;"
1120 "shldq $1, %%r9, %%r10 ;"
1121 "shldq $1, %%r8, %%r9 ;"
1122 "shlq $1, %%r8 ;"
1123
1124
1125 "mulx %%rdx, %%rax, %%rcx ; "
1126
1127 "movq %%rax, 64(%0) ;"
1128 "addq %%rcx, %%r8 ;"
1129 "movq %%r8, 72(%0) ;"
1130 "movq 40(%1), %%rdx ;"
1131 "mulx %%rdx, %%rax, %%rcx ; "
1132 "adcq %%rax, %%r9 ;"
1133 "movq %%r9, 80(%0) ;"
1134 "adcq %%rcx, %%r10 ;"
1135 "movq %%r10, 88(%0) ;"
1136 "movq 48(%1), %%rdx ;"
1137 "mulx %%rdx, %%rax, %%rcx ; "
1138 "adcq %%rax, %%r11 ;"
1139 "movq %%r11, 96(%0) ;"
1140 "adcq %%rcx, %%r15 ;"
1141 "movq %%r15, 104(%0) ;"
1142 "movq 56(%1), %%rdx ;"
1143 "mulx %%rdx, %%rax, %%rcx ; "
1144 "adcq %%rax, %%r13 ;"
1145 "movq %%r13, 112(%0) ;"
1146 "adcq %%rcx, %%r14 ;"
1147 "movq %%r14, 120(%0) ;"
1148 :
1149 : "r"(c), "r"(a)
1150 : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
1151 "%r11", "%r13", "%r14", "%r15");
1152}
1153
1154static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a)
1155{
1156 asm volatile(
1157 "movl $38, %%edx; "
1158 "mulx 32(%1), %%r8, %%r10; "
1159 "xorl %%ebx, %%ebx ;"
1160 "adox (%1), %%r8 ;"
1161 "mulx 40(%1), %%r9, %%r11; "
1162 "adcx %%r10, %%r9 ;"
1163 "adox 8(%1), %%r9 ;"
1164 "mulx 48(%1), %%r10, %%rax; "
1165 "adcx %%r11, %%r10 ;"
1166 "adox 16(%1), %%r10 ;"
1167 "mulx 56(%1), %%r11, %%rcx; "
1168 "adcx %%rax, %%r11 ;"
1169 "adox 24(%1), %%r11 ;"
1170
1171 "adcx %%rbx, %%rcx ;"
1172 "adox %%rbx, %%rcx ;"
1173 "imul %%rdx, %%rcx ;"
1174 "adcx %%rcx, %%r8 ;"
1175 "adcx %%rbx, %%r9 ;"
1176 "movq %%r9, 8(%0) ;"
1177 "adcx %%rbx, %%r10 ;"
1178 "movq %%r10, 16(%0) ;"
1179 "adcx %%rbx, %%r11 ;"
1180 "movq %%r11, 24(%0) ;"
1181 "mov $0, %%ecx ;"
1182 "cmovc %%edx, %%ecx ;"
1183 "addq %%rcx, %%r8 ;"
1184 "movq %%r8, (%0) ;"
1185
1186 "mulx 96(%1), %%r8, %%r10; "
1187 "xorl %%ebx, %%ebx ;"
1188 "adox 64(%1), %%r8 ;"
1189 "mulx 104(%1), %%r9, %%r11; "
1190 "adcx %%r10, %%r9 ;"
1191 "adox 72(%1), %%r9 ;"
1192 "mulx 112(%1), %%r10, %%rax; "
1193 "adcx %%r11, %%r10 ;"
1194 "adox 80(%1), %%r10 ;"
1195 "mulx 120(%1), %%r11, %%rcx; "
1196 "adcx %%rax, %%r11 ;"
1197 "adox 88(%1), %%r11 ;"
1198
1199 "adcx %%rbx, %%rcx ;"
1200 "adox %%rbx, %%rcx ;"
1201 "imul %%rdx, %%rcx ;"
1202 "adcx %%rcx, %%r8 ;"
1203 "adcx %%rbx, %%r9 ;"
1204 "movq %%r9, 40(%0) ;"
1205 "adcx %%rbx, %%r10 ;"
1206 "movq %%r10, 48(%0) ;"
1207 "adcx %%rbx, %%r11 ;"
1208 "movq %%r11, 56(%0) ;"
1209 "mov $0, %%ecx ;"
1210 "cmovc %%edx, %%ecx ;"
1211 "addq %%rcx, %%r8 ;"
1212 "movq %%r8, 32(%0) ;"
1213 :
1214 : "r"(c), "r"(a)
1215 : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
1216 "%r10", "%r11");
1217}
1218
1219static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a)
1220{
1221 asm volatile(
1222 "movl $38, %%edx ; "
1223 "mulx 32(%1), %%r8, %%r10 ;"
1224 "mulx 40(%1), %%r9, %%r11 ;"
1225 "addq %%r10, %%r9 ;"
1226 "mulx 48(%1), %%r10, %%rax ;"
1227 "adcq %%r11, %%r10 ;"
1228 "mulx 56(%1), %%r11, %%rcx ;"
1229 "adcq %%rax, %%r11 ;"
1230
1231 "adcq $0, %%rcx ;"
1232 "addq (%1), %%r8 ;"
1233 "adcq 8(%1), %%r9 ;"
1234 "adcq 16(%1), %%r10 ;"
1235 "adcq 24(%1), %%r11 ;"
1236 "adcq $0, %%rcx ;"
1237 "imul %%rdx, %%rcx ;"
1238 "addq %%rcx, %%r8 ;"
1239 "adcq $0, %%r9 ;"
1240 "movq %%r9, 8(%0) ;"
1241 "adcq $0, %%r10 ;"
1242 "movq %%r10, 16(%0) ;"
1243 "adcq $0, %%r11 ;"
1244 "movq %%r11, 24(%0) ;"
1245 "mov $0, %%ecx ;"
1246 "cmovc %%edx, %%ecx ;"
1247 "addq %%rcx, %%r8 ;"
1248 "movq %%r8, (%0) ;"
1249
1250 "mulx 96(%1), %%r8, %%r10 ;"
1251 "mulx 104(%1), %%r9, %%r11 ;"
1252 "addq %%r10, %%r9 ;"
1253 "mulx 112(%1), %%r10, %%rax ;"
1254 "adcq %%r11, %%r10 ;"
1255 "mulx 120(%1), %%r11, %%rcx ;"
1256 "adcq %%rax, %%r11 ;"
1257
1258 "adcq $0, %%rcx ;"
1259 "addq 64(%1), %%r8 ;"
1260 "adcq 72(%1), %%r9 ;"
1261 "adcq 80(%1), %%r10 ;"
1262 "adcq 88(%1), %%r11 ;"
1263 "adcq $0, %%rcx ;"
1264 "imul %%rdx, %%rcx ;"
1265 "addq %%rcx, %%r8 ;"
1266 "adcq $0, %%r9 ;"
1267 "movq %%r9, 40(%0) ;"
1268 "adcq $0, %%r10 ;"
1269 "movq %%r10, 48(%0) ;"
1270 "adcq $0, %%r11 ;"
1271 "movq %%r11, 56(%0) ;"
1272 "mov $0, %%ecx ;"
1273 "cmovc %%edx, %%ecx ;"
1274 "addq %%rcx, %%r8 ;"
1275 "movq %%r8, 32(%0) ;"
1276 :
1277 : "r"(c), "r"(a)
1278 : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
1279 "%r11");
1280}
1281
1282static void mul_256x256_integer_adx(u64 *const c, const u64 *const a,
1283 const u64 *const b)
1284{
1285 asm volatile(
1286 "movq (%1), %%rdx; "
1287 "mulx (%2), %%r8, %%r9; "
1288 "xorl %%r10d, %%r10d ;"
1289 "movq %%r8, (%0) ;"
1290 "mulx 8(%2), %%r10, %%r11; "
1291 "adox %%r9, %%r10 ;"
1292 "movq %%r10, 8(%0) ;"
1293 "mulx 16(%2), %%r15, %%r13; "
1294 "adox %%r11, %%r15 ;"
1295 "mulx 24(%2), %%r14, %%rdx; "
1296 "adox %%r13, %%r14 ;"
1297 "movq $0, %%rax ;"
1298
1299 "adox %%rdx, %%rax ;"
1300
1301 "movq 8(%1), %%rdx; "
1302 "mulx (%2), %%r8, %%r9; "
1303 "xorl %%r10d, %%r10d ;"
1304 "adcx 8(%0), %%r8 ;"
1305 "movq %%r8, 8(%0) ;"
1306 "mulx 8(%2), %%r10, %%r11; "
1307 "adox %%r9, %%r10 ;"
1308 "adcx %%r15, %%r10 ;"
1309 "movq %%r10, 16(%0) ;"
1310 "mulx 16(%2), %%r15, %%r13; "
1311 "adox %%r11, %%r15 ;"
1312 "adcx %%r14, %%r15 ;"
1313 "movq $0, %%r8 ;"
1314 "mulx 24(%2), %%r14, %%rdx; "
1315 "adox %%r13, %%r14 ;"
1316 "adcx %%rax, %%r14 ;"
1317 "movq $0, %%rax ;"
1318
1319 "adox %%rdx, %%rax ;"
1320 "adcx %%r8, %%rax ;"
1321
1322 "movq 16(%1), %%rdx; "
1323 "mulx (%2), %%r8, %%r9; "
1324 "xorl %%r10d, %%r10d ;"
1325 "adcx 16(%0), %%r8 ;"
1326 "movq %%r8, 16(%0) ;"
1327 "mulx 8(%2), %%r10, %%r11; "
1328 "adox %%r9, %%r10 ;"
1329 "adcx %%r15, %%r10 ;"
1330 "movq %%r10, 24(%0) ;"
1331 "mulx 16(%2), %%r15, %%r13; "
1332 "adox %%r11, %%r15 ;"
1333 "adcx %%r14, %%r15 ;"
1334 "movq $0, %%r8 ;"
1335 "mulx 24(%2), %%r14, %%rdx; "
1336 "adox %%r13, %%r14 ;"
1337 "adcx %%rax, %%r14 ;"
1338 "movq $0, %%rax ;"
1339
1340 "adox %%rdx, %%rax ;"
1341 "adcx %%r8, %%rax ;"
1342
1343 "movq 24(%1), %%rdx; "
1344 "mulx (%2), %%r8, %%r9; "
1345 "xorl %%r10d, %%r10d ;"
1346 "adcx 24(%0), %%r8 ;"
1347 "movq %%r8, 24(%0) ;"
1348 "mulx 8(%2), %%r10, %%r11; "
1349 "adox %%r9, %%r10 ;"
1350 "adcx %%r15, %%r10 ;"
1351 "movq %%r10, 32(%0) ;"
1352 "mulx 16(%2), %%r15, %%r13; "
1353 "adox %%r11, %%r15 ;"
1354 "adcx %%r14, %%r15 ;"
1355 "movq %%r15, 40(%0) ;"
1356 "movq $0, %%r8 ;"
1357 "mulx 24(%2), %%r14, %%rdx; "
1358 "adox %%r13, %%r14 ;"
1359 "adcx %%rax, %%r14 ;"
1360 "movq %%r14, 48(%0) ;"
1361 "movq $0, %%rax ;"
1362
1363 "adox %%rdx, %%rax ;"
1364 "adcx %%r8, %%rax ;"
1365 "movq %%rax, 56(%0) ;"
1366 :
1367 : "r"(c), "r"(a), "r"(b)
1368 : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11",
1369 "%r13", "%r14", "%r15");
1370}
1371
1372static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a,
1373 const u64 *const b)
1374{
1375 asm volatile(
1376 "movq (%1), %%rdx; "
1377 "mulx (%2), %%r8, %%r15; "
1378 "movq %%r8, (%0) ;"
1379 "mulx 8(%2), %%r10, %%rax; "
1380 "addq %%r10, %%r15 ;"
1381 "mulx 16(%2), %%r8, %%rbx; "
1382 "adcq %%r8, %%rax ;"
1383 "mulx 24(%2), %%r10, %%rcx; "
1384 "adcq %%r10, %%rbx ;"
1385
1386 "adcq $0, %%rcx ;"
1387
1388 "movq 8(%1), %%rdx; "
1389 "mulx (%2), %%r8, %%r9; "
1390 "addq %%r15, %%r8 ;"
1391 "movq %%r8, 8(%0) ;"
1392 "mulx 8(%2), %%r10, %%r11; "
1393 "adcq %%r10, %%r9 ;"
1394 "mulx 16(%2), %%r8, %%r13; "
1395 "adcq %%r8, %%r11 ;"
1396 "mulx 24(%2), %%r10, %%r15; "
1397 "adcq %%r10, %%r13 ;"
1398
1399 "adcq $0, %%r15 ;"
1400
1401 "addq %%r9, %%rax ;"
1402 "adcq %%r11, %%rbx ;"
1403 "adcq %%r13, %%rcx ;"
1404 "adcq $0, %%r15 ;"
1405
1406 "movq 16(%1), %%rdx; "
1407 "mulx (%2), %%r8, %%r9; "
1408 "addq %%rax, %%r8 ;"
1409 "movq %%r8, 16(%0) ;"
1410 "mulx 8(%2), %%r10, %%r11; "
1411 "adcq %%r10, %%r9 ;"
1412 "mulx 16(%2), %%r8, %%r13; "
1413 "adcq %%r8, %%r11 ;"
1414 "mulx 24(%2), %%r10, %%rax; "
1415 "adcq %%r10, %%r13 ;"
1416
1417 "adcq $0, %%rax ;"
1418
1419 "addq %%r9, %%rbx ;"
1420 "adcq %%r11, %%rcx ;"
1421 "adcq %%r13, %%r15 ;"
1422 "adcq $0, %%rax ;"
1423
1424 "movq 24(%1), %%rdx; "
1425 "mulx (%2), %%r8, %%r9; "
1426 "addq %%rbx, %%r8 ;"
1427 "movq %%r8, 24(%0) ;"
1428 "mulx 8(%2), %%r10, %%r11; "
1429 "adcq %%r10, %%r9 ;"
1430 "mulx 16(%2), %%r8, %%r13; "
1431 "adcq %%r8, %%r11 ;"
1432 "mulx 24(%2), %%r10, %%rbx; "
1433 "adcq %%r10, %%r13 ;"
1434
1435 "adcq $0, %%rbx ;"
1436
1437 "addq %%r9, %%rcx ;"
1438 "movq %%rcx, 32(%0) ;"
1439 "adcq %%r11, %%r15 ;"
1440 "movq %%r15, 40(%0) ;"
1441 "adcq %%r13, %%rax ;"
1442 "movq %%rax, 48(%0) ;"
1443 "adcq $0, %%rbx ;"
1444 "movq %%rbx, 56(%0) ;"
1445 :
1446 : "r"(c), "r"(a), "r"(b)
1447 : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
1448 "%r10", "%r11", "%r13", "%r15");
1449}
1450
1451static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a)
1452{
1453 asm volatile(
1454 "movq (%1), %%rdx ;"
1455 "mulx 8(%1), %%r8, %%r14 ;"
1456 "xorl %%r15d, %%r15d;"
1457 "mulx 16(%1), %%r9, %%r10 ;"
1458 "adcx %%r14, %%r9 ;"
1459 "mulx 24(%1), %%rax, %%rcx ;"
1460 "adcx %%rax, %%r10 ;"
1461 "movq 24(%1), %%rdx ;"
1462 "mulx 8(%1), %%r11, %%rbx ;"
1463 "adcx %%rcx, %%r11 ;"
1464 "mulx 16(%1), %%rax, %%r13 ;"
1465 "adcx %%rax, %%rbx ;"
1466 "movq 8(%1), %%rdx ;"
1467 "adcx %%r15, %%r13 ;"
1468 "mulx 16(%1), %%rax, %%rcx ;"
1469 "movq $0, %%r14 ;"
1470
1471 "adcx %%r15, %%r14 ;"
1472
1473 "xorl %%r15d, %%r15d;"
1474 "adox %%rax, %%r10 ;"
1475 "adcx %%r8, %%r8 ;"
1476 "adox %%rcx, %%r11 ;"
1477 "adcx %%r9, %%r9 ;"
1478 "adox %%r15, %%rbx ;"
1479 "adcx %%r10, %%r10 ;"
1480 "adox %%r15, %%r13 ;"
1481 "adcx %%r11, %%r11 ;"
1482 "adox %%r15, %%r14 ;"
1483 "adcx %%rbx, %%rbx ;"
1484 "adcx %%r13, %%r13 ;"
1485 "adcx %%r14, %%r14 ;"
1486
1487 "movq (%1), %%rdx ;"
1488 "mulx %%rdx, %%rax, %%rcx ;"
1489
1490 "movq %%rax, 0(%0) ;"
1491 "addq %%rcx, %%r8 ;"
1492 "movq %%r8, 8(%0) ;"
1493 "movq 8(%1), %%rdx ;"
1494 "mulx %%rdx, %%rax, %%rcx ;"
1495 "adcq %%rax, %%r9 ;"
1496 "movq %%r9, 16(%0) ;"
1497 "adcq %%rcx, %%r10 ;"
1498 "movq %%r10, 24(%0) ;"
1499 "movq 16(%1), %%rdx ;"
1500 "mulx %%rdx, %%rax, %%rcx ;"
1501 "adcq %%rax, %%r11 ;"
1502 "movq %%r11, 32(%0) ;"
1503 "adcq %%rcx, %%rbx ;"
1504 "movq %%rbx, 40(%0) ;"
1505 "movq 24(%1), %%rdx ;"
1506 "mulx %%rdx, %%rax, %%rcx ;"
1507 "adcq %%rax, %%r13 ;"
1508 "movq %%r13, 48(%0) ;"
1509 "adcq %%rcx, %%r14 ;"
1510 "movq %%r14, 56(%0) ;"
1511 :
1512 : "r"(c), "r"(a)
1513 : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
1514 "%r10", "%r11", "%r13", "%r14", "%r15");
1515}
1516
1517static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a)
1518{
1519 asm volatile(
1520 "movq 8(%1), %%rdx ;"
1521 "mulx (%1), %%r8, %%r9 ;"
1522 "mulx 16(%1), %%r10, %%r11 ;"
1523 "mulx 24(%1), %%rcx, %%r14 ;"
1524
1525 "movq 16(%1), %%rdx ;"
1526 "mulx 24(%1), %%r15, %%r13 ;"
1527 "mulx (%1), %%rax, %%rdx ;"
1528
1529 "addq %%rax, %%r9 ;"
1530 "adcq %%rdx, %%r10 ;"
1531 "adcq %%rcx, %%r11 ;"
1532 "adcq %%r14, %%r15 ;"
1533 "adcq $0, %%r13 ;"
1534 "movq $0, %%r14 ;"
1535 "adcq $0, %%r14 ;"
1536
1537 "movq (%1), %%rdx ;"
1538 "mulx 24(%1), %%rax, %%rcx ;"
1539
1540 "addq %%rax, %%r10 ;"
1541 "adcq %%rcx, %%r11 ;"
1542 "adcq $0, %%r15 ;"
1543 "adcq $0, %%r13 ;"
1544 "adcq $0, %%r14 ;"
1545
1546 "shldq $1, %%r13, %%r14 ;"
1547 "shldq $1, %%r15, %%r13 ;"
1548 "shldq $1, %%r11, %%r15 ;"
1549 "shldq $1, %%r10, %%r11 ;"
1550 "shldq $1, %%r9, %%r10 ;"
1551 "shldq $1, %%r8, %%r9 ;"
1552 "shlq $1, %%r8 ;"
1553
1554
1555 "mulx %%rdx, %%rax, %%rcx ;"
1556
1557 "movq %%rax, 0(%0) ;"
1558 "addq %%rcx, %%r8 ;"
1559 "movq %%r8, 8(%0) ;"
1560 "movq 8(%1), %%rdx ;"
1561 "mulx %%rdx, %%rax, %%rcx ;"
1562 "adcq %%rax, %%r9 ;"
1563 "movq %%r9, 16(%0) ;"
1564 "adcq %%rcx, %%r10 ;"
1565 "movq %%r10, 24(%0) ;"
1566 "movq 16(%1), %%rdx ;"
1567 "mulx %%rdx, %%rax, %%rcx ;"
1568 "adcq %%rax, %%r11 ;"
1569 "movq %%r11, 32(%0) ;"
1570 "adcq %%rcx, %%r15 ;"
1571 "movq %%r15, 40(%0) ;"
1572 "movq 24(%1), %%rdx ;"
1573 "mulx %%rdx, %%rax, %%rcx ;"
1574 "adcq %%rax, %%r13 ;"
1575 "movq %%r13, 48(%0) ;"
1576 "adcq %%rcx, %%r14 ;"
1577 "movq %%r14, 56(%0) ;"
1578 :
1579 : "r"(c), "r"(a)
1580 : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
1581 "%r11", "%r13", "%r14", "%r15");
1582}
1583
1584static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
1585{
1586 asm volatile(
1587 "movl $38, %%edx ;"
1588 "mulx 32(%1), %%r8, %%r10 ;"
1589 "xorl %%ebx, %%ebx ;"
1590 "adox (%1), %%r8 ;"
1591 "mulx 40(%1), %%r9, %%r11 ;"
1592 "adcx %%r10, %%r9 ;"
1593 "adox 8(%1), %%r9 ;"
1594 "mulx 48(%1), %%r10, %%rax ;"
1595 "adcx %%r11, %%r10 ;"
1596 "adox 16(%1), %%r10 ;"
1597 "mulx 56(%1), %%r11, %%rcx ;"
1598 "adcx %%rax, %%r11 ;"
1599 "adox 24(%1), %%r11 ;"
1600
1601 "adcx %%rbx, %%rcx ;"
1602 "adox %%rbx, %%rcx ;"
1603 "imul %%rdx, %%rcx ;"
1604 "adcx %%rcx, %%r8 ;"
1605 "adcx %%rbx, %%r9 ;"
1606 "movq %%r9, 8(%0) ;"
1607 "adcx %%rbx, %%r10 ;"
1608 "movq %%r10, 16(%0) ;"
1609 "adcx %%rbx, %%r11 ;"
1610 "movq %%r11, 24(%0) ;"
1611 "mov $0, %%ecx ;"
1612 "cmovc %%edx, %%ecx ;"
1613 "addq %%rcx, %%r8 ;"
1614 "movq %%r8, (%0) ;"
1615 :
1616 : "r"(c), "r"(a)
1617 : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
1618 "%r10", "%r11");
1619}
1620
1621static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
1622{
1623 asm volatile(
1624 "movl $38, %%edx ;"
1625 "mulx 32(%1), %%r8, %%r10 ;"
1626 "mulx 40(%1), %%r9, %%r11 ;"
1627 "addq %%r10, %%r9 ;"
1628 "mulx 48(%1), %%r10, %%rax ;"
1629 "adcq %%r11, %%r10 ;"
1630 "mulx 56(%1), %%r11, %%rcx ;"
1631 "adcq %%rax, %%r11 ;"
1632
1633 "adcq $0, %%rcx ;"
1634 "addq (%1), %%r8 ;"
1635 "adcq 8(%1), %%r9 ;"
1636 "adcq 16(%1), %%r10 ;"
1637 "adcq 24(%1), %%r11 ;"
1638 "adcq $0, %%rcx ;"
1639 "imul %%rdx, %%rcx ;"
1640 "addq %%rcx, %%r8 ;"
1641 "adcq $0, %%r9 ;"
1642 "movq %%r9, 8(%0) ;"
1643 "adcq $0, %%r10 ;"
1644 "movq %%r10, 16(%0) ;"
1645 "adcq $0, %%r11 ;"
1646 "movq %%r11, 24(%0) ;"
1647 "mov $0, %%ecx ;"
1648 "cmovc %%edx, %%ecx ;"
1649 "addq %%rcx, %%r8 ;"
1650 "movq %%r8, (%0) ;"
1651 :
1652 : "r"(c), "r"(a)
1653 : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
1654 "%r11");
1655}
1656
1657static __always_inline void
1658add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b)
1659{
1660 asm volatile(
1661 "mov $38, %%eax ;"
1662 "xorl %%ecx, %%ecx ;"
1663 "movq (%2), %%r8 ;"
1664 "adcx (%1), %%r8 ;"
1665 "movq 8(%2), %%r9 ;"
1666 "adcx 8(%1), %%r9 ;"
1667 "movq 16(%2), %%r10 ;"
1668 "adcx 16(%1), %%r10 ;"
1669 "movq 24(%2), %%r11 ;"
1670 "adcx 24(%1), %%r11 ;"
1671 "cmovc %%eax, %%ecx ;"
1672 "xorl %%eax, %%eax ;"
1673 "adcx %%rcx, %%r8 ;"
1674 "adcx %%rax, %%r9 ;"
1675 "movq %%r9, 8(%0) ;"
1676 "adcx %%rax, %%r10 ;"
1677 "movq %%r10, 16(%0) ;"
1678 "adcx %%rax, %%r11 ;"
1679 "movq %%r11, 24(%0) ;"
1680 "mov $38, %%ecx ;"
1681 "cmovc %%ecx, %%eax ;"
1682 "addq %%rax, %%r8 ;"
1683 "movq %%r8, (%0) ;"
1684 :
1685 : "r"(c), "r"(a), "r"(b)
1686 : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
1687}
1688
1689static __always_inline void
1690add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b)
1691{
1692 asm volatile(
1693 "mov $38, %%eax ;"
1694 "movq (%2), %%r8 ;"
1695 "addq (%1), %%r8 ;"
1696 "movq 8(%2), %%r9 ;"
1697 "adcq 8(%1), %%r9 ;"
1698 "movq 16(%2), %%r10 ;"
1699 "adcq 16(%1), %%r10 ;"
1700 "movq 24(%2), %%r11 ;"
1701 "adcq 24(%1), %%r11 ;"
1702 "mov $0, %%ecx ;"
1703 "cmovc %%eax, %%ecx ;"
1704 "addq %%rcx, %%r8 ;"
1705 "adcq $0, %%r9 ;"
1706 "movq %%r9, 8(%0) ;"
1707 "adcq $0, %%r10 ;"
1708 "movq %%r10, 16(%0) ;"
1709 "adcq $0, %%r11 ;"
1710 "movq %%r11, 24(%0) ;"
1711 "mov $0, %%ecx ;"
1712 "cmovc %%eax, %%ecx ;"
1713 "addq %%rcx, %%r8 ;"
1714 "movq %%r8, (%0) ;"
1715 :
1716 : "r"(c), "r"(a), "r"(b)
1717 : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
1718}
1719
1720static __always_inline void
1721sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b)
1722{
1723 asm volatile(
1724 "mov $38, %%eax ;"
1725 "movq (%1), %%r8 ;"
1726 "subq (%2), %%r8 ;"
1727 "movq 8(%1), %%r9 ;"
1728 "sbbq 8(%2), %%r9 ;"
1729 "movq 16(%1), %%r10 ;"
1730 "sbbq 16(%2), %%r10 ;"
1731 "movq 24(%1), %%r11 ;"
1732 "sbbq 24(%2), %%r11 ;"
1733 "mov $0, %%ecx ;"
1734 "cmovc %%eax, %%ecx ;"
1735 "subq %%rcx, %%r8 ;"
1736 "sbbq $0, %%r9 ;"
1737 "movq %%r9, 8(%0) ;"
1738 "sbbq $0, %%r10 ;"
1739 "movq %%r10, 16(%0) ;"
1740 "sbbq $0, %%r11 ;"
1741 "movq %%r11, 24(%0) ;"
1742 "mov $0, %%ecx ;"
1743 "cmovc %%eax, %%ecx ;"
1744 "subq %%rcx, %%r8 ;"
1745 "movq %%r8, (%0) ;"
1746 :
1747 : "r"(c), "r"(a), "r"(b)
1748 : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
1749}
1750
1751
1752static __always_inline void
1753mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a)
1754{
1755 const u64 a24 = 121666;
1756 asm volatile(
1757 "movq %2, %%rdx ;"
1758 "mulx (%1), %%r8, %%r10 ;"
1759 "mulx 8(%1), %%r9, %%r11 ;"
1760 "addq %%r10, %%r9 ;"
1761 "mulx 16(%1), %%r10, %%rax ;"
1762 "adcq %%r11, %%r10 ;"
1763 "mulx 24(%1), %%r11, %%rcx ;"
1764 "adcq %%rax, %%r11 ;"
1765
1766 "adcq $0, %%rcx ;"
1767 "movl $38, %%edx ;"
1768 "imul %%rdx, %%rcx ;"
1769 "addq %%rcx, %%r8 ;"
1770 "adcq $0, %%r9 ;"
1771 "movq %%r9, 8(%0) ;"
1772 "adcq $0, %%r10 ;"
1773 "movq %%r10, 16(%0) ;"
1774 "adcq $0, %%r11 ;"
1775 "movq %%r11, 24(%0) ;"
1776 "mov $0, %%ecx ;"
1777 "cmovc %%edx, %%ecx ;"
1778 "addq %%rcx, %%r8 ;"
1779 "movq %%r8, (%0) ;"
1780 :
1781 : "r"(c), "r"(a), "r"(a24)
1782 : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
1783 "%r11");
1784}
1785
1786static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
1787{
1788 struct {
1789 eltfp25519_1w_buffer buffer;
1790 eltfp25519_1w x0, x1, x2;
1791 } __aligned(32) m;
1792 u64 *T[4];
1793
1794 T[0] = m.x0;
1795 T[1] = c;
1796 T[2] = m.x1;
1797 T[3] = m.x2;
1798
1799 copy_eltfp25519_1w(T[1], a);
1800 sqrn_eltfp25519_1w_adx(T[1], 1);
1801 copy_eltfp25519_1w(T[2], T[1]);
1802 sqrn_eltfp25519_1w_adx(T[2], 2);
1803 mul_eltfp25519_1w_adx(T[0], a, T[2]);
1804 mul_eltfp25519_1w_adx(T[1], T[1], T[0]);
1805 copy_eltfp25519_1w(T[2], T[1]);
1806 sqrn_eltfp25519_1w_adx(T[2], 1);
1807 mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
1808 copy_eltfp25519_1w(T[2], T[0]);
1809 sqrn_eltfp25519_1w_adx(T[2], 5);
1810 mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
1811 copy_eltfp25519_1w(T[2], T[0]);
1812 sqrn_eltfp25519_1w_adx(T[2], 10);
1813 mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
1814 copy_eltfp25519_1w(T[3], T[2]);
1815 sqrn_eltfp25519_1w_adx(T[3], 20);
1816 mul_eltfp25519_1w_adx(T[3], T[3], T[2]);
1817 sqrn_eltfp25519_1w_adx(T[3], 10);
1818 mul_eltfp25519_1w_adx(T[3], T[3], T[0]);
1819 copy_eltfp25519_1w(T[0], T[3]);
1820 sqrn_eltfp25519_1w_adx(T[0], 50);
1821 mul_eltfp25519_1w_adx(T[0], T[0], T[3]);
1822 copy_eltfp25519_1w(T[2], T[0]);
1823 sqrn_eltfp25519_1w_adx(T[2], 100);
1824 mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
1825 sqrn_eltfp25519_1w_adx(T[2], 50);
1826 mul_eltfp25519_1w_adx(T[2], T[2], T[3]);
1827 sqrn_eltfp25519_1w_adx(T[2], 5);
1828 mul_eltfp25519_1w_adx(T[1], T[1], T[2]);
1829
1830 memzero_explicit(&m, sizeof(m));
1831}
1832
1833static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
1834{
1835 struct {
1836 eltfp25519_1w_buffer buffer;
1837 eltfp25519_1w x0, x1, x2;
1838 } __aligned(32) m;
1839 u64 *T[5];
1840
1841 T[0] = m.x0;
1842 T[1] = c;
1843 T[2] = m.x1;
1844 T[3] = m.x2;
1845
1846 copy_eltfp25519_1w(T[1], a);
1847 sqrn_eltfp25519_1w_bmi2(T[1], 1);
1848 copy_eltfp25519_1w(T[2], T[1]);
1849 sqrn_eltfp25519_1w_bmi2(T[2], 2);
1850 mul_eltfp25519_1w_bmi2(T[0], a, T[2]);
1851 mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]);
1852 copy_eltfp25519_1w(T[2], T[1]);
1853 sqrn_eltfp25519_1w_bmi2(T[2], 1);
1854 mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
1855 copy_eltfp25519_1w(T[2], T[0]);
1856 sqrn_eltfp25519_1w_bmi2(T[2], 5);
1857 mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
1858 copy_eltfp25519_1w(T[2], T[0]);
1859 sqrn_eltfp25519_1w_bmi2(T[2], 10);
1860 mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
1861 copy_eltfp25519_1w(T[3], T[2]);
1862 sqrn_eltfp25519_1w_bmi2(T[3], 20);
1863 mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]);
1864 sqrn_eltfp25519_1w_bmi2(T[3], 10);
1865 mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]);
1866 copy_eltfp25519_1w(T[0], T[3]);
1867 sqrn_eltfp25519_1w_bmi2(T[0], 50);
1868 mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]);
1869 copy_eltfp25519_1w(T[2], T[0]);
1870 sqrn_eltfp25519_1w_bmi2(T[2], 100);
1871 mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
1872 sqrn_eltfp25519_1w_bmi2(T[2], 50);
1873 mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]);
1874 sqrn_eltfp25519_1w_bmi2(T[2], 5);
1875 mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]);
1876
1877 memzero_explicit(&m, sizeof(m));
1878}
1879
1880
1881
1882
1883static __always_inline void fred_eltfp25519_1w(u64 *const c)
1884{
1885 u64 tmp0 = 38, tmp1 = 19;
1886 asm volatile(
1887 "btrq $63, %3 ;"
1888 "cmovncl %k5, %k4 ;"
1889
1890
1891 "addq %4, %0 ;"
1892 "adcq $0, %1 ;"
1893 "adcq $0, %2 ;"
1894 "adcq $0, %3 ;"
1895
1896
1897 "movl $0, %k4 ;"
1898 "cmovnsl %k5, %k4 ;"
1899 "btrq $63, %3 ;"
1900
1901
1902 "subq %4, %0 ;"
1903 "sbbq $0, %1 ;"
1904 "sbbq $0, %2 ;"
1905 "sbbq $0, %3 ;"
1906
1907 : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0),
1908 "+r"(tmp1)
1909 :
1910 : "memory", "cc");
1911}
1912
1913static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py)
1914{
1915 u64 temp;
1916 asm volatile(
1917 "test %9, %9 ;"
1918 "movq %0, %8 ;"
1919 "cmovnzq %4, %0 ;"
1920 "cmovnzq %8, %4 ;"
1921 "movq %1, %8 ;"
1922 "cmovnzq %5, %1 ;"
1923 "cmovnzq %8, %5 ;"
1924 "movq %2, %8 ;"
1925 "cmovnzq %6, %2 ;"
1926 "cmovnzq %8, %6 ;"
1927 "movq %3, %8 ;"
1928 "cmovnzq %7, %3 ;"
1929 "cmovnzq %8, %7 ;"
1930 : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]),
1931 "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]),
1932 "=r"(temp)
1933 : "r"(bit)
1934 : "cc"
1935 );
1936}
1937
1938static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py)
1939{
1940 asm volatile(
1941 "test %4, %4 ;"
1942 "cmovnzq %5, %0 ;"
1943 "cmovnzq %6, %1 ;"
1944 "cmovnzq %7, %2 ;"
1945 "cmovnzq %8, %3 ;"
1946 : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3])
1947 : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3])
1948 : "cc"
1949 );
1950}
1951
1952static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE],
1953 const u8 private_key[CURVE25519_KEY_SIZE],
1954 const u8 session_key[CURVE25519_KEY_SIZE])
1955{
1956 struct {
1957 u64 buffer[4 * NUM_WORDS_ELTFP25519];
1958 u64 coordinates[4 * NUM_WORDS_ELTFP25519];
1959 u64 workspace[6 * NUM_WORDS_ELTFP25519];
1960 u8 session[CURVE25519_KEY_SIZE];
1961 u8 private[CURVE25519_KEY_SIZE];
1962 } __aligned(32) m;
1963
1964 int i = 0, j = 0;
1965 u64 prev = 0;
1966 u64 *const X1 = (u64 *)m.session;
1967 u64 *const key = (u64 *)m.private;
1968 u64 *const Px = m.coordinates + 0;
1969 u64 *const Pz = m.coordinates + 4;
1970 u64 *const Qx = m.coordinates + 8;
1971 u64 *const Qz = m.coordinates + 12;
1972 u64 *const X2 = Qx;
1973 u64 *const Z2 = Qz;
1974 u64 *const X3 = Px;
1975 u64 *const Z3 = Pz;
1976 u64 *const X2Z2 = Qx;
1977 u64 *const X3Z3 = Px;
1978
1979 u64 *const A = m.workspace + 0;
1980 u64 *const B = m.workspace + 4;
1981 u64 *const D = m.workspace + 8;
1982 u64 *const C = m.workspace + 12;
1983 u64 *const DA = m.workspace + 16;
1984 u64 *const CB = m.workspace + 20;
1985 u64 *const AB = A;
1986 u64 *const DC = D;
1987 u64 *const DACB = DA;
1988
1989 memcpy(m.private, private_key, sizeof(m.private));
1990 memcpy(m.session, session_key, sizeof(m.session));
1991
1992 curve25519_clamp_secret(m.private);
1993
1994
1995
1996
1997
1998
1999
2000
2001 m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
2002
2003 copy_eltfp25519_1w(Px, X1);
2004 setzero_eltfp25519_1w(Pz);
2005 setzero_eltfp25519_1w(Qx);
2006 setzero_eltfp25519_1w(Qz);
2007
2008 Pz[0] = 1;
2009 Qx[0] = 1;
2010
2011
2012 prev = 0;
2013 j = 62;
2014 for (i = 3; i >= 0; --i) {
2015 while (j >= 0) {
2016 u64 bit = (key[i] >> j) & 0x1;
2017 u64 swap = bit ^ prev;
2018 prev = bit;
2019
2020 add_eltfp25519_1w_adx(A, X2, Z2);
2021 sub_eltfp25519_1w(B, X2, Z2);
2022 add_eltfp25519_1w_adx(C, X3, Z3);
2023 sub_eltfp25519_1w(D, X3, Z3);
2024 mul_eltfp25519_2w_adx(DACB, AB, DC);
2025
2026 cselect(swap, A, C);
2027 cselect(swap, B, D);
2028
2029 sqr_eltfp25519_2w_adx(AB);
2030 add_eltfp25519_1w_adx(X3, DA, CB);
2031 sub_eltfp25519_1w(Z3, DA, CB);
2032 sqr_eltfp25519_2w_adx(X3Z3);
2033
2034 copy_eltfp25519_1w(X2, B);
2035 sub_eltfp25519_1w(Z2, A, B);
2036
2037 mul_a24_eltfp25519_1w(B, Z2);
2038 add_eltfp25519_1w_adx(B, B, X2);
2039 mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB);
2040 mul_eltfp25519_1w_adx(Z3, Z3, X1);
2041 --j;
2042 }
2043 j = 63;
2044 }
2045
2046 inv_eltfp25519_1w_adx(A, Qz);
2047 mul_eltfp25519_1w_adx((u64 *)shared, Qx, A);
2048 fred_eltfp25519_1w((u64 *)shared);
2049
2050 memzero_explicit(&m, sizeof(m));
2051}
2052
2053static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE],
2054 const u8 private_key[CURVE25519_KEY_SIZE])
2055{
2056 struct {
2057 u64 buffer[4 * NUM_WORDS_ELTFP25519];
2058 u64 coordinates[4 * NUM_WORDS_ELTFP25519];
2059 u64 workspace[4 * NUM_WORDS_ELTFP25519];
2060 u8 private[CURVE25519_KEY_SIZE];
2061 } __aligned(32) m;
2062
2063 const int ite[4] = { 64, 64, 64, 63 };
2064 const int q = 3;
2065 u64 swap = 1;
2066
2067 int i = 0, j = 0, k = 0;
2068 u64 *const key = (u64 *)m.private;
2069 u64 *const Ur1 = m.coordinates + 0;
2070 u64 *const Zr1 = m.coordinates + 4;
2071 u64 *const Ur2 = m.coordinates + 8;
2072 u64 *const Zr2 = m.coordinates + 12;
2073
2074 u64 *const UZr1 = m.coordinates + 0;
2075 u64 *const ZUr2 = m.coordinates + 8;
2076
2077 u64 *const A = m.workspace + 0;
2078 u64 *const B = m.workspace + 4;
2079 u64 *const C = m.workspace + 8;
2080 u64 *const D = m.workspace + 12;
2081
2082 u64 *const AB = m.workspace + 0;
2083 u64 *const CD = m.workspace + 8;
2084
2085 const u64 *const P = table_ladder_8k;
2086
2087 memcpy(m.private, private_key, sizeof(m.private));
2088
2089 curve25519_clamp_secret(m.private);
2090
2091 setzero_eltfp25519_1w(Ur1);
2092 setzero_eltfp25519_1w(Zr1);
2093 setzero_eltfp25519_1w(Zr2);
2094 Ur1[0] = 1;
2095 Zr1[0] = 1;
2096 Zr2[0] = 1;
2097
2098
2099 Ur2[3] = 0x1eaecdeee27cab34UL;
2100 Ur2[2] = 0xadc7a0b9235d48e2UL;
2101 Ur2[1] = 0xbbf095ae14b2edf8UL;
2102 Ur2[0] = 0x7e94e1fec82faabdUL;
2103
2104
2105 j = q;
2106 for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
2107 while (j < ite[i]) {
2108 u64 bit = (key[i] >> j) & 0x1;
2109 k = (64 * i + j - q);
2110 swap = swap ^ bit;
2111 cswap(swap, Ur1, Ur2);
2112 cswap(swap, Zr1, Zr2);
2113 swap = bit;
2114
2115 sub_eltfp25519_1w(B, Ur1, Zr1);
2116 add_eltfp25519_1w_adx(A, Ur1, Zr1);
2117 mul_eltfp25519_1w_adx(C, &P[4 * k], B);
2118 sub_eltfp25519_1w(B, A, C);
2119 add_eltfp25519_1w_adx(A, A, C);
2120 sqr_eltfp25519_2w_adx(AB);
2121 mul_eltfp25519_2w_adx(UZr1, ZUr2, AB);
2122 ++j;
2123 }
2124 j = 0;
2125 }
2126
2127
2128 for (i = 0; i < q; ++i) {
2129 add_eltfp25519_1w_adx(A, Ur1, Zr1);
2130 sub_eltfp25519_1w(B, Ur1, Zr1);
2131 sqr_eltfp25519_2w_adx(AB);
2132 copy_eltfp25519_1w(C, B);
2133 sub_eltfp25519_1w(B, A, B);
2134 mul_a24_eltfp25519_1w(D, B);
2135 add_eltfp25519_1w_adx(D, D, C);
2136 mul_eltfp25519_2w_adx(UZr1, AB, CD);
2137 }
2138
2139
2140 inv_eltfp25519_1w_adx(A, Zr1);
2141 mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A);
2142 fred_eltfp25519_1w((u64 *)session_key);
2143
2144 memzero_explicit(&m, sizeof(m));
2145}
2146
2147static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE],
2148 const u8 private_key[CURVE25519_KEY_SIZE],
2149 const u8 session_key[CURVE25519_KEY_SIZE])
2150{
2151 struct {
2152 u64 buffer[4 * NUM_WORDS_ELTFP25519];
2153 u64 coordinates[4 * NUM_WORDS_ELTFP25519];
2154 u64 workspace[6 * NUM_WORDS_ELTFP25519];
2155 u8 session[CURVE25519_KEY_SIZE];
2156 u8 private[CURVE25519_KEY_SIZE];
2157 } __aligned(32) m;
2158
2159 int i = 0, j = 0;
2160 u64 prev = 0;
2161 u64 *const X1 = (u64 *)m.session;
2162 u64 *const key = (u64 *)m.private;
2163 u64 *const Px = m.coordinates + 0;
2164 u64 *const Pz = m.coordinates + 4;
2165 u64 *const Qx = m.coordinates + 8;
2166 u64 *const Qz = m.coordinates + 12;
2167 u64 *const X2 = Qx;
2168 u64 *const Z2 = Qz;
2169 u64 *const X3 = Px;
2170 u64 *const Z3 = Pz;
2171 u64 *const X2Z2 = Qx;
2172 u64 *const X3Z3 = Px;
2173
2174 u64 *const A = m.workspace + 0;
2175 u64 *const B = m.workspace + 4;
2176 u64 *const D = m.workspace + 8;
2177 u64 *const C = m.workspace + 12;
2178 u64 *const DA = m.workspace + 16;
2179 u64 *const CB = m.workspace + 20;
2180 u64 *const AB = A;
2181 u64 *const DC = D;
2182 u64 *const DACB = DA;
2183
2184 memcpy(m.private, private_key, sizeof(m.private));
2185 memcpy(m.session, session_key, sizeof(m.session));
2186
2187 curve25519_clamp_secret(m.private);
2188
2189
2190
2191
2192
2193
2194
2195
2196 m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
2197
2198 copy_eltfp25519_1w(Px, X1);
2199 setzero_eltfp25519_1w(Pz);
2200 setzero_eltfp25519_1w(Qx);
2201 setzero_eltfp25519_1w(Qz);
2202
2203 Pz[0] = 1;
2204 Qx[0] = 1;
2205
2206
2207 prev = 0;
2208 j = 62;
2209 for (i = 3; i >= 0; --i) {
2210 while (j >= 0) {
2211 u64 bit = (key[i] >> j) & 0x1;
2212 u64 swap = bit ^ prev;
2213 prev = bit;
2214
2215 add_eltfp25519_1w_bmi2(A, X2, Z2);
2216 sub_eltfp25519_1w(B, X2, Z2);
2217 add_eltfp25519_1w_bmi2(C, X3, Z3);
2218 sub_eltfp25519_1w(D, X3, Z3);
2219 mul_eltfp25519_2w_bmi2(DACB, AB, DC);
2220
2221 cselect(swap, A, C);
2222 cselect(swap, B, D);
2223
2224 sqr_eltfp25519_2w_bmi2(AB);
2225 add_eltfp25519_1w_bmi2(X3, DA, CB);
2226 sub_eltfp25519_1w(Z3, DA, CB);
2227 sqr_eltfp25519_2w_bmi2(X3Z3);
2228
2229 copy_eltfp25519_1w(X2, B);
2230 sub_eltfp25519_1w(Z2, A, B);
2231
2232 mul_a24_eltfp25519_1w(B, Z2);
2233 add_eltfp25519_1w_bmi2(B, B, X2);
2234 mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB);
2235 mul_eltfp25519_1w_bmi2(Z3, Z3, X1);
2236 --j;
2237 }
2238 j = 63;
2239 }
2240
2241 inv_eltfp25519_1w_bmi2(A, Qz);
2242 mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A);
2243 fred_eltfp25519_1w((u64 *)shared);
2244
2245 memzero_explicit(&m, sizeof(m));
2246}
2247
2248static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE],
2249 const u8 private_key[CURVE25519_KEY_SIZE])
2250{
2251 struct {
2252 u64 buffer[4 * NUM_WORDS_ELTFP25519];
2253 u64 coordinates[4 * NUM_WORDS_ELTFP25519];
2254 u64 workspace[4 * NUM_WORDS_ELTFP25519];
2255 u8 private[CURVE25519_KEY_SIZE];
2256 } __aligned(32) m;
2257
2258 const int ite[4] = { 64, 64, 64, 63 };
2259 const int q = 3;
2260 u64 swap = 1;
2261
2262 int i = 0, j = 0, k = 0;
2263 u64 *const key = (u64 *)m.private;
2264 u64 *const Ur1 = m.coordinates + 0;
2265 u64 *const Zr1 = m.coordinates + 4;
2266 u64 *const Ur2 = m.coordinates + 8;
2267 u64 *const Zr2 = m.coordinates + 12;
2268
2269 u64 *const UZr1 = m.coordinates + 0;
2270 u64 *const ZUr2 = m.coordinates + 8;
2271
2272 u64 *const A = m.workspace + 0;
2273 u64 *const B = m.workspace + 4;
2274 u64 *const C = m.workspace + 8;
2275 u64 *const D = m.workspace + 12;
2276
2277 u64 *const AB = m.workspace + 0;
2278 u64 *const CD = m.workspace + 8;
2279
2280 const u64 *const P = table_ladder_8k;
2281
2282 memcpy(m.private, private_key, sizeof(m.private));
2283
2284 curve25519_clamp_secret(m.private);
2285
2286 setzero_eltfp25519_1w(Ur1);
2287 setzero_eltfp25519_1w(Zr1);
2288 setzero_eltfp25519_1w(Zr2);
2289 Ur1[0] = 1;
2290 Zr1[0] = 1;
2291 Zr2[0] = 1;
2292
2293
2294 Ur2[3] = 0x1eaecdeee27cab34UL;
2295 Ur2[2] = 0xadc7a0b9235d48e2UL;
2296 Ur2[1] = 0xbbf095ae14b2edf8UL;
2297 Ur2[0] = 0x7e94e1fec82faabdUL;
2298
2299
2300 j = q;
2301 for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
2302 while (j < ite[i]) {
2303 u64 bit = (key[i] >> j) & 0x1;
2304 k = (64 * i + j - q);
2305 swap = swap ^ bit;
2306 cswap(swap, Ur1, Ur2);
2307 cswap(swap, Zr1, Zr2);
2308 swap = bit;
2309
2310 sub_eltfp25519_1w(B, Ur1, Zr1);
2311 add_eltfp25519_1w_bmi2(A, Ur1, Zr1);
2312 mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);
2313 sub_eltfp25519_1w(B, A, C);
2314 add_eltfp25519_1w_bmi2(A, A, C);
2315 sqr_eltfp25519_2w_bmi2(AB);
2316 mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB);
2317 ++j;
2318 }
2319 j = 0;
2320 }
2321
2322
2323 for (i = 0; i < q; ++i) {
2324 add_eltfp25519_1w_bmi2(A, Ur1, Zr1);
2325 sub_eltfp25519_1w(B, Ur1, Zr1);
2326 sqr_eltfp25519_2w_bmi2(AB);
2327 copy_eltfp25519_1w(C, B);
2328 sub_eltfp25519_1w(B, A, B);
2329 mul_a24_eltfp25519_1w(D, B);
2330 add_eltfp25519_1w_bmi2(D, D, C);
2331 mul_eltfp25519_2w_bmi2(UZr1, AB, CD);
2332 }
2333
2334
2335 inv_eltfp25519_1w_bmi2(A, Zr1);
2336 mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A);
2337 fred_eltfp25519_1w((u64 *)session_key);
2338
2339 memzero_explicit(&m, sizeof(m));
2340}
2341
2342void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
2343 const u8 secret[CURVE25519_KEY_SIZE],
2344 const u8 basepoint[CURVE25519_KEY_SIZE])
2345{
2346 if (static_branch_likely(&curve25519_use_adx))
2347 curve25519_adx(mypublic, secret, basepoint);
2348 else if (static_branch_likely(&curve25519_use_bmi2))
2349 curve25519_bmi2(mypublic, secret, basepoint);
2350 else
2351 curve25519_generic(mypublic, secret, basepoint);
2352}
2353EXPORT_SYMBOL(curve25519_arch);
2354
2355void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
2356 const u8 secret[CURVE25519_KEY_SIZE])
2357{
2358 if (static_branch_likely(&curve25519_use_adx))
2359 curve25519_adx_base(pub, secret);
2360 else if (static_branch_likely(&curve25519_use_bmi2))
2361 curve25519_bmi2_base(pub, secret);
2362 else
2363 curve25519_generic(pub, secret, curve25519_base_point);
2364}
2365EXPORT_SYMBOL(curve25519_base_arch);
2366
2367static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
2368 unsigned int len)
2369{
2370 u8 *secret = kpp_tfm_ctx(tfm);
2371
2372 if (!len)
2373 curve25519_generate_secret(secret);
2374 else if (len == CURVE25519_KEY_SIZE &&
2375 crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
2376 memcpy(secret, buf, CURVE25519_KEY_SIZE);
2377 else
2378 return -EINVAL;
2379 return 0;
2380}
2381
2382static int curve25519_generate_public_key(struct kpp_request *req)
2383{
2384 struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
2385 const u8 *secret = kpp_tfm_ctx(tfm);
2386 u8 buf[CURVE25519_KEY_SIZE];
2387 int copied, nbytes;
2388
2389 if (req->src)
2390 return -EINVAL;
2391
2392 curve25519_base_arch(buf, secret);
2393
2394
2395 nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
2396 copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
2397 nbytes),
2398 buf, nbytes);
2399 if (copied != nbytes)
2400 return -EINVAL;
2401 return 0;
2402}
2403
2404static int curve25519_compute_shared_secret(struct kpp_request *req)
2405{
2406 struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
2407 const u8 *secret = kpp_tfm_ctx(tfm);
2408 u8 public_key[CURVE25519_KEY_SIZE];
2409 u8 buf[CURVE25519_KEY_SIZE];
2410 int copied, nbytes;
2411
2412 if (!req->src)
2413 return -EINVAL;
2414
2415 copied = sg_copy_to_buffer(req->src,
2416 sg_nents_for_len(req->src,
2417 CURVE25519_KEY_SIZE),
2418 public_key, CURVE25519_KEY_SIZE);
2419 if (copied != CURVE25519_KEY_SIZE)
2420 return -EINVAL;
2421
2422 curve25519_arch(buf, secret, public_key);
2423
2424
2425 nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
2426 copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
2427 nbytes),
2428 buf, nbytes);
2429 if (copied != nbytes)
2430 return -EINVAL;
2431 return 0;
2432}
2433
2434static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
2435{
2436 return CURVE25519_KEY_SIZE;
2437}
2438
2439static struct kpp_alg curve25519_alg = {
2440 .base.cra_name = "curve25519",
2441 .base.cra_driver_name = "curve25519-x86",
2442 .base.cra_priority = 200,
2443 .base.cra_module = THIS_MODULE,
2444 .base.cra_ctxsize = CURVE25519_KEY_SIZE,
2445
2446 .set_secret = curve25519_set_secret,
2447 .generate_public_key = curve25519_generate_public_key,
2448 .compute_shared_secret = curve25519_compute_shared_secret,
2449 .max_size = curve25519_max_size,
2450};
2451
2452static int __init curve25519_mod_init(void)
2453{
2454 if (boot_cpu_has(X86_FEATURE_BMI2))
2455 static_branch_enable(&curve25519_use_bmi2);
2456 else if (boot_cpu_has(X86_FEATURE_ADX))
2457 static_branch_enable(&curve25519_use_adx);
2458 else
2459 return 0;
2460 return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
2461 crypto_register_kpp(&curve25519_alg) : 0;
2462}
2463
2464static void __exit curve25519_mod_exit(void)
2465{
2466 if (IS_REACHABLE(CONFIG_CRYPTO_KPP) &&
2467 (boot_cpu_has(X86_FEATURE_BMI2) || boot_cpu_has(X86_FEATURE_ADX)))
2468 crypto_unregister_kpp(&curve25519_alg);
2469}
2470
2471module_init(curve25519_mod_init);
2472module_exit(curve25519_mod_exit);
2473
2474MODULE_ALIAS_CRYPTO("curve25519");
2475MODULE_ALIAS_CRYPTO("curve25519-x86");
2476MODULE_LICENSE("GPL v2");
2477