1
2
3
4
5
6#include "tls.h"
7
8typedef uint8_t byte;
9typedef uint16_t word16;
10typedef uint32_t word32;
11#define XMEMSET memset
12
13#define F25519_SIZE CURVE25519_KEYSIZE
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43#if 0
44static void fprime_copy(byte *x, const byte *a)
45{
46 int i;
47 for (i = 0; i < F25519_SIZE; i++)
48 x[i] = a[i];
49}
50#endif
51
52static void lm_copy(byte* x, const byte* a)
53{
54 int i;
55 for (i = 0; i < F25519_SIZE; i++)
56 x[i] = a[i];
57}
58
59#if 0
60static void fprime_select(byte *dst, const byte *zero, const byte *one, byte condition)
61{
62 const byte mask = -condition;
63 int i;
64
65 for (i = 0; i < F25519_SIZE; i++)
66 dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
67}
68#endif
69
70static void fe_select(byte *dst,
71 const byte *zero, const byte *one,
72 byte condition)
73{
74 const byte mask = -condition;
75 int i;
76
77 for (i = 0; i < F25519_SIZE; i++)
78 dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
79}
80
81#if 0
82static void raw_add(byte *x, const byte *p)
83{
84 word16 c = 0;
85 int i;
86
87 for (i = 0; i < F25519_SIZE; i++) {
88 c += ((word16)x[i]) + ((word16)p[i]);
89 x[i] = (byte)c;
90 c >>= 8;
91 }
92}
93#endif
94
95#if 0
96static void raw_try_sub(byte *x, const byte *p)
97{
98 byte minusp[F25519_SIZE];
99 word16 c = 0;
100 int i;
101
102 for (i = 0; i < F25519_SIZE; i++) {
103 c = ((word16)x[i]) - ((word16)p[i]) - c;
104 minusp[i] = (byte)c;
105 c = (c >> 8) & 1;
106 }
107
108 fprime_select(x, minusp, x, (byte)c);
109}
110#endif
111
112#if 0
113static int prime_msb(const byte *p)
114{
115 int i;
116 byte x;
117 int shift = 1;
118 int z = F25519_SIZE - 1;
119
120
121
122
123
124 for (i = F25519_SIZE - 1; i >= 0; i--) {
125 shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1);
126 z -= shift;
127 }
128 x = p[z];
129 z <<= 3;
130 shift = 1;
131 for (i = 0; i < 8; i++) {
132 shift &= ((-(x >> i) | (x >> i)) >> (7 - i) & 1);
133 z += shift;
134 }
135
136 return z - 1;
137}
138#endif
139
140#if 0
141static void fprime_add(byte *r, const byte *a, const byte *modulus)
142{
143 raw_add(r, a);
144 raw_try_sub(r, modulus);
145}
146#endif
147
148#if 0
149static void fprime_sub(byte *r, const byte *a, const byte *modulus)
150{
151 raw_add(r, modulus);
152 raw_try_sub(r, a);
153 raw_try_sub(r, modulus);
154}
155#endif
156
157#if 0
158static void fprime_mul(byte *r, const byte *a, const byte *b,
159 const byte *modulus)
160{
161 word16 c = 0;
162 int i,j;
163
164 XMEMSET(r, 0, F25519_SIZE);
165
166 for (i = prime_msb(modulus); i >= 0; i--) {
167 const byte bit = (b[i >> 3] >> (i & 7)) & 1;
168 byte plusa[F25519_SIZE];
169
170 for (j = 0; j < F25519_SIZE; j++) {
171 c |= ((word16)r[j]) << 1;
172 r[j] = (byte)c;
173 c >>= 8;
174 }
175 raw_try_sub(r, modulus);
176
177 fprime_copy(plusa, r);
178 fprime_add(plusa, a, modulus);
179
180 fprime_select(r, r, plusa, bit);
181 }
182}
183#endif
184
185#if 0
186static void fe_load(byte *x, word32 c)
187{
188 word32 i;
189
190 for (i = 0; i < sizeof(c); i++) {
191 x[i] = c;
192 c >>= 8;
193 }
194
195 for (; i < F25519_SIZE; i++)
196 x[i] = 0;
197}
198#endif
199
200static void fe_normalize(byte *x)
201{
202 byte minusp[F25519_SIZE];
203 word16 c;
204 int i;
205
206
207 c = (x[31] >> 7) * 19;
208 x[31] &= 127;
209
210 for (i = 0; i < F25519_SIZE; i++) {
211 c += x[i];
212 x[i] = (byte)c;
213 c >>= 8;
214 }
215
216
217
218
219
220 c = 19;
221
222 for (i = 0; i + 1 < F25519_SIZE; i++) {
223 c += x[i];
224 minusp[i] = (byte)c;
225 c >>= 8;
226 }
227
228 c += ((word16)x[i]) - 128;
229 minusp[31] = (byte)c;
230
231
232 fe_select(x, minusp, x, (c >> 15) & 1);
233}
234
235static void lm_add(byte* r, const byte* a, const byte* b)
236{
237 word16 c = 0;
238 int i;
239
240
241 for (i = 0; i < F25519_SIZE; i++) {
242 c >>= 8;
243 c += ((word16)a[i]) + ((word16)b[i]);
244 r[i] = (byte)c;
245 }
246
247
248 r[31] &= 127;
249 c = (c >> 7) * 19;
250
251 for (i = 0; i < F25519_SIZE; i++) {
252 c += r[i];
253 r[i] = (byte)c;
254 c >>= 8;
255 }
256}
257
258static void lm_sub(byte* r, const byte* a, const byte* b)
259{
260 word32 c = 0;
261 int i;
262
263
264 c = 218;
265 for (i = 0; i + 1 < F25519_SIZE; i++) {
266 c += 65280 + ((word32)a[i]) - ((word32)b[i]);
267 r[i] = c;
268 c >>= 8;
269 }
270
271 c += ((word32)a[31]) - ((word32)b[31]);
272 r[31] = c & 127;
273 c = (c >> 7) * 19;
274
275 for (i = 0; i < F25519_SIZE; i++) {
276 c += r[i];
277 r[i] = c;
278 c >>= 8;
279 }
280}
281
282#if 0
283static void lm_neg(byte* r, const byte* a)
284{
285 word32 c = 0;
286 int i;
287
288
289 c = 218;
290 for (i = 0; i + 1 < F25519_SIZE; i++) {
291 c += 65280 - ((word32)a[i]);
292 r[i] = c;
293 c >>= 8;
294 }
295
296 c -= ((word32)a[31]);
297 r[31] = c & 127;
298 c = (c >> 7) * 19;
299
300 for (i = 0; i < F25519_SIZE; i++) {
301 c += r[i];
302 r[i] = c;
303 c >>= 8;
304 }
305}
306#endif
307
308static void fe_mul__distinct(byte *r, const byte *a, const byte *b)
309{
310 word32 c = 0;
311 int i;
312
313 for (i = 0; i < F25519_SIZE; i++) {
314 int j;
315
316 c >>= 8;
317 for (j = 0; j <= i; j++)
318 c += ((word32)a[j]) * ((word32)b[i - j]);
319
320 for (; j < F25519_SIZE; j++)
321 c += ((word32)a[j]) *
322 ((word32)b[i + F25519_SIZE - j]) * 38;
323
324 r[i] = c;
325 }
326
327 r[31] &= 127;
328 c = (c >> 7) * 19;
329
330 for (i = 0; i < F25519_SIZE; i++) {
331 c += r[i];
332 r[i] = c;
333 c >>= 8;
334 }
335}
336
337#if 0
338static void lm_mul(byte *r, const byte* a, const byte *b)
339{
340 byte tmp[F25519_SIZE];
341
342 fe_mul__distinct(tmp, a, b);
343 lm_copy(r, tmp);
344}
345#endif
346
347static void fe_mul_c(byte *r, const byte *a, word32 b)
348{
349 word32 c = 0;
350 int i;
351
352 for (i = 0; i < F25519_SIZE; i++) {
353 c >>= 8;
354 c += b * ((word32)a[i]);
355 r[i] = c;
356 }
357
358 r[31] &= 127;
359 c >>= 7;
360 c *= 19;
361
362 for (i = 0; i < F25519_SIZE; i++) {
363 c += r[i];
364 r[i] = c;
365 c >>= 8;
366 }
367}
368
369static void fe_inv__distinct(byte *r, const byte *x)
370{
371 byte s[F25519_SIZE];
372 int i;
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391 fe_mul__distinct(s, x, x);
392 fe_mul__distinct(r, s, x);
393
394
395 for (i = 0; i < 248; i++) {
396 fe_mul__distinct(s, r, r);
397 fe_mul__distinct(r, s, x);
398 }
399
400
401 fe_mul__distinct(s, r, r);
402
403
404 fe_mul__distinct(r, s, s);
405 fe_mul__distinct(s, r, x);
406
407
408 fe_mul__distinct(r, s, s);
409
410
411 fe_mul__distinct(s, r, r);
412 fe_mul__distinct(r, s, x);
413
414
415 fe_mul__distinct(s, r, r);
416 fe_mul__distinct(r, s, x);
417}
418
419#if 0
420static void lm_invert(byte *r, const byte *x)
421{
422 byte tmp[F25519_SIZE];
423
424 fe_inv__distinct(tmp, x);
425 lm_copy(r, tmp);
426}
427#endif
428
429#if 0
430
431
432
433static void exp2523(byte *r, const byte *x, byte *s)
434{
435 int i;
436
437
438
439
440
441
442
443 fe_mul__distinct(r, x, x);
444 fe_mul__distinct(s, r, x);
445
446
447 for (i = 0; i < 248; i++) {
448 fe_mul__distinct(r, s, s);
449 fe_mul__distinct(s, r, x);
450 }
451
452
453 fe_mul__distinct(r, s, s);
454
455
456 fe_mul__distinct(s, r, r);
457 fe_mul__distinct(r, s, x);
458}
459#endif
460
461#if 0
462static void fe_sqrt(byte *r, const byte *a)
463{
464 byte v[F25519_SIZE];
465 byte i[F25519_SIZE];
466 byte x[F25519_SIZE];
467 byte y[F25519_SIZE];
468
469
470 fe_mul_c(x, a, 2);
471 exp2523(v, x, y);
472
473
474 fe_mul__distinct(y, v, v);
475 fe_mul__distinct(i, x, y);
476 fe_load(y, 1);
477 lm_sub(i, i, y);
478
479
480 fe_mul__distinct(x, v, a);
481 fe_mul__distinct(r, x, i);
482}
483#endif
484
485
486static void xc_diffadd(byte *x5, byte *z5,
487 const byte *x1, const byte *z1,
488 const byte *x2, const byte *z2,
489 const byte *x3, const byte *z3)
490{
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505 byte da[F25519_SIZE];
506 byte cb[F25519_SIZE];
507 byte a[F25519_SIZE];
508 byte b[F25519_SIZE];
509
510 lm_add(a, x2, z2);
511 lm_sub(b, x3, z3);
512 fe_mul__distinct(da, a, b);
513
514 lm_sub(b, x2, z2);
515 lm_add(a, x3, z3);
516 fe_mul__distinct(cb, a, b);
517
518 lm_add(a, da, cb);
519 fe_mul__distinct(b, a, a);
520 fe_mul__distinct(x5, z1, b);
521
522 lm_sub(a, da, cb);
523 fe_mul__distinct(b, a, a);
524 fe_mul__distinct(z5, x1, b);
525}
526
527
528static void xc_double(byte *x3, byte *z3,
529 const byte *x1, const byte *z1)
530{
531
532
533
534
535
536
537
538 byte x1sq[F25519_SIZE];
539 byte z1sq[F25519_SIZE];
540 byte x1z1[F25519_SIZE];
541 byte a[F25519_SIZE];
542
543 fe_mul__distinct(x1sq, x1, x1);
544 fe_mul__distinct(z1sq, z1, z1);
545 fe_mul__distinct(x1z1, x1, z1);
546
547 lm_sub(a, x1sq, z1sq);
548 fe_mul__distinct(x3, a, a);
549
550 fe_mul_c(a, x1z1, 486662);
551 lm_add(a, x1sq, a);
552 lm_add(a, z1sq, a);
553 fe_mul__distinct(x1sq, x1z1, a);
554 fe_mul_c(z3, x1sq, 4);
555}
556
557void FAST_FUNC curve25519(byte *result, const byte *e, const byte *q)
558{
559 int i;
560
561 struct {
562
563 byte f25519_one[F25519_SIZE];
564
565
566 byte xm[F25519_SIZE];
567 byte zm[F25519_SIZE];
568
569 byte xm1[F25519_SIZE];
570 byte zm1[F25519_SIZE];
571 } z;
572#define f25519_one z.f25519_one
573#define xm z.xm
574#define zm z.zm
575#define xm1 z.xm1
576#define zm1 z.zm1
577 memset(&z, 0, sizeof(z));
578 f25519_one[0] = 1;
579 zm[0] = 1;
580 xm1[0] = 1;
581
582
583 lm_copy(xm, q);
584
585 for (i = 253; i >= 0; i--) {
586 const int bit = (e[i >> 3] >> (i & 7)) & 1;
587 byte xms[F25519_SIZE];
588 byte zms[F25519_SIZE];
589
590
591 xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1);
592 xc_double(xm, zm, xm, zm);
593
594
595 xc_diffadd(xms, zms, xm1, zm1, xm, zm, q, f25519_one);
596
597
598
599
600
601 fe_select(xm1, xm1, xm, bit);
602 fe_select(zm1, zm1, zm, bit);
603 fe_select(xm, xm, xms, bit);
604 fe_select(zm, zm, zms, bit);
605 }
606
607
608 fe_inv__distinct(zm1, zm);
609 fe_mul__distinct(result, zm1, xm);
610 fe_normalize(result);
611}
612