1
2
3
4
5
6#include "tls.h"
7
8typedef uint8_t byte;
9typedef uint16_t word16;
10typedef uint32_t word32;
11#define XMEMSET memset
12
13#define F25519_SIZE CURVE25519_KEYSIZE
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43#if 0
44static void fprime_copy(byte *x, const byte *a)
45{
46 memcpy(x, a, F25519_SIZE);
47}
48#endif
49
50static void lm_copy(byte* x, const byte* a)
51{
52 memcpy(x, a, F25519_SIZE);
53}
54
55#if 0
56static void fprime_select(byte *dst, const byte *zero, const byte *one, byte condition)
57{
58 const byte mask = -condition;
59 int i;
60
61 for (i = 0; i < F25519_SIZE; i++)
62 dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
63}
64#endif
65
66static void fe_select(byte *dst,
67 const byte *zero, const byte *one,
68 byte condition)
69{
70 const byte mask = -condition;
71 int i;
72
73 for (i = 0; i < F25519_SIZE; i++)
74 dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
75}
76
77#if 0
78static void raw_add(byte *x, const byte *p)
79{
80 word16 c = 0;
81 int i;
82
83 for (i = 0; i < F25519_SIZE; i++) {
84 c += ((word16)x[i]) + ((word16)p[i]);
85 x[i] = (byte)c;
86 c >>= 8;
87 }
88}
89#endif
90
91#if 0
92static void raw_try_sub(byte *x, const byte *p)
93{
94 byte minusp[F25519_SIZE];
95 word16 c = 0;
96 int i;
97
98 for (i = 0; i < F25519_SIZE; i++) {
99 c = ((word16)x[i]) - ((word16)p[i]) - c;
100 minusp[i] = (byte)c;
101 c = (c >> 8) & 1;
102 }
103
104 fprime_select(x, minusp, x, (byte)c);
105}
106#endif
107
108#if 0
109static int prime_msb(const byte *p)
110{
111 int i;
112 byte x;
113 int shift = 1;
114 int z = F25519_SIZE - 1;
115
116
117
118
119
120 for (i = F25519_SIZE - 1; i >= 0; i--) {
121 shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1);
122 z -= shift;
123 }
124 x = p[z];
125 z <<= 3;
126 shift = 1;
127 for (i = 0; i < 8; i++) {
128 shift &= ((-(x >> i) | (x >> i)) >> (7 - i) & 1);
129 z += shift;
130 }
131
132 return z - 1;
133}
134#endif
135
136#if 0
137static void fprime_add(byte *r, const byte *a, const byte *modulus)
138{
139 raw_add(r, a);
140 raw_try_sub(r, modulus);
141}
142#endif
143
144#if 0
145static void fprime_sub(byte *r, const byte *a, const byte *modulus)
146{
147 raw_add(r, modulus);
148 raw_try_sub(r, a);
149 raw_try_sub(r, modulus);
150}
151#endif
152
153#if 0
154static void fprime_mul(byte *r, const byte *a, const byte *b,
155 const byte *modulus)
156{
157 word16 c = 0;
158 int i,j;
159
160 XMEMSET(r, 0, F25519_SIZE);
161
162 for (i = prime_msb(modulus); i >= 0; i--) {
163 const byte bit = (b[i >> 3] >> (i & 7)) & 1;
164 byte plusa[F25519_SIZE];
165
166 for (j = 0; j < F25519_SIZE; j++) {
167 c |= ((word16)r[j]) << 1;
168 r[j] = (byte)c;
169 c >>= 8;
170 }
171 raw_try_sub(r, modulus);
172
173 fprime_copy(plusa, r);
174 fprime_add(plusa, a, modulus);
175
176 fprime_select(r, r, plusa, bit);
177 }
178}
179#endif
180
181#if 0
182static void fe_load(byte *x, word32 c)
183{
184 word32 i;
185
186 for (i = 0; i < sizeof(c); i++) {
187 x[i] = c;
188 c >>= 8;
189 }
190
191 for (; i < F25519_SIZE; i++)
192 x[i] = 0;
193}
194#endif
195
196static void fe_normalize(byte *x)
197{
198 byte minusp[F25519_SIZE];
199 unsigned c;
200 int i;
201
202
203 c = (x[31] >> 7) * 19;
204 x[31] &= 127;
205
206 for (i = 0; i < F25519_SIZE; i++) {
207 c += x[i];
208 x[i] = (byte)c;
209 c >>= 8;
210 }
211
212
213
214
215
216 c = 19;
217
218 for (i = 0; i < F25519_SIZE - 1; i++) {
219 c += x[i];
220 minusp[i] = (byte)c;
221 c >>= 8;
222 }
223
224 c += ((unsigned)x[i]) - 128;
225 minusp[31] = (byte)c;
226
227
228 fe_select(x, minusp, x, (c >> 15) & 1);
229}
230
231static void lm_add(byte* r, const byte* a, const byte* b)
232{
233 unsigned c = 0;
234 int i;
235
236
237 for (i = 0; i < F25519_SIZE; i++) {
238 c >>= 8;
239 c += ((unsigned)a[i]) + ((unsigned)b[i]);
240 r[i] = (byte)c;
241 }
242
243
244 r[31] &= 127;
245 c = (c >> 7) * 19;
246
247 for (i = 0; i < F25519_SIZE; i++) {
248 c += r[i];
249 r[i] = (byte)c;
250 c >>= 8;
251 }
252}
253
254static void lm_sub(byte* r, const byte* a, const byte* b)
255{
256 word32 c = 0;
257 int i;
258
259
260 c = 218;
261 for (i = 0; i + 1 < F25519_SIZE; i++) {
262 c += 65280 + ((word32)a[i]) - ((word32)b[i]);
263 r[i] = c;
264 c >>= 8;
265 }
266
267 c += ((word32)a[31]) - ((word32)b[31]);
268 r[31] = c & 127;
269 c = (c >> 7) * 19;
270
271 for (i = 0; i < F25519_SIZE; i++) {
272 c += r[i];
273 r[i] = c;
274 c >>= 8;
275 }
276}
277
278#if 0
279static void lm_neg(byte* r, const byte* a)
280{
281 word32 c = 0;
282 int i;
283
284
285 c = 218;
286 for (i = 0; i + 1 < F25519_SIZE; i++) {
287 c += 65280 - ((word32)a[i]);
288 r[i] = c;
289 c >>= 8;
290 }
291
292 c -= ((word32)a[31]);
293 r[31] = c & 127;
294 c = (c >> 7) * 19;
295
296 for (i = 0; i < F25519_SIZE; i++) {
297 c += r[i];
298 r[i] = c;
299 c >>= 8;
300 }
301}
302#endif
303
304static void fe_mul__distinct(byte *r, const byte *a, const byte *b)
305{
306 word32 c = 0;
307 int i;
308
309 for (i = 0; i < F25519_SIZE; i++) {
310 int j;
311
312 c >>= 8;
313 for (j = 0; j <= i; j++)
314 c += ((word32)a[j]) * ((word32)b[i - j]);
315
316 for (; j < F25519_SIZE; j++)
317 c += ((word32)a[j]) *
318 ((word32)b[i + F25519_SIZE - j]) * 38;
319
320 r[i] = c;
321 }
322
323 r[31] &= 127;
324 c = (c >> 7) * 19;
325
326 for (i = 0; i < F25519_SIZE; i++) {
327 c += r[i];
328 r[i] = c;
329 c >>= 8;
330 }
331}
332
333#if 0
334static void lm_mul(byte *r, const byte* a, const byte *b)
335{
336 byte tmp[F25519_SIZE];
337
338 fe_mul__distinct(tmp, a, b);
339 lm_copy(r, tmp);
340}
341#endif
342
343static void fe_mul_c(byte *r, const byte *a, word32 b)
344{
345 word32 c = 0;
346 int i;
347
348 for (i = 0; i < F25519_SIZE; i++) {
349 c >>= 8;
350 c += b * ((word32)a[i]);
351 r[i] = c;
352 }
353
354 r[31] &= 127;
355 c >>= 7;
356 c *= 19;
357
358 for (i = 0; i < F25519_SIZE; i++) {
359 c += r[i];
360 r[i] = c;
361 c >>= 8;
362 }
363}
364
365static void fe_inv__distinct(byte *r, const byte *x)
366{
367 byte s[F25519_SIZE];
368 int i;
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387 fe_mul__distinct(s, x, x);
388 fe_mul__distinct(r, s, x);
389
390
391 for (i = 0; i < 248; i++) {
392 fe_mul__distinct(s, r, r);
393 fe_mul__distinct(r, s, x);
394 }
395
396
397 fe_mul__distinct(s, r, r);
398
399
400 fe_mul__distinct(r, s, s);
401 fe_mul__distinct(s, r, x);
402
403
404 fe_mul__distinct(r, s, s);
405
406
407 fe_mul__distinct(s, r, r);
408 fe_mul__distinct(r, s, x);
409
410
411 fe_mul__distinct(s, r, r);
412 fe_mul__distinct(r, s, x);
413}
414
415#if 0
416static void lm_invert(byte *r, const byte *x)
417{
418 byte tmp[F25519_SIZE];
419
420 fe_inv__distinct(tmp, x);
421 lm_copy(r, tmp);
422}
423#endif
424
425#if 0
426
427
428
429static void exp2523(byte *r, const byte *x, byte *s)
430{
431 int i;
432
433
434
435
436
437
438
439 fe_mul__distinct(r, x, x);
440 fe_mul__distinct(s, r, x);
441
442
443 for (i = 0; i < 248; i++) {
444 fe_mul__distinct(r, s, s);
445 fe_mul__distinct(s, r, x);
446 }
447
448
449 fe_mul__distinct(r, s, s);
450
451
452 fe_mul__distinct(s, r, r);
453 fe_mul__distinct(r, s, x);
454}
455#endif
456
457#if 0
458static void fe_sqrt(byte *r, const byte *a)
459{
460 byte v[F25519_SIZE];
461 byte i[F25519_SIZE];
462 byte x[F25519_SIZE];
463 byte y[F25519_SIZE];
464
465
466 fe_mul_c(x, a, 2);
467 exp2523(v, x, y);
468
469
470 fe_mul__distinct(y, v, v);
471 fe_mul__distinct(i, x, y);
472 fe_load(y, 1);
473 lm_sub(i, i, y);
474
475
476 fe_mul__distinct(x, v, a);
477 fe_mul__distinct(r, x, i);
478}
479#endif
480
481
482static void xc_diffadd(byte *x5, byte *z5,
483 const byte *x1, const byte *z1,
484 const byte *x2, const byte *z2,
485 const byte *x3, const byte *z3)
486{
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501 byte da[F25519_SIZE];
502 byte cb[F25519_SIZE];
503 byte a[F25519_SIZE];
504 byte b[F25519_SIZE];
505
506 lm_add(a, x2, z2);
507 lm_sub(b, x3, z3);
508 fe_mul__distinct(da, a, b);
509
510 lm_sub(b, x2, z2);
511 lm_add(a, x3, z3);
512 fe_mul__distinct(cb, a, b);
513
514 lm_add(a, da, cb);
515 fe_mul__distinct(b, a, a);
516 fe_mul__distinct(x5, z1, b);
517
518 lm_sub(a, da, cb);
519 fe_mul__distinct(b, a, a);
520 fe_mul__distinct(z5, x1, b);
521}
522
523
524static void xc_double(byte *x3, byte *z3,
525 const byte *x1, const byte *z1)
526{
527
528
529
530
531
532
533
534 byte x1sq[F25519_SIZE];
535 byte z1sq[F25519_SIZE];
536 byte x1z1[F25519_SIZE];
537 byte a[F25519_SIZE];
538
539 fe_mul__distinct(x1sq, x1, x1);
540 fe_mul__distinct(z1sq, z1, z1);
541 fe_mul__distinct(x1z1, x1, z1);
542
543 lm_sub(a, x1sq, z1sq);
544 fe_mul__distinct(x3, a, a);
545
546 fe_mul_c(a, x1z1, 486662);
547 lm_add(a, x1sq, a);
548 lm_add(a, z1sq, a);
549 fe_mul__distinct(x1sq, x1z1, a);
550 fe_mul_c(z3, x1sq, 4);
551}
552
553void FAST_FUNC curve25519(byte *result, const byte *e, const byte *q)
554{
555 int i;
556
557 struct {
558
559 byte f25519_one[F25519_SIZE];
560
561
562 byte xm[F25519_SIZE];
563 byte zm[F25519_SIZE];
564
565 byte xm1[F25519_SIZE];
566 byte zm1[F25519_SIZE];
567 } z;
568#define f25519_one z.f25519_one
569#define xm z.xm
570#define zm z.zm
571#define xm1 z.xm1
572#define zm1 z.zm1
573 memset(&z, 0, sizeof(z));
574 f25519_one[0] = 1;
575 zm[0] = 1;
576 xm1[0] = 1;
577
578
579 lm_copy(xm, q);
580
581 for (i = 253; i >= 0; i--) {
582 const int bit = (e[i >> 3] >> (i & 7)) & 1;
583 byte xms[F25519_SIZE];
584 byte zms[F25519_SIZE];
585
586
587 xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1);
588 xc_double(xm, zm, xm, zm);
589
590
591 xc_diffadd(xms, zms, xm1, zm1, xm, zm, q, f25519_one);
592
593
594
595
596
597 fe_select(xm1, xm1, xm, bit);
598 fe_select(zm1, zm1, zm, bit);
599 fe_select(xm, xm, xms, bit);
600 fe_select(zm, zm, zms, bit);
601 }
602
603
604 fe_inv__distinct(zm1, zm);
605 fe_mul__distinct(result, zm1, xm);
606 fe_normalize(result);
607}
608