1
2
3
4
5
6#include "tls.h"
7
8typedef uint8_t byte;
9typedef uint16_t word16;
10typedef uint32_t word32;
11#define XMEMSET memset
12
13#define F25519_SIZE CURVE25519_KEYSIZE
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43#if 0
44static void fprime_copy(byte *x, const byte *a)
45{
46 memcpy(x, a, F25519_SIZE);
47}
48#endif
49
50static void lm_copy(byte* x, const byte* a)
51{
52 memcpy(x, a, F25519_SIZE);
53}
54
55#if 0
56static void fprime_select(byte *dst, const byte *zero, const byte *one, byte condition)
57{
58 const byte mask = -condition;
59 int i;
60
61 for (i = 0; i < F25519_SIZE; i++)
62 dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
63}
64#endif
65
66#if 0
67static void fe_select(byte *dst,
68 const byte *src,
69 byte condition)
70{
71 const byte mask = -condition;
72 int i;
73
74 for (i = 0; i < F25519_SIZE; i++)
75 dst[i] = dst[i] ^ (mask & (src[i] ^ dst[i]));
76}
77#else
78# define fe_select(dst, src, condition) do { \
79 if (condition) lm_copy(dst, src); \
80} while (0)
81#endif
82
83#if 0
84static void raw_add(byte *x, const byte *p)
85{
86 word16 c = 0;
87 int i;
88
89 for (i = 0; i < F25519_SIZE; i++) {
90 c += ((word16)x[i]) + ((word16)p[i]);
91 x[i] = (byte)c;
92 c >>= 8;
93 }
94}
95#endif
96
97#if 0
98static void raw_try_sub(byte *x, const byte *p)
99{
100 byte minusp[F25519_SIZE];
101 word16 c = 0;
102 int i;
103
104 for (i = 0; i < F25519_SIZE; i++) {
105 c = ((word16)x[i]) - ((word16)p[i]) - c;
106 minusp[i] = (byte)c;
107 c = (c >> 8) & 1;
108 }
109
110 fprime_select(x, minusp, x, (byte)c);
111}
112#endif
113
114#if 0
115static int prime_msb(const byte *p)
116{
117 int i;
118 byte x;
119 int shift = 1;
120 int z = F25519_SIZE - 1;
121
122
123
124
125
126 for (i = F25519_SIZE - 1; i >= 0; i--) {
127 shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1);
128 z -= shift;
129 }
130 x = p[z];
131 z <<= 3;
132 shift = 1;
133 for (i = 0; i < 8; i++) {
134 shift &= ((-(x >> i) | (x >> i)) >> (7 - i) & 1);
135 z += shift;
136 }
137
138 return z - 1;
139}
140#endif
141
142#if 0
143static void fprime_add(byte *r, const byte *a, const byte *modulus)
144{
145 raw_add(r, a);
146 raw_try_sub(r, modulus);
147}
148#endif
149
150#if 0
151static void fprime_sub(byte *r, const byte *a, const byte *modulus)
152{
153 raw_add(r, modulus);
154 raw_try_sub(r, a);
155 raw_try_sub(r, modulus);
156}
157#endif
158
159#if 0
160static void fprime_mul(byte *r, const byte *a, const byte *b,
161 const byte *modulus)
162{
163 word16 c = 0;
164 int i,j;
165
166 XMEMSET(r, 0, F25519_SIZE);
167
168 for (i = prime_msb(modulus); i >= 0; i--) {
169 const byte bit = (b[i >> 3] >> (i & 7)) & 1;
170 byte plusa[F25519_SIZE];
171
172 for (j = 0; j < F25519_SIZE; j++) {
173 c |= ((word16)r[j]) << 1;
174 r[j] = (byte)c;
175 c >>= 8;
176 }
177 raw_try_sub(r, modulus);
178
179 fprime_copy(plusa, r);
180 fprime_add(plusa, a, modulus);
181
182 fprime_select(r, r, plusa, bit);
183 }
184}
185#endif
186
187#if 0
188static void fe_load(byte *x, word32 c)
189{
190 int i;
191
192 for (i = 0; i < sizeof(c); i++) {
193 x[i] = c;
194 c >>= 8;
195 }
196
197 for (; i < F25519_SIZE; i++)
198 x[i] = 0;
199}
200#endif
201
202static void fe_reduce(byte *x, word32 c)
203{
204 int i;
205
206
207 x[31] = c & 127;
208 c = (c >> 7) * 19;
209
210 for (i = 0; i < F25519_SIZE; i++) {
211 c += x[i];
212 x[i] = (byte)c;
213 c >>= 8;
214 }
215}
216
217static void fe_normalize(byte *x)
218{
219 byte minusp[F25519_SIZE];
220 unsigned c;
221 int i;
222
223
224 fe_reduce(x, x[31]);
225
226
227
228
229
230 c = 19;
231
232 for (i = 0; i < F25519_SIZE - 1; i++) {
233 c += x[i];
234 minusp[i] = (byte)c;
235 c >>= 8;
236 }
237
238 c += ((unsigned)x[i]) - 128;
239 minusp[31] = (byte)c;
240
241
242 fe_select(x, minusp, !(c & (1<<15)));
243}
244
245static void lm_add(byte* r, const byte* a, const byte* b)
246{
247 unsigned c = 0;
248 int i;
249
250
251 for (i = 0; i < F25519_SIZE; i++) {
252 c >>= 8;
253 c += ((unsigned)a[i]) + ((unsigned)b[i]);
254 r[i] = (byte)c;
255 }
256
257
258 fe_reduce(r, c);
259}
260
261static void lm_sub(byte* r, const byte* a, const byte* b)
262{
263 word32 c = 0;
264 int i;
265
266
267 c = 218;
268 for (i = 0; i < F25519_SIZE - 1; i++) {
269 c += 65280 + ((word32)a[i]) - ((word32)b[i]);
270 r[i] = c;
271 c >>= 8;
272 }
273
274 c += ((word32)a[31]) - ((word32)b[31]);
275
276 fe_reduce(r, c);
277}
278
279#if 0
280static void lm_neg(byte* r, const byte* a)
281{
282 word32 c = 0;
283 int i;
284
285
286 c = 218;
287 for (i = 0; i < F25519_SIZE - 1; i++) {
288 c += 65280 - ((word32)a[i]);
289 r[i] = c;
290 c >>= 8;
291 }
292
293 c -= ((word32)a[31]);
294
295 fe_reduce(r, c);
296}
297#endif
298
299static void fe_mul__distinct(byte *r, const byte *a, const byte *b)
300{
301 word32 c = 0;
302 int i;
303
304 for (i = 0; i < F25519_SIZE; i++) {
305 int j;
306
307 c >>= 8;
308 for (j = 0; j <= i; j++)
309 c += ((word32)a[j]) * ((word32)b[i - j]);
310
311 for (; j < F25519_SIZE; j++)
312 c += ((word32)a[j]) *
313 ((word32)b[i + F25519_SIZE - j]) * 38;
314
315 r[i] = c;
316 }
317
318 fe_reduce(r, c);
319}
320
321#if 0
322static void lm_mul(byte *r, const byte* a, const byte *b)
323{
324 byte tmp[F25519_SIZE];
325
326 fe_mul__distinct(tmp, a, b);
327 lm_copy(r, tmp);
328}
329#endif
330
331static void fe_mul_c(byte *r, const byte *a, word32 b)
332{
333 word32 c = 0;
334 int i;
335
336 for (i = 0; i < F25519_SIZE; i++) {
337 c >>= 8;
338 c += b * ((word32)a[i]);
339 r[i] = c;
340 }
341
342 fe_reduce(r, c);
343}
344
345static void fe_inv__distinct(byte *r, const byte *x)
346{
347 byte s[F25519_SIZE];
348 int i;
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366 lm_copy(r, x);
367
368
369 for (i = 0; i < 249; i++) {
370 fe_mul__distinct(s, r, r);
371 fe_mul__distinct(r, s, x);
372 }
373
374
375 fe_mul__distinct(s, r, r);
376
377
378 fe_mul__distinct(r, s, s);
379 fe_mul__distinct(s, r, x);
380
381
382 fe_mul__distinct(r, s, s);
383
384
385 for (i = 0; i < 2; i++) {
386 fe_mul__distinct(s, r, r);
387 fe_mul__distinct(r, s, x);
388 }
389}
390
391#if 0
392static void lm_invert(byte *r, const byte *x)
393{
394 byte tmp[F25519_SIZE];
395
396 fe_inv__distinct(tmp, x);
397 lm_copy(r, tmp);
398}
399#endif
400
401#if 0
402
403
404
405static void exp2523(byte *r, const byte *x, byte *s)
406{
407 int i;
408
409
410
411
412
413
414 lm_copy(s, x);
415
416
417 for (i = 0; i < 249; i++) {
418 fe_mul__distinct(r, s, s);
419 fe_mul__distinct(s, r, x);
420 }
421
422
423 fe_mul__distinct(r, s, s);
424
425
426 fe_mul__distinct(s, r, r);
427 fe_mul__distinct(r, s, x);
428}
429#endif
430
431#if 0
432static void fe_sqrt(byte *r, const byte *a)
433{
434 byte v[F25519_SIZE];
435 byte i[F25519_SIZE];
436 byte x[F25519_SIZE];
437 byte y[F25519_SIZE];
438
439
440 fe_mul_c(x, a, 2);
441 exp2523(v, x, y);
442
443
444 fe_mul__distinct(y, v, v);
445 fe_mul__distinct(i, x, y);
446 fe_load(y, 1);
447 lm_sub(i, i, y);
448
449
450 fe_mul__distinct(x, v, a);
451 fe_mul__distinct(r, x, i);
452}
453#endif
454
455
456static void xc_diffadd(byte *x5, byte *z5,
457 const byte *x1, const byte *z1,
458 const byte *x2, const byte *z2,
459 const byte *x3, const byte *z3)
460{
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475 byte da[F25519_SIZE];
476 byte cb[F25519_SIZE];
477 byte a[F25519_SIZE];
478 byte b[F25519_SIZE];
479
480 lm_add(a, x2, z2);
481 lm_sub(b, x3, z3);
482 fe_mul__distinct(da, a, b);
483
484 lm_sub(b, x2, z2);
485 lm_add(a, x3, z3);
486 fe_mul__distinct(cb, a, b);
487
488 lm_add(a, da, cb);
489 fe_mul__distinct(b, a, a);
490 fe_mul__distinct(x5, z1, b);
491
492 lm_sub(a, da, cb);
493 fe_mul__distinct(b, a, a);
494 fe_mul__distinct(z5, x1, b);
495}
496
497
498static void xc_double(byte *x3, byte *z3,
499 const byte *x1, const byte *z1)
500{
501
502
503
504
505
506
507
508 byte x1sq[F25519_SIZE];
509 byte z1sq[F25519_SIZE];
510 byte x1z1[F25519_SIZE];
511 byte a[F25519_SIZE];
512
513 fe_mul__distinct(x1sq, x1, x1);
514 fe_mul__distinct(z1sq, z1, z1);
515 fe_mul__distinct(x1z1, x1, z1);
516
517 lm_sub(a, x1sq, z1sq);
518 fe_mul__distinct(x3, a, a);
519
520 fe_mul_c(a, x1z1, 486662);
521 lm_add(a, x1sq, a);
522 lm_add(a, z1sq, a);
523 fe_mul__distinct(x1sq, x1z1, a);
524 fe_mul_c(z3, x1sq, 4);
525}
526
527static void curve25519(byte *result, const byte *e, const byte *q)
528{
529 int i;
530
531 struct Z {
532
533 uint8_t basepoint9[CURVE25519_KEYSIZE];
534
535
536 byte f25519_one[F25519_SIZE];
537
538
539 byte xm1[F25519_SIZE];
540 byte zm1[F25519_SIZE];
541
542 byte xm[F25519_SIZE];
543 byte zm[F25519_SIZE];
544
545 byte xms[F25519_SIZE];
546 byte zms[F25519_SIZE];
547 } z;
548 uint8_t *XM1 = (uint8_t*)&z + offsetof(struct Z,xm1);
549#define basepoint9 z.basepoint9
550#define f25519_one z.f25519_one
551#define xm1 z.xm1
552#define zm1 z.zm1
553#define xm z.xm
554#define zm z.zm
555#define xms z.xms
556#define zms z.zms
557 memset(&z, 0, sizeof(z));
558 f25519_one[0] = 1;
559 zm[0] = 1;
560 xm1[0] = 1;
561
562 if (!q) {
563 basepoint9[0] = 9;
564 q = basepoint9;
565 }
566
567
568 lm_copy(xm, q);
569
570 for (i = 253; i >= 0; i--) {
571 const int bit = (e[i >> 3] >> (i & 7)) & 1;
572
573
574
575
576 xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1);
577 xc_double(xm, zm, xm, zm);
578
579
580 xc_diffadd(xms, zms, xm1, zm1, xm, zm, q, f25519_one);
581
582
583
584
585
586#if 0
587 fe_select(xm1, xm, bit);
588 fe_select(zm1, zm, bit);
589 fe_select(xm, xms, bit);
590 fe_select(zm, zms, bit);
591#else
592
593
594 if (bit) {
595
596
597
598
599 memmove(XM1, XM1 + 2 * F25519_SIZE, 4 * F25519_SIZE);
600 }
601#endif
602 }
603
604
605 fe_inv__distinct(zm1, zm);
606 fe_mul__distinct(result, zm1, xm);
607 fe_normalize(result);
608}
609
610
611
612void FAST_FUNC curve_x25519_compute_pubkey_and_premaster(
613 uint8_t *pubkey, uint8_t *premaster,
614 const uint8_t *peerkey32)
615{
616 uint8_t privkey[CURVE25519_KEYSIZE];
617
618
619 tls_get_random(privkey, sizeof(privkey));
620 privkey[0] &= 0xf8;
621 privkey[CURVE25519_KEYSIZE-1] = ((privkey[CURVE25519_KEYSIZE-1] & 0x7f) | 0x40);
622
623
624 curve25519(pubkey, privkey, NULL );
625
626
627 curve25519(premaster, privkey, peerkey32);
628}
629