LXR busybox/networking/tls

   1/*
   2 * Copyright (C) 2018 Denys Vlasenko
   3 *
   4 * Licensed under GPLv2, see file LICENSE in this source tree.
   5 */
   6#include "tls.h"
   7
   8typedef uint8_t  byte;
   9typedef uint16_t word16;
  10typedef uint32_t word32;
  11#define XMEMSET  memset
  12
  13#define F25519_SIZE CURVE25519_KEYSIZE
  14
  15/* The code below is taken from wolfssl-3.15.3/wolfcrypt/src/fe_low_mem.c
  16 * Header comment is kept intact:
  17 */
  18
  19/* fe_low_mem.c
  20 *
  21 * Copyright (C) 2006-2017 wolfSSL Inc.
  22 *
  23 * This file is part of wolfSSL.
  24 *
  25 * wolfSSL is free software; you can redistribute it and/or modify
  26 * it under the terms of the GNU General Public License as published by
  27 * the Free Software Foundation; either version 2 of the License, or
  28 * (at your option) any later version.
  29 *
  30 * wolfSSL is distributed in the hope that it will be useful,
  31 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  32 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  33 * GNU General Public License for more details.
  34 *
  35 * You should have received a copy of the GNU General Public License
  36 * along with this program; if not, write to the Free Software
  37 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  38 */
  39
  40
  41/* Based from Daniel Beer's public domain work. */
  42
  43#if 0 //UNUSED
  44static void fprime_copy(byte *x, const byte *a)
  45{
  46        memcpy(x, a, F25519_SIZE);
  47}
  48#endif
  49
  50static void lm_copy(byte* x, const byte* a)
  51{
  52        memcpy(x, a, F25519_SIZE);
  53}
  54
  55#if 0 //UNUSED
  56static void fprime_select(byte *dst, const byte *zero, const byte *one, byte condition)
  57{
  58        const byte mask = -condition;
  59        int i;
  60
  61        for (i = 0; i < F25519_SIZE; i++)
  62                dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
  63}
  64#endif
  65
  66#if 0 /* constant-time */
  67static void fe_select(byte *dst,
  68                const byte *src,
  69                byte condition)
  70{
  71        const byte mask = -condition;
  72        int i;
  73
  74        for (i = 0; i < F25519_SIZE; i++)
  75                dst[i] = dst[i] ^ (mask & (src[i] ^ dst[i]));
  76}
  77#else
  78# define fe_select(dst, src, condition) do { \
  79        if (condition) lm_copy(dst, src); \
  80} while (0)
  81#endif
  82
  83#if 0 //UNUSED
  84static void raw_add(byte *x, const byte *p)
  85{
  86        word16 c = 0;
  87        int i;
  88
  89        for (i = 0; i < F25519_SIZE; i++) {
  90                c += ((word16)x[i]) + ((word16)p[i]);
  91                x[i] = (byte)c;
  92                c >>= 8;
  93        }
  94}
  95#endif
  96
  97#if 0 //UNUSED
  98static void raw_try_sub(byte *x, const byte *p)
  99{
 100        byte minusp[F25519_SIZE];
 101        word16 c = 0;
 102        int i;
 103
 104        for (i = 0; i < F25519_SIZE; i++) {
 105                c = ((word16)x[i]) - ((word16)p[i]) - c;
 106                minusp[i] = (byte)c;
 107                c = (c >> 8) & 1;
 108        }
 109
 110        fprime_select(x, minusp, x, (byte)c);
 111}
 112#endif
 113
 114#if 0 //UNUSED
 115static int prime_msb(const byte *p)
 116{
 117        int i;
 118        byte x;
 119        int shift = 1;
 120        int z     = F25519_SIZE - 1;
 121
 122        /*
 123            Test for any hot bits.
 124            As soon as one instance is encountered set shift to 0.
 125         */
 126        for (i = F25519_SIZE - 1; i >= 0; i--) {
 127                shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1);
 128                z -= shift;
 129        }
 130        x = p[z];
 131        z <<= 3;
 132        shift = 1;
 133        for (i = 0; i < 8; i++) {
 134                shift &= ((-(x >> i) | (x >> i)) >> (7 - i) & 1);
 135                z += shift;
 136        }
 137
 138        return z - 1;
 139}
 140#endif
 141
 142#if 0 //UNUSED
 143static void fprime_add(byte *r, const byte *a, const byte *modulus)
 144{
 145        raw_add(r, a);
 146        raw_try_sub(r, modulus);
 147}
 148#endif
 149
 150#if 0 //UNUSED
 151static void fprime_sub(byte *r, const byte *a, const byte *modulus)
 152{
 153        raw_add(r, modulus);
 154        raw_try_sub(r, a);
 155        raw_try_sub(r, modulus);
 156}
 157#endif
 158
 159#if 0 //UNUSED
 160static void fprime_mul(byte *r, const byte *a, const byte *b,
 161                const byte *modulus)
 162{
 163        word16 c = 0;
 164        int i,j;
 165
 166        XMEMSET(r, 0, F25519_SIZE);
 167
 168        for (i = prime_msb(modulus); i >= 0; i--) {
 169                const byte bit = (b[i >> 3] >> (i & 7)) & 1;
 170                byte plusa[F25519_SIZE];
 171
 172                for (j = 0; j < F25519_SIZE; j++) {
 173                        c |= ((word16)r[j]) << 1;
 174                        r[j] = (byte)c;
 175                        c >>= 8;
 176                }
 177                raw_try_sub(r, modulus);
 178
 179                fprime_copy(plusa, r);
 180                fprime_add(plusa, a, modulus);
 181
 182                fprime_select(r, r, plusa, bit);
 183        }
 184}
 185#endif
 186
 187#if 0 //UNUSED
 188static void fe_load(byte *x, word32 c)
 189{
 190        int i;
 191
 192        for (i = 0; i < sizeof(c); i++) {
 193                x[i] = c;
 194                c >>= 8;
 195        }
 196
 197        for (; i < F25519_SIZE; i++)
 198                x[i] = 0;
 199}
 200#endif
 201
 202static void fe_reduce(byte *x, word32 c)
 203{
 204        int i;
 205
 206        /* Reduce using 2^255 = 19 mod p */
 207        x[31] = c & 127;
 208        c = (c >> 7) * 19;
 209
 210        for (i = 0; i < F25519_SIZE; i++) {
 211                c += x[i];
 212                x[i] = (byte)c;
 213                c >>= 8;
 214        }
 215}
 216
 217static void fe_normalize(byte *x)
 218{
 219        byte minusp[F25519_SIZE];
 220        unsigned c;
 221        int i;
 222
 223        /* Reduce using 2^255 = 19 mod p */
 224        fe_reduce(x, x[31]);
 225
 226        /* The number is now less than 2^255 + 18, and therefore less than
 227         * 2p. Try subtracting p, and conditionally load the subtracted
 228         * value if underflow did not occur.
 229         */
 230        c = 19;
 231
 232        for (i = 0; i < F25519_SIZE - 1; i++) {
 233                c += x[i];
 234                minusp[i] = (byte)c;
 235                c >>= 8;
 236        }
 237
 238        c += ((unsigned)x[i]) - 128;
 239        minusp[31] = (byte)c;
 240
 241        /* Load x-p if no underflow */
 242        fe_select(x, minusp, !(c & (1<<15)));
 243}
 244
 245static void lm_add(byte* r, const byte* a, const byte* b)
 246{
 247        unsigned c = 0;
 248        int i;
 249
 250        /* Add */
 251        for (i = 0; i < F25519_SIZE; i++) {
 252                c >>= 8;
 253                c += ((unsigned)a[i]) + ((unsigned)b[i]);
 254                r[i] = (byte)c;
 255        }
 256
 257        /* Reduce with 2^255 = 19 mod p */
 258        fe_reduce(r, c);
 259}
 260
 261static void lm_sub(byte* r, const byte* a, const byte* b)
 262{
 263        word32 c = 0;
 264        int i;
 265
 266        /* Calculate a + 2p - b, to avoid underflow */
 267        c = 218;
 268        for (i = 0; i < F25519_SIZE - 1; i++) {
 269                c += 65280 + ((word32)a[i]) - ((word32)b[i]);
 270                r[i] = c;
 271                c >>= 8;
 272        }
 273
 274        c += ((word32)a[31]) - ((word32)b[31]);
 275
 276        fe_reduce(r, c);
 277}
 278
 279#if 0 //UNUSED
 280static void lm_neg(byte* r, const byte* a)
 281{
 282        word32 c = 0;
 283        int i;
 284
 285        /* Calculate 2p - a, to avoid underflow */
 286        c = 218;
 287        for (i = 0; i < F25519_SIZE - 1; i++) {
 288                c += 65280 - ((word32)a[i]);
 289                r[i] = c;
 290                c >>= 8;
 291        }
 292
 293        c -= ((word32)a[31]);
 294
 295        fe_reduce(r, c);
 296}
 297#endif
 298
 299static void fe_mul__distinct(byte *r, const byte *a, const byte *b)
 300{
 301        word32 c = 0;
 302        int i;
 303
 304        for (i = 0; i < F25519_SIZE; i++) {
 305                int j;
 306
 307                c >>= 8;
 308                for (j = 0; j <= i; j++)
 309                        c += ((word32)a[j]) * ((word32)b[i - j]);
 310
 311                for (; j < F25519_SIZE; j++)
 312                        c += ((word32)a[j]) *
 313                                ((word32)b[i + F25519_SIZE - j]) * 38;
 314
 315                r[i] = c;
 316        }
 317
 318        fe_reduce(r, c);
 319}
 320
 321#if 0 //UNUSED
 322static void lm_mul(byte *r, const byte* a, const byte *b)
 323{
 324        byte tmp[F25519_SIZE];
 325
 326        fe_mul__distinct(tmp, a, b);
 327        lm_copy(r, tmp);
 328}
 329#endif
 330
 331static void fe_mul_c(byte *r, const byte *a, word32 b)
 332{
 333        word32 c = 0;
 334        int i;
 335
 336        for (i = 0; i < F25519_SIZE; i++) {
 337                c >>= 8;
 338                c += b * ((word32)a[i]);
 339                r[i] = c;
 340        }
 341
 342        fe_reduce(r, c);
 343}
 344
 345static void fe_inv__distinct(byte *r, const byte *x)
 346{
 347        byte s[F25519_SIZE];
 348        int i;
 349
 350        /* This is a prime field, so by Fermat's little theorem:
 351         *
 352         *     x^(p-1) = 1 mod p
 353         *
 354         * Therefore, raise to (p-2) = 2^255-21 to get a multiplicative
 355         * inverse.
 356         *
 357         * This is a 255-bit binary number with the digits:
 358         *
 359         *     11111111... 01011
 360         *
 361         * We compute the result by the usual binary chain, but
 362         * alternate between keeping the accumulator in r and s, so as
 363         * to avoid copying temporaries.
 364         */
 365
 366        lm_copy(r, x);
 367
 368        /* 1, 1 x 249 */
 369        for (i = 0; i < 249; i++) {
 370                fe_mul__distinct(s, r, r);
 371                fe_mul__distinct(r, s, x);
 372        }
 373
 374        /* 0 */
 375        fe_mul__distinct(s, r, r);
 376
 377        /* 1 */
 378        fe_mul__distinct(r, s, s);
 379        fe_mul__distinct(s, r, x);
 380
 381        /* 0 */
 382        fe_mul__distinct(r, s, s);
 383
 384        /* 1, 1 */
 385        for (i = 0; i < 2; i++) {
 386                fe_mul__distinct(s, r, r);
 387                fe_mul__distinct(r, s, x);
 388        }
 389}
 390
 391#if 0 //UNUSED
 392static void lm_invert(byte *r, const byte *x)
 393{
 394        byte tmp[F25519_SIZE];
 395
 396        fe_inv__distinct(tmp, x);
 397        lm_copy(r, tmp);
 398}
 399#endif
 400
 401#if 0 //UNUSED
 402/* Raise x to the power of (p-5)/8 = 2^252-3, using s for temporary
 403 * storage.
 404 */
 405static void exp2523(byte *r, const byte *x, byte *s)
 406{
 407        int i;
 408
 409        /* This number is a 252-bit number with the binary expansion:
 410         *
 411         *     111111... 01
 412         */
 413
 414        lm_copy(s, x);
 415
 416        /* 1, 1 x 249 */
 417        for (i = 0; i < 249; i++) {
 418                fe_mul__distinct(r, s, s);
 419                fe_mul__distinct(s, r, x);
 420        }
 421
 422        /* 0 */
 423        fe_mul__distinct(r, s, s);
 424
 425        /* 1 */
 426        fe_mul__distinct(s, r, r);
 427        fe_mul__distinct(r, s, x);
 428}
 429#endif
 430
 431#if 0 //UNUSED
 432static void fe_sqrt(byte *r, const byte *a)
 433{
 434        byte v[F25519_SIZE];
 435        byte i[F25519_SIZE];
 436        byte x[F25519_SIZE];
 437        byte y[F25519_SIZE];
 438
 439        /* v = (2a)^((p-5)/8) [x = 2a] */
 440        fe_mul_c(x, a, 2);
 441        exp2523(v, x, y);
 442
 443        /* i = 2av^2 - 1 */
 444        fe_mul__distinct(y, v, v);
 445        fe_mul__distinct(i, x, y);
 446        fe_load(y, 1);
 447        lm_sub(i, i, y);
 448
 449        /* r = avi */
 450        fe_mul__distinct(x, v, a);
 451        fe_mul__distinct(r, x, i);
 452}
 453#endif
 454
 455/* Differential addition */
 456static void xc_diffadd(byte *x5, byte *z5,
 457                const byte *x1, const byte *z1,
 458                const byte *x2, const byte *z2,
 459                const byte *x3, const byte *z3)
 460{
 461        /* Explicit formulas database: dbl-1987-m3
 462         *
 463         * source 1987 Montgomery "Speeding the Pollard and elliptic curve
 464         *   methods of factorization", page 261, fifth display, plus
 465         *   common-subexpression elimination
 466         * compute A = X2+Z2
 467         * compute B = X2-Z2
 468         * compute C = X3+Z3
 469         * compute D = X3-Z3
 470         * compute DA = D A
 471         * compute CB = C B
 472         * compute X5 = Z1(DA+CB)^2
 473         * compute Z5 = X1(DA-CB)^2
 474         */
 475        byte da[F25519_SIZE];
 476        byte cb[F25519_SIZE];
 477        byte a[F25519_SIZE];
 478        byte b[F25519_SIZE];
 479
 480        lm_add(a, x2, z2);
 481        lm_sub(b, x3, z3); /* D */
 482        fe_mul__distinct(da, a, b);
 483
 484        lm_sub(b, x2, z2);
 485        lm_add(a, x3, z3); /* C */
 486        fe_mul__distinct(cb, a, b);
 487
 488        lm_add(a, da, cb);
 489        fe_mul__distinct(b, a, a);
 490        fe_mul__distinct(x5, z1, b);
 491
 492        lm_sub(a, da, cb);
 493        fe_mul__distinct(b, a, a);
 494        fe_mul__distinct(z5, x1, b);
 495}
 496
 497/* Double an X-coordinate */
 498static void xc_double(byte *x3, byte *z3,
 499                const byte *x1, const byte *z1)
 500{
 501        /* Explicit formulas database: dbl-1987-m
 502         *
 503         * source 1987 Montgomery "Speeding the Pollard and elliptic
 504         *   curve methods of factorization", page 261, fourth display
 505         * compute X3 = (X1^2-Z1^2)^2
 506         * compute Z3 = 4 X1 Z1 (X1^2 + a X1 Z1 + Z1^2)
 507         */
 508        byte x1sq[F25519_SIZE];
 509        byte z1sq[F25519_SIZE];
 510        byte x1z1[F25519_SIZE];
 511        byte a[F25519_SIZE];
 512
 513        fe_mul__distinct(x1sq, x1, x1);
 514        fe_mul__distinct(z1sq, z1, z1);
 515        fe_mul__distinct(x1z1, x1, z1);
 516
 517        lm_sub(a, x1sq, z1sq);
 518        fe_mul__distinct(x3, a, a);
 519
 520        fe_mul_c(a, x1z1, 486662);
 521        lm_add(a, x1sq, a);
 522        lm_add(a, z1sq, a);
 523        fe_mul__distinct(x1sq, x1z1, a);
 524        fe_mul_c(z3, x1sq, 4);
 525}
 526
 527static void curve25519(byte *result, const byte *e, const byte *q)
 528{
 529        int i;
 530
 531        struct Z {
 532                /* for bbox's special case of q == NULL meaning "use basepoint" */
 533                /*static const*/ uint8_t basepoint9[CURVE25519_KEYSIZE]; // = {9};
 534
 535                /* from wolfssl-3.15.3/wolfssl/wolfcrypt/fe_operations.h */
 536                /*static const*/ byte f25519_one[F25519_SIZE]; // = {1};
 537
 538                /* Predecessor: P_(m-1) */
 539                byte xm1[F25519_SIZE]; // = {1};
 540                byte zm1[F25519_SIZE]; // = {0};
 541                /* Current point: P_m */
 542                byte xm[F25519_SIZE];
 543                byte zm[F25519_SIZE]; // = {1};
 544                /* Temporaries */
 545                byte xms[F25519_SIZE];
 546                byte zms[F25519_SIZE];
 547        } z;
 548        uint8_t *XM1 = (uint8_t*)&z + offsetof(struct Z,xm1); // gcc 11.0.0 workaround
 549#define basepoint9 z.basepoint9
 550#define f25519_one z.f25519_one
 551#define xm1        z.xm1
 552#define zm1        z.zm1
 553#define xm         z.xm
 554#define zm         z.zm
 555#define xms        z.xms
 556#define zms        z.zms
 557        memset(&z, 0, sizeof(z));
 558        f25519_one[0] = 1;
 559        zm[0] = 1;
 560        xm1[0] = 1;
 561
 562        if (!q) {
 563                basepoint9[0] = 9;
 564                q = basepoint9;
 565        }
 566
 567        /* Note: bit 254 is assumed to be 1 */
 568        lm_copy(xm, q);
 569
 570        for (i = 253; i >= 0; i--) {
 571                const int bit = (e[i >> 3] >> (i & 7)) & 1;
 572//              byte xms[F25519_SIZE];
 573//              byte zms[F25519_SIZE];
 574
 575                /* From P_m and P_(m-1), compute P_(2m) and P_(2m-1) */
 576                xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1);
 577                xc_double(xm, zm, xm, zm);
 578
 579                /* Compute P_(2m+1) */
 580                xc_diffadd(xms, zms, xm1, zm1, xm, zm, q, f25519_one);
 581
 582                /* Select:
 583                 *   bit = 1 --> (P_(2m+1), P_(2m))
 584                 *   bit = 0 --> (P_(2m), P_(2m-1))
 585                 */
 586#if 0
 587                fe_select(xm1, xm, bit);
 588                fe_select(zm1, zm, bit);
 589                fe_select(xm, xms, bit);
 590                fe_select(zm, zms, bit);
 591#else
 592// same as above in about 50 bytes smaller code, but
 593// requires that in-memory order is exactly xm1,zm1,xm,zm,xms,zms
 594                if (bit) {
 595                        //memcpy(xm1, xm, 4 * F25519_SIZE);
 596                        //^^^ gcc 11.0.0 warns of overlapping memcpy
 597                        //memmove(xm1, xm, 4 * F25519_SIZE);
 598                        //^^^ gcc 11.0.0 warns of out-of-bounds access to xm1[]
 599                        memmove(XM1, XM1 + 2 * F25519_SIZE, 4 * F25519_SIZE);
 600                }
 601#endif
 602        }
 603
 604        /* Freeze out of projective coordinates */
 605        fe_inv__distinct(zm1, zm);
 606        fe_mul__distinct(result, zm1, xm);
 607        fe_normalize(result);
 608}
 609
 610/* interface to bbox's TLS code: */
 611
 612void FAST_FUNC curve_x25519_compute_pubkey_and_premaster(
 613                uint8_t *pubkey, uint8_t *premaster,
 614                const uint8_t *peerkey32)
 615{
 616        uint8_t privkey[CURVE25519_KEYSIZE]; //[32]
 617
 618        /* Generate random private key, see RFC 7748 */
 619        tls_get_random(privkey, sizeof(privkey));
 620        privkey[0] &= 0xf8;
 621        privkey[CURVE25519_KEYSIZE-1] = ((privkey[CURVE25519_KEYSIZE-1] & 0x7f) | 0x40);
 622
 623        /* Compute public key */
 624        curve25519(pubkey, privkey, NULL /* "use base point of x25519" */);
 625
 626        /* Compute premaster using peer's public key */
 627        curve25519(premaster, privkey, peerkey32);
 628}
 629