linux/lib/mpi/mpih-mul.c
<<
>>
Prefs
   1/* mpihelp-mul.c  -  MPI helper functions
   2 * Copyright (C) 1994, 1996, 1998, 1999,
   3 *               2000 Free Software Foundation, Inc.
   4 *
   5 * This file is part of GnuPG.
   6 *
   7 * GnuPG is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * GnuPG is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 * GNU General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU General Public License
  18 * along with this program; if not, write to the Free Software
  19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  20 *
  21 * Note: This code is heavily based on the GNU MP Library.
  22 *       Actually it's the same code with only minor changes in the
  23 *       way the data is stored; this is to support the abstraction
  24 *       of an optional secure memory allocation which may be used
  25 *       to avoid revealing of sensitive data due to paging etc.
  26 *       The GNU MP Library itself is published under the LGPL;
  27 *       however I decided to publish this code under the plain GPL.
  28 */
  29
  30#include <linux/string.h>
  31#include "mpi-internal.h"
  32#include "longlong.h"
  33
  34#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace)          \
  35        do {                                                    \
  36                if ((size) < KARATSUBA_THRESHOLD)               \
  37                        mul_n_basecase(prodp, up, vp, size);    \
  38                else                                            \
  39                        mul_n(prodp, up, vp, size, tspace);     \
  40        } while (0);
  41
  42#define MPN_SQR_N_RECURSE(prodp, up, size, tspace)              \
  43        do {                                                    \
  44                if ((size) < KARATSUBA_THRESHOLD)               \
  45                        mpih_sqr_n_basecase(prodp, up, size);   \
  46                else                                            \
  47                        mpih_sqr_n(prodp, up, size, tspace);    \
  48        } while (0);
  49
  50/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
  51 * both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
  52 * always stored.  Return the most significant limb.
  53 *
  54 * Argument constraints:
  55 * 1. PRODP != UP and PRODP != VP, i.e. the destination
  56 *    must be distinct from the multiplier and the multiplicand.
  57 *
  58 *
  59 * Handle simple cases with traditional multiplication.
  60 *
  61 * This is the most critical code of multiplication.  All multiplies rely
  62 * on this, both small and huge.  Small ones arrive here immediately.  Huge
  63 * ones arrive here as this is the base case for Karatsuba's recursive
  64 * algorithm below.
  65 */
  66
  67static mpi_limb_t
  68mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
  69{
  70        mpi_size_t i;
  71        mpi_limb_t cy;
  72        mpi_limb_t v_limb;
  73
  74        /* Multiply by the first limb in V separately, as the result can be
  75         * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
  76        v_limb = vp[0];
  77        if (v_limb <= 1) {
  78                if (v_limb == 1)
  79                        MPN_COPY(prodp, up, size);
  80                else
  81                        MPN_ZERO(prodp, size);
  82                cy = 0;
  83        } else
  84                cy = mpihelp_mul_1(prodp, up, size, v_limb);
  85
  86        prodp[size] = cy;
  87        prodp++;
  88
  89        /* For each iteration in the outer loop, multiply one limb from
  90         * U with one limb from V, and add it to PROD.  */
  91        for (i = 1; i < size; i++) {
  92                v_limb = vp[i];
  93                if (v_limb <= 1) {
  94                        cy = 0;
  95                        if (v_limb == 1)
  96                                cy = mpihelp_add_n(prodp, prodp, up, size);
  97                } else
  98                        cy = mpihelp_addmul_1(prodp, up, size, v_limb);
  99
 100                prodp[size] = cy;
 101                prodp++;
 102        }
 103
 104        return cy;
 105}
 106
 107static void
 108mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
 109                mpi_size_t size, mpi_ptr_t tspace)
 110{
 111        if (size & 1) {
 112                /* The size is odd, and the code below doesn't handle that.
 113                 * Multiply the least significant (size - 1) limbs with a recursive
 114                 * call, and handle the most significant limb of S1 and S2
 115                 * separately.
 116                 * A slightly faster way to do this would be to make the Karatsuba
 117                 * code below behave as if the size were even, and let it check for
 118                 * odd size in the end.  I.e., in essence move this code to the end.
 119                 * Doing so would save us a recursive call, and potentially make the
 120                 * stack grow a lot less.
 121                 */
 122                mpi_size_t esize = size - 1;    /* even size */
 123                mpi_limb_t cy_limb;
 124
 125                MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace);
 126                cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]);
 127                prodp[esize + esize] = cy_limb;
 128                cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]);
 129                prodp[esize + size] = cy_limb;
 130        } else {
 131                /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
 132                 *
 133                 * Split U in two pieces, U1 and U0, such that
 134                 * U = U0 + U1*(B**n),
 135                 * and V in V1 and V0, such that
 136                 * V = V0 + V1*(B**n).
 137                 *
 138                 * UV is then computed recursively using the identity
 139                 *
 140                 *        2n   n          n                     n
 141                 * UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
 142                 *                1 1        1  0   0  1              0 0
 143                 *
 144                 * Where B = 2**BITS_PER_MP_LIMB.
 145                 */
 146                mpi_size_t hsize = size >> 1;
 147                mpi_limb_t cy;
 148                int negflg;
 149
 150                /* Product H.      ________________  ________________
 151                 *                |_____U1 x V1____||____U0 x V0_____|
 152                 * Put result in upper part of PROD and pass low part of TSPACE
 153                 * as new TSPACE.
 154                 */
 155                MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize,
 156                                  tspace);
 157
 158                /* Product M.      ________________
 159                 *                |_(U1-U0)(V0-V1)_|
 160                 */
 161                if (mpihelp_cmp(up + hsize, up, hsize) >= 0) {
 162                        mpihelp_sub_n(prodp, up + hsize, up, hsize);
 163                        negflg = 0;
 164                } else {
 165                        mpihelp_sub_n(prodp, up, up + hsize, hsize);
 166                        negflg = 1;
 167                }
 168                if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) {
 169                        mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
 170                        negflg ^= 1;
 171                } else {
 172                        mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
 173                        /* No change of NEGFLG.  */
 174                }
 175                /* Read temporary operands from low part of PROD.
 176                 * Put result in low part of TSPACE using upper part of TSPACE
 177                 * as new TSPACE.
 178                 */
 179                MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize,
 180                                  tspace + size);
 181
 182                /* Add/copy product H. */
 183                MPN_COPY(prodp + hsize, prodp + size, hsize);
 184                cy = mpihelp_add_n(prodp + size, prodp + size,
 185                                   prodp + size + hsize, hsize);
 186
 187                /* Add product M (if NEGFLG M is a negative number) */
 188                if (negflg)
 189                        cy -=
 190                            mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace,
 191                                          size);
 192                else
 193                        cy +=
 194                            mpihelp_add_n(prodp + hsize, prodp + hsize, tspace,
 195                                          size);
 196
 197                /* Product L.      ________________  ________________
 198                 *                |________________||____U0 x V0_____|
 199                 * Read temporary operands from low part of PROD.
 200                 * Put result in low part of TSPACE using upper part of TSPACE
 201                 * as new TSPACE.
 202                 */
 203                MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
 204
 205                /* Add/copy Product L (twice) */
 206
 207                cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
 208                if (cy)
 209                        mpihelp_add_1(prodp + hsize + size,
 210                                      prodp + hsize + size, hsize, cy);
 211
 212                MPN_COPY(prodp, tspace, hsize);
 213                cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
 214                                   hsize);
 215                if (cy)
 216                        mpihelp_add_1(prodp + size, prodp + size, size, 1);
 217        }
 218}
 219
 220void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size)
 221{
 222        mpi_size_t i;
 223        mpi_limb_t cy_limb;
 224        mpi_limb_t v_limb;
 225
 226        /* Multiply by the first limb in V separately, as the result can be
 227         * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
 228        v_limb = up[0];
 229        if (v_limb <= 1) {
 230                if (v_limb == 1)
 231                        MPN_COPY(prodp, up, size);
 232                else
 233                        MPN_ZERO(prodp, size);
 234                cy_limb = 0;
 235        } else
 236                cy_limb = mpihelp_mul_1(prodp, up, size, v_limb);
 237
 238        prodp[size] = cy_limb;
 239        prodp++;
 240
 241        /* For each iteration in the outer loop, multiply one limb from
 242         * U with one limb from V, and add it to PROD.  */
 243        for (i = 1; i < size; i++) {
 244                v_limb = up[i];
 245                if (v_limb <= 1) {
 246                        cy_limb = 0;
 247                        if (v_limb == 1)
 248                                cy_limb = mpihelp_add_n(prodp, prodp, up, size);
 249                } else
 250                        cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
 251
 252                prodp[size] = cy_limb;
 253                prodp++;
 254        }
 255}
 256
 257void
 258mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
 259{
 260        if (size & 1) {
 261                /* The size is odd, and the code below doesn't handle that.
 262                 * Multiply the least significant (size - 1) limbs with a recursive
 263                 * call, and handle the most significant limb of S1 and S2
 264                 * separately.
 265                 * A slightly faster way to do this would be to make the Karatsuba
 266                 * code below behave as if the size were even, and let it check for
 267                 * odd size in the end.  I.e., in essence move this code to the end.
 268                 * Doing so would save us a recursive call, and potentially make the
 269                 * stack grow a lot less.
 270                 */
 271                mpi_size_t esize = size - 1;    /* even size */
 272                mpi_limb_t cy_limb;
 273
 274                MPN_SQR_N_RECURSE(prodp, up, esize, tspace);
 275                cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]);
 276                prodp[esize + esize] = cy_limb;
 277                cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]);
 278
 279                prodp[esize + size] = cy_limb;
 280        } else {
 281                mpi_size_t hsize = size >> 1;
 282                mpi_limb_t cy;
 283
 284                /* Product H.      ________________  ________________
 285                 *                |_____U1 x U1____||____U0 x U0_____|
 286                 * Put result in upper part of PROD and pass low part of TSPACE
 287                 * as new TSPACE.
 288                 */
 289                MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
 290
 291                /* Product M.      ________________
 292                 *                |_(U1-U0)(U0-U1)_|
 293                 */
 294                if (mpihelp_cmp(up + hsize, up, hsize) >= 0)
 295                        mpihelp_sub_n(prodp, up + hsize, up, hsize);
 296                else
 297                        mpihelp_sub_n(prodp, up, up + hsize, hsize);
 298
 299                /* Read temporary operands from low part of PROD.
 300                 * Put result in low part of TSPACE using upper part of TSPACE
 301                 * as new TSPACE.  */
 302                MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
 303
 304                /* Add/copy product H  */
 305                MPN_COPY(prodp + hsize, prodp + size, hsize);
 306                cy = mpihelp_add_n(prodp + size, prodp + size,
 307                                   prodp + size + hsize, hsize);
 308
 309                /* Add product M (if NEGFLG M is a negative number).  */
 310                cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
 311
 312                /* Product L.      ________________  ________________
 313                 *                |________________||____U0 x U0_____|
 314                 * Read temporary operands from low part of PROD.
 315                 * Put result in low part of TSPACE using upper part of TSPACE
 316                 * as new TSPACE.  */
 317                MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size);
 318
 319                /* Add/copy Product L (twice).  */
 320                cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
 321                if (cy)
 322                        mpihelp_add_1(prodp + hsize + size,
 323                                      prodp + hsize + size, hsize, cy);
 324
 325                MPN_COPY(prodp, tspace, hsize);
 326                cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
 327                                   hsize);
 328                if (cy)
 329                        mpihelp_add_1(prodp + size, prodp + size, size, 1);
 330        }
 331}
 332
 333int
 334mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
 335                           mpi_ptr_t up, mpi_size_t usize,
 336                           mpi_ptr_t vp, mpi_size_t vsize,
 337                           struct karatsuba_ctx *ctx)
 338{
 339        mpi_limb_t cy;
 340
 341        if (!ctx->tspace || ctx->tspace_size < vsize) {
 342                if (ctx->tspace)
 343                        mpi_free_limb_space(ctx->tspace);
 344                ctx->tspace = mpi_alloc_limb_space(2 * vsize);
 345                if (!ctx->tspace)
 346                        return -ENOMEM;
 347                ctx->tspace_size = vsize;
 348        }
 349
 350        MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace);
 351
 352        prodp += vsize;
 353        up += vsize;
 354        usize -= vsize;
 355        if (usize >= vsize) {
 356                if (!ctx->tp || ctx->tp_size < vsize) {
 357                        if (ctx->tp)
 358                                mpi_free_limb_space(ctx->tp);
 359                        ctx->tp = mpi_alloc_limb_space(2 * vsize);
 360                        if (!ctx->tp) {
 361                                if (ctx->tspace)
 362                                        mpi_free_limb_space(ctx->tspace);
 363                                ctx->tspace = NULL;
 364                                return -ENOMEM;
 365                        }
 366                        ctx->tp_size = vsize;
 367                }
 368
 369                do {
 370                        MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace);
 371                        cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize);
 372                        mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize,
 373                                      cy);
 374                        prodp += vsize;
 375                        up += vsize;
 376                        usize -= vsize;
 377                } while (usize >= vsize);
 378        }
 379
 380        if (usize) {
 381                if (usize < KARATSUBA_THRESHOLD) {
 382                        mpi_limb_t tmp;
 383                        if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp)
 384                            < 0)
 385                                return -ENOMEM;
 386                } else {
 387                        if (!ctx->next) {
 388                                ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL);
 389                                if (!ctx->next)
 390                                        return -ENOMEM;
 391                        }
 392                        if (mpihelp_mul_karatsuba_case(ctx->tspace,
 393                                                       vp, vsize,
 394                                                       up, usize,
 395                                                       ctx->next) < 0)
 396                                return -ENOMEM;
 397                }
 398
 399                cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize);
 400                mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy);
 401        }
 402
 403        return 0;
 404}
 405
 406void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx)
 407{
 408        struct karatsuba_ctx *ctx2;
 409
 410        if (ctx->tp)
 411                mpi_free_limb_space(ctx->tp);
 412        if (ctx->tspace)
 413                mpi_free_limb_space(ctx->tspace);
 414        for (ctx = ctx->next; ctx; ctx = ctx2) {
 415                ctx2 = ctx->next;
 416                if (ctx->tp)
 417                        mpi_free_limb_space(ctx->tp);
 418                if (ctx->tspace)
 419                        mpi_free_limb_space(ctx->tspace);
 420                kfree(ctx);
 421        }
 422}
 423
 424/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
 425 * and v (pointed to by VP, with VSIZE limbs), and store the result at
 426 * PRODP.  USIZE + VSIZE limbs are always stored, but if the input
 427 * operands are normalized.  Return the most significant limb of the
 428 * result.
 429 *
 430 * NOTE: The space pointed to by PRODP is overwritten before finished
 431 * with U and V, so overlap is an error.
 432 *
 433 * Argument constraints:
 434 * 1. USIZE >= VSIZE.
 435 * 2. PRODP != UP and PRODP != VP, i.e. the destination
 436 *    must be distinct from the multiplier and the multiplicand.
 437 */
 438
 439int
 440mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
 441            mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result)
 442{
 443        mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
 444        mpi_limb_t cy;
 445        struct karatsuba_ctx ctx;
 446
 447        if (vsize < KARATSUBA_THRESHOLD) {
 448                mpi_size_t i;
 449                mpi_limb_t v_limb;
 450
 451                if (!vsize) {
 452                        *_result = 0;
 453                        return 0;
 454                }
 455
 456                /* Multiply by the first limb in V separately, as the result can be
 457                 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
 458                v_limb = vp[0];
 459                if (v_limb <= 1) {
 460                        if (v_limb == 1)
 461                                MPN_COPY(prodp, up, usize);
 462                        else
 463                                MPN_ZERO(prodp, usize);
 464                        cy = 0;
 465                } else
 466                        cy = mpihelp_mul_1(prodp, up, usize, v_limb);
 467
 468                prodp[usize] = cy;
 469                prodp++;
 470
 471                /* For each iteration in the outer loop, multiply one limb from
 472                 * U with one limb from V, and add it to PROD.  */
 473                for (i = 1; i < vsize; i++) {
 474                        v_limb = vp[i];
 475                        if (v_limb <= 1) {
 476                                cy = 0;
 477                                if (v_limb == 1)
 478                                        cy = mpihelp_add_n(prodp, prodp, up,
 479                                                           usize);
 480                        } else
 481                                cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
 482
 483                        prodp[usize] = cy;
 484                        prodp++;
 485                }
 486
 487                *_result = cy;
 488                return 0;
 489        }
 490
 491        memset(&ctx, 0, sizeof ctx);
 492        if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0)
 493                return -ENOMEM;
 494        mpihelp_release_karatsuba_ctx(&ctx);
 495        *_result = *prod_endp;
 496        return 0;
 497}
 498