busybox/libbb/hash_md5_sha.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * Utility routines.
   4 *
   5 * Copyright (C) 2010 Denys Vlasenko
   6 *
   7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
   8 */
   9
  10#include "libbb.h"
  11
  12/* gcc 4.2.1 optimizes rotr64 better with inline than with macro
  13 * (for rotX32, there is no difference). Why? My guess is that
  14 * macro requires clever common subexpression elimination heuristics
  15 * in gcc, while inline basically forces it to happen.
  16 */
  17//#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
  18static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
  19{
  20        return (x << n) | (x >> (32 - n));
  21}
  22//#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
  23static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
  24{
  25        return (x >> n) | (x << (32 - n));
  26}
  27/* rotr64 in needed for sha512 only: */
  28//#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
  29static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
  30{
  31        return (x >> n) | (x << (64 - n));
  32}
  33
  34
  35/* Feed data through a temporary buffer.
  36 * The internal buffer remembers previous data until it has 64
  37 * bytes worth to pass on.
  38 */
  39static void FAST_FUNC common64_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
  40{
  41        unsigned bufpos = ctx->total64 & 63;
  42
  43        ctx->total64 += len;
  44
  45        while (1) {
  46                unsigned remaining = 64 - bufpos;
  47                if (remaining > len)
  48                        remaining = len;
  49                /* Copy data into aligned buffer */
  50                memcpy(ctx->wbuffer + bufpos, buffer, remaining);
  51                len -= remaining;
  52                buffer = (const char *)buffer + remaining;
  53                bufpos += remaining;
  54                /* clever way to do "if (bufpos != 64) break; ... ; bufpos = 0;" */
  55                bufpos -= 64;
  56                if (bufpos != 0)
  57                        break;
  58                /* Buffer is filled up, process it */
  59                ctx->process_block(ctx);
  60                /*bufpos = 0; - already is */
  61        }
  62}
  63
  64/* Process the remaining bytes in the buffer */
  65static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
  66{
  67        unsigned bufpos = ctx->total64 & 63;
  68        /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
  69        ctx->wbuffer[bufpos++] = 0x80;
  70
  71        /* This loop iterates either once or twice, no more, no less */
  72        while (1) {
  73                unsigned remaining = 64 - bufpos;
  74                memset(ctx->wbuffer + bufpos, 0, remaining);
  75                /* Do we have enough space for the length count? */
  76                if (remaining >= 8) {
  77                        /* Store the 64-bit counter of bits in the buffer */
  78                        uint64_t t = ctx->total64 << 3;
  79                        if (swap_needed)
  80                                t = bb_bswap_64(t);
  81                        /* wbuffer is suitably aligned for this */
  82                        *(uint64_t *) (&ctx->wbuffer[64 - 8]) = t;
  83                }
  84                ctx->process_block(ctx);
  85                if (remaining >= 8)
  86                        break;
  87                bufpos = 0;
  88        }
  89}
  90
  91
  92/*
  93 * Compute MD5 checksum of strings according to the
  94 * definition of MD5 in RFC 1321 from April 1992.
  95 *
  96 * Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
  97 *
  98 * Copyright (C) 1995-1999 Free Software Foundation, Inc.
  99 * Copyright (C) 2001 Manuel Novoa III
 100 * Copyright (C) 2003 Glenn L. McGrath
 101 * Copyright (C) 2003 Erik Andersen
 102 *
 103 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
 104 */
 105
 106/* 0: fastest, 3: smallest */
 107#if CONFIG_MD5_SIZE_VS_SPEED < 0
 108# define MD5_SIZE_VS_SPEED 0
 109#elif CONFIG_MD5_SIZE_VS_SPEED > 3
 110# define MD5_SIZE_VS_SPEED 3
 111#else
 112# define MD5_SIZE_VS_SPEED CONFIG_MD5_SIZE_VS_SPEED
 113#endif
 114
 115/* These are the four functions used in the four steps of the MD5 algorithm
 116 * and defined in the RFC 1321.  The first function is a little bit optimized
 117 * (as found in Colin Plumbs public domain implementation).
 118 * #define FF(b, c, d) ((b & c) | (~b & d))
 119 */
 120#undef FF
 121#undef FG
 122#undef FH
 123#undef FI
 124#define FF(b, c, d) (d ^ (b & (c ^ d)))
 125#define FG(b, c, d) FF(d, b, c)
 126#define FH(b, c, d) (b ^ c ^ d)
 127#define FI(b, c, d) (c ^ (b | ~d))
 128
 129/* Hash a single block, 64 bytes long and 4-byte aligned */
 130static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
 131{
 132#if MD5_SIZE_VS_SPEED > 0
 133        /* Before we start, one word to the strange constants.
 134           They are defined in RFC 1321 as
 135           T[i] = (int)(4294967296.0 * fabs(sin(i))), i=1..64
 136         */
 137        static const uint32_t C_array[] = {
 138                /* round 1 */
 139                0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
 140                0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
 141                0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
 142                0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
 143                /* round 2 */
 144                0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
 145                0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
 146                0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
 147                0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
 148                /* round 3 */
 149                0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
 150                0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
 151                0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
 152                0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
 153                /* round 4 */
 154                0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
 155                0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
 156                0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
 157                0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
 158        };
 159        static const char P_array[] ALIGN1 = {
 160# if MD5_SIZE_VS_SPEED > 1
 161                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */
 162# endif
 163                1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */
 164                5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */
 165                0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9  /* 4 */
 166        };
 167#endif
 168        uint32_t *words = (void*) ctx->wbuffer;
 169        uint32_t A = ctx->hash[0];
 170        uint32_t B = ctx->hash[1];
 171        uint32_t C = ctx->hash[2];
 172        uint32_t D = ctx->hash[3];
 173
 174#if MD5_SIZE_VS_SPEED >= 2  /* 2 or 3 */
 175
 176        static const char S_array[] ALIGN1 = {
 177                7, 12, 17, 22,
 178                5, 9, 14, 20,
 179                4, 11, 16, 23,
 180                6, 10, 15, 21
 181        };
 182        const uint32_t *pc;
 183        const char *pp;
 184        const char *ps;
 185        int i;
 186        uint32_t temp;
 187
 188# if BB_BIG_ENDIAN
 189        for (i = 0; i < 16; i++)
 190                words[i] = SWAP_LE32(words[i]);
 191# endif
 192
 193# if MD5_SIZE_VS_SPEED == 3
 194        pc = C_array;
 195        pp = P_array;
 196        ps = S_array - 4;
 197
 198        for (i = 0; i < 64; i++) {
 199                if ((i & 0x0f) == 0)
 200                        ps += 4;
 201                temp = A;
 202                switch (i >> 4) {
 203                case 0:
 204                        temp += FF(B, C, D);
 205                        break;
 206                case 1:
 207                        temp += FG(B, C, D);
 208                        break;
 209                case 2:
 210                        temp += FH(B, C, D);
 211                        break;
 212                case 3:
 213                        temp += FI(B, C, D);
 214                }
 215                temp += words[(int) (*pp++)] + *pc++;
 216                temp = rotl32(temp, ps[i & 3]);
 217                temp += B;
 218                A = D;
 219                D = C;
 220                C = B;
 221                B = temp;
 222        }
 223# else  /* MD5_SIZE_VS_SPEED == 2 */
 224        pc = C_array;
 225        pp = P_array;
 226        ps = S_array;
 227
 228        for (i = 0; i < 16; i++) {
 229                temp = A + FF(B, C, D) + words[(int) (*pp++)] + *pc++;
 230                temp = rotl32(temp, ps[i & 3]);
 231                temp += B;
 232                A = D;
 233                D = C;
 234                C = B;
 235                B = temp;
 236        }
 237        ps += 4;
 238        for (i = 0; i < 16; i++) {
 239                temp = A + FG(B, C, D) + words[(int) (*pp++)] + *pc++;
 240                temp = rotl32(temp, ps[i & 3]);
 241                temp += B;
 242                A = D;
 243                D = C;
 244                C = B;
 245                B = temp;
 246        }
 247        ps += 4;
 248        for (i = 0; i < 16; i++) {
 249                temp = A + FH(B, C, D) + words[(int) (*pp++)] + *pc++;
 250                temp = rotl32(temp, ps[i & 3]);
 251                temp += B;
 252                A = D;
 253                D = C;
 254                C = B;
 255                B = temp;
 256        }
 257        ps += 4;
 258        for (i = 0; i < 16; i++) {
 259                temp = A + FI(B, C, D) + words[(int) (*pp++)] + *pc++;
 260                temp = rotl32(temp, ps[i & 3]);
 261                temp += B;
 262                A = D;
 263                D = C;
 264                C = B;
 265                B = temp;
 266        }
 267# endif
 268        /* Add checksum to the starting values */
 269        ctx->hash[0] += A;
 270        ctx->hash[1] += B;
 271        ctx->hash[2] += C;
 272        ctx->hash[3] += D;
 273
 274#else  /* MD5_SIZE_VS_SPEED == 0 or 1 */
 275
 276        uint32_t A_save = A;
 277        uint32_t B_save = B;
 278        uint32_t C_save = C;
 279        uint32_t D_save = D;
 280# if MD5_SIZE_VS_SPEED == 1
 281        const uint32_t *pc;
 282        const char *pp;
 283        int i;
 284# endif
 285
 286        /* First round: using the given function, the context and a constant
 287           the next context is computed.  Because the algorithm's processing
 288           unit is a 32-bit word and it is determined to work on words in
 289           little endian byte order we perhaps have to change the byte order
 290           before the computation.  To reduce the work for the next steps
 291           we save swapped words in WORDS array.  */
 292# undef OP
 293# define OP(a, b, c, d, s, T) \
 294        do { \
 295                a += FF(b, c, d) + (*words IF_BIG_ENDIAN(= SWAP_LE32(*words))) + T; \
 296                words++; \
 297                a = rotl32(a, s); \
 298                a += b; \
 299        } while (0)
 300
 301        /* Round 1 */
 302# if MD5_SIZE_VS_SPEED == 1
 303        pc = C_array;
 304        for (i = 0; i < 4; i++) {
 305                OP(A, B, C, D, 7, *pc++);
 306                OP(D, A, B, C, 12, *pc++);
 307                OP(C, D, A, B, 17, *pc++);
 308                OP(B, C, D, A, 22, *pc++);
 309        }
 310# else
 311        OP(A, B, C, D, 7, 0xd76aa478);
 312        OP(D, A, B, C, 12, 0xe8c7b756);
 313        OP(C, D, A, B, 17, 0x242070db);
 314        OP(B, C, D, A, 22, 0xc1bdceee);
 315        OP(A, B, C, D, 7, 0xf57c0faf);
 316        OP(D, A, B, C, 12, 0x4787c62a);
 317        OP(C, D, A, B, 17, 0xa8304613);
 318        OP(B, C, D, A, 22, 0xfd469501);
 319        OP(A, B, C, D, 7, 0x698098d8);
 320        OP(D, A, B, C, 12, 0x8b44f7af);
 321        OP(C, D, A, B, 17, 0xffff5bb1);
 322        OP(B, C, D, A, 22, 0x895cd7be);
 323        OP(A, B, C, D, 7, 0x6b901122);
 324        OP(D, A, B, C, 12, 0xfd987193);
 325        OP(C, D, A, B, 17, 0xa679438e);
 326        OP(B, C, D, A, 22, 0x49b40821);
 327# endif
 328        words -= 16;
 329
 330        /* For the second to fourth round we have the possibly swapped words
 331           in WORDS.  Redefine the macro to take an additional first
 332           argument specifying the function to use.  */
 333# undef OP
 334# define OP(f, a, b, c, d, k, s, T) \
 335        do { \
 336                a += f(b, c, d) + words[k] + T; \
 337                a = rotl32(a, s); \
 338                a += b; \
 339        } while (0)
 340
 341        /* Round 2 */
 342# if MD5_SIZE_VS_SPEED == 1
 343        pp = P_array;
 344        for (i = 0; i < 4; i++) {
 345                OP(FG, A, B, C, D, (int) (*pp++), 5, *pc++);
 346                OP(FG, D, A, B, C, (int) (*pp++), 9, *pc++);
 347                OP(FG, C, D, A, B, (int) (*pp++), 14, *pc++);
 348                OP(FG, B, C, D, A, (int) (*pp++), 20, *pc++);
 349        }
 350# else
 351        OP(FG, A, B, C, D, 1, 5, 0xf61e2562);
 352        OP(FG, D, A, B, C, 6, 9, 0xc040b340);
 353        OP(FG, C, D, A, B, 11, 14, 0x265e5a51);
 354        OP(FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
 355        OP(FG, A, B, C, D, 5, 5, 0xd62f105d);
 356        OP(FG, D, A, B, C, 10, 9, 0x02441453);
 357        OP(FG, C, D, A, B, 15, 14, 0xd8a1e681);
 358        OP(FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
 359        OP(FG, A, B, C, D, 9, 5, 0x21e1cde6);
 360        OP(FG, D, A, B, C, 14, 9, 0xc33707d6);
 361        OP(FG, C, D, A, B, 3, 14, 0xf4d50d87);
 362        OP(FG, B, C, D, A, 8, 20, 0x455a14ed);
 363        OP(FG, A, B, C, D, 13, 5, 0xa9e3e905);
 364        OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8);
 365        OP(FG, C, D, A, B, 7, 14, 0x676f02d9);
 366        OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
 367# endif
 368
 369        /* Round 3 */
 370# if MD5_SIZE_VS_SPEED == 1
 371        for (i = 0; i < 4; i++) {
 372                OP(FH, A, B, C, D, (int) (*pp++), 4, *pc++);
 373                OP(FH, D, A, B, C, (int) (*pp++), 11, *pc++);
 374                OP(FH, C, D, A, B, (int) (*pp++), 16, *pc++);
 375                OP(FH, B, C, D, A, (int) (*pp++), 23, *pc++);
 376        }
 377# else
 378        OP(FH, A, B, C, D, 5, 4, 0xfffa3942);
 379        OP(FH, D, A, B, C, 8, 11, 0x8771f681);
 380        OP(FH, C, D, A, B, 11, 16, 0x6d9d6122);
 381        OP(FH, B, C, D, A, 14, 23, 0xfde5380c);
 382        OP(FH, A, B, C, D, 1, 4, 0xa4beea44);
 383        OP(FH, D, A, B, C, 4, 11, 0x4bdecfa9);
 384        OP(FH, C, D, A, B, 7, 16, 0xf6bb4b60);
 385        OP(FH, B, C, D, A, 10, 23, 0xbebfbc70);
 386        OP(FH, A, B, C, D, 13, 4, 0x289b7ec6);
 387        OP(FH, D, A, B, C, 0, 11, 0xeaa127fa);
 388        OP(FH, C, D, A, B, 3, 16, 0xd4ef3085);
 389        OP(FH, B, C, D, A, 6, 23, 0x04881d05);
 390        OP(FH, A, B, C, D, 9, 4, 0xd9d4d039);
 391        OP(FH, D, A, B, C, 12, 11, 0xe6db99e5);
 392        OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8);
 393        OP(FH, B, C, D, A, 2, 23, 0xc4ac5665);
 394# endif
 395
 396        /* Round 4 */
 397# if MD5_SIZE_VS_SPEED == 1
 398        for (i = 0; i < 4; i++) {
 399                OP(FI, A, B, C, D, (int) (*pp++), 6, *pc++);
 400                OP(FI, D, A, B, C, (int) (*pp++), 10, *pc++);
 401                OP(FI, C, D, A, B, (int) (*pp++), 15, *pc++);
 402                OP(FI, B, C, D, A, (int) (*pp++), 21, *pc++);
 403        }
 404# else
 405        OP(FI, A, B, C, D, 0, 6, 0xf4292244);
 406        OP(FI, D, A, B, C, 7, 10, 0x432aff97);
 407        OP(FI, C, D, A, B, 14, 15, 0xab9423a7);
 408        OP(FI, B, C, D, A, 5, 21, 0xfc93a039);
 409        OP(FI, A, B, C, D, 12, 6, 0x655b59c3);
 410        OP(FI, D, A, B, C, 3, 10, 0x8f0ccc92);
 411        OP(FI, C, D, A, B, 10, 15, 0xffeff47d);
 412        OP(FI, B, C, D, A, 1, 21, 0x85845dd1);
 413        OP(FI, A, B, C, D, 8, 6, 0x6fa87e4f);
 414        OP(FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
 415        OP(FI, C, D, A, B, 6, 15, 0xa3014314);
 416        OP(FI, B, C, D, A, 13, 21, 0x4e0811a1);
 417        OP(FI, A, B, C, D, 4, 6, 0xf7537e82);
 418        OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
 419        OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
 420        OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
 421# undef OP
 422# endif
 423        /* Add checksum to the starting values */
 424        ctx->hash[0] = A_save + A;
 425        ctx->hash[1] = B_save + B;
 426        ctx->hash[2] = C_save + C;
 427        ctx->hash[3] = D_save + D;
 428#endif
 429}
 430#undef FF
 431#undef FG
 432#undef FH
 433#undef FI
 434
 435/* Initialize structure containing state of computation.
 436 * (RFC 1321, 3.3: Step 3)
 437 */
 438void FAST_FUNC md5_begin(md5_ctx_t *ctx)
 439{
 440        ctx->hash[0] = 0x67452301;
 441        ctx->hash[1] = 0xefcdab89;
 442        ctx->hash[2] = 0x98badcfe;
 443        ctx->hash[3] = 0x10325476;
 444        ctx->total64 = 0;
 445        ctx->process_block = md5_process_block64;
 446}
 447
 448/* Used also for sha1 and sha256 */
 449void FAST_FUNC md5_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
 450{
 451        common64_hash(ctx, buffer, len);
 452}
 453
 454/* Process the remaining bytes in the buffer and put result from CTX
 455 * in first 16 bytes following RESBUF.  The result is always in little
 456 * endian byte order, so that a byte-wise output yields to the wanted
 457 * ASCII representation of the message digest.
 458 */
 459void FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf)
 460{
 461        /* MD5 stores total in LE, need to swap on BE arches: */
 462        common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN);
 463
 464        /* The MD5 result is in little endian byte order */
 465#if BB_BIG_ENDIAN
 466        ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
 467        ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
 468        ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
 469        ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
 470#endif
 471        memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
 472}
 473
 474
 475/*
 476 * SHA1 part is:
 477 * Copyright 2007 Rob Landley <rob@landley.net>
 478 *
 479 * Based on the public domain SHA-1 in C by Steve Reid <steve@edmweb.com>
 480 * from http://www.mirrors.wiretapped.net/security/cryptography/hashes/sha1/
 481 *
 482 * Licensed under GPLv2, see file LICENSE in this source tree.
 483 *
 484 * ---------------------------------------------------------------------------
 485 *
 486 * SHA256 and SHA512 parts are:
 487 * Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>.
 488 * Shrank by Denys Vlasenko.
 489 *
 490 * ---------------------------------------------------------------------------
 491 *
 492 * The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c
 493 * and replace "4096" with something like "2000 + time(NULL) % 2097",
 494 * then rebuild and compare "shaNNNsum bigfile" results.
 495 */
 496
 497static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
 498{
 499        static const uint32_t rconsts[] = {
 500                0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
 501        };
 502        int i, j;
 503        int cnt;
 504        uint32_t W[16+16];
 505        uint32_t a, b, c, d, e;
 506
 507        /* On-stack work buffer frees up one register in the main loop
 508         * which otherwise will be needed to hold ctx pointer */
 509        for (i = 0; i < 16; i++)
 510                W[i] = W[i+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[i]);
 511
 512        a = ctx->hash[0];
 513        b = ctx->hash[1];
 514        c = ctx->hash[2];
 515        d = ctx->hash[3];
 516        e = ctx->hash[4];
 517
 518        /* 4 rounds of 20 operations each */
 519        cnt = 0;
 520        for (i = 0; i < 4; i++) {
 521                j = 19;
 522                do {
 523                        uint32_t work;
 524
 525                        work = c ^ d;
 526                        if (i == 0) {
 527                                work = (work & b) ^ d;
 528                                if (j <= 3)
 529                                        goto ge16;
 530                                /* Used to do SWAP_BE32 here, but this
 531                                 * requires ctx (see comment above) */
 532                                work += W[cnt];
 533                        } else {
 534                                if (i == 2)
 535                                        work = ((b | c) & d) | (b & c);
 536                                else /* i = 1 or 3 */
 537                                        work ^= b;
 538 ge16:
 539                                W[cnt] = W[cnt+16] = rotl32(W[cnt+13] ^ W[cnt+8] ^ W[cnt+2] ^ W[cnt], 1);
 540                                work += W[cnt];
 541                        }
 542                        work += e + rotl32(a, 5) + rconsts[i];
 543
 544                        /* Rotate by one for next time */
 545                        e = d;
 546                        d = c;
 547                        c = /* b = */ rotl32(b, 30);
 548                        b = a;
 549                        a = work;
 550                        cnt = (cnt + 1) & 15;
 551                } while (--j >= 0);
 552        }
 553
 554        ctx->hash[0] += a;
 555        ctx->hash[1] += b;
 556        ctx->hash[2] += c;
 557        ctx->hash[3] += d;
 558        ctx->hash[4] += e;
 559}
 560
 561/* Constants for SHA512 from FIPS 180-2:4.2.3.
 562 * SHA256 constants from FIPS 180-2:4.2.2
 563 * are the most significant half of first 64 elements
 564 * of the same array.
 565 */
 566static const uint64_t sha_K[80] = {
 567        0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
 568        0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
 569        0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
 570        0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
 571        0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
 572        0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
 573        0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
 574        0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
 575        0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
 576        0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
 577        0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
 578        0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
 579        0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
 580        0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
 581        0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
 582        0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
 583        0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
 584        0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
 585        0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
 586        0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
 587        0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
 588        0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
 589        0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
 590        0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
 591        0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
 592        0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
 593        0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
 594        0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
 595        0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
 596        0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
 597        0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
 598        0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
 599        0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, /* [64]+ are used for sha512 only */
 600        0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
 601        0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
 602        0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
 603        0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
 604        0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
 605        0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
 606        0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
 607};
 608
 609#undef Ch
 610#undef Maj
 611#undef S0
 612#undef S1
 613#undef R0
 614#undef R1
 615
 616static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
 617{
 618        unsigned t;
 619        uint32_t W[64], a, b, c, d, e, f, g, h;
 620        const uint32_t *words = (uint32_t*) ctx->wbuffer;
 621
 622        /* Operators defined in FIPS 180-2:4.1.2.  */
 623#define Ch(x, y, z) ((x & y) ^ (~x & z))
 624#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
 625#define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22))
 626#define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25))
 627#define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3))
 628#define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
 629
 630        /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2.  */
 631        for (t = 0; t < 16; ++t)
 632                W[t] = SWAP_BE32(words[t]);
 633        for (/*t = 16*/; t < 64; ++t)
 634                W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
 635
 636        a = ctx->hash[0];
 637        b = ctx->hash[1];
 638        c = ctx->hash[2];
 639        d = ctx->hash[3];
 640        e = ctx->hash[4];
 641        f = ctx->hash[5];
 642        g = ctx->hash[6];
 643        h = ctx->hash[7];
 644
 645        /* The actual computation according to FIPS 180-2:6.2.2 step 3.  */
 646        for (t = 0; t < 64; ++t) {
 647                /* Need to fetch upper half of sha_K[t]
 648                 * (I hope compiler is clever enough to just fetch
 649                 * upper half)
 650                 */
 651                uint32_t K_t = sha_K[t] >> 32;
 652                uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t];
 653                uint32_t T2 = S0(a) + Maj(a, b, c);
 654                h = g;
 655                g = f;
 656                f = e;
 657                e = d + T1;
 658                d = c;
 659                c = b;
 660                b = a;
 661                a = T1 + T2;
 662        }
 663#undef Ch
 664#undef Maj
 665#undef S0
 666#undef S1
 667#undef R0
 668#undef R1
 669        /* Add the starting values of the context according to FIPS 180-2:6.2.2
 670           step 4.  */
 671        ctx->hash[0] += a;
 672        ctx->hash[1] += b;
 673        ctx->hash[2] += c;
 674        ctx->hash[3] += d;
 675        ctx->hash[4] += e;
 676        ctx->hash[5] += f;
 677        ctx->hash[6] += g;
 678        ctx->hash[7] += h;
 679}
 680
 681static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
 682{
 683        unsigned t;
 684        uint64_t W[80];
 685        /* On i386, having assignments here (not later as sha256 does)
 686         * produces 99 bytes smaller code with gcc 4.3.1
 687         */
 688        uint64_t a = ctx->hash[0];
 689        uint64_t b = ctx->hash[1];
 690        uint64_t c = ctx->hash[2];
 691        uint64_t d = ctx->hash[3];
 692        uint64_t e = ctx->hash[4];
 693        uint64_t f = ctx->hash[5];
 694        uint64_t g = ctx->hash[6];
 695        uint64_t h = ctx->hash[7];
 696        const uint64_t *words = (uint64_t*) ctx->wbuffer;
 697
 698        /* Operators defined in FIPS 180-2:4.1.2.  */
 699#define Ch(x, y, z) ((x & y) ^ (~x & z))
 700#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
 701#define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39))
 702#define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41))
 703#define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7))
 704#define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
 705
 706        /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2.  */
 707        for (t = 0; t < 16; ++t)
 708                W[t] = SWAP_BE64(words[t]);
 709        for (/*t = 16*/; t < 80; ++t)
 710                W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
 711
 712        /* The actual computation according to FIPS 180-2:6.3.2 step 3.  */
 713        for (t = 0; t < 80; ++t) {
 714                uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t];
 715                uint64_t T2 = S0(a) + Maj(a, b, c);
 716                h = g;
 717                g = f;
 718                f = e;
 719                e = d + T1;
 720                d = c;
 721                c = b;
 722                b = a;
 723                a = T1 + T2;
 724        }
 725#undef Ch
 726#undef Maj
 727#undef S0
 728#undef S1
 729#undef R0
 730#undef R1
 731        /* Add the starting values of the context according to FIPS 180-2:6.3.2
 732           step 4.  */
 733        ctx->hash[0] += a;
 734        ctx->hash[1] += b;
 735        ctx->hash[2] += c;
 736        ctx->hash[3] += d;
 737        ctx->hash[4] += e;
 738        ctx->hash[5] += f;
 739        ctx->hash[6] += g;
 740        ctx->hash[7] += h;
 741}
 742
 743
 744void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
 745{
 746        ctx->hash[0] = 0x67452301;
 747        ctx->hash[1] = 0xefcdab89;
 748        ctx->hash[2] = 0x98badcfe;
 749        ctx->hash[3] = 0x10325476;
 750        ctx->hash[4] = 0xc3d2e1f0;
 751        ctx->total64 = 0;
 752        ctx->process_block = sha1_process_block64;
 753}
 754
 755static const uint32_t init256[] = {
 756        0,
 757        0,
 758        0x6a09e667,
 759        0xbb67ae85,
 760        0x3c6ef372,
 761        0xa54ff53a,
 762        0x510e527f,
 763        0x9b05688c,
 764        0x1f83d9ab,
 765        0x5be0cd19,
 766};
 767static const uint32_t init512_lo[] = {
 768        0,
 769        0,
 770        0xf3bcc908,
 771        0x84caa73b,
 772        0xfe94f82b,
 773        0x5f1d36f1,
 774        0xade682d1,
 775        0x2b3e6c1f,
 776        0xfb41bd6b,
 777        0x137e2179,
 778};
 779
 780/* Initialize structure containing state of computation.
 781   (FIPS 180-2:5.3.2)  */
 782void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
 783{
 784        memcpy(&ctx->total64, init256, sizeof(init256));
 785        /*ctx->total64 = 0; - done by prepending two 32-bit zeros to init256 */
 786        ctx->process_block = sha256_process_block64;
 787}
 788
 789/* Initialize structure containing state of computation.
 790   (FIPS 180-2:5.3.3)  */
 791void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
 792{
 793        int i;
 794        /* Two extra iterations zero out ctx->total64[2] */
 795        uint64_t *tp = ctx->total64;
 796        for (i = 0; i < 2+8; i++)
 797                tp[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
 798        /*ctx->total64[0] = ctx->total64[1] = 0; - already done */
 799}
 800
 801void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
 802{
 803        unsigned bufpos = ctx->total64[0] & 127;
 804        unsigned remaining;
 805
 806        /* First increment the byte count.  FIPS 180-2 specifies the possible
 807           length of the file up to 2^128 _bits_.
 808           We compute the number of _bytes_ and convert to bits later.  */
 809        ctx->total64[0] += len;
 810        if (ctx->total64[0] < len)
 811                ctx->total64[1]++;
 812#if 0
 813        remaining = 128 - bufpos;
 814
 815        /* Hash whole blocks */
 816        while (len >= remaining) {
 817                memcpy(ctx->wbuffer + bufpos, buffer, remaining);
 818                buffer = (const char *)buffer + remaining;
 819                len -= remaining;
 820                remaining = 128;
 821                bufpos = 0;
 822                sha512_process_block128(ctx);
 823        }
 824
 825        /* Save last, partial blosk */
 826        memcpy(ctx->wbuffer + bufpos, buffer, len);
 827#else
 828        while (1) {
 829                remaining = 128 - bufpos;
 830                if (remaining > len)
 831                        remaining = len;
 832                /* Copy data into aligned buffer */
 833                memcpy(ctx->wbuffer + bufpos, buffer, remaining);
 834                len -= remaining;
 835                buffer = (const char *)buffer + remaining;
 836                bufpos += remaining;
 837                /* clever way to do "if (bufpos != 128) break; ... ; bufpos = 0;" */
 838                bufpos -= 128;
 839                if (bufpos != 0)
 840                        break;
 841                /* Buffer is filled up, process it */
 842                sha512_process_block128(ctx);
 843                /*bufpos = 0; - already is */
 844        }
 845#endif
 846}
 847
 848/* Used also for sha256 */
 849void FAST_FUNC sha1_end(sha1_ctx_t *ctx, void *resbuf)
 850{
 851        unsigned hash_size;
 852
 853        /* SHA stores total in BE, need to swap on LE arches: */
 854        common64_end(ctx, /*swap_needed:*/ BB_LITTLE_ENDIAN);
 855
 856        hash_size = (ctx->process_block == sha1_process_block64) ? 5 : 8;
 857        /* This way we do not impose alignment constraints on resbuf: */
 858        if (BB_LITTLE_ENDIAN) {
 859                unsigned i;
 860                for (i = 0; i < hash_size; ++i)
 861                        ctx->hash[i] = SWAP_BE32(ctx->hash[i]);
 862        }
 863        memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * hash_size);
 864}
 865
 866void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
 867{
 868        unsigned bufpos = ctx->total64[0] & 127;
 869
 870        /* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... */
 871        ctx->wbuffer[bufpos++] = 0x80;
 872
 873        while (1) {
 874                unsigned remaining = 128 - bufpos;
 875                memset(ctx->wbuffer + bufpos, 0, remaining);
 876                if (remaining >= 16) {
 877                        /* Store the 128-bit counter of bits in the buffer in BE format */
 878                        uint64_t t;
 879                        t = ctx->total64[0] << 3;
 880                        t = SWAP_BE64(t);
 881                        *(uint64_t *) (&ctx->wbuffer[128 - 8]) = t;
 882                        t = (ctx->total64[1] << 3) | (ctx->total64[0] >> 61);
 883                        t = SWAP_BE64(t);
 884                        *(uint64_t *) (&ctx->wbuffer[128 - 16]) = t;
 885                }
 886                sha512_process_block128(ctx);
 887                if (remaining >= 16)
 888                        break;
 889                bufpos = 0;
 890        }
 891
 892        if (BB_LITTLE_ENDIAN) {
 893                unsigned i;
 894                for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
 895                        ctx->hash[i] = SWAP_BE64(ctx->hash[i]);
 896        }
 897        memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
 898}
 899