linux/drivers/staging/skein/skein_block.c
<<
>>
Prefs
   1/*
   2 ***********************************************************************
   3 *
   4 * Implementation of the Skein block functions.
   5 *
   6 * Source code author: Doug Whiting, 2008.
   7 *
   8 * This algorithm and source code is released to the public domain.
   9 *
  10 * Compile-time switches:
  11 *
  12 *  SKEIN_USE_ASM  -- set bits (256/512/1024) to select which
  13 *                    versions use ASM code for block processing
  14 *                    [default: use C for all block sizes]
  15 *
  16 ***********************************************************************
  17 */
  18
  19#include <linux/string.h>
  20#include <linux/bitops.h>
  21#include "skein_base.h"
  22#include "skein_block.h"
  23
  24/*****************************  SKEIN_256 ******************************/
  25#if !(SKEIN_USE_ASM & 256)
  26void skein_256_process_block(struct skein_256_ctx *ctx, const u8 *blk_ptr,
  27                             size_t blk_cnt, size_t byte_cnt_add)
  28{ /* do it in C */
  29        enum {
  30                WCNT = SKEIN_256_STATE_WORDS
  31        };
  32        size_t r;
  33#if SKEIN_UNROLL_256
  34        /* key schedule: chaining vars + tweak + "rot"*/
  35        u64  kw[WCNT + 4 + (RCNT * 2)];
  36#else
  37        /* key schedule words : chaining vars + tweak */
  38        u64  kw[WCNT + 4];
  39#endif
  40        u64  X0, X1, X2, X3; /* local copy of context vars, for speed */
  41        u64  w[WCNT]; /* local copy of input block */
  42#ifdef SKEIN_DEBUG
  43        const u64 *X_ptr[4]; /* use for debugging (help cc put Xn in regs) */
  44
  45        X_ptr[0] = &X0;
  46        X_ptr[1] = &X1;
  47        X_ptr[2] = &X2;
  48        X_ptr[3] = &X3;
  49#endif
  50        skein_assert(blk_cnt != 0); /* never call with blk_cnt == 0! */
  51        ts[0] = ctx->h.tweak[0];
  52        ts[1] = ctx->h.tweak[1];
  53        do  {
  54                /*
  55                 * this implementation only supports 2**64 input bytes
  56                 * (no carry out here)
  57                 */
  58                ts[0] += byte_cnt_add; /* update processed length */
  59
  60                /* precompute the key schedule for this block */
  61                ks[0] = ctx->x[0];
  62                ks[1] = ctx->x[1];
  63                ks[2] = ctx->x[2];
  64                ks[3] = ctx->x[3];
  65                ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY;
  66
  67                ts[2] = ts[0] ^ ts[1];
  68
  69                /* get input block in little-endian format */
  70                skein_get64_lsb_first(w, blk_ptr, WCNT);
  71                debug_save_tweak(ctx);
  72
  73                /* do the first full key injection */
  74                X0 = w[0] + ks[0];
  75                X1 = w[1] + ks[1] + ts[0];
  76                X2 = w[2] + ks[2] + ts[1];
  77                X3 = w[3] + ks[3];
  78
  79                blk_ptr += SKEIN_256_BLOCK_BYTES;
  80
  81                /* run the rounds */
  82                for (r = 1;
  83                        r < (SKEIN_UNROLL_256 ? 2 * RCNT : 2);
  84                        r += (SKEIN_UNROLL_256 ? 2 * SKEIN_UNROLL_256 : 1)) {
  85                        R256_8_ROUNDS(0);
  86#if   R256_UNROLL_R(1)
  87                        R256_8_ROUNDS(1);
  88#endif
  89#if   R256_UNROLL_R(2)
  90                        R256_8_ROUNDS(2);
  91#endif
  92#if   R256_UNROLL_R(3)
  93                        R256_8_ROUNDS(3);
  94#endif
  95#if   R256_UNROLL_R(4)
  96                        R256_8_ROUNDS(4);
  97#endif
  98#if   R256_UNROLL_R(5)
  99                        R256_8_ROUNDS(5);
 100#endif
 101#if   R256_UNROLL_R(6)
 102                        R256_8_ROUNDS(6);
 103#endif
 104#if   R256_UNROLL_R(7)
 105                        R256_8_ROUNDS(7);
 106#endif
 107#if   R256_UNROLL_R(8)
 108                        R256_8_ROUNDS(8);
 109#endif
 110#if   R256_UNROLL_R(9)
 111                        R256_8_ROUNDS(9);
 112#endif
 113#if   R256_UNROLL_R(10)
 114                        R256_8_ROUNDS(10);
 115#endif
 116#if   R256_UNROLL_R(11)
 117                        R256_8_ROUNDS(11);
 118#endif
 119#if   R256_UNROLL_R(12)
 120                        R256_8_ROUNDS(12);
 121#endif
 122#if   R256_UNROLL_R(13)
 123                        R256_8_ROUNDS(13);
 124#endif
 125#if   R256_UNROLL_R(14)
 126                        R256_8_ROUNDS(14);
 127#endif
 128                }
 129                /* do the final "feedforward" xor, update context chaining */
 130                ctx->x[0] = X0 ^ w[0];
 131                ctx->x[1] = X1 ^ w[1];
 132                ctx->x[2] = X2 ^ w[2];
 133                ctx->x[3] = X3 ^ w[3];
 134
 135                ts[1] &= ~SKEIN_T1_FLAG_FIRST;
 136        } while (--blk_cnt);
 137        ctx->h.tweak[0] = ts[0];
 138        ctx->h.tweak[1] = ts[1];
 139}
 140
 141#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
 142size_t skein_256_process_block_code_size(void)
 143{
 144        return ((u8 *)skein_256_process_block_code_size) -
 145                ((u8 *)skein_256_process_block);
 146}
 147
 148unsigned int skein_256_unroll_cnt(void)
 149{
 150        return SKEIN_UNROLL_256;
 151}
 152#endif
 153#endif
 154
 155/*****************************  SKEIN_512 ******************************/
 156#if !(SKEIN_USE_ASM & 512)
 157void skein_512_process_block(struct skein_512_ctx *ctx, const u8 *blk_ptr,
 158                             size_t blk_cnt, size_t byte_cnt_add)
 159{ /* do it in C */
 160        enum {
 161                WCNT = SKEIN_512_STATE_WORDS
 162        };
 163        size_t  r;
 164#if SKEIN_UNROLL_512
 165        /* key sched: chaining vars + tweak + "rot"*/
 166        u64  kw[WCNT + 4 + RCNT * 2];
 167#else
 168        /* key schedule words : chaining vars + tweak */
 169        u64  kw[WCNT + 4];
 170#endif
 171        u64  X0, X1, X2, X3, X4, X5, X6, X7; /* local copies, for speed */
 172        u64  w[WCNT]; /* local copy of input block */
 173#ifdef SKEIN_DEBUG
 174        const u64 *X_ptr[8]; /* use for debugging (help cc put Xn in regs) */
 175
 176        X_ptr[0] = &X0;
 177        X_ptr[1] = &X1;
 178        X_ptr[2] = &X2;
 179        X_ptr[3] = &X3;
 180        X_ptr[4] = &X4;
 181        X_ptr[5] = &X5;
 182        X_ptr[6] = &X6;
 183        X_ptr[7] = &X7;
 184#endif
 185
 186        skein_assert(blk_cnt != 0); /* never call with blk_cnt == 0! */
 187        ts[0] = ctx->h.tweak[0];
 188        ts[1] = ctx->h.tweak[1];
 189        do  {
 190                /*
 191                 * this implementation only supports 2**64 input bytes
 192                 * (no carry out here)
 193                 */
 194                ts[0] += byte_cnt_add; /* update processed length */
 195
 196                /* precompute the key schedule for this block */
 197                ks[0] = ctx->x[0];
 198                ks[1] = ctx->x[1];
 199                ks[2] = ctx->x[2];
 200                ks[3] = ctx->x[3];
 201                ks[4] = ctx->x[4];
 202                ks[5] = ctx->x[5];
 203                ks[6] = ctx->x[6];
 204                ks[7] = ctx->x[7];
 205                ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
 206                        ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
 207
 208                ts[2] = ts[0] ^ ts[1];
 209
 210                /* get input block in little-endian format */
 211                skein_get64_lsb_first(w, blk_ptr, WCNT);
 212                debug_save_tweak(ctx);
 213
 214                /* do the first full key injection */
 215                X0 = w[0] + ks[0];
 216                X1 = w[1] + ks[1];
 217                X2 = w[2] + ks[2];
 218                X3 = w[3] + ks[3];
 219                X4 = w[4] + ks[4];
 220                X5 = w[5] + ks[5] + ts[0];
 221                X6 = w[6] + ks[6] + ts[1];
 222                X7 = w[7] + ks[7];
 223
 224                blk_ptr += SKEIN_512_BLOCK_BYTES;
 225
 226                /* run the rounds */
 227                for (r = 1;
 228                        r < (SKEIN_UNROLL_512 ? 2 * RCNT : 2);
 229                        r += (SKEIN_UNROLL_512 ? 2 * SKEIN_UNROLL_512 : 1)) {
 230                        R512_8_ROUNDS(0);
 231
 232#if   R512_UNROLL_R(1)
 233                        R512_8_ROUNDS(1);
 234#endif
 235#if   R512_UNROLL_R(2)
 236                        R512_8_ROUNDS(2);
 237#endif
 238#if   R512_UNROLL_R(3)
 239                        R512_8_ROUNDS(3);
 240#endif
 241#if   R512_UNROLL_R(4)
 242                        R512_8_ROUNDS(4);
 243#endif
 244#if   R512_UNROLL_R(5)
 245                        R512_8_ROUNDS(5);
 246#endif
 247#if   R512_UNROLL_R(6)
 248                        R512_8_ROUNDS(6);
 249#endif
 250#if   R512_UNROLL_R(7)
 251                        R512_8_ROUNDS(7);
 252#endif
 253#if   R512_UNROLL_R(8)
 254                        R512_8_ROUNDS(8);
 255#endif
 256#if   R512_UNROLL_R(9)
 257                        R512_8_ROUNDS(9);
 258#endif
 259#if   R512_UNROLL_R(10)
 260                        R512_8_ROUNDS(10);
 261#endif
 262#if   R512_UNROLL_R(11)
 263                        R512_8_ROUNDS(11);
 264#endif
 265#if   R512_UNROLL_R(12)
 266                        R512_8_ROUNDS(12);
 267#endif
 268#if   R512_UNROLL_R(13)
 269                        R512_8_ROUNDS(13);
 270#endif
 271#if   R512_UNROLL_R(14)
 272                        R512_8_ROUNDS(14);
 273#endif
 274                }
 275
 276                /* do the final "feedforward" xor, update context chaining */
 277                ctx->x[0] = X0 ^ w[0];
 278                ctx->x[1] = X1 ^ w[1];
 279                ctx->x[2] = X2 ^ w[2];
 280                ctx->x[3] = X3 ^ w[3];
 281                ctx->x[4] = X4 ^ w[4];
 282                ctx->x[5] = X5 ^ w[5];
 283                ctx->x[6] = X6 ^ w[6];
 284                ctx->x[7] = X7 ^ w[7];
 285
 286                ts[1] &= ~SKEIN_T1_FLAG_FIRST;
 287        } while (--blk_cnt);
 288        ctx->h.tweak[0] = ts[0];
 289        ctx->h.tweak[1] = ts[1];
 290}
 291
 292#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
 293size_t skein_512_process_block_code_size(void)
 294{
 295        return ((u8 *)skein_512_process_block_code_size) -
 296                ((u8 *)skein_512_process_block);
 297}
 298
 299unsigned int skein_512_unroll_cnt(void)
 300{
 301        return SKEIN_UNROLL_512;
 302}
 303#endif
 304#endif
 305
 306/*****************************  SKEIN_1024 ******************************/
 307#if !(SKEIN_USE_ASM & 1024)
 308void skein_1024_process_block(struct skein_1024_ctx *ctx, const u8 *blk_ptr,
 309                              size_t blk_cnt, size_t byte_cnt_add)
 310{ /* do it in C, always looping (unrolled is bigger AND slower!) */
 311        enum {
 312                WCNT = SKEIN_1024_STATE_WORDS
 313        };
 314        size_t  r;
 315#if (SKEIN_UNROLL_1024 != 0)
 316        /* key sched: chaining vars + tweak + "rot" */
 317        u64  kw[WCNT + 4 + (RCNT * 2)];
 318#else
 319        /* key schedule words : chaining vars + tweak */
 320        u64  kw[WCNT + 4];
 321#endif
 322
 323        /* local copy of vars, for speed */
 324        u64  X00, X01, X02, X03, X04, X05, X06, X07,
 325             X08, X09, X10, X11, X12, X13, X14, X15;
 326        u64  w[WCNT]; /* local copy of input block */
 327
 328        skein_assert(blk_cnt != 0); /* never call with blk_cnt == 0! */
 329        ts[0] = ctx->h.tweak[0];
 330        ts[1] = ctx->h.tweak[1];
 331        do  {
 332                /*
 333                 * this implementation only supports 2**64 input bytes
 334                 * (no carry out here)
 335                 */
 336                ts[0] += byte_cnt_add; /* update processed length */
 337
 338                /* precompute the key schedule for this block */
 339                ks[0]  = ctx->x[0];
 340                ks[1]  = ctx->x[1];
 341                ks[2]  = ctx->x[2];
 342                ks[3]  = ctx->x[3];
 343                ks[4]  = ctx->x[4];
 344                ks[5]  = ctx->x[5];
 345                ks[6]  = ctx->x[6];
 346                ks[7]  = ctx->x[7];
 347                ks[8]  = ctx->x[8];
 348                ks[9]  = ctx->x[9];
 349                ks[10] = ctx->x[10];
 350                ks[11] = ctx->x[11];
 351                ks[12] = ctx->x[12];
 352                ks[13] = ctx->x[13];
 353                ks[14] = ctx->x[14];
 354                ks[15] = ctx->x[15];
 355                ks[16] =  ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
 356                          ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^
 357                          ks[8] ^ ks[9] ^ ks[10] ^ ks[11] ^
 358                          ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY;
 359
 360                ts[2] = ts[0] ^ ts[1];
 361
 362                /* get input block in little-endian format */
 363                skein_get64_lsb_first(w, blk_ptr, WCNT);
 364                debug_save_tweak(ctx);
 365
 366                /* do the first full key injection */
 367                X00 = w[0] + ks[0];
 368                X01 = w[1] + ks[1];
 369                X02 = w[2] + ks[2];
 370                X03 = w[3] + ks[3];
 371                X04 = w[4] + ks[4];
 372                X05 = w[5] + ks[5];
 373                X06 = w[6] + ks[6];
 374                X07 = w[7] + ks[7];
 375                X08 = w[8] + ks[8];
 376                X09 = w[9] + ks[9];
 377                X10 = w[10] + ks[10];
 378                X11 = w[11] + ks[11];
 379                X12 = w[12] + ks[12];
 380                X13 = w[13] + ks[13] + ts[0];
 381                X14 = w[14] + ks[14] + ts[1];
 382                X15 = w[15] + ks[15];
 383
 384                for (r = 1;
 385                        r < (SKEIN_UNROLL_1024 ? 2 * RCNT : 2);
 386                        r += (SKEIN_UNROLL_1024 ? 2 * SKEIN_UNROLL_1024 : 1)) {
 387                        R1024_8_ROUNDS(0);
 388#if   R1024_UNROLL_R(1)
 389                        R1024_8_ROUNDS(1);
 390#endif
 391#if   R1024_UNROLL_R(2)
 392                        R1024_8_ROUNDS(2);
 393#endif
 394#if   R1024_UNROLL_R(3)
 395                        R1024_8_ROUNDS(3);
 396#endif
 397#if   R1024_UNROLL_R(4)
 398                        R1024_8_ROUNDS(4);
 399#endif
 400#if   R1024_UNROLL_R(5)
 401                        R1024_8_ROUNDS(5);
 402#endif
 403#if   R1024_UNROLL_R(6)
 404                        R1024_8_ROUNDS(6);
 405#endif
 406#if   R1024_UNROLL_R(7)
 407                        R1024_8_ROUNDS(7);
 408#endif
 409#if   R1024_UNROLL_R(8)
 410                        R1024_8_ROUNDS(8);
 411#endif
 412#if   R1024_UNROLL_R(9)
 413                        R1024_8_ROUNDS(9);
 414#endif
 415#if   R1024_UNROLL_R(10)
 416                        R1024_8_ROUNDS(10);
 417#endif
 418#if   R1024_UNROLL_R(11)
 419                        R1024_8_ROUNDS(11);
 420#endif
 421#if   R1024_UNROLL_R(12)
 422                        R1024_8_ROUNDS(12);
 423#endif
 424#if   R1024_UNROLL_R(13)
 425                        R1024_8_ROUNDS(13);
 426#endif
 427#if   R1024_UNROLL_R(14)
 428                        R1024_8_ROUNDS(14);
 429#endif
 430                }
 431                /* do the final "feedforward" xor, update context chaining */
 432
 433                ctx->x[0] = X00 ^ w[0];
 434                ctx->x[1] = X01 ^ w[1];
 435                ctx->x[2] = X02 ^ w[2];
 436                ctx->x[3] = X03 ^ w[3];
 437                ctx->x[4] = X04 ^ w[4];
 438                ctx->x[5] = X05 ^ w[5];
 439                ctx->x[6] = X06 ^ w[6];
 440                ctx->x[7] = X07 ^ w[7];
 441                ctx->x[8] = X08 ^ w[8];
 442                ctx->x[9] = X09 ^ w[9];
 443                ctx->x[10] = X10 ^ w[10];
 444                ctx->x[11] = X11 ^ w[11];
 445                ctx->x[12] = X12 ^ w[12];
 446                ctx->x[13] = X13 ^ w[13];
 447                ctx->x[14] = X14 ^ w[14];
 448                ctx->x[15] = X15 ^ w[15];
 449
 450                ts[1] &= ~SKEIN_T1_FLAG_FIRST;
 451                blk_ptr += SKEIN_1024_BLOCK_BYTES;
 452        } while (--blk_cnt);
 453        ctx->h.tweak[0] = ts[0];
 454        ctx->h.tweak[1] = ts[1];
 455}
 456
 457#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
 458size_t skein_1024_process_block_code_size(void)
 459{
 460        return ((u8 *)skein_1024_process_block_code_size) -
 461                ((u8 *)skein_1024_process_block);
 462}
 463
 464unsigned int skein_1024_unroll_cnt(void)
 465{
 466        return SKEIN_UNROLL_1024;
 467}
 468#endif
 469#endif
 470