linux/arch/x86/crypto/sha256_ni_asm.S
<<
>>
Prefs
   1/*
   2 * Intel SHA Extensions optimized implementation of a SHA-256 update function
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * Copyright(c) 2015 Intel Corporation.
  10 *
  11 * This program is free software; you can redistribute it and/or modify
  12 * it under the terms of version 2 of the GNU General Public License as
  13 * published by the Free Software Foundation.
  14 *
  15 * This program is distributed in the hope that it will be useful, but
  16 * WITHOUT ANY WARRANTY; without even the implied warranty of
  17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 * General Public License for more details.
  19 *
  20 * Contact Information:
  21 *      Sean Gulley <sean.m.gulley@intel.com>
  22 *      Tim Chen <tim.c.chen@linux.intel.com>
  23 *
  24 * BSD LICENSE
  25 *
  26 * Copyright(c) 2015 Intel Corporation.
  27 *
  28 * Redistribution and use in source and binary forms, with or without
  29 * modification, are permitted provided that the following conditions
  30 * are met:
  31 *
  32 *      * Redistributions of source code must retain the above copyright
  33 *        notice, this list of conditions and the following disclaimer.
  34 *      * Redistributions in binary form must reproduce the above copyright
  35 *        notice, this list of conditions and the following disclaimer in
  36 *        the documentation and/or other materials provided with the
  37 *        distribution.
  38 *      * Neither the name of Intel Corporation nor the names of its
  39 *        contributors may be used to endorse or promote products derived
  40 *        from this software without specific prior written permission.
  41 *
  42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  53 *
  54 */
  55
  56#include <linux/linkage.h>
  57
  58#define DIGEST_PTR      %rdi    /* 1st arg */
  59#define DATA_PTR        %rsi    /* 2nd arg */
  60#define NUM_BLKS        %rdx    /* 3rd arg */
  61
  62#define SHA256CONSTANTS %rax
  63
  64#define MSG             %xmm0
  65#define STATE0          %xmm1
  66#define STATE1          %xmm2
  67#define MSGTMP0         %xmm3
  68#define MSGTMP1         %xmm4
  69#define MSGTMP2         %xmm5
  70#define MSGTMP3         %xmm6
  71#define MSGTMP4         %xmm7
  72
  73#define SHUF_MASK       %xmm8
  74
  75#define ABEF_SAVE       %xmm9
  76#define CDGH_SAVE       %xmm10
  77
  78/*
  79 * Intel SHA Extensions optimized implementation of a SHA-256 update function
  80 *
  81 * The function takes a pointer to the current hash values, a pointer to the
  82 * input data, and a number of 64 byte blocks to process.  Once all blocks have
  83 * been processed, the digest pointer is  updated with the resulting hash value.
  84 * The function only processes complete blocks, there is no functionality to
  85 * store partial blocks.  All message padding and hash value initialization must
  86 * be done outside the update function.
  87 *
  88 * The indented lines in the loop are instructions related to rounds processing.
  89 * The non-indented lines are instructions related to the message schedule.
  90 *
  91 * void sha256_ni_transform(uint32_t *digest, const void *data,
  92                uint32_t numBlocks);
  93 * digest : pointer to digest
  94 * data: pointer to input data
  95 * numBlocks: Number of blocks to process
  96 */
  97
  98.text
  99.align 32
 100SYM_FUNC_START(sha256_ni_transform)
 101
 102        shl             $6, NUM_BLKS            /*  convert to bytes */
 103        jz              .Ldone_hash
 104        add             DATA_PTR, NUM_BLKS      /* pointer to end of data */
 105
 106        /*
 107         * load initial hash values
 108         * Need to reorder these appropriately
 109         * DCBA, HGFE -> ABEF, CDGH
 110         */
 111        movdqu          0*16(DIGEST_PTR), STATE0
 112        movdqu          1*16(DIGEST_PTR), STATE1
 113
 114        pshufd          $0xB1, STATE0,  STATE0          /* CDAB */
 115        pshufd          $0x1B, STATE1,  STATE1          /* EFGH */
 116        movdqa          STATE0, MSGTMP4
 117        palignr         $8, STATE1,  STATE0             /* ABEF */
 118        pblendw         $0xF0, MSGTMP4, STATE1          /* CDGH */
 119
 120        movdqa          PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
 121        lea             K256(%rip), SHA256CONSTANTS
 122
 123.Lloop0:
 124        /* Save hash values for addition after rounds */
 125        movdqa          STATE0, ABEF_SAVE
 126        movdqa          STATE1, CDGH_SAVE
 127
 128        /* Rounds 0-3 */
 129        movdqu          0*16(DATA_PTR), MSG
 130        pshufb          SHUF_MASK, MSG
 131        movdqa          MSG, MSGTMP0
 132                paddd           0*16(SHA256CONSTANTS), MSG
 133                sha256rnds2     STATE0, STATE1
 134                pshufd          $0x0E, MSG, MSG
 135                sha256rnds2     STATE1, STATE0
 136
 137        /* Rounds 4-7 */
 138        movdqu          1*16(DATA_PTR), MSG
 139        pshufb          SHUF_MASK, MSG
 140        movdqa          MSG, MSGTMP1
 141                paddd           1*16(SHA256CONSTANTS), MSG
 142                sha256rnds2     STATE0, STATE1
 143                pshufd          $0x0E, MSG, MSG
 144                sha256rnds2     STATE1, STATE0
 145        sha256msg1      MSGTMP1, MSGTMP0
 146
 147        /* Rounds 8-11 */
 148        movdqu          2*16(DATA_PTR), MSG
 149        pshufb          SHUF_MASK, MSG
 150        movdqa          MSG, MSGTMP2
 151                paddd           2*16(SHA256CONSTANTS), MSG
 152                sha256rnds2     STATE0, STATE1
 153                pshufd          $0x0E, MSG, MSG
 154                sha256rnds2     STATE1, STATE0
 155        sha256msg1      MSGTMP2, MSGTMP1
 156
 157        /* Rounds 12-15 */
 158        movdqu          3*16(DATA_PTR), MSG
 159        pshufb          SHUF_MASK, MSG
 160        movdqa          MSG, MSGTMP3
 161                paddd           3*16(SHA256CONSTANTS), MSG
 162                sha256rnds2     STATE0, STATE1
 163        movdqa          MSGTMP3, MSGTMP4
 164        palignr         $4, MSGTMP2, MSGTMP4
 165        paddd           MSGTMP4, MSGTMP0
 166        sha256msg2      MSGTMP3, MSGTMP0
 167                pshufd          $0x0E, MSG, MSG
 168                sha256rnds2     STATE1, STATE0
 169        sha256msg1      MSGTMP3, MSGTMP2
 170
 171        /* Rounds 16-19 */
 172        movdqa          MSGTMP0, MSG
 173                paddd           4*16(SHA256CONSTANTS), MSG
 174                sha256rnds2     STATE0, STATE1
 175        movdqa          MSGTMP0, MSGTMP4
 176        palignr         $4, MSGTMP3, MSGTMP4
 177        paddd           MSGTMP4, MSGTMP1
 178        sha256msg2      MSGTMP0, MSGTMP1
 179                pshufd          $0x0E, MSG, MSG
 180                sha256rnds2     STATE1, STATE0
 181        sha256msg1      MSGTMP0, MSGTMP3
 182
 183        /* Rounds 20-23 */
 184        movdqa          MSGTMP1, MSG
 185                paddd           5*16(SHA256CONSTANTS), MSG
 186                sha256rnds2     STATE0, STATE1
 187        movdqa          MSGTMP1, MSGTMP4
 188        palignr         $4, MSGTMP0, MSGTMP4
 189        paddd           MSGTMP4, MSGTMP2
 190        sha256msg2      MSGTMP1, MSGTMP2
 191                pshufd          $0x0E, MSG, MSG
 192                sha256rnds2     STATE1, STATE0
 193        sha256msg1      MSGTMP1, MSGTMP0
 194
 195        /* Rounds 24-27 */
 196        movdqa          MSGTMP2, MSG
 197                paddd           6*16(SHA256CONSTANTS), MSG
 198                sha256rnds2     STATE0, STATE1
 199        movdqa          MSGTMP2, MSGTMP4
 200        palignr         $4, MSGTMP1, MSGTMP4
 201        paddd           MSGTMP4, MSGTMP3
 202        sha256msg2      MSGTMP2, MSGTMP3
 203                pshufd          $0x0E, MSG, MSG
 204                sha256rnds2     STATE1, STATE0
 205        sha256msg1      MSGTMP2, MSGTMP1
 206
 207        /* Rounds 28-31 */
 208        movdqa          MSGTMP3, MSG
 209                paddd           7*16(SHA256CONSTANTS), MSG
 210                sha256rnds2     STATE0, STATE1
 211        movdqa          MSGTMP3, MSGTMP4
 212        palignr         $4, MSGTMP2, MSGTMP4
 213        paddd           MSGTMP4, MSGTMP0
 214        sha256msg2      MSGTMP3, MSGTMP0
 215                pshufd          $0x0E, MSG, MSG
 216                sha256rnds2     STATE1, STATE0
 217        sha256msg1      MSGTMP3, MSGTMP2
 218
 219        /* Rounds 32-35 */
 220        movdqa          MSGTMP0, MSG
 221                paddd           8*16(SHA256CONSTANTS), MSG
 222                sha256rnds2     STATE0, STATE1
 223        movdqa          MSGTMP0, MSGTMP4
 224        palignr         $4, MSGTMP3, MSGTMP4
 225        paddd           MSGTMP4, MSGTMP1
 226        sha256msg2      MSGTMP0, MSGTMP1
 227                pshufd          $0x0E, MSG, MSG
 228                sha256rnds2     STATE1, STATE0
 229        sha256msg1      MSGTMP0, MSGTMP3
 230
 231        /* Rounds 36-39 */
 232        movdqa          MSGTMP1, MSG
 233                paddd           9*16(SHA256CONSTANTS), MSG
 234                sha256rnds2     STATE0, STATE1
 235        movdqa          MSGTMP1, MSGTMP4
 236        palignr         $4, MSGTMP0, MSGTMP4
 237        paddd           MSGTMP4, MSGTMP2
 238        sha256msg2      MSGTMP1, MSGTMP2
 239                pshufd          $0x0E, MSG, MSG
 240                sha256rnds2     STATE1, STATE0
 241        sha256msg1      MSGTMP1, MSGTMP0
 242
 243        /* Rounds 40-43 */
 244        movdqa          MSGTMP2, MSG
 245                paddd           10*16(SHA256CONSTANTS), MSG
 246                sha256rnds2     STATE0, STATE1
 247        movdqa          MSGTMP2, MSGTMP4
 248        palignr         $4, MSGTMP1, MSGTMP4
 249        paddd           MSGTMP4, MSGTMP3
 250        sha256msg2      MSGTMP2, MSGTMP3
 251                pshufd          $0x0E, MSG, MSG
 252                sha256rnds2     STATE1, STATE0
 253        sha256msg1      MSGTMP2, MSGTMP1
 254
 255        /* Rounds 44-47 */
 256        movdqa          MSGTMP3, MSG
 257                paddd           11*16(SHA256CONSTANTS), MSG
 258                sha256rnds2     STATE0, STATE1
 259        movdqa          MSGTMP3, MSGTMP4
 260        palignr         $4, MSGTMP2, MSGTMP4
 261        paddd           MSGTMP4, MSGTMP0
 262        sha256msg2      MSGTMP3, MSGTMP0
 263                pshufd          $0x0E, MSG, MSG
 264                sha256rnds2     STATE1, STATE0
 265        sha256msg1      MSGTMP3, MSGTMP2
 266
 267        /* Rounds 48-51 */
 268        movdqa          MSGTMP0, MSG
 269                paddd           12*16(SHA256CONSTANTS), MSG
 270                sha256rnds2     STATE0, STATE1
 271        movdqa          MSGTMP0, MSGTMP4
 272        palignr         $4, MSGTMP3, MSGTMP4
 273        paddd           MSGTMP4, MSGTMP1
 274        sha256msg2      MSGTMP0, MSGTMP1
 275                pshufd          $0x0E, MSG, MSG
 276                sha256rnds2     STATE1, STATE0
 277        sha256msg1      MSGTMP0, MSGTMP3
 278
 279        /* Rounds 52-55 */
 280        movdqa          MSGTMP1, MSG
 281                paddd           13*16(SHA256CONSTANTS), MSG
 282                sha256rnds2     STATE0, STATE1
 283        movdqa          MSGTMP1, MSGTMP4
 284        palignr         $4, MSGTMP0, MSGTMP4
 285        paddd           MSGTMP4, MSGTMP2
 286        sha256msg2      MSGTMP1, MSGTMP2
 287                pshufd          $0x0E, MSG, MSG
 288                sha256rnds2     STATE1, STATE0
 289
 290        /* Rounds 56-59 */
 291        movdqa          MSGTMP2, MSG
 292                paddd           14*16(SHA256CONSTANTS), MSG
 293                sha256rnds2     STATE0, STATE1
 294        movdqa          MSGTMP2, MSGTMP4
 295        palignr         $4, MSGTMP1, MSGTMP4
 296        paddd           MSGTMP4, MSGTMP3
 297        sha256msg2      MSGTMP2, MSGTMP3
 298                pshufd          $0x0E, MSG, MSG
 299                sha256rnds2     STATE1, STATE0
 300
 301        /* Rounds 60-63 */
 302        movdqa          MSGTMP3, MSG
 303                paddd           15*16(SHA256CONSTANTS), MSG
 304                sha256rnds2     STATE0, STATE1
 305                pshufd          $0x0E, MSG, MSG
 306                sha256rnds2     STATE1, STATE0
 307
 308        /* Add current hash values with previously saved */
 309        paddd           ABEF_SAVE, STATE0
 310        paddd           CDGH_SAVE, STATE1
 311
 312        /* Increment data pointer and loop if more to process */
 313        add             $64, DATA_PTR
 314        cmp             NUM_BLKS, DATA_PTR
 315        jne             .Lloop0
 316
 317        /* Write hash values back in the correct order */
 318        pshufd          $0x1B, STATE0,  STATE0          /* FEBA */
 319        pshufd          $0xB1, STATE1,  STATE1          /* DCHG */
 320        movdqa          STATE0, MSGTMP4
 321        pblendw         $0xF0, STATE1,  STATE0          /* DCBA */
 322        palignr         $8, MSGTMP4, STATE1             /* HGFE */
 323
 324        movdqu          STATE0, 0*16(DIGEST_PTR)
 325        movdqu          STATE1, 1*16(DIGEST_PTR)
 326
 327.Ldone_hash:
 328
 329        ret
 330SYM_FUNC_END(sha256_ni_transform)
 331
 332.section        .rodata.cst256.K256, "aM", @progbits, 256
 333.align 64
 334K256:
 335        .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
 336        .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
 337        .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
 338        .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
 339        .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
 340        .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
 341        .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
 342        .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
 343        .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
 344        .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
 345        .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
 346        .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
 347        .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
 348        .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
 349        .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
 350        .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
 351
 352.section        .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
 353.align 16
 354PSHUFFLE_BYTE_FLIP_MASK:
 355        .octa 0x0c0d0e0f08090a0b0405060700010203
 356