linux/arch/x86/crypto/sha512_ssse3_glue.c
<<
>>
Prefs
   1/*
   2 * Cryptographic API.
   3 *
   4 * Glue code for the SHA512 Secure Hash Algorithm assembler
   5 * implementation using supplemental SSE3 / AVX / AVX2 instructions.
   6 *
   7 * This file is based on sha512_generic.c
   8 *
   9 * Copyright (C) 2013 Intel Corporation
  10 * Author: Tim Chen <tim.c.chen@linux.intel.com>
  11 *
  12 * This program is free software; you can redistribute it and/or modify it
  13 * under the terms of the GNU General Public License as published by the Free
  14 * Software Foundation; either version 2 of the License, or (at your option)
  15 * any later version.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  21 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  22 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  24 * SOFTWARE.
  25 *
  26 */
  27
  28#define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
  29
  30#include <crypto/internal/hash.h>
  31#include <linux/init.h>
  32#include <linux/module.h>
  33#include <linux/mm.h>
  34#include <linux/cryptohash.h>
  35#include <linux/types.h>
  36#include <crypto/sha.h>
  37#include <asm/byteorder.h>
  38#include <asm/i387.h>
  39#include <asm/xcr.h>
  40#include <asm/xsave.h>
  41
  42#include <linux/string.h>
  43
  44asmlinkage void sha512_transform_ssse3(const char *data, u64 *digest,
  45                                     u64 rounds);
  46#ifdef CONFIG_AS_AVX
  47asmlinkage void sha512_transform_avx(const char *data, u64 *digest,
  48                                     u64 rounds);
  49#endif
  50#ifdef CONFIG_AS_AVX2
  51asmlinkage void sha512_transform_rorx(const char *data, u64 *digest,
  52                                     u64 rounds);
  53#endif
  54
  55static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64);
  56
  57
  58static int sha512_ssse3_init(struct shash_desc *desc)
  59{
  60        struct sha512_state *sctx = shash_desc_ctx(desc);
  61
  62        sctx->state[0] = SHA512_H0;
  63        sctx->state[1] = SHA512_H1;
  64        sctx->state[2] = SHA512_H2;
  65        sctx->state[3] = SHA512_H3;
  66        sctx->state[4] = SHA512_H4;
  67        sctx->state[5] = SHA512_H5;
  68        sctx->state[6] = SHA512_H6;
  69        sctx->state[7] = SHA512_H7;
  70        sctx->count[0] = sctx->count[1] = 0;
  71
  72        return 0;
  73}
  74
  75static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
  76                               unsigned int len, unsigned int partial)
  77{
  78        struct sha512_state *sctx = shash_desc_ctx(desc);
  79        unsigned int done = 0;
  80
  81        sctx->count[0] += len;
  82        if (sctx->count[0] < len)
  83                sctx->count[1]++;
  84
  85        if (partial) {
  86                done = SHA512_BLOCK_SIZE - partial;
  87                memcpy(sctx->buf + partial, data, done);
  88                sha512_transform_asm(sctx->buf, sctx->state, 1);
  89        }
  90
  91        if (len - done >= SHA512_BLOCK_SIZE) {
  92                const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
  93
  94                sha512_transform_asm(data + done, sctx->state, (u64) rounds);
  95
  96                done += rounds * SHA512_BLOCK_SIZE;
  97        }
  98
  99        memcpy(sctx->buf, data + done, len - done);
 100
 101        return 0;
 102}
 103
 104static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
 105                             unsigned int len)
 106{
 107        struct sha512_state *sctx = shash_desc_ctx(desc);
 108        unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
 109        int res;
 110
 111        /* Handle the fast case right here */
 112        if (partial + len < SHA512_BLOCK_SIZE) {
 113                sctx->count[0] += len;
 114                if (sctx->count[0] < len)
 115                        sctx->count[1]++;
 116                memcpy(sctx->buf + partial, data, len);
 117
 118                return 0;
 119        }
 120
 121        if (!irq_fpu_usable()) {
 122                res = crypto_sha512_update(desc, data, len);
 123        } else {
 124                kernel_fpu_begin();
 125                res = __sha512_ssse3_update(desc, data, len, partial);
 126                kernel_fpu_end();
 127        }
 128
 129        return res;
 130}
 131
 132
 133/* Add padding and return the message digest. */
 134static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
 135{
 136        struct sha512_state *sctx = shash_desc_ctx(desc);
 137        unsigned int i, index, padlen;
 138        __be64 *dst = (__be64 *)out;
 139        __be64 bits[2];
 140        static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
 141
 142        /* save number of bits */
 143        bits[1] = cpu_to_be64(sctx->count[0] << 3);
 144        bits[0] = cpu_to_be64(sctx->count[1] << 3) | sctx->count[0] >> 61;
 145
 146        /* Pad out to 112 mod 128 and append length */
 147        index = sctx->count[0] & 0x7f;
 148        padlen = (index < 112) ? (112 - index) : ((128+112) - index);
 149
 150        if (!irq_fpu_usable()) {
 151                crypto_sha512_update(desc, padding, padlen);
 152                crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
 153        } else {
 154                kernel_fpu_begin();
 155                /* We need to fill a whole block for __sha512_ssse3_update() */
 156                if (padlen <= 112) {
 157                        sctx->count[0] += padlen;
 158                        if (sctx->count[0] < padlen)
 159                                sctx->count[1]++;
 160                        memcpy(sctx->buf + index, padding, padlen);
 161                } else {
 162                        __sha512_ssse3_update(desc, padding, padlen, index);
 163                }
 164                __sha512_ssse3_update(desc, (const u8 *)&bits,
 165                                        sizeof(bits), 112);
 166                kernel_fpu_end();
 167        }
 168
 169        /* Store state in digest */
 170        for (i = 0; i < 8; i++)
 171                dst[i] = cpu_to_be64(sctx->state[i]);
 172
 173        /* Wipe context */
 174        memset(sctx, 0, sizeof(*sctx));
 175
 176        return 0;
 177}
 178
 179static int sha512_ssse3_export(struct shash_desc *desc, void *out)
 180{
 181        struct sha512_state *sctx = shash_desc_ctx(desc);
 182
 183        memcpy(out, sctx, sizeof(*sctx));
 184
 185        return 0;
 186}
 187
 188static int sha512_ssse3_import(struct shash_desc *desc, const void *in)
 189{
 190        struct sha512_state *sctx = shash_desc_ctx(desc);
 191
 192        memcpy(sctx, in, sizeof(*sctx));
 193
 194        return 0;
 195}
 196
 197static int sha384_ssse3_init(struct shash_desc *desc)
 198{
 199        struct sha512_state *sctx = shash_desc_ctx(desc);
 200
 201        sctx->state[0] = SHA384_H0;
 202        sctx->state[1] = SHA384_H1;
 203        sctx->state[2] = SHA384_H2;
 204        sctx->state[3] = SHA384_H3;
 205        sctx->state[4] = SHA384_H4;
 206        sctx->state[5] = SHA384_H5;
 207        sctx->state[6] = SHA384_H6;
 208        sctx->state[7] = SHA384_H7;
 209
 210        sctx->count[0] = sctx->count[1] = 0;
 211
 212        return 0;
 213}
 214
 215static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash)
 216{
 217        u8 D[SHA512_DIGEST_SIZE];
 218
 219        sha512_ssse3_final(desc, D);
 220
 221        memcpy(hash, D, SHA384_DIGEST_SIZE);
 222        memset(D, 0, SHA512_DIGEST_SIZE);
 223
 224        return 0;
 225}
 226
 227static struct shash_alg algs[] = { {
 228        .digestsize     =       SHA512_DIGEST_SIZE,
 229        .init           =       sha512_ssse3_init,
 230        .update         =       sha512_ssse3_update,
 231        .final          =       sha512_ssse3_final,
 232        .export         =       sha512_ssse3_export,
 233        .import         =       sha512_ssse3_import,
 234        .descsize       =       sizeof(struct sha512_state),
 235        .statesize      =       sizeof(struct sha512_state),
 236        .base           =       {
 237                .cra_name       =       "sha512",
 238                .cra_driver_name =      "sha512-ssse3",
 239                .cra_priority   =       150,
 240                .cra_flags      =       CRYPTO_ALG_TYPE_SHASH,
 241                .cra_blocksize  =       SHA512_BLOCK_SIZE,
 242                .cra_module     =       THIS_MODULE,
 243        }
 244},  {
 245        .digestsize     =       SHA384_DIGEST_SIZE,
 246        .init           =       sha384_ssse3_init,
 247        .update         =       sha512_ssse3_update,
 248        .final          =       sha384_ssse3_final,
 249        .export         =       sha512_ssse3_export,
 250        .import         =       sha512_ssse3_import,
 251        .descsize       =       sizeof(struct sha512_state),
 252        .statesize      =       sizeof(struct sha512_state),
 253        .base           =       {
 254                .cra_name       =       "sha384",
 255                .cra_driver_name =      "sha384-ssse3",
 256                .cra_priority   =       150,
 257                .cra_flags      =       CRYPTO_ALG_TYPE_SHASH,
 258                .cra_blocksize  =       SHA384_BLOCK_SIZE,
 259                .cra_module     =       THIS_MODULE,
 260        }
 261} };
 262
 263#ifdef CONFIG_AS_AVX
 264static bool __init avx_usable(void)
 265{
 266        u64 xcr0;
 267
 268        if (!cpu_has_avx || !cpu_has_osxsave)
 269                return false;
 270
 271        xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 272        if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
 273                pr_info("AVX detected but unusable.\n");
 274
 275                return false;
 276        }
 277
 278        return true;
 279}
 280#endif
 281
 282static int __init sha512_ssse3_mod_init(void)
 283{
 284        /* test for SSSE3 first */
 285        if (cpu_has_ssse3)
 286                sha512_transform_asm = sha512_transform_ssse3;
 287
 288#ifdef CONFIG_AS_AVX
 289        /* allow AVX to override SSSE3, it's a little faster */
 290        if (avx_usable()) {
 291#ifdef CONFIG_AS_AVX2
 292                if (boot_cpu_has(X86_FEATURE_AVX2))
 293                        sha512_transform_asm = sha512_transform_rorx;
 294                else
 295#endif
 296                        sha512_transform_asm = sha512_transform_avx;
 297        }
 298#endif
 299
 300        if (sha512_transform_asm) {
 301#ifdef CONFIG_AS_AVX
 302                if (sha512_transform_asm == sha512_transform_avx)
 303                        pr_info("Using AVX optimized SHA-512 implementation\n");
 304#ifdef CONFIG_AS_AVX2
 305                else if (sha512_transform_asm == sha512_transform_rorx)
 306                        pr_info("Using AVX2 optimized SHA-512 implementation\n");
 307#endif
 308                else
 309#endif
 310                        pr_info("Using SSSE3 optimized SHA-512 implementation\n");
 311                return crypto_register_shashes(algs, ARRAY_SIZE(algs));
 312        }
 313        pr_info("Neither AVX nor SSSE3 is available/usable.\n");
 314
 315        return -ENODEV;
 316}
 317
 318static void __exit sha512_ssse3_mod_fini(void)
 319{
 320        crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
 321}
 322
 323module_init(sha512_ssse3_mod_init);
 324module_exit(sha512_ssse3_mod_fini);
 325
 326MODULE_LICENSE("GPL");
 327MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated");
 328
 329MODULE_ALIAS("sha512");
 330MODULE_ALIAS("sha384");
 331