linux/drivers/crypto/padlock-sha.c
<<
>>
Prefs
   1/*
   2 * Cryptographic API.
   3 *
   4 * Support for VIA PadLock hardware crypto engine.
   5 *
   6 * Copyright (c) 2006  Michal Ludvig <michal@logix.cz>
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License as published by
  10 * the Free Software Foundation; either version 2 of the License, or
  11 * (at your option) any later version.
  12 *
  13 */
  14
  15#include <crypto/internal/hash.h>
  16#include <crypto/padlock.h>
  17#include <crypto/sha.h>
  18#include <linux/err.h>
  19#include <linux/module.h>
  20#include <linux/init.h>
  21#include <linux/errno.h>
  22#include <linux/interrupt.h>
  23#include <linux/kernel.h>
  24#include <linux/scatterlist.h>
  25#include <asm/cpu_device_id.h>
  26#include <asm/fpu/api.h>
  27
  28struct padlock_sha_desc {
  29        struct shash_desc fallback;
  30};
  31
  32struct padlock_sha_ctx {
  33        struct crypto_shash *fallback;
  34};
  35
  36static int padlock_sha_init(struct shash_desc *desc)
  37{
  38        struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  39        struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
  40
  41        dctx->fallback.tfm = ctx->fallback;
  42        dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  43        return crypto_shash_init(&dctx->fallback);
  44}
  45
  46static int padlock_sha_update(struct shash_desc *desc,
  47                              const u8 *data, unsigned int length)
  48{
  49        struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  50
  51        dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  52        return crypto_shash_update(&dctx->fallback, data, length);
  53}
  54
  55static int padlock_sha_export(struct shash_desc *desc, void *out)
  56{
  57        struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  58
  59        return crypto_shash_export(&dctx->fallback, out);
  60}
  61
  62static int padlock_sha_import(struct shash_desc *desc, const void *in)
  63{
  64        struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  65        struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
  66
  67        dctx->fallback.tfm = ctx->fallback;
  68        dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  69        return crypto_shash_import(&dctx->fallback, in);
  70}
  71
  72static inline void padlock_output_block(uint32_t *src,
  73                        uint32_t *dst, size_t count)
  74{
  75        while (count--)
  76                *dst++ = swab32(*src++);
  77}
  78
  79static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
  80                              unsigned int count, u8 *out)
  81{
  82        /* We can't store directly to *out as it may be unaligned. */
  83        /* BTW Don't reduce the buffer size below 128 Bytes!
  84         *     PadLock microcode needs it that big. */
  85        char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
  86                ((aligned(STACK_ALIGN)));
  87        char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
  88        struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  89        struct sha1_state state;
  90        unsigned int space;
  91        unsigned int leftover;
  92        int ts_state;
  93        int err;
  94
  95        dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  96        err = crypto_shash_export(&dctx->fallback, &state);
  97        if (err)
  98                goto out;
  99
 100        if (state.count + count > ULONG_MAX)
 101                return crypto_shash_finup(&dctx->fallback, in, count, out);
 102
 103        leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1;
 104        space =  SHA1_BLOCK_SIZE - leftover;
 105        if (space) {
 106                if (count > space) {
 107                        err = crypto_shash_update(&dctx->fallback, in, space) ?:
 108                              crypto_shash_export(&dctx->fallback, &state);
 109                        if (err)
 110                                goto out;
 111                        count -= space;
 112                        in += space;
 113                } else {
 114                        memcpy(state.buffer + leftover, in, count);
 115                        in = state.buffer;
 116                        count += leftover;
 117                        state.count &= ~(SHA1_BLOCK_SIZE - 1);
 118                }
 119        }
 120
 121        memcpy(result, &state.state, SHA1_DIGEST_SIZE);
 122
 123        /* prevent taking the spurious DNA fault with padlock. */
 124        ts_state = irq_ts_save();
 125        asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
 126                      : \
 127                      : "c"((unsigned long)state.count + count), \
 128                        "a"((unsigned long)state.count), \
 129                        "S"(in), "D"(result));
 130        irq_ts_restore(ts_state);
 131
 132        padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
 133
 134out:
 135        return err;
 136}
 137
 138static int padlock_sha1_final(struct shash_desc *desc, u8 *out)
 139{
 140        u8 buf[4];
 141
 142        return padlock_sha1_finup(desc, buf, 0, out);
 143}
 144
 145static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
 146                                unsigned int count, u8 *out)
 147{
 148        /* We can't store directly to *out as it may be unaligned. */
 149        /* BTW Don't reduce the buffer size below 128 Bytes!
 150         *     PadLock microcode needs it that big. */
 151        char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
 152                ((aligned(STACK_ALIGN)));
 153        char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
 154        struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
 155        struct sha256_state state;
 156        unsigned int space;
 157        unsigned int leftover;
 158        int ts_state;
 159        int err;
 160
 161        dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
 162        err = crypto_shash_export(&dctx->fallback, &state);
 163        if (err)
 164                goto out;
 165
 166        if (state.count + count > ULONG_MAX)
 167                return crypto_shash_finup(&dctx->fallback, in, count, out);
 168
 169        leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1;
 170        space =  SHA256_BLOCK_SIZE - leftover;
 171        if (space) {
 172                if (count > space) {
 173                        err = crypto_shash_update(&dctx->fallback, in, space) ?:
 174                              crypto_shash_export(&dctx->fallback, &state);
 175                        if (err)
 176                                goto out;
 177                        count -= space;
 178                        in += space;
 179                } else {
 180                        memcpy(state.buf + leftover, in, count);
 181                        in = state.buf;
 182                        count += leftover;
 183                        state.count &= ~(SHA1_BLOCK_SIZE - 1);
 184                }
 185        }
 186
 187        memcpy(result, &state.state, SHA256_DIGEST_SIZE);
 188
 189        /* prevent taking the spurious DNA fault with padlock. */
 190        ts_state = irq_ts_save();
 191        asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
 192                      : \
 193                      : "c"((unsigned long)state.count + count), \
 194                        "a"((unsigned long)state.count), \
 195                        "S"(in), "D"(result));
 196        irq_ts_restore(ts_state);
 197
 198        padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
 199
 200out:
 201        return err;
 202}
 203
 204static int padlock_sha256_final(struct shash_desc *desc, u8 *out)
 205{
 206        u8 buf[4];
 207
 208        return padlock_sha256_finup(desc, buf, 0, out);
 209}
 210
 211static int padlock_cra_init(struct crypto_tfm *tfm)
 212{
 213        struct crypto_shash *hash = __crypto_shash_cast(tfm);
 214        const char *fallback_driver_name = crypto_tfm_alg_name(tfm);
 215        struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm);
 216        struct crypto_shash *fallback_tfm;
 217        int err = -ENOMEM;
 218
 219        /* Allocate a fallback and abort if it failed. */
 220        fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
 221                                          CRYPTO_ALG_NEED_FALLBACK);
 222        if (IS_ERR(fallback_tfm)) {
 223                printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
 224                       fallback_driver_name);
 225                err = PTR_ERR(fallback_tfm);
 226                goto out;
 227        }
 228
 229        ctx->fallback = fallback_tfm;
 230        hash->descsize += crypto_shash_descsize(fallback_tfm);
 231        return 0;
 232
 233out:
 234        return err;
 235}
 236
 237static void padlock_cra_exit(struct crypto_tfm *tfm)
 238{
 239        struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm);
 240
 241        crypto_free_shash(ctx->fallback);
 242}
 243
 244static struct shash_alg sha1_alg = {
 245        .digestsize     =       SHA1_DIGEST_SIZE,
 246        .init           =       padlock_sha_init,
 247        .update         =       padlock_sha_update,
 248        .finup          =       padlock_sha1_finup,
 249        .final          =       padlock_sha1_final,
 250        .export         =       padlock_sha_export,
 251        .import         =       padlock_sha_import,
 252        .descsize       =       sizeof(struct padlock_sha_desc),
 253        .statesize      =       sizeof(struct sha1_state),
 254        .base           =       {
 255                .cra_name               =       "sha1",
 256                .cra_driver_name        =       "sha1-padlock",
 257                .cra_priority           =       PADLOCK_CRA_PRIORITY,
 258                .cra_flags              =       CRYPTO_ALG_TYPE_SHASH |
 259                                                CRYPTO_ALG_NEED_FALLBACK,
 260                .cra_blocksize          =       SHA1_BLOCK_SIZE,
 261                .cra_ctxsize            =       sizeof(struct padlock_sha_ctx),
 262                .cra_module             =       THIS_MODULE,
 263                .cra_init               =       padlock_cra_init,
 264                .cra_exit               =       padlock_cra_exit,
 265        }
 266};
 267
 268static struct shash_alg sha256_alg = {
 269        .digestsize     =       SHA256_DIGEST_SIZE,
 270        .init           =       padlock_sha_init,
 271        .update         =       padlock_sha_update,
 272        .finup          =       padlock_sha256_finup,
 273        .final          =       padlock_sha256_final,
 274        .export         =       padlock_sha_export,
 275        .import         =       padlock_sha_import,
 276        .descsize       =       sizeof(struct padlock_sha_desc),
 277        .statesize      =       sizeof(struct sha256_state),
 278        .base           =       {
 279                .cra_name               =       "sha256",
 280                .cra_driver_name        =       "sha256-padlock",
 281                .cra_priority           =       PADLOCK_CRA_PRIORITY,
 282                .cra_flags              =       CRYPTO_ALG_TYPE_SHASH |
 283                                                CRYPTO_ALG_NEED_FALLBACK,
 284                .cra_blocksize          =       SHA256_BLOCK_SIZE,
 285                .cra_ctxsize            =       sizeof(struct padlock_sha_ctx),
 286                .cra_module             =       THIS_MODULE,
 287                .cra_init               =       padlock_cra_init,
 288                .cra_exit               =       padlock_cra_exit,
 289        }
 290};
 291
 292/* Add two shash_alg instance for hardware-implemented *
 293* multiple-parts hash supported by VIA Nano Processor.*/
 294static int padlock_sha1_init_nano(struct shash_desc *desc)
 295{
 296        struct sha1_state *sctx = shash_desc_ctx(desc);
 297
 298        *sctx = (struct sha1_state){
 299                .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
 300        };
 301
 302        return 0;
 303}
 304
 305static int padlock_sha1_update_nano(struct shash_desc *desc,
 306                        const u8 *data, unsigned int len)
 307{
 308        struct sha1_state *sctx = shash_desc_ctx(desc);
 309        unsigned int partial, done;
 310        const u8 *src;
 311        /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
 312        u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
 313                ((aligned(STACK_ALIGN)));
 314        u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
 315        int ts_state;
 316
 317        partial = sctx->count & 0x3f;
 318        sctx->count += len;
 319        done = 0;
 320        src = data;
 321        memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE);
 322
 323        if ((partial + len) >= SHA1_BLOCK_SIZE) {
 324
 325                /* Append the bytes in state's buffer to a block to handle */
 326                if (partial) {
 327                        done = -partial;
 328                        memcpy(sctx->buffer + partial, data,
 329                                done + SHA1_BLOCK_SIZE);
 330                        src = sctx->buffer;
 331                        ts_state = irq_ts_save();
 332                        asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
 333                        : "+S"(src), "+D"(dst) \
 334                        : "a"((long)-1), "c"((unsigned long)1));
 335                        irq_ts_restore(ts_state);
 336                        done += SHA1_BLOCK_SIZE;
 337                        src = data + done;
 338                }
 339
 340                /* Process the left bytes from the input data */
 341                if (len - done >= SHA1_BLOCK_SIZE) {
 342                        ts_state = irq_ts_save();
 343                        asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
 344                        : "+S"(src), "+D"(dst)
 345                        : "a"((long)-1),
 346                        "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
 347                        irq_ts_restore(ts_state);
 348                        done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
 349                        src = data + done;
 350                }
 351                partial = 0;
 352        }
 353        memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE);
 354        memcpy(sctx->buffer + partial, src, len - done);
 355
 356        return 0;
 357}
 358
 359static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out)
 360{
 361        struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc);
 362        unsigned int partial, padlen;
 363        __be64 bits;
 364        static const u8 padding[64] = { 0x80, };
 365
 366        bits = cpu_to_be64(state->count << 3);
 367
 368        /* Pad out to 56 mod 64 */
 369        partial = state->count & 0x3f;
 370        padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
 371        padlock_sha1_update_nano(desc, padding, padlen);
 372
 373        /* Append length field bytes */
 374        padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits));
 375
 376        /* Swap to output */
 377        padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5);
 378
 379        return 0;
 380}
 381
 382static int padlock_sha256_init_nano(struct shash_desc *desc)
 383{
 384        struct sha256_state *sctx = shash_desc_ctx(desc);
 385
 386        *sctx = (struct sha256_state){
 387                .state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \
 388                                SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7},
 389        };
 390
 391        return 0;
 392}
 393
 394static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
 395                          unsigned int len)
 396{
 397        struct sha256_state *sctx = shash_desc_ctx(desc);
 398        unsigned int partial, done;
 399        const u8 *src;
 400        /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
 401        u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
 402                ((aligned(STACK_ALIGN)));
 403        u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
 404        int ts_state;
 405
 406        partial = sctx->count & 0x3f;
 407        sctx->count += len;
 408        done = 0;
 409        src = data;
 410        memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE);
 411
 412        if ((partial + len) >= SHA256_BLOCK_SIZE) {
 413
 414                /* Append the bytes in state's buffer to a block to handle */
 415                if (partial) {
 416                        done = -partial;
 417                        memcpy(sctx->buf + partial, data,
 418                                done + SHA256_BLOCK_SIZE);
 419                        src = sctx->buf;
 420                        ts_state = irq_ts_save();
 421                        asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
 422                        : "+S"(src), "+D"(dst)
 423                        : "a"((long)-1), "c"((unsigned long)1));
 424                        irq_ts_restore(ts_state);
 425                        done += SHA256_BLOCK_SIZE;
 426                        src = data + done;
 427                }
 428
 429                /* Process the left bytes from input data*/
 430                if (len - done >= SHA256_BLOCK_SIZE) {
 431                        ts_state = irq_ts_save();
 432                        asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
 433                        : "+S"(src), "+D"(dst)
 434                        : "a"((long)-1),
 435                        "c"((unsigned long)((len - done) / 64)));
 436                        irq_ts_restore(ts_state);
 437                        done += ((len - done) - (len - done) % 64);
 438                        src = data + done;
 439                }
 440                partial = 0;
 441        }
 442        memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE);
 443        memcpy(sctx->buf + partial, src, len - done);
 444
 445        return 0;
 446}
 447
 448static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out)
 449{
 450        struct sha256_state *state =
 451                (struct sha256_state *)shash_desc_ctx(desc);
 452        unsigned int partial, padlen;
 453        __be64 bits;
 454        static const u8 padding[64] = { 0x80, };
 455
 456        bits = cpu_to_be64(state->count << 3);
 457
 458        /* Pad out to 56 mod 64 */
 459        partial = state->count & 0x3f;
 460        padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
 461        padlock_sha256_update_nano(desc, padding, padlen);
 462
 463        /* Append length field bytes */
 464        padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits));
 465
 466        /* Swap to output */
 467        padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8);
 468
 469        return 0;
 470}
 471
 472static int padlock_sha_export_nano(struct shash_desc *desc,
 473                                void *out)
 474{
 475        int statesize = crypto_shash_statesize(desc->tfm);
 476        void *sctx = shash_desc_ctx(desc);
 477
 478        memcpy(out, sctx, statesize);
 479        return 0;
 480}
 481
 482static int padlock_sha_import_nano(struct shash_desc *desc,
 483                                const void *in)
 484{
 485        int statesize = crypto_shash_statesize(desc->tfm);
 486        void *sctx = shash_desc_ctx(desc);
 487
 488        memcpy(sctx, in, statesize);
 489        return 0;
 490}
 491
 492static struct shash_alg sha1_alg_nano = {
 493        .digestsize     =       SHA1_DIGEST_SIZE,
 494        .init           =       padlock_sha1_init_nano,
 495        .update         =       padlock_sha1_update_nano,
 496        .final          =       padlock_sha1_final_nano,
 497        .export         =       padlock_sha_export_nano,
 498        .import         =       padlock_sha_import_nano,
 499        .descsize       =       sizeof(struct sha1_state),
 500        .statesize      =       sizeof(struct sha1_state),
 501        .base           =       {
 502                .cra_name               =       "sha1",
 503                .cra_driver_name        =       "sha1-padlock-nano",
 504                .cra_priority           =       PADLOCK_CRA_PRIORITY,
 505                .cra_flags              =       CRYPTO_ALG_TYPE_SHASH,
 506                .cra_blocksize          =       SHA1_BLOCK_SIZE,
 507                .cra_module             =       THIS_MODULE,
 508        }
 509};
 510
 511static struct shash_alg sha256_alg_nano = {
 512        .digestsize     =       SHA256_DIGEST_SIZE,
 513        .init           =       padlock_sha256_init_nano,
 514        .update         =       padlock_sha256_update_nano,
 515        .final          =       padlock_sha256_final_nano,
 516        .export         =       padlock_sha_export_nano,
 517        .import         =       padlock_sha_import_nano,
 518        .descsize       =       sizeof(struct sha256_state),
 519        .statesize      =       sizeof(struct sha256_state),
 520        .base           =       {
 521                .cra_name               =       "sha256",
 522                .cra_driver_name        =       "sha256-padlock-nano",
 523                .cra_priority           =       PADLOCK_CRA_PRIORITY,
 524                .cra_flags              =       CRYPTO_ALG_TYPE_SHASH,
 525                .cra_blocksize          =       SHA256_BLOCK_SIZE,
 526                .cra_module             =       THIS_MODULE,
 527        }
 528};
 529
 530static struct x86_cpu_id padlock_sha_ids[] = {
 531        X86_FEATURE_MATCH(X86_FEATURE_PHE),
 532        {}
 533};
 534MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids);
 535
 536static int __init padlock_init(void)
 537{
 538        int rc = -ENODEV;
 539        struct cpuinfo_x86 *c = &cpu_data(0);
 540        struct shash_alg *sha1;
 541        struct shash_alg *sha256;
 542
 543        if (!x86_match_cpu(padlock_sha_ids) || !cpu_has_phe_enabled)
 544                return -ENODEV;
 545
 546        /* Register the newly added algorithm module if on *
 547        * VIA Nano processor, or else just do as before */
 548        if (c->x86_model < 0x0f) {
 549                sha1 = &sha1_alg;
 550                sha256 = &sha256_alg;
 551        } else {
 552                sha1 = &sha1_alg_nano;
 553                sha256 = &sha256_alg_nano;
 554        }
 555
 556        rc = crypto_register_shash(sha1);
 557        if (rc)
 558                goto out;
 559
 560        rc = crypto_register_shash(sha256);
 561        if (rc)
 562                goto out_unreg1;
 563
 564        printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n");
 565
 566        return 0;
 567
 568out_unreg1:
 569        crypto_unregister_shash(sha1);
 570
 571out:
 572        printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n");
 573        return rc;
 574}
 575
 576static void __exit padlock_fini(void)
 577{
 578        struct cpuinfo_x86 *c = &cpu_data(0);
 579
 580        if (c->x86_model >= 0x0f) {
 581                crypto_unregister_shash(&sha1_alg_nano);
 582                crypto_unregister_shash(&sha256_alg_nano);
 583        } else {
 584                crypto_unregister_shash(&sha1_alg);
 585                crypto_unregister_shash(&sha256_alg);
 586        }
 587}
 588
 589module_init(padlock_init);
 590module_exit(padlock_fini);
 591
 592MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
 593MODULE_LICENSE("GPL");
 594MODULE_AUTHOR("Michal Ludvig");
 595
 596MODULE_ALIAS_CRYPTO("sha1-all");
 597MODULE_ALIAS_CRYPTO("sha256-all");
 598MODULE_ALIAS_CRYPTO("sha1-padlock");
 599MODULE_ALIAS_CRYPTO("sha256-padlock");
 600