linux/arch/x86/crypto/chacha_glue.c
<<
>>
Prefs
   1/*
   2 * x64 SIMD accelerated ChaCha and XChaCha stream ciphers,
   3 * including ChaCha20 (RFC7539)
   4 *
   5 * Copyright (C) 2015 Martin Willi
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 */
  12
  13#include <crypto/algapi.h>
  14#include <crypto/chacha.h>
  15#include <crypto/internal/skcipher.h>
  16#include <linux/kernel.h>
  17#include <linux/module.h>
  18#include <asm/fpu/api.h>
  19#include <asm/simd.h>
  20
  21#define CHACHA_STATE_ALIGN 16
  22
  23asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
  24                                       unsigned int len, int nrounds);
  25asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
  26                                        unsigned int len, int nrounds);
  27asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
  28#ifdef CONFIG_AS_AVX2
  29asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
  30                                       unsigned int len, int nrounds);
  31asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
  32                                       unsigned int len, int nrounds);
  33asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
  34                                       unsigned int len, int nrounds);
  35static bool chacha_use_avx2;
  36#ifdef CONFIG_AS_AVX512
  37asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
  38                                           unsigned int len, int nrounds);
  39asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
  40                                           unsigned int len, int nrounds);
  41asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
  42                                           unsigned int len, int nrounds);
  43static bool chacha_use_avx512vl;
  44#endif
  45#endif
  46
  47static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
  48{
  49        len = min(len, maxblocks * CHACHA_BLOCK_SIZE);
  50        return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
  51}
  52
  53static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
  54                          unsigned int bytes, int nrounds)
  55{
  56#ifdef CONFIG_AS_AVX2
  57#ifdef CONFIG_AS_AVX512
  58        if (chacha_use_avx512vl) {
  59                while (bytes >= CHACHA_BLOCK_SIZE * 8) {
  60                        chacha_8block_xor_avx512vl(state, dst, src, bytes,
  61                                                   nrounds);
  62                        bytes -= CHACHA_BLOCK_SIZE * 8;
  63                        src += CHACHA_BLOCK_SIZE * 8;
  64                        dst += CHACHA_BLOCK_SIZE * 8;
  65                        state[12] += 8;
  66                }
  67                if (bytes > CHACHA_BLOCK_SIZE * 4) {
  68                        chacha_8block_xor_avx512vl(state, dst, src, bytes,
  69                                                   nrounds);
  70                        state[12] += chacha_advance(bytes, 8);
  71                        return;
  72                }
  73                if (bytes > CHACHA_BLOCK_SIZE * 2) {
  74                        chacha_4block_xor_avx512vl(state, dst, src, bytes,
  75                                                   nrounds);
  76                        state[12] += chacha_advance(bytes, 4);
  77                        return;
  78                }
  79                if (bytes) {
  80                        chacha_2block_xor_avx512vl(state, dst, src, bytes,
  81                                                   nrounds);
  82                        state[12] += chacha_advance(bytes, 2);
  83                        return;
  84                }
  85        }
  86#endif
  87        if (chacha_use_avx2) {
  88                while (bytes >= CHACHA_BLOCK_SIZE * 8) {
  89                        chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
  90                        bytes -= CHACHA_BLOCK_SIZE * 8;
  91                        src += CHACHA_BLOCK_SIZE * 8;
  92                        dst += CHACHA_BLOCK_SIZE * 8;
  93                        state[12] += 8;
  94                }
  95                if (bytes > CHACHA_BLOCK_SIZE * 4) {
  96                        chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
  97                        state[12] += chacha_advance(bytes, 8);
  98                        return;
  99                }
 100                if (bytes > CHACHA_BLOCK_SIZE * 2) {
 101                        chacha_4block_xor_avx2(state, dst, src, bytes, nrounds);
 102                        state[12] += chacha_advance(bytes, 4);
 103                        return;
 104                }
 105                if (bytes > CHACHA_BLOCK_SIZE) {
 106                        chacha_2block_xor_avx2(state, dst, src, bytes, nrounds);
 107                        state[12] += chacha_advance(bytes, 2);
 108                        return;
 109                }
 110        }
 111#endif
 112        while (bytes >= CHACHA_BLOCK_SIZE * 4) {
 113                chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
 114                bytes -= CHACHA_BLOCK_SIZE * 4;
 115                src += CHACHA_BLOCK_SIZE * 4;
 116                dst += CHACHA_BLOCK_SIZE * 4;
 117                state[12] += 4;
 118        }
 119        if (bytes > CHACHA_BLOCK_SIZE) {
 120                chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
 121                state[12] += chacha_advance(bytes, 4);
 122                return;
 123        }
 124        if (bytes) {
 125                chacha_block_xor_ssse3(state, dst, src, bytes, nrounds);
 126                state[12]++;
 127        }
 128}
 129
 130static int chacha_simd_stream_xor(struct skcipher_walk *walk,
 131                                  struct chacha_ctx *ctx, u8 *iv)
 132{
 133        u32 *state, state_buf[16 + 2] __aligned(8);
 134        int next_yield = 4096; /* bytes until next FPU yield */
 135        int err = 0;
 136
 137        BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
 138        state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
 139
 140        crypto_chacha_init(state, ctx, iv);
 141
 142        while (walk->nbytes > 0) {
 143                unsigned int nbytes = walk->nbytes;
 144
 145                if (nbytes < walk->total) {
 146                        nbytes = round_down(nbytes, walk->stride);
 147                        next_yield -= nbytes;
 148                }
 149
 150                chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr,
 151                              nbytes, ctx->nrounds);
 152
 153                if (next_yield <= 0) {
 154                        /* temporarily allow preemption */
 155                        kernel_fpu_end();
 156                        kernel_fpu_begin();
 157                        next_yield = 4096;
 158                }
 159
 160                err = skcipher_walk_done(walk, walk->nbytes - nbytes);
 161        }
 162
 163        return err;
 164}
 165
 166static int chacha_simd(struct skcipher_request *req)
 167{
 168        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 169        struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
 170        struct skcipher_walk walk;
 171        int err;
 172
 173        if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable())
 174                return crypto_chacha_crypt(req);
 175
 176        err = skcipher_walk_virt(&walk, req, true);
 177        if (err)
 178                return err;
 179
 180        kernel_fpu_begin();
 181        err = chacha_simd_stream_xor(&walk, ctx, req->iv);
 182        kernel_fpu_end();
 183        return err;
 184}
 185
 186static int xchacha_simd(struct skcipher_request *req)
 187{
 188        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 189        struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
 190        struct skcipher_walk walk;
 191        struct chacha_ctx subctx;
 192        u32 *state, state_buf[16 + 2] __aligned(8);
 193        u8 real_iv[16];
 194        int err;
 195
 196        if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable())
 197                return crypto_xchacha_crypt(req);
 198
 199        err = skcipher_walk_virt(&walk, req, true);
 200        if (err)
 201                return err;
 202
 203        BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
 204        state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
 205        crypto_chacha_init(state, ctx, req->iv);
 206
 207        kernel_fpu_begin();
 208
 209        hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
 210        subctx.nrounds = ctx->nrounds;
 211
 212        memcpy(&real_iv[0], req->iv + 24, 8);
 213        memcpy(&real_iv[8], req->iv + 16, 8);
 214        err = chacha_simd_stream_xor(&walk, &subctx, real_iv);
 215
 216        kernel_fpu_end();
 217
 218        return err;
 219}
 220
 221static struct skcipher_alg algs[] = {
 222        {
 223                .base.cra_name          = "chacha20",
 224                .base.cra_driver_name   = "chacha20-simd",
 225                .base.cra_priority      = 300,
 226                .base.cra_blocksize     = 1,
 227                .base.cra_ctxsize       = sizeof(struct chacha_ctx),
 228                .base.cra_module        = THIS_MODULE,
 229
 230                .min_keysize            = CHACHA_KEY_SIZE,
 231                .max_keysize            = CHACHA_KEY_SIZE,
 232                .ivsize                 = CHACHA_IV_SIZE,
 233                .chunksize              = CHACHA_BLOCK_SIZE,
 234                .setkey                 = crypto_chacha20_setkey,
 235                .encrypt                = chacha_simd,
 236                .decrypt                = chacha_simd,
 237        }, {
 238                .base.cra_name          = "xchacha20",
 239                .base.cra_driver_name   = "xchacha20-simd",
 240                .base.cra_priority      = 300,
 241                .base.cra_blocksize     = 1,
 242                .base.cra_ctxsize       = sizeof(struct chacha_ctx),
 243                .base.cra_module        = THIS_MODULE,
 244
 245                .min_keysize            = CHACHA_KEY_SIZE,
 246                .max_keysize            = CHACHA_KEY_SIZE,
 247                .ivsize                 = XCHACHA_IV_SIZE,
 248                .chunksize              = CHACHA_BLOCK_SIZE,
 249                .setkey                 = crypto_chacha20_setkey,
 250                .encrypt                = xchacha_simd,
 251                .decrypt                = xchacha_simd,
 252        }, {
 253                .base.cra_name          = "xchacha12",
 254                .base.cra_driver_name   = "xchacha12-simd",
 255                .base.cra_priority      = 300,
 256                .base.cra_blocksize     = 1,
 257                .base.cra_ctxsize       = sizeof(struct chacha_ctx),
 258                .base.cra_module        = THIS_MODULE,
 259
 260                .min_keysize            = CHACHA_KEY_SIZE,
 261                .max_keysize            = CHACHA_KEY_SIZE,
 262                .ivsize                 = XCHACHA_IV_SIZE,
 263                .chunksize              = CHACHA_BLOCK_SIZE,
 264                .setkey                 = crypto_chacha12_setkey,
 265                .encrypt                = xchacha_simd,
 266                .decrypt                = xchacha_simd,
 267        },
 268};
 269
 270static int __init chacha_simd_mod_init(void)
 271{
 272        if (!boot_cpu_has(X86_FEATURE_SSSE3))
 273                return -ENODEV;
 274
 275#ifdef CONFIG_AS_AVX2
 276        chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
 277                          boot_cpu_has(X86_FEATURE_AVX2) &&
 278                          cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
 279#ifdef CONFIG_AS_AVX512
 280        chacha_use_avx512vl = chacha_use_avx2 &&
 281                              boot_cpu_has(X86_FEATURE_AVX512VL) &&
 282                              boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
 283#endif
 284#endif
 285        return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
 286}
 287
 288static void __exit chacha_simd_mod_fini(void)
 289{
 290        crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
 291}
 292
 293module_init(chacha_simd_mod_init);
 294module_exit(chacha_simd_mod_fini);
 295
 296MODULE_LICENSE("GPL");
 297MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
 298MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (x64 SIMD accelerated)");
 299MODULE_ALIAS_CRYPTO("chacha20");
 300MODULE_ALIAS_CRYPTO("chacha20-simd");
 301MODULE_ALIAS_CRYPTO("xchacha20");
 302MODULE_ALIAS_CRYPTO("xchacha20-simd");
 303MODULE_ALIAS_CRYPTO("xchacha12");
 304MODULE_ALIAS_CRYPTO("xchacha12-simd");
 305