linux/arch/x86/crypto/chacha20_glue.c
<<
>>
Prefs
   1/*
   2 * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
   3 *
   4 * Copyright (C) 2015 Martin Willi
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 */
  11
  12#include <crypto/algapi.h>
  13#include <crypto/chacha20.h>
  14#include <crypto/internal/skcipher.h>
  15#include <linux/kernel.h>
  16#include <linux/module.h>
  17#include <asm/fpu/api.h>
  18#include <asm/simd.h>
  19
  20#define CHACHA20_STATE_ALIGN 16
  21
  22asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
  23asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
  24#ifdef CONFIG_AS_AVX2
  25asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src);
  26static bool chacha20_use_avx2;
  27#endif
  28
  29static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
  30                            unsigned int bytes)
  31{
  32        u8 buf[CHACHA20_BLOCK_SIZE];
  33
  34#ifdef CONFIG_AS_AVX2
  35        if (chacha20_use_avx2) {
  36                while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
  37                        chacha20_8block_xor_avx2(state, dst, src);
  38                        bytes -= CHACHA20_BLOCK_SIZE * 8;
  39                        src += CHACHA20_BLOCK_SIZE * 8;
  40                        dst += CHACHA20_BLOCK_SIZE * 8;
  41                        state[12] += 8;
  42                }
  43        }
  44#endif
  45        while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
  46                chacha20_4block_xor_ssse3(state, dst, src);
  47                bytes -= CHACHA20_BLOCK_SIZE * 4;
  48                src += CHACHA20_BLOCK_SIZE * 4;
  49                dst += CHACHA20_BLOCK_SIZE * 4;
  50                state[12] += 4;
  51        }
  52        while (bytes >= CHACHA20_BLOCK_SIZE) {
  53                chacha20_block_xor_ssse3(state, dst, src);
  54                bytes -= CHACHA20_BLOCK_SIZE;
  55                src += CHACHA20_BLOCK_SIZE;
  56                dst += CHACHA20_BLOCK_SIZE;
  57                state[12]++;
  58        }
  59        if (bytes) {
  60                memcpy(buf, src, bytes);
  61                chacha20_block_xor_ssse3(state, buf, buf);
  62                memcpy(dst, buf, bytes);
  63        }
  64}
  65
  66static int chacha20_simd(struct skcipher_request *req)
  67{
  68        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
  69        struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
  70        u32 *state, state_buf[16 + 2] __aligned(8);
  71        struct skcipher_walk walk;
  72        int err;
  73
  74        BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
  75        state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
  76
  77        if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
  78                return crypto_chacha20_crypt(req);
  79
  80        err = skcipher_walk_virt(&walk, req, true);
  81
  82        crypto_chacha20_init(state, ctx, walk.iv);
  83
  84        kernel_fpu_begin();
  85
  86        while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
  87                chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
  88                                rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
  89                err = skcipher_walk_done(&walk,
  90                                         walk.nbytes % CHACHA20_BLOCK_SIZE);
  91        }
  92
  93        if (walk.nbytes) {
  94                chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
  95                                walk.nbytes);
  96                err = skcipher_walk_done(&walk, 0);
  97        }
  98
  99        kernel_fpu_end();
 100
 101        return err;
 102}
 103
 104static struct skcipher_alg alg = {
 105        .base.cra_name          = "chacha20",
 106        .base.cra_driver_name   = "chacha20-simd",
 107        .base.cra_priority      = 300,
 108        .base.cra_blocksize     = 1,
 109        .base.cra_ctxsize       = sizeof(struct chacha20_ctx),
 110        .base.cra_module        = THIS_MODULE,
 111
 112        .min_keysize            = CHACHA20_KEY_SIZE,
 113        .max_keysize            = CHACHA20_KEY_SIZE,
 114        .ivsize                 = CHACHA20_IV_SIZE,
 115        .chunksize              = CHACHA20_BLOCK_SIZE,
 116        .setkey                 = crypto_chacha20_setkey,
 117        .encrypt                = chacha20_simd,
 118        .decrypt                = chacha20_simd,
 119};
 120
 121static int __init chacha20_simd_mod_init(void)
 122{
 123        if (!boot_cpu_has(X86_FEATURE_SSSE3))
 124                return -ENODEV;
 125
 126#ifdef CONFIG_AS_AVX2
 127        chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
 128                            boot_cpu_has(X86_FEATURE_AVX2) &&
 129                            cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
 130#endif
 131        return crypto_register_skcipher(&alg);
 132}
 133
 134static void __exit chacha20_simd_mod_fini(void)
 135{
 136        crypto_unregister_skcipher(&alg);
 137}
 138
 139module_init(chacha20_simd_mod_init);
 140module_exit(chacha20_simd_mod_fini);
 141
 142MODULE_LICENSE("GPL");
 143MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
 144MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
 145MODULE_ALIAS_CRYPTO("chacha20");
 146MODULE_ALIAS_CRYPTO("chacha20-simd");
 147