linux/arch/arm64/crypto/chacha-neon-glue.c
<<
>>
Prefs
   1/*
   2 * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
   3 * including ChaCha20 (RFC7539)
   4 *
   5 * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 *
  11 * Based on:
  12 * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
  13 *
  14 * Copyright (C) 2015 Martin Willi
  15 *
  16 * This program is free software; you can redistribute it and/or modify
  17 * it under the terms of the GNU General Public License as published by
  18 * the Free Software Foundation; either version 2 of the License, or
  19 * (at your option) any later version.
  20 */
  21
  22#include <crypto/algapi.h>
  23#include <crypto/internal/chacha.h>
  24#include <crypto/internal/simd.h>
  25#include <crypto/internal/skcipher.h>
  26#include <linux/jump_label.h>
  27#include <linux/kernel.h>
  28#include <linux/module.h>
  29
  30#include <asm/hwcap.h>
  31#include <asm/neon.h>
  32#include <asm/simd.h>
  33
  34asmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src,
  35                                      int nrounds);
  36asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
  37                                       int nrounds, int bytes);
  38asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
  39
  40static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
  41
  42static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
  43                          int bytes, int nrounds)
  44{
  45        while (bytes > 0) {
  46                int l = min(bytes, CHACHA_BLOCK_SIZE * 5);
  47
  48                if (l <= CHACHA_BLOCK_SIZE) {
  49                        u8 buf[CHACHA_BLOCK_SIZE];
  50
  51                        memcpy(buf, src, l);
  52                        chacha_block_xor_neon(state, buf, buf, nrounds);
  53                        memcpy(dst, buf, l);
  54                        state[12] += 1;
  55                        break;
  56                }
  57                chacha_4block_xor_neon(state, dst, src, nrounds, l);
  58                bytes -= l;
  59                src += l;
  60                dst += l;
  61                state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
  62        }
  63}
  64
  65void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
  66{
  67        if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
  68                hchacha_block_generic(state, stream, nrounds);
  69        } else {
  70                kernel_neon_begin();
  71                hchacha_block_neon(state, stream, nrounds);
  72                kernel_neon_end();
  73        }
  74}
  75EXPORT_SYMBOL(hchacha_block_arch);
  76
  77void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
  78{
  79        chacha_init_generic(state, key, iv);
  80}
  81EXPORT_SYMBOL(chacha_init_arch);
  82
  83void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
  84                       int nrounds)
  85{
  86        if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
  87            !crypto_simd_usable())
  88                return chacha_crypt_generic(state, dst, src, bytes, nrounds);
  89
  90        do {
  91                unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
  92
  93                kernel_neon_begin();
  94                chacha_doneon(state, dst, src, todo, nrounds);
  95                kernel_neon_end();
  96
  97                bytes -= todo;
  98                src += todo;
  99                dst += todo;
 100        } while (bytes);
 101}
 102EXPORT_SYMBOL(chacha_crypt_arch);
 103
 104static int chacha_neon_stream_xor(struct skcipher_request *req,
 105                                  const struct chacha_ctx *ctx, const u8 *iv)
 106{
 107        struct skcipher_walk walk;
 108        u32 state[16];
 109        int err;
 110
 111        err = skcipher_walk_virt(&walk, req, false);
 112
 113        chacha_init_generic(state, ctx->key, iv);
 114
 115        while (walk.nbytes > 0) {
 116                unsigned int nbytes = walk.nbytes;
 117
 118                if (nbytes < walk.total)
 119                        nbytes = rounddown(nbytes, walk.stride);
 120
 121                if (!static_branch_likely(&have_neon) ||
 122                    !crypto_simd_usable()) {
 123                        chacha_crypt_generic(state, walk.dst.virt.addr,
 124                                             walk.src.virt.addr, nbytes,
 125                                             ctx->nrounds);
 126                } else {
 127                        kernel_neon_begin();
 128                        chacha_doneon(state, walk.dst.virt.addr,
 129                                      walk.src.virt.addr, nbytes, ctx->nrounds);
 130                        kernel_neon_end();
 131                }
 132                err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 133        }
 134
 135        return err;
 136}
 137
 138static int chacha_neon(struct skcipher_request *req)
 139{
 140        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 141        struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
 142
 143        return chacha_neon_stream_xor(req, ctx, req->iv);
 144}
 145
 146static int xchacha_neon(struct skcipher_request *req)
 147{
 148        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 149        struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
 150        struct chacha_ctx subctx;
 151        u32 state[16];
 152        u8 real_iv[16];
 153
 154        chacha_init_generic(state, ctx->key, req->iv);
 155        hchacha_block_arch(state, subctx.key, ctx->nrounds);
 156        subctx.nrounds = ctx->nrounds;
 157
 158        memcpy(&real_iv[0], req->iv + 24, 8);
 159        memcpy(&real_iv[8], req->iv + 16, 8);
 160        return chacha_neon_stream_xor(req, &subctx, real_iv);
 161}
 162
 163static struct skcipher_alg algs[] = {
 164        {
 165                .base.cra_name          = "chacha20",
 166                .base.cra_driver_name   = "chacha20-neon",
 167                .base.cra_priority      = 300,
 168                .base.cra_blocksize     = 1,
 169                .base.cra_ctxsize       = sizeof(struct chacha_ctx),
 170                .base.cra_module        = THIS_MODULE,
 171
 172                .min_keysize            = CHACHA_KEY_SIZE,
 173                .max_keysize            = CHACHA_KEY_SIZE,
 174                .ivsize                 = CHACHA_IV_SIZE,
 175                .chunksize              = CHACHA_BLOCK_SIZE,
 176                .walksize               = 5 * CHACHA_BLOCK_SIZE,
 177                .setkey                 = chacha20_setkey,
 178                .encrypt                = chacha_neon,
 179                .decrypt                = chacha_neon,
 180        }, {
 181                .base.cra_name          = "xchacha20",
 182                .base.cra_driver_name   = "xchacha20-neon",
 183                .base.cra_priority      = 300,
 184                .base.cra_blocksize     = 1,
 185                .base.cra_ctxsize       = sizeof(struct chacha_ctx),
 186                .base.cra_module        = THIS_MODULE,
 187
 188                .min_keysize            = CHACHA_KEY_SIZE,
 189                .max_keysize            = CHACHA_KEY_SIZE,
 190                .ivsize                 = XCHACHA_IV_SIZE,
 191                .chunksize              = CHACHA_BLOCK_SIZE,
 192                .walksize               = 5 * CHACHA_BLOCK_SIZE,
 193                .setkey                 = chacha20_setkey,
 194                .encrypt                = xchacha_neon,
 195                .decrypt                = xchacha_neon,
 196        }, {
 197                .base.cra_name          = "xchacha12",
 198                .base.cra_driver_name   = "xchacha12-neon",
 199                .base.cra_priority      = 300,
 200                .base.cra_blocksize     = 1,
 201                .base.cra_ctxsize       = sizeof(struct chacha_ctx),
 202                .base.cra_module        = THIS_MODULE,
 203
 204                .min_keysize            = CHACHA_KEY_SIZE,
 205                .max_keysize            = CHACHA_KEY_SIZE,
 206                .ivsize                 = XCHACHA_IV_SIZE,
 207                .chunksize              = CHACHA_BLOCK_SIZE,
 208                .walksize               = 5 * CHACHA_BLOCK_SIZE,
 209                .setkey                 = chacha12_setkey,
 210                .encrypt                = xchacha_neon,
 211                .decrypt                = xchacha_neon,
 212        }
 213};
 214
 215static int __init chacha_simd_mod_init(void)
 216{
 217        if (!cpu_have_named_feature(ASIMD))
 218                return 0;
 219
 220        static_branch_enable(&have_neon);
 221
 222        return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
 223                crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
 224}
 225
 226static void __exit chacha_simd_mod_fini(void)
 227{
 228        if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && cpu_have_named_feature(ASIMD))
 229                crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
 230}
 231
 232module_init(chacha_simd_mod_init);
 233module_exit(chacha_simd_mod_fini);
 234
 235MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
 236MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 237MODULE_LICENSE("GPL v2");
 238MODULE_ALIAS_CRYPTO("chacha20");
 239MODULE_ALIAS_CRYPTO("chacha20-neon");
 240MODULE_ALIAS_CRYPTO("xchacha20");
 241MODULE_ALIAS_CRYPTO("xchacha20-neon");
 242MODULE_ALIAS_CRYPTO("xchacha12");
 243MODULE_ALIAS_CRYPTO("xchacha12-neon");
 244