1
2
3
4
5
6
7
8
9
10
11
12#include <crypto/algapi.h>
13#include <crypto/chacha20.h>
14#include <crypto/internal/skcipher.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <asm/fpu/api.h>
18#include <asm/simd.h>
19
20#define CHACHA20_STATE_ALIGN 16
21
22asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
23asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
24#ifdef CONFIG_AS_AVX2
25asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src);
26static bool chacha20_use_avx2;
27#endif
28
29static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
30 unsigned int bytes)
31{
32 u8 buf[CHACHA20_BLOCK_SIZE];
33
34#ifdef CONFIG_AS_AVX2
35 if (chacha20_use_avx2) {
36 while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
37 chacha20_8block_xor_avx2(state, dst, src);
38 bytes -= CHACHA20_BLOCK_SIZE * 8;
39 src += CHACHA20_BLOCK_SIZE * 8;
40 dst += CHACHA20_BLOCK_SIZE * 8;
41 state[12] += 8;
42 }
43 }
44#endif
45 while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
46 chacha20_4block_xor_ssse3(state, dst, src);
47 bytes -= CHACHA20_BLOCK_SIZE * 4;
48 src += CHACHA20_BLOCK_SIZE * 4;
49 dst += CHACHA20_BLOCK_SIZE * 4;
50 state[12] += 4;
51 }
52 while (bytes >= CHACHA20_BLOCK_SIZE) {
53 chacha20_block_xor_ssse3(state, dst, src);
54 bytes -= CHACHA20_BLOCK_SIZE;
55 src += CHACHA20_BLOCK_SIZE;
56 dst += CHACHA20_BLOCK_SIZE;
57 state[12]++;
58 }
59 if (bytes) {
60 memcpy(buf, src, bytes);
61 chacha20_block_xor_ssse3(state, buf, buf);
62 memcpy(dst, buf, bytes);
63 }
64}
65
66static int chacha20_simd(struct skcipher_request *req)
67{
68 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
69 struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
70 u32 *state, state_buf[16 + 2] __aligned(8);
71 struct skcipher_walk walk;
72 int err;
73
74 BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
75 state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
76
77 if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
78 return crypto_chacha20_crypt(req);
79
80 err = skcipher_walk_virt(&walk, req, true);
81
82 crypto_chacha20_init(state, ctx, walk.iv);
83
84 kernel_fpu_begin();
85
86 while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
87 chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
88 rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
89 err = skcipher_walk_done(&walk,
90 walk.nbytes % CHACHA20_BLOCK_SIZE);
91 }
92
93 if (walk.nbytes) {
94 chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
95 walk.nbytes);
96 err = skcipher_walk_done(&walk, 0);
97 }
98
99 kernel_fpu_end();
100
101 return err;
102}
103
104static struct skcipher_alg alg = {
105 .base.cra_name = "chacha20",
106 .base.cra_driver_name = "chacha20-simd",
107 .base.cra_priority = 300,
108 .base.cra_blocksize = 1,
109 .base.cra_ctxsize = sizeof(struct chacha20_ctx),
110 .base.cra_module = THIS_MODULE,
111
112 .min_keysize = CHACHA20_KEY_SIZE,
113 .max_keysize = CHACHA20_KEY_SIZE,
114 .ivsize = CHACHA20_IV_SIZE,
115 .chunksize = CHACHA20_BLOCK_SIZE,
116 .setkey = crypto_chacha20_setkey,
117 .encrypt = chacha20_simd,
118 .decrypt = chacha20_simd,
119};
120
121static int __init chacha20_simd_mod_init(void)
122{
123 if (!boot_cpu_has(X86_FEATURE_SSSE3))
124 return -ENODEV;
125
126#ifdef CONFIG_AS_AVX2
127 chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
128 boot_cpu_has(X86_FEATURE_AVX2) &&
129 cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
130#endif
131 return crypto_register_skcipher(&alg);
132}
133
134static void __exit chacha20_simd_mod_fini(void)
135{
136 crypto_unregister_skcipher(&alg);
137}
138
139module_init(chacha20_simd_mod_init);
140module_exit(chacha20_simd_mod_fini);
141
142MODULE_LICENSE("GPL");
143MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
144MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
145MODULE_ALIAS_CRYPTO("chacha20");
146MODULE_ALIAS_CRYPTO("chacha20-simd");
147