1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/module.h>
25#include <linux/hardirq.h>
26#include <linux/types.h>
27#include <linux/crypto.h>
28#include <linux/err.h>
29#include <crypto/algapi.h>
30#include <crypto/cast5.h>
31#include <crypto/cryptd.h>
32#include <crypto/ctr.h>
33#include <asm/xcr.h>
34#include <asm/xsave.h>
35#include <asm/crypto/ablk_helper.h>
36#include <asm/crypto/glue_helper.h>
37
38#define CAST5_PARALLEL_BLOCKS 16
39
40asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
41 const u8 *src);
42asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
43 const u8 *src);
44asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
45 const u8 *src);
46asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
47 __be64 *iv);
48
49static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
50{
51 return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
52 NULL, fpu_enabled, nbytes);
53}
54
55static inline void cast5_fpu_end(bool fpu_enabled)
56{
57 return glue_fpu_end(fpu_enabled);
58}
59
60static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
61 bool enc)
62{
63 bool fpu_enabled = false;
64 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
65 const unsigned int bsize = CAST5_BLOCK_SIZE;
66 unsigned int nbytes;
67 void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
68 int err;
69
70 fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
71
72 err = blkcipher_walk_virt(desc, walk);
73 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
74
75 while ((nbytes = walk->nbytes)) {
76 u8 *wsrc = walk->src.virt.addr;
77 u8 *wdst = walk->dst.virt.addr;
78
79 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
80
81
82 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
83 do {
84 fn(ctx, wdst, wsrc);
85
86 wsrc += bsize * CAST5_PARALLEL_BLOCKS;
87 wdst += bsize * CAST5_PARALLEL_BLOCKS;
88 nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
89 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
90
91 if (nbytes < bsize)
92 goto done;
93 }
94
95 fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
96
97
98 do {
99 fn(ctx, wdst, wsrc);
100
101 wsrc += bsize;
102 wdst += bsize;
103 nbytes -= bsize;
104 } while (nbytes >= bsize);
105
106done:
107 err = blkcipher_walk_done(desc, walk, nbytes);
108 }
109
110 cast5_fpu_end(fpu_enabled);
111 return err;
112}
113
114static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
115 struct scatterlist *src, unsigned int nbytes)
116{
117 struct blkcipher_walk walk;
118
119 blkcipher_walk_init(&walk, dst, src, nbytes);
120 return ecb_crypt(desc, &walk, true);
121}
122
123static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
124 struct scatterlist *src, unsigned int nbytes)
125{
126 struct blkcipher_walk walk;
127
128 blkcipher_walk_init(&walk, dst, src, nbytes);
129 return ecb_crypt(desc, &walk, false);
130}
131
132static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
133 struct blkcipher_walk *walk)
134{
135 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
136 const unsigned int bsize = CAST5_BLOCK_SIZE;
137 unsigned int nbytes = walk->nbytes;
138 u64 *src = (u64 *)walk->src.virt.addr;
139 u64 *dst = (u64 *)walk->dst.virt.addr;
140 u64 *iv = (u64 *)walk->iv;
141
142 do {
143 *dst = *src ^ *iv;
144 __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
145 iv = dst;
146
147 src += 1;
148 dst += 1;
149 nbytes -= bsize;
150 } while (nbytes >= bsize);
151
152 *(u64 *)walk->iv = *iv;
153 return nbytes;
154}
155
156static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
157 struct scatterlist *src, unsigned int nbytes)
158{
159 struct blkcipher_walk walk;
160 int err;
161
162 blkcipher_walk_init(&walk, dst, src, nbytes);
163 err = blkcipher_walk_virt(desc, &walk);
164
165 while ((nbytes = walk.nbytes)) {
166 nbytes = __cbc_encrypt(desc, &walk);
167 err = blkcipher_walk_done(desc, &walk, nbytes);
168 }
169
170 return err;
171}
172
173static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
174 struct blkcipher_walk *walk)
175{
176 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
177 const unsigned int bsize = CAST5_BLOCK_SIZE;
178 unsigned int nbytes = walk->nbytes;
179 u64 *src = (u64 *)walk->src.virt.addr;
180 u64 *dst = (u64 *)walk->dst.virt.addr;
181 u64 last_iv;
182
183
184 src += nbytes / bsize - 1;
185 dst += nbytes / bsize - 1;
186
187 last_iv = *src;
188
189
190 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
191 do {
192 nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
193 src -= CAST5_PARALLEL_BLOCKS - 1;
194 dst -= CAST5_PARALLEL_BLOCKS - 1;
195
196 cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
197
198 nbytes -= bsize;
199 if (nbytes < bsize)
200 goto done;
201
202 *dst ^= *(src - 1);
203 src -= 1;
204 dst -= 1;
205 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
206
207 if (nbytes < bsize)
208 goto done;
209 }
210
211
212 for (;;) {
213 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
214
215 nbytes -= bsize;
216 if (nbytes < bsize)
217 break;
218
219 *dst ^= *(src - 1);
220 src -= 1;
221 dst -= 1;
222 }
223
224done:
225 *dst ^= *(u64 *)walk->iv;
226 *(u64 *)walk->iv = last_iv;
227
228 return nbytes;
229}
230
231static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
232 struct scatterlist *src, unsigned int nbytes)
233{
234 bool fpu_enabled = false;
235 struct blkcipher_walk walk;
236 int err;
237
238 blkcipher_walk_init(&walk, dst, src, nbytes);
239 err = blkcipher_walk_virt(desc, &walk);
240 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
241
242 while ((nbytes = walk.nbytes)) {
243 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
244 nbytes = __cbc_decrypt(desc, &walk);
245 err = blkcipher_walk_done(desc, &walk, nbytes);
246 }
247
248 cast5_fpu_end(fpu_enabled);
249 return err;
250}
251
252static void ctr_crypt_final(struct blkcipher_desc *desc,
253 struct blkcipher_walk *walk)
254{
255 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
256 u8 *ctrblk = walk->iv;
257 u8 keystream[CAST5_BLOCK_SIZE];
258 u8 *src = walk->src.virt.addr;
259 u8 *dst = walk->dst.virt.addr;
260 unsigned int nbytes = walk->nbytes;
261
262 __cast5_encrypt(ctx, keystream, ctrblk);
263 crypto_xor(keystream, src, nbytes);
264 memcpy(dst, keystream, nbytes);
265
266 crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
267}
268
269static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
270 struct blkcipher_walk *walk)
271{
272 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
273 const unsigned int bsize = CAST5_BLOCK_SIZE;
274 unsigned int nbytes = walk->nbytes;
275 u64 *src = (u64 *)walk->src.virt.addr;
276 u64 *dst = (u64 *)walk->dst.virt.addr;
277
278
279 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
280 do {
281 cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
282 (__be64 *)walk->iv);
283
284 src += CAST5_PARALLEL_BLOCKS;
285 dst += CAST5_PARALLEL_BLOCKS;
286 nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
287 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
288
289 if (nbytes < bsize)
290 goto done;
291 }
292
293
294 do {
295 u64 ctrblk;
296
297 if (dst != src)
298 *dst = *src;
299
300 ctrblk = *(u64 *)walk->iv;
301 be64_add_cpu((__be64 *)walk->iv, 1);
302
303 __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
304 *dst ^= ctrblk;
305
306 src += 1;
307 dst += 1;
308 nbytes -= bsize;
309 } while (nbytes >= bsize);
310
311done:
312 return nbytes;
313}
314
315static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
316 struct scatterlist *src, unsigned int nbytes)
317{
318 bool fpu_enabled = false;
319 struct blkcipher_walk walk;
320 int err;
321
322 blkcipher_walk_init(&walk, dst, src, nbytes);
323 err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
324 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
325
326 while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
327 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
328 nbytes = __ctr_crypt(desc, &walk);
329 err = blkcipher_walk_done(desc, &walk, nbytes);
330 }
331
332 cast5_fpu_end(fpu_enabled);
333
334 if (walk.nbytes) {
335 ctr_crypt_final(desc, &walk);
336 err = blkcipher_walk_done(desc, &walk, 0);
337 }
338
339 return err;
340}
341
342
343static struct crypto_alg cast5_algs[6] = { {
344 .cra_name = "__ecb-cast5-avx",
345 .cra_driver_name = "__driver-ecb-cast5-avx",
346 .cra_priority = 0,
347 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
348 .cra_blocksize = CAST5_BLOCK_SIZE,
349 .cra_ctxsize = sizeof(struct cast5_ctx),
350 .cra_alignmask = 0,
351 .cra_type = &crypto_blkcipher_type,
352 .cra_module = THIS_MODULE,
353 .cra_u = {
354 .blkcipher = {
355 .min_keysize = CAST5_MIN_KEY_SIZE,
356 .max_keysize = CAST5_MAX_KEY_SIZE,
357 .setkey = cast5_setkey,
358 .encrypt = ecb_encrypt,
359 .decrypt = ecb_decrypt,
360 },
361 },
362}, {
363 .cra_name = "__cbc-cast5-avx",
364 .cra_driver_name = "__driver-cbc-cast5-avx",
365 .cra_priority = 0,
366 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
367 .cra_blocksize = CAST5_BLOCK_SIZE,
368 .cra_ctxsize = sizeof(struct cast5_ctx),
369 .cra_alignmask = 0,
370 .cra_type = &crypto_blkcipher_type,
371 .cra_module = THIS_MODULE,
372 .cra_u = {
373 .blkcipher = {
374 .min_keysize = CAST5_MIN_KEY_SIZE,
375 .max_keysize = CAST5_MAX_KEY_SIZE,
376 .setkey = cast5_setkey,
377 .encrypt = cbc_encrypt,
378 .decrypt = cbc_decrypt,
379 },
380 },
381}, {
382 .cra_name = "__ctr-cast5-avx",
383 .cra_driver_name = "__driver-ctr-cast5-avx",
384 .cra_priority = 0,
385 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
386 .cra_blocksize = 1,
387 .cra_ctxsize = sizeof(struct cast5_ctx),
388 .cra_alignmask = 0,
389 .cra_type = &crypto_blkcipher_type,
390 .cra_module = THIS_MODULE,
391 .cra_u = {
392 .blkcipher = {
393 .min_keysize = CAST5_MIN_KEY_SIZE,
394 .max_keysize = CAST5_MAX_KEY_SIZE,
395 .ivsize = CAST5_BLOCK_SIZE,
396 .setkey = cast5_setkey,
397 .encrypt = ctr_crypt,
398 .decrypt = ctr_crypt,
399 },
400 },
401}, {
402 .cra_name = "ecb(cast5)",
403 .cra_driver_name = "ecb-cast5-avx",
404 .cra_priority = 200,
405 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
406 .cra_blocksize = CAST5_BLOCK_SIZE,
407 .cra_ctxsize = sizeof(struct async_helper_ctx),
408 .cra_alignmask = 0,
409 .cra_type = &crypto_ablkcipher_type,
410 .cra_module = THIS_MODULE,
411 .cra_init = ablk_init,
412 .cra_exit = ablk_exit,
413 .cra_u = {
414 .ablkcipher = {
415 .min_keysize = CAST5_MIN_KEY_SIZE,
416 .max_keysize = CAST5_MAX_KEY_SIZE,
417 .setkey = ablk_set_key,
418 .encrypt = ablk_encrypt,
419 .decrypt = ablk_decrypt,
420 },
421 },
422}, {
423 .cra_name = "cbc(cast5)",
424 .cra_driver_name = "cbc-cast5-avx",
425 .cra_priority = 200,
426 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
427 .cra_blocksize = CAST5_BLOCK_SIZE,
428 .cra_ctxsize = sizeof(struct async_helper_ctx),
429 .cra_alignmask = 0,
430 .cra_type = &crypto_ablkcipher_type,
431 .cra_module = THIS_MODULE,
432 .cra_init = ablk_init,
433 .cra_exit = ablk_exit,
434 .cra_u = {
435 .ablkcipher = {
436 .min_keysize = CAST5_MIN_KEY_SIZE,
437 .max_keysize = CAST5_MAX_KEY_SIZE,
438 .ivsize = CAST5_BLOCK_SIZE,
439 .setkey = ablk_set_key,
440 .encrypt = __ablk_encrypt,
441 .decrypt = ablk_decrypt,
442 },
443 },
444}, {
445 .cra_name = "ctr(cast5)",
446 .cra_driver_name = "ctr-cast5-avx",
447 .cra_priority = 200,
448 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
449 .cra_blocksize = 1,
450 .cra_ctxsize = sizeof(struct async_helper_ctx),
451 .cra_alignmask = 0,
452 .cra_type = &crypto_ablkcipher_type,
453 .cra_module = THIS_MODULE,
454 .cra_init = ablk_init,
455 .cra_exit = ablk_exit,
456 .cra_u = {
457 .ablkcipher = {
458 .min_keysize = CAST5_MIN_KEY_SIZE,
459 .max_keysize = CAST5_MAX_KEY_SIZE,
460 .ivsize = CAST5_BLOCK_SIZE,
461 .setkey = ablk_set_key,
462 .encrypt = ablk_encrypt,
463 .decrypt = ablk_encrypt,
464 .geniv = "chainiv",
465 },
466 },
467} };
468
469static int __init cast5_init(void)
470{
471 u64 xcr0;
472
473 if (!cpu_has_avx || !cpu_has_osxsave) {
474 pr_info("AVX instructions are not detected.\n");
475 return -ENODEV;
476 }
477
478 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
479 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
480 pr_info("AVX detected but unusable.\n");
481 return -ENODEV;
482 }
483
484 return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
485}
486
487static void __exit cast5_exit(void)
488{
489 crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
490}
491
492module_init(cast5_init);
493module_exit(cast5_exit);
494
495MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
496MODULE_LICENSE("GPL");
497MODULE_ALIAS("cast5");
498