linux/arch/x86/crypto/cast5_avx_glue.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Glue Code for the AVX assembler implementation of the Cast5 Cipher
   4 *
   5 * Copyright (C) 2012 Johannes Goetzfried
   6 *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
   7 */
   8
   9#include <asm/crypto/glue_helper.h>
  10#include <crypto/algapi.h>
  11#include <crypto/cast5.h>
  12#include <crypto/internal/simd.h>
  13#include <linux/crypto.h>
  14#include <linux/err.h>
  15#include <linux/module.h>
  16#include <linux/types.h>
  17
  18#define CAST5_PARALLEL_BLOCKS 16
  19
  20asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
  21                                    const u8 *src);
  22asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
  23                                    const u8 *src);
  24asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
  25                                    const u8 *src);
  26asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
  27                                __be64 *iv);
  28
  29static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
  30                                 unsigned int keylen)
  31{
  32        return cast5_setkey(&tfm->base, key, keylen);
  33}
  34
  35static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk,
  36                                   unsigned int nbytes)
  37{
  38        return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
  39                              walk, fpu_enabled, nbytes);
  40}
  41
  42static inline void cast5_fpu_end(bool fpu_enabled)
  43{
  44        return glue_fpu_end(fpu_enabled);
  45}
  46
  47static int ecb_crypt(struct skcipher_request *req, bool enc)
  48{
  49        bool fpu_enabled = false;
  50        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
  51        struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
  52        struct skcipher_walk walk;
  53        const unsigned int bsize = CAST5_BLOCK_SIZE;
  54        unsigned int nbytes;
  55        void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
  56        int err;
  57
  58        err = skcipher_walk_virt(&walk, req, false);
  59
  60        while ((nbytes = walk.nbytes)) {
  61                u8 *wsrc = walk.src.virt.addr;
  62                u8 *wdst = walk.dst.virt.addr;
  63
  64                fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
  65
  66                /* Process multi-block batch */
  67                if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
  68                        fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
  69                        do {
  70                                fn(ctx, wdst, wsrc);
  71
  72                                wsrc += bsize * CAST5_PARALLEL_BLOCKS;
  73                                wdst += bsize * CAST5_PARALLEL_BLOCKS;
  74                                nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
  75                        } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
  76
  77                        if (nbytes < bsize)
  78                                goto done;
  79                }
  80
  81                fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
  82
  83                /* Handle leftovers */
  84                do {
  85                        fn(ctx, wdst, wsrc);
  86
  87                        wsrc += bsize;
  88                        wdst += bsize;
  89                        nbytes -= bsize;
  90                } while (nbytes >= bsize);
  91
  92done:
  93                err = skcipher_walk_done(&walk, nbytes);
  94        }
  95
  96        cast5_fpu_end(fpu_enabled);
  97        return err;
  98}
  99
 100static int ecb_encrypt(struct skcipher_request *req)
 101{
 102        return ecb_crypt(req, true);
 103}
 104
 105static int ecb_decrypt(struct skcipher_request *req)
 106{
 107        return ecb_crypt(req, false);
 108}
 109
 110static int cbc_encrypt(struct skcipher_request *req)
 111{
 112        const unsigned int bsize = CAST5_BLOCK_SIZE;
 113        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 114        struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
 115        struct skcipher_walk walk;
 116        unsigned int nbytes;
 117        int err;
 118
 119        err = skcipher_walk_virt(&walk, req, false);
 120
 121        while ((nbytes = walk.nbytes)) {
 122                u64 *src = (u64 *)walk.src.virt.addr;
 123                u64 *dst = (u64 *)walk.dst.virt.addr;
 124                u64 *iv = (u64 *)walk.iv;
 125
 126                do {
 127                        *dst = *src ^ *iv;
 128                        __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
 129                        iv = dst;
 130                        src++;
 131                        dst++;
 132                        nbytes -= bsize;
 133                } while (nbytes >= bsize);
 134
 135                *(u64 *)walk.iv = *iv;
 136                err = skcipher_walk_done(&walk, nbytes);
 137        }
 138
 139        return err;
 140}
 141
 142static unsigned int __cbc_decrypt(struct cast5_ctx *ctx,
 143                                  struct skcipher_walk *walk)
 144{
 145        const unsigned int bsize = CAST5_BLOCK_SIZE;
 146        unsigned int nbytes = walk->nbytes;
 147        u64 *src = (u64 *)walk->src.virt.addr;
 148        u64 *dst = (u64 *)walk->dst.virt.addr;
 149        u64 last_iv;
 150
 151        /* Start of the last block. */
 152        src += nbytes / bsize - 1;
 153        dst += nbytes / bsize - 1;
 154
 155        last_iv = *src;
 156
 157        /* Process multi-block batch */
 158        if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
 159                do {
 160                        nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
 161                        src -= CAST5_PARALLEL_BLOCKS - 1;
 162                        dst -= CAST5_PARALLEL_BLOCKS - 1;
 163
 164                        cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
 165
 166                        nbytes -= bsize;
 167                        if (nbytes < bsize)
 168                                goto done;
 169
 170                        *dst ^= *(src - 1);
 171                        src -= 1;
 172                        dst -= 1;
 173                } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
 174        }
 175
 176        /* Handle leftovers */
 177        for (;;) {
 178                __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
 179
 180                nbytes -= bsize;
 181                if (nbytes < bsize)
 182                        break;
 183
 184                *dst ^= *(src - 1);
 185                src -= 1;
 186                dst -= 1;
 187        }
 188
 189done:
 190        *dst ^= *(u64 *)walk->iv;
 191        *(u64 *)walk->iv = last_iv;
 192
 193        return nbytes;
 194}
 195
 196static int cbc_decrypt(struct skcipher_request *req)
 197{
 198        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 199        struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
 200        bool fpu_enabled = false;
 201        struct skcipher_walk walk;
 202        unsigned int nbytes;
 203        int err;
 204
 205        err = skcipher_walk_virt(&walk, req, false);
 206
 207        while ((nbytes = walk.nbytes)) {
 208                fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
 209                nbytes = __cbc_decrypt(ctx, &walk);
 210                err = skcipher_walk_done(&walk, nbytes);
 211        }
 212
 213        cast5_fpu_end(fpu_enabled);
 214        return err;
 215}
 216
 217static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx)
 218{
 219        u8 *ctrblk = walk->iv;
 220        u8 keystream[CAST5_BLOCK_SIZE];
 221        u8 *src = walk->src.virt.addr;
 222        u8 *dst = walk->dst.virt.addr;
 223        unsigned int nbytes = walk->nbytes;
 224
 225        __cast5_encrypt(ctx, keystream, ctrblk);
 226        crypto_xor_cpy(dst, keystream, src, nbytes);
 227
 228        crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
 229}
 230
 231static unsigned int __ctr_crypt(struct skcipher_walk *walk,
 232                                struct cast5_ctx *ctx)
 233{
 234        const unsigned int bsize = CAST5_BLOCK_SIZE;
 235        unsigned int nbytes = walk->nbytes;
 236        u64 *src = (u64 *)walk->src.virt.addr;
 237        u64 *dst = (u64 *)walk->dst.virt.addr;
 238
 239        /* Process multi-block batch */
 240        if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
 241                do {
 242                        cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
 243                                        (__be64 *)walk->iv);
 244
 245                        src += CAST5_PARALLEL_BLOCKS;
 246                        dst += CAST5_PARALLEL_BLOCKS;
 247                        nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
 248                } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
 249
 250                if (nbytes < bsize)
 251                        goto done;
 252        }
 253
 254        /* Handle leftovers */
 255        do {
 256                u64 ctrblk;
 257
 258                if (dst != src)
 259                        *dst = *src;
 260
 261                ctrblk = *(u64 *)walk->iv;
 262                be64_add_cpu((__be64 *)walk->iv, 1);
 263
 264                __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
 265                *dst ^= ctrblk;
 266
 267                src += 1;
 268                dst += 1;
 269                nbytes -= bsize;
 270        } while (nbytes >= bsize);
 271
 272done:
 273        return nbytes;
 274}
 275
 276static int ctr_crypt(struct skcipher_request *req)
 277{
 278        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 279        struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
 280        bool fpu_enabled = false;
 281        struct skcipher_walk walk;
 282        unsigned int nbytes;
 283        int err;
 284
 285        err = skcipher_walk_virt(&walk, req, false);
 286
 287        while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
 288                fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
 289                nbytes = __ctr_crypt(&walk, ctx);
 290                err = skcipher_walk_done(&walk, nbytes);
 291        }
 292
 293        cast5_fpu_end(fpu_enabled);
 294
 295        if (walk.nbytes) {
 296                ctr_crypt_final(&walk, ctx);
 297                err = skcipher_walk_done(&walk, 0);
 298        }
 299
 300        return err;
 301}
 302
 303static struct skcipher_alg cast5_algs[] = {
 304        {
 305                .base.cra_name          = "__ecb(cast5)",
 306                .base.cra_driver_name   = "__ecb-cast5-avx",
 307                .base.cra_priority      = 200,
 308                .base.cra_flags         = CRYPTO_ALG_INTERNAL,
 309                .base.cra_blocksize     = CAST5_BLOCK_SIZE,
 310                .base.cra_ctxsize       = sizeof(struct cast5_ctx),
 311                .base.cra_module        = THIS_MODULE,
 312                .min_keysize            = CAST5_MIN_KEY_SIZE,
 313                .max_keysize            = CAST5_MAX_KEY_SIZE,
 314                .setkey                 = cast5_setkey_skcipher,
 315                .encrypt                = ecb_encrypt,
 316                .decrypt                = ecb_decrypt,
 317        }, {
 318                .base.cra_name          = "__cbc(cast5)",
 319                .base.cra_driver_name   = "__cbc-cast5-avx",
 320                .base.cra_priority      = 200,
 321                .base.cra_flags         = CRYPTO_ALG_INTERNAL,
 322                .base.cra_blocksize     = CAST5_BLOCK_SIZE,
 323                .base.cra_ctxsize       = sizeof(struct cast5_ctx),
 324                .base.cra_module        = THIS_MODULE,
 325                .min_keysize            = CAST5_MIN_KEY_SIZE,
 326                .max_keysize            = CAST5_MAX_KEY_SIZE,
 327                .ivsize                 = CAST5_BLOCK_SIZE,
 328                .setkey                 = cast5_setkey_skcipher,
 329                .encrypt                = cbc_encrypt,
 330                .decrypt                = cbc_decrypt,
 331        }, {
 332                .base.cra_name          = "__ctr(cast5)",
 333                .base.cra_driver_name   = "__ctr-cast5-avx",
 334                .base.cra_priority      = 200,
 335                .base.cra_flags         = CRYPTO_ALG_INTERNAL,
 336                .base.cra_blocksize     = 1,
 337                .base.cra_ctxsize       = sizeof(struct cast5_ctx),
 338                .base.cra_module        = THIS_MODULE,
 339                .min_keysize            = CAST5_MIN_KEY_SIZE,
 340                .max_keysize            = CAST5_MAX_KEY_SIZE,
 341                .ivsize                 = CAST5_BLOCK_SIZE,
 342                .chunksize              = CAST5_BLOCK_SIZE,
 343                .setkey                 = cast5_setkey_skcipher,
 344                .encrypt                = ctr_crypt,
 345                .decrypt                = ctr_crypt,
 346        }
 347};
 348
 349static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)];
 350
 351static int __init cast5_init(void)
 352{
 353        const char *feature_name;
 354
 355        if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
 356                                &feature_name)) {
 357                pr_info("CPU feature '%s' is not supported.\n", feature_name);
 358                return -ENODEV;
 359        }
 360
 361        return simd_register_skciphers_compat(cast5_algs,
 362                                              ARRAY_SIZE(cast5_algs),
 363                                              cast5_simd_algs);
 364}
 365
 366static void __exit cast5_exit(void)
 367{
 368        simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs),
 369                                  cast5_simd_algs);
 370}
 371
 372module_init(cast5_init);
 373module_exit(cast5_exit);
 374
 375MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
 376MODULE_LICENSE("GPL");
 377MODULE_ALIAS_CRYPTO("cast5");
 378