linux/arch/x86/crypto/twofish_glue_3way.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Glue Code for 3-way parallel assembler optimized version of Twofish
   4 *
   5 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
   6 */
   7
   8#include <asm/crypto/glue_helper.h>
   9#include <asm/crypto/twofish.h>
  10#include <crypto/algapi.h>
  11#include <crypto/b128ops.h>
  12#include <crypto/internal/skcipher.h>
  13#include <crypto/twofish.h>
  14#include <linux/crypto.h>
  15#include <linux/init.h>
  16#include <linux/module.h>
  17#include <linux/types.h>
  18
  19EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
  20EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
  21
  22static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
  23                                   const u8 *key, unsigned int keylen)
  24{
  25        return twofish_setkey(&tfm->base, key, keylen);
  26}
  27
  28static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
  29                                        const u8 *src)
  30{
  31        __twofish_enc_blk_3way(ctx, dst, src, false);
  32}
  33
  34static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
  35                                            const u8 *src)
  36{
  37        __twofish_enc_blk_3way(ctx, dst, src, true);
  38}
  39
  40void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
  41{
  42        u128 ivs[2];
  43
  44        ivs[0] = src[0];
  45        ivs[1] = src[1];
  46
  47        twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
  48
  49        u128_xor(&dst[1], &dst[1], &ivs[0]);
  50        u128_xor(&dst[2], &dst[2], &ivs[1]);
  51}
  52EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
  53
  54void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
  55{
  56        be128 ctrblk;
  57
  58        if (dst != src)
  59                *dst = *src;
  60
  61        le128_to_be128(&ctrblk, iv);
  62        le128_inc(iv);
  63
  64        twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
  65        u128_xor(dst, dst, (u128 *)&ctrblk);
  66}
  67EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
  68
  69void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
  70                              le128 *iv)
  71{
  72        be128 ctrblks[3];
  73
  74        if (dst != src) {
  75                dst[0] = src[0];
  76                dst[1] = src[1];
  77                dst[2] = src[2];
  78        }
  79
  80        le128_to_be128(&ctrblks[0], iv);
  81        le128_inc(iv);
  82        le128_to_be128(&ctrblks[1], iv);
  83        le128_inc(iv);
  84        le128_to_be128(&ctrblks[2], iv);
  85        le128_inc(iv);
  86
  87        twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
  88}
  89EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way);
  90
  91static const struct common_glue_ctx twofish_enc = {
  92        .num_funcs = 2,
  93        .fpu_blocks_limit = -1,
  94
  95        .funcs = { {
  96                .num_blocks = 3,
  97                .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
  98        }, {
  99                .num_blocks = 1,
 100                .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
 101        } }
 102};
 103
 104static const struct common_glue_ctx twofish_ctr = {
 105        .num_funcs = 2,
 106        .fpu_blocks_limit = -1,
 107
 108        .funcs = { {
 109                .num_blocks = 3,
 110                .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) }
 111        }, {
 112                .num_blocks = 1,
 113                .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) }
 114        } }
 115};
 116
 117static const struct common_glue_ctx twofish_dec = {
 118        .num_funcs = 2,
 119        .fpu_blocks_limit = -1,
 120
 121        .funcs = { {
 122                .num_blocks = 3,
 123                .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
 124        }, {
 125                .num_blocks = 1,
 126                .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
 127        } }
 128};
 129
 130static const struct common_glue_ctx twofish_dec_cbc = {
 131        .num_funcs = 2,
 132        .fpu_blocks_limit = -1,
 133
 134        .funcs = { {
 135                .num_blocks = 3,
 136                .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
 137        }, {
 138                .num_blocks = 1,
 139                .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
 140        } }
 141};
 142
 143static int ecb_encrypt(struct skcipher_request *req)
 144{
 145        return glue_ecb_req_128bit(&twofish_enc, req);
 146}
 147
 148static int ecb_decrypt(struct skcipher_request *req)
 149{
 150        return glue_ecb_req_128bit(&twofish_dec, req);
 151}
 152
 153static int cbc_encrypt(struct skcipher_request *req)
 154{
 155        return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
 156                                           req);
 157}
 158
 159static int cbc_decrypt(struct skcipher_request *req)
 160{
 161        return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
 162}
 163
 164static int ctr_crypt(struct skcipher_request *req)
 165{
 166        return glue_ctr_req_128bit(&twofish_ctr, req);
 167}
 168
 169static struct skcipher_alg tf_skciphers[] = {
 170        {
 171                .base.cra_name          = "ecb(twofish)",
 172                .base.cra_driver_name   = "ecb-twofish-3way",
 173                .base.cra_priority      = 300,
 174                .base.cra_blocksize     = TF_BLOCK_SIZE,
 175                .base.cra_ctxsize       = sizeof(struct twofish_ctx),
 176                .base.cra_module        = THIS_MODULE,
 177                .min_keysize            = TF_MIN_KEY_SIZE,
 178                .max_keysize            = TF_MAX_KEY_SIZE,
 179                .setkey                 = twofish_setkey_skcipher,
 180                .encrypt                = ecb_encrypt,
 181                .decrypt                = ecb_decrypt,
 182        }, {
 183                .base.cra_name          = "cbc(twofish)",
 184                .base.cra_driver_name   = "cbc-twofish-3way",
 185                .base.cra_priority      = 300,
 186                .base.cra_blocksize     = TF_BLOCK_SIZE,
 187                .base.cra_ctxsize       = sizeof(struct twofish_ctx),
 188                .base.cra_module        = THIS_MODULE,
 189                .min_keysize            = TF_MIN_KEY_SIZE,
 190                .max_keysize            = TF_MAX_KEY_SIZE,
 191                .ivsize                 = TF_BLOCK_SIZE,
 192                .setkey                 = twofish_setkey_skcipher,
 193                .encrypt                = cbc_encrypt,
 194                .decrypt                = cbc_decrypt,
 195        }, {
 196                .base.cra_name          = "ctr(twofish)",
 197                .base.cra_driver_name   = "ctr-twofish-3way",
 198                .base.cra_priority      = 300,
 199                .base.cra_blocksize     = 1,
 200                .base.cra_ctxsize       = sizeof(struct twofish_ctx),
 201                .base.cra_module        = THIS_MODULE,
 202                .min_keysize            = TF_MIN_KEY_SIZE,
 203                .max_keysize            = TF_MAX_KEY_SIZE,
 204                .ivsize                 = TF_BLOCK_SIZE,
 205                .chunksize              = TF_BLOCK_SIZE,
 206                .setkey                 = twofish_setkey_skcipher,
 207                .encrypt                = ctr_crypt,
 208                .decrypt                = ctr_crypt,
 209        },
 210};
 211
 212static bool is_blacklisted_cpu(void)
 213{
 214        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 215                return false;
 216
 217        if (boot_cpu_data.x86 == 0x06 &&
 218                (boot_cpu_data.x86_model == 0x1c ||
 219                 boot_cpu_data.x86_model == 0x26 ||
 220                 boot_cpu_data.x86_model == 0x36)) {
 221                /*
 222                 * On Atom, twofish-3way is slower than original assembler
 223                 * implementation. Twofish-3way trades off some performance in
 224                 * storing blocks in 64bit registers to allow three blocks to
 225                 * be processed parallel. Parallel operation then allows gaining
 226                 * more performance than was trade off, on out-of-order CPUs.
 227                 * However Atom does not benefit from this parallellism and
 228                 * should be blacklisted.
 229                 */
 230                return true;
 231        }
 232
 233        if (boot_cpu_data.x86 == 0x0f) {
 234                /*
 235                 * On Pentium 4, twofish-3way is slower than original assembler
 236                 * implementation because excessive uses of 64bit rotate and
 237                 * left-shifts (which are really slow on P4) needed to store and
 238                 * handle 128bit block in two 64bit registers.
 239                 */
 240                return true;
 241        }
 242
 243        return false;
 244}
 245
 246static int force;
 247module_param(force, int, 0);
 248MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 249
 250static int __init init(void)
 251{
 252        if (!force && is_blacklisted_cpu()) {
 253                printk(KERN_INFO
 254                        "twofish-x86_64-3way: performance on this CPU "
 255                        "would be suboptimal: disabling "
 256                        "twofish-x86_64-3way.\n");
 257                return -ENODEV;
 258        }
 259
 260        return crypto_register_skciphers(tf_skciphers,
 261                                         ARRAY_SIZE(tf_skciphers));
 262}
 263
 264static void __exit fini(void)
 265{
 266        crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers));
 267}
 268
 269module_init(init);
 270module_exit(fini);
 271
 272MODULE_LICENSE("GPL");
 273MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
 274MODULE_ALIAS_CRYPTO("twofish");
 275MODULE_ALIAS_CRYPTO("twofish-asm");
 276