linux/arch/x86/crypto/twofish_glue_3way.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Glue Code for 3-way parallel assembler optimized version of Twofish
   4 *
   5 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
   6 */
   7
   8#include <asm/crypto/glue_helper.h>
   9#include <asm/crypto/twofish.h>
  10#include <crypto/algapi.h>
  11#include <crypto/b128ops.h>
  12#include <crypto/internal/skcipher.h>
  13#include <crypto/twofish.h>
  14#include <linux/crypto.h>
  15#include <linux/init.h>
  16#include <linux/module.h>
  17#include <linux/types.h>
  18
  19EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
  20EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
  21
  22static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
  23                                   const u8 *key, unsigned int keylen)
  24{
  25        return twofish_setkey(&tfm->base, key, keylen);
  26}
  27
  28static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src)
  29{
  30        __twofish_enc_blk_3way(ctx, dst, src, false);
  31}
  32
  33static inline void twofish_enc_blk_xor_3way(const void *ctx, u8 *dst,
  34                                            const u8 *src)
  35{
  36        __twofish_enc_blk_3way(ctx, dst, src, true);
  37}
  38
  39void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s)
  40{
  41        u128 ivs[2];
  42        u128 *dst = (u128 *)d;
  43        const u128 *src = (const u128 *)s;
  44
  45        ivs[0] = src[0];
  46        ivs[1] = src[1];
  47
  48        twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
  49
  50        u128_xor(&dst[1], &dst[1], &ivs[0]);
  51        u128_xor(&dst[2], &dst[2], &ivs[1]);
  52}
  53EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
  54
  55void twofish_enc_blk_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
  56{
  57        be128 ctrblk;
  58        u128 *dst = (u128 *)d;
  59        const u128 *src = (const u128 *)s;
  60
  61        if (dst != src)
  62                *dst = *src;
  63
  64        le128_to_be128(&ctrblk, iv);
  65        le128_inc(iv);
  66
  67        twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
  68        u128_xor(dst, dst, (u128 *)&ctrblk);
  69}
  70EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
  71
  72void twofish_enc_blk_ctr_3way(const void *ctx, u8 *d, const u8 *s, le128 *iv)
  73{
  74        be128 ctrblks[3];
  75        u128 *dst = (u128 *)d;
  76        const u128 *src = (const u128 *)s;
  77
  78        if (dst != src) {
  79                dst[0] = src[0];
  80                dst[1] = src[1];
  81                dst[2] = src[2];
  82        }
  83
  84        le128_to_be128(&ctrblks[0], iv);
  85        le128_inc(iv);
  86        le128_to_be128(&ctrblks[1], iv);
  87        le128_inc(iv);
  88        le128_to_be128(&ctrblks[2], iv);
  89        le128_inc(iv);
  90
  91        twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
  92}
  93EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way);
  94
  95static const struct common_glue_ctx twofish_enc = {
  96        .num_funcs = 2,
  97        .fpu_blocks_limit = -1,
  98
  99        .funcs = { {
 100                .num_blocks = 3,
 101                .fn_u = { .ecb = twofish_enc_blk_3way }
 102        }, {
 103                .num_blocks = 1,
 104                .fn_u = { .ecb = twofish_enc_blk }
 105        } }
 106};
 107
 108static const struct common_glue_ctx twofish_ctr = {
 109        .num_funcs = 2,
 110        .fpu_blocks_limit = -1,
 111
 112        .funcs = { {
 113                .num_blocks = 3,
 114                .fn_u = { .ctr = twofish_enc_blk_ctr_3way }
 115        }, {
 116                .num_blocks = 1,
 117                .fn_u = { .ctr = twofish_enc_blk_ctr }
 118        } }
 119};
 120
 121static const struct common_glue_ctx twofish_dec = {
 122        .num_funcs = 2,
 123        .fpu_blocks_limit = -1,
 124
 125        .funcs = { {
 126                .num_blocks = 3,
 127                .fn_u = { .ecb = twofish_dec_blk_3way }
 128        }, {
 129                .num_blocks = 1,
 130                .fn_u = { .ecb = twofish_dec_blk }
 131        } }
 132};
 133
 134static const struct common_glue_ctx twofish_dec_cbc = {
 135        .num_funcs = 2,
 136        .fpu_blocks_limit = -1,
 137
 138        .funcs = { {
 139                .num_blocks = 3,
 140                .fn_u = { .cbc = twofish_dec_blk_cbc_3way }
 141        }, {
 142                .num_blocks = 1,
 143                .fn_u = { .cbc = twofish_dec_blk }
 144        } }
 145};
 146
 147static int ecb_encrypt(struct skcipher_request *req)
 148{
 149        return glue_ecb_req_128bit(&twofish_enc, req);
 150}
 151
 152static int ecb_decrypt(struct skcipher_request *req)
 153{
 154        return glue_ecb_req_128bit(&twofish_dec, req);
 155}
 156
 157static int cbc_encrypt(struct skcipher_request *req)
 158{
 159        return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req);
 160}
 161
 162static int cbc_decrypt(struct skcipher_request *req)
 163{
 164        return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
 165}
 166
 167static int ctr_crypt(struct skcipher_request *req)
 168{
 169        return glue_ctr_req_128bit(&twofish_ctr, req);
 170}
 171
 172static struct skcipher_alg tf_skciphers[] = {
 173        {
 174                .base.cra_name          = "ecb(twofish)",
 175                .base.cra_driver_name   = "ecb-twofish-3way",
 176                .base.cra_priority      = 300,
 177                .base.cra_blocksize     = TF_BLOCK_SIZE,
 178                .base.cra_ctxsize       = sizeof(struct twofish_ctx),
 179                .base.cra_module        = THIS_MODULE,
 180                .min_keysize            = TF_MIN_KEY_SIZE,
 181                .max_keysize            = TF_MAX_KEY_SIZE,
 182                .setkey                 = twofish_setkey_skcipher,
 183                .encrypt                = ecb_encrypt,
 184                .decrypt                = ecb_decrypt,
 185        }, {
 186                .base.cra_name          = "cbc(twofish)",
 187                .base.cra_driver_name   = "cbc-twofish-3way",
 188                .base.cra_priority      = 300,
 189                .base.cra_blocksize     = TF_BLOCK_SIZE,
 190                .base.cra_ctxsize       = sizeof(struct twofish_ctx),
 191                .base.cra_module        = THIS_MODULE,
 192                .min_keysize            = TF_MIN_KEY_SIZE,
 193                .max_keysize            = TF_MAX_KEY_SIZE,
 194                .ivsize                 = TF_BLOCK_SIZE,
 195                .setkey                 = twofish_setkey_skcipher,
 196                .encrypt                = cbc_encrypt,
 197                .decrypt                = cbc_decrypt,
 198        }, {
 199                .base.cra_name          = "ctr(twofish)",
 200                .base.cra_driver_name   = "ctr-twofish-3way",
 201                .base.cra_priority      = 300,
 202                .base.cra_blocksize     = 1,
 203                .base.cra_ctxsize       = sizeof(struct twofish_ctx),
 204                .base.cra_module        = THIS_MODULE,
 205                .min_keysize            = TF_MIN_KEY_SIZE,
 206                .max_keysize            = TF_MAX_KEY_SIZE,
 207                .ivsize                 = TF_BLOCK_SIZE,
 208                .chunksize              = TF_BLOCK_SIZE,
 209                .setkey                 = twofish_setkey_skcipher,
 210                .encrypt                = ctr_crypt,
 211                .decrypt                = ctr_crypt,
 212        },
 213};
 214
 215static bool is_blacklisted_cpu(void)
 216{
 217        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 218                return false;
 219
 220        if (boot_cpu_data.x86 == 0x06 &&
 221                (boot_cpu_data.x86_model == 0x1c ||
 222                 boot_cpu_data.x86_model == 0x26 ||
 223                 boot_cpu_data.x86_model == 0x36)) {
 224                /*
 225                 * On Atom, twofish-3way is slower than original assembler
 226                 * implementation. Twofish-3way trades off some performance in
 227                 * storing blocks in 64bit registers to allow three blocks to
 228                 * be processed parallel. Parallel operation then allows gaining
 229                 * more performance than was trade off, on out-of-order CPUs.
 230                 * However Atom does not benefit from this parallellism and
 231                 * should be blacklisted.
 232                 */
 233                return true;
 234        }
 235
 236        if (boot_cpu_data.x86 == 0x0f) {
 237                /*
 238                 * On Pentium 4, twofish-3way is slower than original assembler
 239                 * implementation because excessive uses of 64bit rotate and
 240                 * left-shifts (which are really slow on P4) needed to store and
 241                 * handle 128bit block in two 64bit registers.
 242                 */
 243                return true;
 244        }
 245
 246        return false;
 247}
 248
 249static int force;
 250module_param(force, int, 0);
 251MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 252
 253static int __init init(void)
 254{
 255        if (!force && is_blacklisted_cpu()) {
 256                printk(KERN_INFO
 257                        "twofish-x86_64-3way: performance on this CPU "
 258                        "would be suboptimal: disabling "
 259                        "twofish-x86_64-3way.\n");
 260                return -ENODEV;
 261        }
 262
 263        return crypto_register_skciphers(tf_skciphers,
 264                                         ARRAY_SIZE(tf_skciphers));
 265}
 266
 267static void __exit fini(void)
 268{
 269        crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers));
 270}
 271
 272module_init(init);
 273module_exit(fini);
 274
 275MODULE_LICENSE("GPL");
 276MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
 277MODULE_ALIAS_CRYPTO("twofish");
 278MODULE_ALIAS_CRYPTO("twofish-asm");
 279