linux/arch/x86/crypto/twofish-i586-asm_32.S
<<
>>
Prefs
   1/***************************************************************************
   2*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
   3*                                                                         *
   4*   This program is free software; you can redistribute it and/or modify  *
   5*   it under the terms of the GNU General Public License as published by  *
   6*   the Free Software Foundation; either version 2 of the License, or     *
   7*   (at your option) any later version.                                   *
   8*                                                                         *
   9*   This program is distributed in the hope that it will be useful,       *
  10*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
  11*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
  12*   GNU General Public License for more details.                          *
  13*                                                                         *
  14*   You should have received a copy of the GNU General Public License     *
  15*   along with this program; if not, write to the                         *
  16*   Free Software Foundation, Inc.,                                       *
  17*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
  18***************************************************************************/
  19
  20.file "twofish-i586-asm.S"
  21.text
  22
  23#include <linux/linkage.h>
  24#include <asm/asm-offsets.h>
  25
  26/* return address at 0 */
  27
  28#define in_blk    12  /* input byte array address parameter*/
  29#define out_blk   8  /* output byte array address parameter*/
  30#define ctx       4  /* Twofish context structure */
  31
  32#define a_offset        0
  33#define b_offset        4
  34#define c_offset        8
  35#define d_offset        12
  36
  37/* Structure of the crypto context struct*/
  38
  39#define s0      0       /* S0 Array 256 Words each */
  40#define s1      1024    /* S1 Array */
  41#define s2      2048    /* S2 Array */
  42#define s3      3072    /* S3 Array */
  43#define w       4096    /* 8 whitening keys (word) */
  44#define k       4128    /* key 1-32 ( word ) */
  45
  46/* define a few register aliases to allow macro substitution */
  47
  48#define R0D    %eax
  49#define R0B    %al
  50#define R0H    %ah
  51
  52#define R1D    %ebx
  53#define R1B    %bl
  54#define R1H    %bh
  55
  56#define R2D    %ecx
  57#define R2B    %cl
  58#define R2H    %ch
  59
  60#define R3D    %edx
  61#define R3B    %dl
  62#define R3H    %dh
  63
  64
  65/* performs input whitening */
  66#define input_whitening(src,context,offset)\
  67        xor     w+offset(context),      src;
  68
  69/* performs input whitening */
  70#define output_whitening(src,context,offset)\
  71        xor     w+16+offset(context),   src;
  72
  73/*
  74 * a input register containing a (rotated 16)
  75 * b input register containing b
  76 * c input register containing c
  77 * d input register containing d (already rol $1)
  78 * operations on a and b are interleaved to increase performance
  79 */
  80#define encrypt_round(a,b,c,d,round)\
  81        push    d ## D;\
  82        movzx   b ## B,         %edi;\
  83        mov     s1(%ebp,%edi,4),d ## D;\
  84        movzx   a ## B,         %edi;\
  85        mov     s2(%ebp,%edi,4),%esi;\
  86        movzx   b ## H,         %edi;\
  87        ror     $16,            b ## D;\
  88        xor     s2(%ebp,%edi,4),d ## D;\
  89        movzx   a ## H,         %edi;\
  90        ror     $16,            a ## D;\
  91        xor     s3(%ebp,%edi,4),%esi;\
  92        movzx   b ## B,         %edi;\
  93        xor     s3(%ebp,%edi,4),d ## D;\
  94        movzx   a ## B,         %edi;\
  95        xor     (%ebp,%edi,4),  %esi;\
  96        movzx   b ## H,         %edi;\
  97        ror     $15,            b ## D;\
  98        xor     (%ebp,%edi,4),  d ## D;\
  99        movzx   a ## H,         %edi;\
 100        xor     s1(%ebp,%edi,4),%esi;\
 101        pop     %edi;\
 102        add     d ## D,         %esi;\
 103        add     %esi,           d ## D;\
 104        add     k+round(%ebp),  %esi;\
 105        xor     %esi,           c ## D;\
 106        rol     $15,            c ## D;\
 107        add     k+4+round(%ebp),d ## D;\
 108        xor     %edi,           d ## D;
 109
 110/*
 111 * a input register containing a (rotated 16)
 112 * b input register containing b
 113 * c input register containing c
 114 * d input register containing d (already rol $1)
 115 * operations on a and b are interleaved to increase performance
 116 * last round has different rotations for the output preparation
 117 */
 118#define encrypt_last_round(a,b,c,d,round)\
 119        push    d ## D;\
 120        movzx   b ## B,         %edi;\
 121        mov     s1(%ebp,%edi,4),d ## D;\
 122        movzx   a ## B,         %edi;\
 123        mov     s2(%ebp,%edi,4),%esi;\
 124        movzx   b ## H,         %edi;\
 125        ror     $16,            b ## D;\
 126        xor     s2(%ebp,%edi,4),d ## D;\
 127        movzx   a ## H,         %edi;\
 128        ror     $16,            a ## D;\
 129        xor     s3(%ebp,%edi,4),%esi;\
 130        movzx   b ## B,         %edi;\
 131        xor     s3(%ebp,%edi,4),d ## D;\
 132        movzx   a ## B,         %edi;\
 133        xor     (%ebp,%edi,4),  %esi;\
 134        movzx   b ## H,         %edi;\
 135        ror     $16,            b ## D;\
 136        xor     (%ebp,%edi,4),  d ## D;\
 137        movzx   a ## H,         %edi;\
 138        xor     s1(%ebp,%edi,4),%esi;\
 139        pop     %edi;\
 140        add     d ## D,         %esi;\
 141        add     %esi,           d ## D;\
 142        add     k+round(%ebp),  %esi;\
 143        xor     %esi,           c ## D;\
 144        ror     $1,             c ## D;\
 145        add     k+4+round(%ebp),d ## D;\
 146        xor     %edi,           d ## D;
 147
 148/*
 149 * a input register containing a
 150 * b input register containing b (rotated 16)
 151 * c input register containing c
 152 * d input register containing d (already rol $1)
 153 * operations on a and b are interleaved to increase performance
 154 */
 155#define decrypt_round(a,b,c,d,round)\
 156        push    c ## D;\
 157        movzx   a ## B,         %edi;\
 158        mov     (%ebp,%edi,4),  c ## D;\
 159        movzx   b ## B,         %edi;\
 160        mov     s3(%ebp,%edi,4),%esi;\
 161        movzx   a ## H,         %edi;\
 162        ror     $16,            a ## D;\
 163        xor     s1(%ebp,%edi,4),c ## D;\
 164        movzx   b ## H,         %edi;\
 165        ror     $16,            b ## D;\
 166        xor     (%ebp,%edi,4),  %esi;\
 167        movzx   a ## B,         %edi;\
 168        xor     s2(%ebp,%edi,4),c ## D;\
 169        movzx   b ## B,         %edi;\
 170        xor     s1(%ebp,%edi,4),%esi;\
 171        movzx   a ## H,         %edi;\
 172        ror     $15,            a ## D;\
 173        xor     s3(%ebp,%edi,4),c ## D;\
 174        movzx   b ## H,         %edi;\
 175        xor     s2(%ebp,%edi,4),%esi;\
 176        pop     %edi;\
 177        add     %esi,           c ## D;\
 178        add     c ## D,         %esi;\
 179        add     k+round(%ebp),  c ## D;\
 180        xor     %edi,           c ## D;\
 181        add     k+4+round(%ebp),%esi;\
 182        xor     %esi,           d ## D;\
 183        rol     $15,            d ## D;
 184
 185/*
 186 * a input register containing a
 187 * b input register containing b (rotated 16)
 188 * c input register containing c
 189 * d input register containing d (already rol $1)
 190 * operations on a and b are interleaved to increase performance
 191 * last round has different rotations for the output preparation
 192 */
 193#define decrypt_last_round(a,b,c,d,round)\
 194        push    c ## D;\
 195        movzx   a ## B,         %edi;\
 196        mov     (%ebp,%edi,4),  c ## D;\
 197        movzx   b ## B,         %edi;\
 198        mov     s3(%ebp,%edi,4),%esi;\
 199        movzx   a ## H,         %edi;\
 200        ror     $16,            a ## D;\
 201        xor     s1(%ebp,%edi,4),c ## D;\
 202        movzx   b ## H,         %edi;\
 203        ror     $16,            b ## D;\
 204        xor     (%ebp,%edi,4),  %esi;\
 205        movzx   a ## B,         %edi;\
 206        xor     s2(%ebp,%edi,4),c ## D;\
 207        movzx   b ## B,         %edi;\
 208        xor     s1(%ebp,%edi,4),%esi;\
 209        movzx   a ## H,         %edi;\
 210        ror     $16,            a ## D;\
 211        xor     s3(%ebp,%edi,4),c ## D;\
 212        movzx   b ## H,         %edi;\
 213        xor     s2(%ebp,%edi,4),%esi;\
 214        pop     %edi;\
 215        add     %esi,           c ## D;\
 216        add     c ## D,         %esi;\
 217        add     k+round(%ebp),  c ## D;\
 218        xor     %edi,           c ## D;\
 219        add     k+4+round(%ebp),%esi;\
 220        xor     %esi,           d ## D;\
 221        ror     $1,             d ## D;
 222
 223ENTRY(twofish_enc_blk)
 224        push    %ebp                    /* save registers according to calling convention*/
 225        push    %ebx
 226        push    %esi
 227        push    %edi
 228
 229        mov     ctx + 16(%esp), %ebp    /* abuse the base pointer: set new base
 230                                         * pointer to the ctx address */
 231        mov     in_blk+16(%esp),%edi    /* input address in edi */
 232
 233        mov     (%edi),         %eax
 234        mov     b_offset(%edi), %ebx
 235        mov     c_offset(%edi), %ecx
 236        mov     d_offset(%edi), %edx
 237        input_whitening(%eax,%ebp,a_offset)
 238        ror     $16,    %eax
 239        input_whitening(%ebx,%ebp,b_offset)
 240        input_whitening(%ecx,%ebp,c_offset)
 241        input_whitening(%edx,%ebp,d_offset)
 242        rol     $1,     %edx
 243
 244        encrypt_round(R0,R1,R2,R3,0);
 245        encrypt_round(R2,R3,R0,R1,8);
 246        encrypt_round(R0,R1,R2,R3,2*8);
 247        encrypt_round(R2,R3,R0,R1,3*8);
 248        encrypt_round(R0,R1,R2,R3,4*8);
 249        encrypt_round(R2,R3,R0,R1,5*8);
 250        encrypt_round(R0,R1,R2,R3,6*8);
 251        encrypt_round(R2,R3,R0,R1,7*8);
 252        encrypt_round(R0,R1,R2,R3,8*8);
 253        encrypt_round(R2,R3,R0,R1,9*8);
 254        encrypt_round(R0,R1,R2,R3,10*8);
 255        encrypt_round(R2,R3,R0,R1,11*8);
 256        encrypt_round(R0,R1,R2,R3,12*8);
 257        encrypt_round(R2,R3,R0,R1,13*8);
 258        encrypt_round(R0,R1,R2,R3,14*8);
 259        encrypt_last_round(R2,R3,R0,R1,15*8);
 260
 261        output_whitening(%eax,%ebp,c_offset)
 262        output_whitening(%ebx,%ebp,d_offset)
 263        output_whitening(%ecx,%ebp,a_offset)
 264        output_whitening(%edx,%ebp,b_offset)
 265        mov     out_blk+16(%esp),%edi;
 266        mov     %eax,           c_offset(%edi)
 267        mov     %ebx,           d_offset(%edi)
 268        mov     %ecx,           (%edi)
 269        mov     %edx,           b_offset(%edi)
 270
 271        pop     %edi
 272        pop     %esi
 273        pop     %ebx
 274        pop     %ebp
 275        mov     $1,     %eax
 276        ret
 277ENDPROC(twofish_enc_blk)
 278
 279ENTRY(twofish_dec_blk)
 280        push    %ebp                    /* save registers according to calling convention*/
 281        push    %ebx
 282        push    %esi
 283        push    %edi
 284
 285
 286        mov     ctx + 16(%esp), %ebp    /* abuse the base pointer: set new base
 287                                         * pointer to the ctx address */
 288        mov     in_blk+16(%esp),%edi    /* input address in edi */
 289
 290        mov     (%edi),         %eax
 291        mov     b_offset(%edi), %ebx
 292        mov     c_offset(%edi), %ecx
 293        mov     d_offset(%edi), %edx
 294        output_whitening(%eax,%ebp,a_offset)
 295        output_whitening(%ebx,%ebp,b_offset)
 296        ror     $16,    %ebx
 297        output_whitening(%ecx,%ebp,c_offset)
 298        output_whitening(%edx,%ebp,d_offset)
 299        rol     $1,     %ecx
 300
 301        decrypt_round(R0,R1,R2,R3,15*8);
 302        decrypt_round(R2,R3,R0,R1,14*8);
 303        decrypt_round(R0,R1,R2,R3,13*8);
 304        decrypt_round(R2,R3,R0,R1,12*8);
 305        decrypt_round(R0,R1,R2,R3,11*8);
 306        decrypt_round(R2,R3,R0,R1,10*8);
 307        decrypt_round(R0,R1,R2,R3,9*8);
 308        decrypt_round(R2,R3,R0,R1,8*8);
 309        decrypt_round(R0,R1,R2,R3,7*8);
 310        decrypt_round(R2,R3,R0,R1,6*8);
 311        decrypt_round(R0,R1,R2,R3,5*8);
 312        decrypt_round(R2,R3,R0,R1,4*8);
 313        decrypt_round(R0,R1,R2,R3,3*8);
 314        decrypt_round(R2,R3,R0,R1,2*8);
 315        decrypt_round(R0,R1,R2,R3,1*8);
 316        decrypt_last_round(R2,R3,R0,R1,0);
 317
 318        input_whitening(%eax,%ebp,c_offset)
 319        input_whitening(%ebx,%ebp,d_offset)
 320        input_whitening(%ecx,%ebp,a_offset)
 321        input_whitening(%edx,%ebp,b_offset)
 322        mov     out_blk+16(%esp),%edi;
 323        mov     %eax,           c_offset(%edi)
 324        mov     %ebx,           d_offset(%edi)
 325        mov     %ecx,           (%edi)
 326        mov     %edx,           b_offset(%edi)
 327
 328        pop     %edi
 329        pop     %esi
 330        pop     %ebx
 331        pop     %ebp
 332        mov     $1,     %eax
 333        ret
 334ENDPROC(twofish_dec_blk)
 335