linux/arch/x86/crypto/aes-i586-asm_32.S
<<
>>
Prefs
   1// -------------------------------------------------------------------------
   2// Copyright (c) 2001, Dr Brian Gladman <                 >, Worcester, UK.
   3// All rights reserved.
   4//
   5// LICENSE TERMS
   6//
   7// The free distribution and use of this software in both source and binary 
   8// form is allowed (with or without changes) provided that:
   9//
  10//   1. distributions of this source code include the above copyright 
  11//      notice, this list of conditions and the following disclaimer//
  12//
  13//   2. distributions in binary form include the above copyright
  14//      notice, this list of conditions and the following disclaimer
  15//      in the documentation and/or other associated materials//
  16//
  17//   3. the copyright holder's name is not used to endorse products 
  18//      built using this software without specific written permission.
  19//
  20//
  21// ALTERNATIVELY, provided that this notice is retained in full, this product
  22// may be distributed under the terms of the GNU General Public License (GPL),
  23// in which case the provisions of the GPL apply INSTEAD OF those given above.
  24//
  25// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
  26// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
  27
  28// DISCLAIMER
  29//
  30// This software is provided 'as is' with no explicit or implied warranties
  31// in respect of its properties including, but not limited to, correctness 
  32// and fitness for purpose.
  33// -------------------------------------------------------------------------
  34// Issue Date: 29/07/2002
  35
  36.file "aes-i586-asm.S"
  37.text
  38
  39#include <linux/linkage.h>
  40#include <asm/asm-offsets.h>
  41
  42#define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words)
  43
  44/* offsets to parameters with one register pushed onto stack */
  45#define ctx 8
  46#define out_blk 12
  47#define in_blk 16
  48
  49/* offsets in crypto_aes_ctx structure */
  50#define klen (480)
  51#define ekey (0)
  52#define dkey (240)
  53
  54// register mapping for encrypt and decrypt subroutines
  55
  56#define r0  eax
  57#define r1  ebx
  58#define r2  ecx
  59#define r3  edx
  60#define r4  esi
  61#define r5  edi
  62
  63#define eaxl  al
  64#define eaxh  ah
  65#define ebxl  bl
  66#define ebxh  bh
  67#define ecxl  cl
  68#define ecxh  ch
  69#define edxl  dl
  70#define edxh  dh
  71
  72#define _h(reg) reg##h
  73#define h(reg) _h(reg)
  74
  75#define _l(reg) reg##l
  76#define l(reg) _l(reg)
  77
  78// This macro takes a 32-bit word representing a column and uses
  79// each of its four bytes to index into four tables of 256 32-bit
  80// words to obtain values that are then xored into the appropriate
  81// output registers r0, r1, r4 or r5.  
  82
  83// Parameters:
  84// table table base address
  85//   %1  out_state[0]
  86//   %2  out_state[1]
  87//   %3  out_state[2]
  88//   %4  out_state[3]
  89//   idx input register for the round (destroyed)
  90//   tmp scratch register for the round
  91// sched key schedule
  92
  93#define do_col(table, a1,a2,a3,a4, idx, tmp)    \
  94        movzx   %l(idx),%tmp;                   \
  95        xor     table(,%tmp,4),%a1;             \
  96        movzx   %h(idx),%tmp;                   \
  97        shr     $16,%idx;                       \
  98        xor     table+tlen(,%tmp,4),%a2;        \
  99        movzx   %l(idx),%tmp;                   \
 100        movzx   %h(idx),%idx;                   \
 101        xor     table+2*tlen(,%tmp,4),%a3;      \
 102        xor     table+3*tlen(,%idx,4),%a4;
 103
 104// initialise output registers from the key schedule
 105// NB1: original value of a3 is in idx on exit
 106// NB2: original values of a1,a2,a4 aren't used
 107#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
 108        mov     0 sched,%a1;                    \
 109        movzx   %l(idx),%tmp;                   \
 110        mov     12 sched,%a2;                   \
 111        xor     table(,%tmp,4),%a1;             \
 112        mov     4 sched,%a4;                    \
 113        movzx   %h(idx),%tmp;                   \
 114        shr     $16,%idx;                       \
 115        xor     table+tlen(,%tmp,4),%a2;        \
 116        movzx   %l(idx),%tmp;                   \
 117        movzx   %h(idx),%idx;                   \
 118        xor     table+3*tlen(,%idx,4),%a4;      \
 119        mov     %a3,%idx;                       \
 120        mov     8 sched,%a3;                    \
 121        xor     table+2*tlen(,%tmp,4),%a3;
 122
 123// initialise output registers from the key schedule
 124// NB1: original value of a3 is in idx on exit
 125// NB2: original values of a1,a2,a4 aren't used
 126#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
 127        mov     0 sched,%a1;                    \
 128        movzx   %l(idx),%tmp;                   \
 129        mov     4 sched,%a2;                    \
 130        xor     table(,%tmp,4),%a1;             \
 131        mov     12 sched,%a4;                   \
 132        movzx   %h(idx),%tmp;                   \
 133        shr     $16,%idx;                       \
 134        xor     table+tlen(,%tmp,4),%a2;        \
 135        movzx   %l(idx),%tmp;                   \
 136        movzx   %h(idx),%idx;                   \
 137        xor     table+3*tlen(,%idx,4),%a4;      \
 138        mov     %a3,%idx;                       \
 139        mov     8 sched,%a3;                    \
 140        xor     table+2*tlen(,%tmp,4),%a3;
 141
 142
 143// original Gladman had conditional saves to MMX regs.
 144#define save(a1, a2)            \
 145        mov     %a2,4*a1(%esp)
 146
 147#define restore(a1, a2)         \
 148        mov     4*a2(%esp),%a1
 149
 150// These macros perform a forward encryption cycle. They are entered with
 151// the first previous round column values in r0,r1,r4,r5 and
 152// exit with the final values in the same registers, using stack
 153// for temporary storage.
 154
 155// round column values
 156// on entry: r0,r1,r4,r5
 157// on exit:  r2,r1,r4,r5
 158#define fwd_rnd1(arg, table)                                            \
 159        save   (0,r1);                                                  \
 160        save   (1,r5);                                                  \
 161                                                                        \
 162        /* compute new column values */                                 \
 163        do_fcol(table, r2,r5,r4,r1, r0,r3, arg);        /* idx=r0 */    \
 164        do_col (table, r4,r1,r2,r5, r0,r3);             /* idx=r4 */    \
 165        restore(r0,0);                                                  \
 166        do_col (table, r1,r2,r5,r4, r0,r3);             /* idx=r1 */    \
 167        restore(r0,1);                                                  \
 168        do_col (table, r5,r4,r1,r2, r0,r3);             /* idx=r5 */
 169
 170// round column values
 171// on entry: r2,r1,r4,r5
 172// on exit:  r0,r1,r4,r5
 173#define fwd_rnd2(arg, table)                                            \
 174        save   (0,r1);                                                  \
 175        save   (1,r5);                                                  \
 176                                                                        \
 177        /* compute new column values */                                 \
 178        do_fcol(table, r0,r5,r4,r1, r2,r3, arg);        /* idx=r2 */    \
 179        do_col (table, r4,r1,r0,r5, r2,r3);             /* idx=r4 */    \
 180        restore(r2,0);                                                  \
 181        do_col (table, r1,r0,r5,r4, r2,r3);             /* idx=r1 */    \
 182        restore(r2,1);                                                  \
 183        do_col (table, r5,r4,r1,r0, r2,r3);             /* idx=r5 */
 184
 185// These macros performs an inverse encryption cycle. They are entered with
 186// the first previous round column values in r0,r1,r4,r5 and
 187// exit with the final values in the same registers, using stack
 188// for temporary storage
 189
 190// round column values
 191// on entry: r0,r1,r4,r5
 192// on exit:  r2,r1,r4,r5
 193#define inv_rnd1(arg, table)                                            \
 194        save    (0,r1);                                                 \
 195        save    (1,r5);                                                 \
 196                                                                        \
 197        /* compute new column values */                                 \
 198        do_icol(table, r2,r1,r4,r5, r0,r3, arg);        /* idx=r0 */    \
 199        do_col (table, r4,r5,r2,r1, r0,r3);             /* idx=r4 */    \
 200        restore(r0,0);                                                  \
 201        do_col (table, r1,r4,r5,r2, r0,r3);             /* idx=r1 */    \
 202        restore(r0,1);                                                  \
 203        do_col (table, r5,r2,r1,r4, r0,r3);             /* idx=r5 */
 204
 205// round column values
 206// on entry: r2,r1,r4,r5
 207// on exit:  r0,r1,r4,r5
 208#define inv_rnd2(arg, table)                                            \
 209        save    (0,r1);                                                 \
 210        save    (1,r5);                                                 \
 211                                                                        \
 212        /* compute new column values */                                 \
 213        do_icol(table, r0,r1,r4,r5, r2,r3, arg);        /* idx=r2 */    \
 214        do_col (table, r4,r5,r0,r1, r2,r3);             /* idx=r4 */    \
 215        restore(r2,0);                                                  \
 216        do_col (table, r1,r4,r5,r0, r2,r3);             /* idx=r1 */    \
 217        restore(r2,1);                                                  \
 218        do_col (table, r5,r0,r1,r4, r2,r3);             /* idx=r5 */
 219
 220// AES (Rijndael) Encryption Subroutine
 221/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
 222
 223.extern  crypto_ft_tab
 224.extern  crypto_fl_tab
 225
 226ENTRY(aes_enc_blk)
 227        push    %ebp
 228        mov     ctx(%esp),%ebp
 229
 230// CAUTION: the order and the values used in these assigns 
 231// rely on the register mappings
 232
 2331:      push    %ebx
 234        mov     in_blk+4(%esp),%r2
 235        push    %esi
 236        mov     klen(%ebp),%r3   // key size
 237        push    %edi
 238#if ekey != 0
 239        lea     ekey(%ebp),%ebp  // key pointer
 240#endif
 241
 242// input four columns and xor in first round key
 243
 244        mov     (%r2),%r0
 245        mov     4(%r2),%r1
 246        mov     8(%r2),%r4
 247        mov     12(%r2),%r5
 248        xor     (%ebp),%r0
 249        xor     4(%ebp),%r1
 250        xor     8(%ebp),%r4
 251        xor     12(%ebp),%r5
 252
 253        sub     $8,%esp         // space for register saves on stack
 254        add     $16,%ebp        // increment to next round key
 255        cmp     $24,%r3
 256        jb      4f              // 10 rounds for 128-bit key
 257        lea     32(%ebp),%ebp
 258        je      3f              // 12 rounds for 192-bit key
 259        lea     32(%ebp),%ebp
 260
 2612:      fwd_rnd1( -64(%ebp), crypto_ft_tab)     // 14 rounds for 256-bit key
 262        fwd_rnd2( -48(%ebp), crypto_ft_tab)
 2633:      fwd_rnd1( -32(%ebp), crypto_ft_tab)     // 12 rounds for 192-bit key
 264        fwd_rnd2( -16(%ebp), crypto_ft_tab)
 2654:      fwd_rnd1(    (%ebp), crypto_ft_tab)     // 10 rounds for 128-bit key
 266        fwd_rnd2( +16(%ebp), crypto_ft_tab)
 267        fwd_rnd1( +32(%ebp), crypto_ft_tab)
 268        fwd_rnd2( +48(%ebp), crypto_ft_tab)
 269        fwd_rnd1( +64(%ebp), crypto_ft_tab)
 270        fwd_rnd2( +80(%ebp), crypto_ft_tab)
 271        fwd_rnd1( +96(%ebp), crypto_ft_tab)
 272        fwd_rnd2(+112(%ebp), crypto_ft_tab)
 273        fwd_rnd1(+128(%ebp), crypto_ft_tab)
 274        fwd_rnd2(+144(%ebp), crypto_fl_tab)     // last round uses a different table
 275
 276// move final values to the output array.  CAUTION: the 
 277// order of these assigns rely on the register mappings
 278
 279        add     $8,%esp
 280        mov     out_blk+12(%esp),%ebp
 281        mov     %r5,12(%ebp)
 282        pop     %edi
 283        mov     %r4,8(%ebp)
 284        pop     %esi
 285        mov     %r1,4(%ebp)
 286        pop     %ebx
 287        mov     %r0,(%ebp)
 288        pop     %ebp
 289        ret
 290ENDPROC(aes_enc_blk)
 291
 292// AES (Rijndael) Decryption Subroutine
 293/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
 294
 295.extern  crypto_it_tab
 296.extern  crypto_il_tab
 297
 298ENTRY(aes_dec_blk)
 299        push    %ebp
 300        mov     ctx(%esp),%ebp
 301
 302// CAUTION: the order and the values used in these assigns 
 303// rely on the register mappings
 304
 3051:      push    %ebx
 306        mov     in_blk+4(%esp),%r2
 307        push    %esi
 308        mov     klen(%ebp),%r3   // key size
 309        push    %edi
 310#if dkey != 0
 311        lea     dkey(%ebp),%ebp  // key pointer
 312#endif
 313        
 314// input four columns and xor in first round key
 315
 316        mov     (%r2),%r0
 317        mov     4(%r2),%r1
 318        mov     8(%r2),%r4
 319        mov     12(%r2),%r5
 320        xor     (%ebp),%r0
 321        xor     4(%ebp),%r1
 322        xor     8(%ebp),%r4
 323        xor     12(%ebp),%r5
 324
 325        sub     $8,%esp         // space for register saves on stack
 326        add     $16,%ebp        // increment to next round key
 327        cmp     $24,%r3
 328        jb      4f              // 10 rounds for 128-bit key
 329        lea     32(%ebp),%ebp
 330        je      3f              // 12 rounds for 192-bit key
 331        lea     32(%ebp),%ebp
 332
 3332:      inv_rnd1( -64(%ebp), crypto_it_tab)     // 14 rounds for 256-bit key
 334        inv_rnd2( -48(%ebp), crypto_it_tab)
 3353:      inv_rnd1( -32(%ebp), crypto_it_tab)     // 12 rounds for 192-bit key
 336        inv_rnd2( -16(%ebp), crypto_it_tab)
 3374:      inv_rnd1(    (%ebp), crypto_it_tab)     // 10 rounds for 128-bit key
 338        inv_rnd2( +16(%ebp), crypto_it_tab)
 339        inv_rnd1( +32(%ebp), crypto_it_tab)
 340        inv_rnd2( +48(%ebp), crypto_it_tab)
 341        inv_rnd1( +64(%ebp), crypto_it_tab)
 342        inv_rnd2( +80(%ebp), crypto_it_tab)
 343        inv_rnd1( +96(%ebp), crypto_it_tab)
 344        inv_rnd2(+112(%ebp), crypto_it_tab)
 345        inv_rnd1(+128(%ebp), crypto_it_tab)
 346        inv_rnd2(+144(%ebp), crypto_il_tab)     // last round uses a different table
 347
 348// move final values to the output array.  CAUTION: the 
 349// order of these assigns rely on the register mappings
 350
 351        add     $8,%esp
 352        mov     out_blk+12(%esp),%ebp
 353        mov     %r5,12(%ebp)
 354        pop     %edi
 355        mov     %r4,8(%ebp)
 356        pop     %esi
 357        mov     %r1,4(%ebp)
 358        pop     %ebx
 359        mov     %r0,(%ebp)
 360        pop     %ebp
 361        ret
 362ENDPROC(aes_dec_blk)
 363