linux/arch/x86/crypto/aes-x86_64-asm_64.S
<<
>>
Prefs
   1/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
   2 *
   3 * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
   4 *
   5 * License:
   6 * This code can be distributed under the terms of the GNU General Public
   7 * License (GPL) Version 2 provided that the above header down to and
   8 * including this sentence is retained in full.
   9 */
  10
  11.extern crypto_ft_tab
  12.extern crypto_it_tab
  13.extern crypto_fl_tab
  14.extern crypto_il_tab
  15
  16.text
  17
  18#include <asm/asm-offsets.h>
  19
  20#define R1      %rax
  21#define R1E     %eax
  22#define R1X     %ax
  23#define R1H     %ah
  24#define R1L     %al
  25#define R2      %rbx
  26#define R2E     %ebx
  27#define R2X     %bx
  28#define R2H     %bh
  29#define R2L     %bl
  30#define R3      %rcx
  31#define R3E     %ecx
  32#define R3X     %cx
  33#define R3H     %ch
  34#define R3L     %cl
  35#define R4      %rdx
  36#define R4E     %edx
  37#define R4X     %dx
  38#define R4H     %dh
  39#define R4L     %dl
  40#define R5      %rsi
  41#define R5E     %esi
  42#define R6      %rdi
  43#define R6E     %edi
  44#define R7      %rbp
  45#define R7E     %ebp
  46#define R8      %r8
  47#define R9      %r9
  48#define R10     %r10
  49#define R11     %r11
  50
  51#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
  52        .global FUNC;                   \
  53        .type   FUNC,@function;         \
  54        .align  8;                      \
  55FUNC:   movq    r1,r2;                  \
  56        movq    r3,r4;                  \
  57        leaq    KEY+48(r8),r9;          \
  58        movq    r10,r11;                \
  59        movl    (r7),r5 ## E;           \
  60        movl    4(r7),r1 ## E;          \
  61        movl    8(r7),r6 ## E;          \
  62        movl    12(r7),r7 ## E;         \
  63        movl    480(r8),r10 ## E;       \
  64        xorl    -48(r9),r5 ## E;        \
  65        xorl    -44(r9),r1 ## E;        \
  66        xorl    -40(r9),r6 ## E;        \
  67        xorl    -36(r9),r7 ## E;        \
  68        cmpl    $24,r10 ## E;           \
  69        jb      B128;                   \
  70        leaq    32(r9),r9;              \
  71        je      B192;                   \
  72        leaq    32(r9),r9;
  73
  74#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
  75        movq    r1,r2;                  \
  76        movq    r3,r4;                  \
  77        movl    r5 ## E,(r9);           \
  78        movl    r6 ## E,4(r9);          \
  79        movl    r7 ## E,8(r9);          \
  80        movl    r8 ## E,12(r9);         \
  81        ret;
  82
  83#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
  84        movzbl  r2 ## H,r5 ## E;        \
  85        movzbl  r2 ## L,r6 ## E;        \
  86        movl    TAB+1024(,r5,4),r5 ## E;\
  87        movw    r4 ## X,r2 ## X;        \
  88        movl    TAB(,r6,4),r6 ## E;     \
  89        roll    $16,r2 ## E;            \
  90        shrl    $16,r4 ## E;            \
  91        movzbl  r4 ## H,r7 ## E;        \
  92        movzbl  r4 ## L,r4 ## E;        \
  93        xorl    OFFSET(r8),ra ## E;     \
  94        xorl    OFFSET+4(r8),rb ## E;   \
  95        xorl    TAB+3072(,r7,4),r5 ## E;\
  96        xorl    TAB+2048(,r4,4),r6 ## E;\
  97        movzbl  r1 ## L,r7 ## E;        \
  98        movzbl  r1 ## H,r4 ## E;        \
  99        movl    TAB+1024(,r4,4),r4 ## E;\
 100        movw    r3 ## X,r1 ## X;        \
 101        roll    $16,r1 ## E;            \
 102        shrl    $16,r3 ## E;            \
 103        xorl    TAB(,r7,4),r5 ## E;     \
 104        movzbl  r3 ## H,r7 ## E;        \
 105        movzbl  r3 ## L,r3 ## E;        \
 106        xorl    TAB+3072(,r7,4),r4 ## E;\
 107        xorl    TAB+2048(,r3,4),r5 ## E;\
 108        movzbl  r1 ## H,r7 ## E;        \
 109        movzbl  r1 ## L,r3 ## E;        \
 110        shrl    $16,r1 ## E;            \
 111        xorl    TAB+3072(,r7,4),r6 ## E;\
 112        movl    TAB+2048(,r3,4),r3 ## E;\
 113        movzbl  r1 ## H,r7 ## E;        \
 114        movzbl  r1 ## L,r1 ## E;        \
 115        xorl    TAB+1024(,r7,4),r6 ## E;\
 116        xorl    TAB(,r1,4),r3 ## E;     \
 117        movzbl  r2 ## H,r1 ## E;        \
 118        movzbl  r2 ## L,r7 ## E;        \
 119        shrl    $16,r2 ## E;            \
 120        xorl    TAB+3072(,r1,4),r3 ## E;\
 121        xorl    TAB+2048(,r7,4),r4 ## E;\
 122        movzbl  r2 ## H,r1 ## E;        \
 123        movzbl  r2 ## L,r2 ## E;        \
 124        xorl    OFFSET+8(r8),rc ## E;   \
 125        xorl    OFFSET+12(r8),rd ## E;  \
 126        xorl    TAB+1024(,r1,4),r3 ## E;\
 127        xorl    TAB(,r2,4),r4 ## E;
 128
 129#define move_regs(r1,r2,r3,r4) \
 130        movl    r3 ## E,r1 ## E;        \
 131        movl    r4 ## E,r2 ## E;
 132
 133#define entry(FUNC,KEY,B128,B192) \
 134        prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
 135
 136#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
 137
 138#define encrypt_round(TAB,OFFSET) \
 139        round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
 140        move_regs(R1,R2,R5,R6)
 141
 142#define encrypt_final(TAB,OFFSET) \
 143        round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
 144
 145#define decrypt_round(TAB,OFFSET) \
 146        round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
 147        move_regs(R1,R2,R5,R6)
 148
 149#define decrypt_final(TAB,OFFSET) \
 150        round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
 151
 152/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
 153
 154        entry(aes_enc_blk,0,enc128,enc192)
 155        encrypt_round(crypto_ft_tab,-96)
 156        encrypt_round(crypto_ft_tab,-80)
 157enc192: encrypt_round(crypto_ft_tab,-64)
 158        encrypt_round(crypto_ft_tab,-48)
 159enc128: encrypt_round(crypto_ft_tab,-32)
 160        encrypt_round(crypto_ft_tab,-16)
 161        encrypt_round(crypto_ft_tab,  0)
 162        encrypt_round(crypto_ft_tab, 16)
 163        encrypt_round(crypto_ft_tab, 32)
 164        encrypt_round(crypto_ft_tab, 48)
 165        encrypt_round(crypto_ft_tab, 64)
 166        encrypt_round(crypto_ft_tab, 80)
 167        encrypt_round(crypto_ft_tab, 96)
 168        encrypt_final(crypto_fl_tab,112)
 169        return
 170
 171/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
 172
 173        entry(aes_dec_blk,240,dec128,dec192)
 174        decrypt_round(crypto_it_tab,-96)
 175        decrypt_round(crypto_it_tab,-80)
 176dec192: decrypt_round(crypto_it_tab,-64)
 177        decrypt_round(crypto_it_tab,-48)
 178dec128: decrypt_round(crypto_it_tab,-32)
 179        decrypt_round(crypto_it_tab,-16)
 180        decrypt_round(crypto_it_tab,  0)
 181        decrypt_round(crypto_it_tab, 16)
 182        decrypt_round(crypto_it_tab, 32)
 183        decrypt_round(crypto_it_tab, 48)
 184        decrypt_round(crypto_it_tab, 64)
 185        decrypt_round(crypto_it_tab, 80)
 186        decrypt_round(crypto_it_tab, 96)
 187        decrypt_final(crypto_il_tab,112)
 188        return
 189