linux/arch/powerpc/crypto/aes-spe-modes.S
<<
>>
Prefs
   1/*
   2 * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
   3 *
   4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License as published by the Free
   8 * Software Foundation; either version 2 of the License, or (at your option)
   9 * any later version.
  10 *
  11 */
  12
  13#include <asm/ppc_asm.h>
  14#include "aes-spe-regs.h"
  15
  16#ifdef __BIG_ENDIAN__                   /* Macros for big endian builds */
  17
  18#define LOAD_DATA(reg, off) \
  19        lwz             reg,off(rSP);   /* load with offset             */
  20#define SAVE_DATA(reg, off) \
  21        stw             reg,off(rDP);   /* save with offset             */
  22#define NEXT_BLOCK \
  23        addi            rSP,rSP,16;     /* increment pointers per bloc  */ \
  24        addi            rDP,rDP,16;
  25#define LOAD_IV(reg, off) \
  26        lwz             reg,off(rIP);   /* IV loading with offset       */
  27#define SAVE_IV(reg, off) \
  28        stw             reg,off(rIP);   /* IV saving with offset        */
  29#define START_IV                        /* nothing to reset             */
  30#define CBC_DEC 16                      /* CBC decrement per block      */
  31#define CTR_DEC 1                       /* CTR decrement one byte       */
  32
  33#else                                   /* Macros for little endian     */
  34
  35#define LOAD_DATA(reg, off) \
  36        lwbrx           reg,0,rSP;      /* load reversed                */ \
  37        addi            rSP,rSP,4;      /* and increment pointer        */
  38#define SAVE_DATA(reg, off) \
  39        stwbrx          reg,0,rDP;      /* save reversed                */ \
  40        addi            rDP,rDP,4;      /* and increment pointer        */
  41#define NEXT_BLOCK                      /* nothing todo                 */
  42#define LOAD_IV(reg, off) \
  43        lwbrx           reg,0,rIP;      /* load reversed                */ \
  44        addi            rIP,rIP,4;      /* and increment pointer        */
  45#define SAVE_IV(reg, off) \
  46        stwbrx          reg,0,rIP;      /* load reversed                */ \
  47        addi            rIP,rIP,4;      /* and increment pointer        */
  48#define START_IV \
  49        subi            rIP,rIP,16;     /* must reset pointer           */
  50#define CBC_DEC 32                      /* 2 blocks because of incs     */
  51#define CTR_DEC 17                      /* 1 block because of incs      */
  52
  53#endif
  54
  55#define SAVE_0_REGS
  56#define LOAD_0_REGS
  57
  58#define SAVE_4_REGS \
  59        stw             rI0,96(r1);     /* save 32 bit registers        */ \
  60        stw             rI1,100(r1);                                       \
  61        stw             rI2,104(r1);                                       \
  62        stw             rI3,108(r1);
  63
  64#define LOAD_4_REGS \
  65        lwz             rI0,96(r1);     /* restore 32 bit registers     */ \
  66        lwz             rI1,100(r1);                                       \
  67        lwz             rI2,104(r1);                                       \
  68        lwz             rI3,108(r1);
  69
  70#define SAVE_8_REGS \
  71        SAVE_4_REGS                                                        \
  72        stw             rG0,112(r1);    /* save 32 bit registers        */ \
  73        stw             rG1,116(r1);                                       \
  74        stw             rG2,120(r1);                                       \
  75        stw             rG3,124(r1);
  76
  77#define LOAD_8_REGS \
  78        LOAD_4_REGS                                                        \
  79        lwz             rG0,112(r1);    /* restore 32 bit registers     */ \
  80        lwz             rG1,116(r1);                                       \
  81        lwz             rG2,120(r1);                                       \
  82        lwz             rG3,124(r1);
  83
  84#define INITIALIZE_CRYPT(tab,nr32bitregs) \
  85        mflr            r0;                                                \
  86        stwu            r1,-160(r1);    /* create stack frame           */ \
  87        lis             rT0,tab@h;      /* en-/decryption table pointer */ \
  88        stw             r0,8(r1);       /* save link register           */ \
  89        ori             rT0,rT0,tab@l;                                     \
  90        evstdw          r14,16(r1);                                        \
  91        mr              rKS,rKP;                                           \
  92        evstdw          r15,24(r1);     /* We must save non volatile    */ \
  93        evstdw          r16,32(r1);     /* registers. Take the chance   */ \
  94        evstdw          r17,40(r1);     /* and save the SPE part too    */ \
  95        evstdw          r18,48(r1);                                        \
  96        evstdw          r19,56(r1);                                        \
  97        evstdw          r20,64(r1);                                        \
  98        evstdw          r21,72(r1);                                        \
  99        evstdw          r22,80(r1);                                        \
 100        evstdw          r23,88(r1);                                        \
 101        SAVE_##nr32bitregs##_REGS
 102
 103#define FINALIZE_CRYPT(nr32bitregs) \
 104        lwz             r0,8(r1);                                          \
 105        evldw           r14,16(r1);     /* restore SPE registers        */ \
 106        evldw           r15,24(r1);                                        \
 107        evldw           r16,32(r1);                                        \
 108        evldw           r17,40(r1);                                        \
 109        evldw           r18,48(r1);                                        \
 110        evldw           r19,56(r1);                                        \
 111        evldw           r20,64(r1);                                        \
 112        evldw           r21,72(r1);                                        \
 113        evldw           r22,80(r1);                                        \
 114        evldw           r23,88(r1);                                        \
 115        LOAD_##nr32bitregs##_REGS                                          \
 116        mtlr            r0;             /* restore link register        */ \
 117        xor             r0,r0,r0;                                          \
 118        stw             r0,16(r1);      /* delete sensitive data        */ \
 119        stw             r0,24(r1);      /* that we might have pushed    */ \
 120        stw             r0,32(r1);      /* from other context that runs */ \
 121        stw             r0,40(r1);      /* the same code                */ \
 122        stw             r0,48(r1);                                         \
 123        stw             r0,56(r1);                                         \
 124        stw             r0,64(r1);                                         \
 125        stw             r0,72(r1);                                         \
 126        stw             r0,80(r1);                                         \
 127        stw             r0,88(r1);                                         \
 128        addi            r1,r1,160;      /* cleanup stack frame          */
 129
 130#define ENDIAN_SWAP(t0, t1, s0, s1) \
 131        rotrwi          t0,s0,8;        /* swap endianness for 2 GPRs   */ \
 132        rotrwi          t1,s1,8;                                           \
 133        rlwimi          t0,s0,8,8,15;                                      \
 134        rlwimi          t1,s1,8,8,15;                                      \
 135        rlwimi          t0,s0,8,24,31;                                     \
 136        rlwimi          t1,s1,8,24,31;
 137
 138#define GF128_MUL(d0, d1, d2, d3, t0) \
 139        li              t0,0x87;        /* multiplication in GF128      */ \
 140        cmpwi           d3,-1;                                             \
 141        iselgt          t0,0,t0;                                           \
 142        rlwimi          d3,d2,0,0,0;    /* propagate "carry" bits       */ \
 143        rotlwi          d3,d3,1;                                           \
 144        rlwimi          d2,d1,0,0,0;                                       \
 145        rotlwi          d2,d2,1;                                           \
 146        rlwimi          d1,d0,0,0,0;                                       \
 147        slwi            d0,d0,1;        /* shift left 128 bit           */ \
 148        rotlwi          d1,d1,1;                                           \
 149        xor             d0,d0,t0;
 150
 151#define START_KEY(d0, d1, d2, d3) \
 152        lwz             rW0,0(rKP);                                        \
 153        mtctr           rRR;                                               \
 154        lwz             rW1,4(rKP);                                        \
 155        lwz             rW2,8(rKP);                                        \
 156        lwz             rW3,12(rKP);                                       \
 157        xor             rD0,d0,rW0;                                        \
 158        xor             rD1,d1,rW1;                                        \
 159        xor             rD2,d2,rW2;                                        \
 160        xor             rD3,d3,rW3;
 161
 162/*
 163 * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
 164 *                 u32 rounds)
 165 *
 166 * called from glue layer to encrypt a single 16 byte block
 167 * round values are AES128 = 4, AES192 = 5, AES256 = 6
 168 *
 169 */
 170_GLOBAL(ppc_encrypt_aes)
 171        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
 172        LOAD_DATA(rD0, 0)
 173        LOAD_DATA(rD1, 4)
 174        LOAD_DATA(rD2, 8)
 175        LOAD_DATA(rD3, 12)
 176        START_KEY(rD0, rD1, rD2, rD3)
 177        bl              ppc_encrypt_block
 178        xor             rD0,rD0,rW0
 179        SAVE_DATA(rD0, 0)
 180        xor             rD1,rD1,rW1
 181        SAVE_DATA(rD1, 4)
 182        xor             rD2,rD2,rW2
 183        SAVE_DATA(rD2, 8)
 184        xor             rD3,rD3,rW3
 185        SAVE_DATA(rD3, 12)
 186        FINALIZE_CRYPT(0)
 187        blr
 188
 189/*
 190 * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
 191 *                 u32 rounds)
 192 *
 193 * called from glue layer to decrypt a single 16 byte block
 194 * round values are AES128 = 4, AES192 = 5, AES256 = 6
 195 *
 196 */
 197_GLOBAL(ppc_decrypt_aes)
 198        INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
 199        LOAD_DATA(rD0, 0)
 200        addi            rT1,rT0,4096
 201        LOAD_DATA(rD1, 4)
 202        LOAD_DATA(rD2, 8)
 203        LOAD_DATA(rD3, 12)
 204        START_KEY(rD0, rD1, rD2, rD3)
 205        bl              ppc_decrypt_block
 206        xor             rD0,rD0,rW0
 207        SAVE_DATA(rD0, 0)
 208        xor             rD1,rD1,rW1
 209        SAVE_DATA(rD1, 4)
 210        xor             rD2,rD2,rW2
 211        SAVE_DATA(rD2, 8)
 212        xor             rD3,rD3,rW3
 213        SAVE_DATA(rD3, 12)
 214        FINALIZE_CRYPT(0)
 215        blr
 216
 217/*
 218 * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
 219 *                 u32 rounds, u32 bytes);
 220 *
 221 * called from glue layer to encrypt multiple blocks via ECB
 222 * Bytes must be larger or equal 16 and only whole blocks are
 223 * processed. round values are AES128 = 4, AES192 = 5 and
 224 * AES256 = 6
 225 *
 226 */
 227_GLOBAL(ppc_encrypt_ecb)
 228        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
 229ppc_encrypt_ecb_loop:
 230        LOAD_DATA(rD0, 0)
 231        mr              rKP,rKS
 232        LOAD_DATA(rD1, 4)
 233        subi            rLN,rLN,16
 234        LOAD_DATA(rD2, 8)
 235        cmpwi           rLN,15
 236        LOAD_DATA(rD3, 12)
 237        START_KEY(rD0, rD1, rD2, rD3)
 238        bl              ppc_encrypt_block
 239        xor             rD0,rD0,rW0
 240        SAVE_DATA(rD0, 0)
 241        xor             rD1,rD1,rW1
 242        SAVE_DATA(rD1, 4)
 243        xor             rD2,rD2,rW2
 244        SAVE_DATA(rD2, 8)
 245        xor             rD3,rD3,rW3
 246        SAVE_DATA(rD3, 12)
 247        NEXT_BLOCK
 248        bt              gt,ppc_encrypt_ecb_loop
 249        FINALIZE_CRYPT(0)
 250        blr
 251
 252/*
 253 * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
 254 *                 u32 rounds, u32 bytes);
 255 *
 256 * called from glue layer to decrypt multiple blocks via ECB
 257 * Bytes must be larger or equal 16 and only whole blocks are
 258 * processed. round values are AES128 = 4, AES192 = 5 and
 259 * AES256 = 6
 260 *
 261 */
 262_GLOBAL(ppc_decrypt_ecb)
 263        INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
 264        addi            rT1,rT0,4096
 265ppc_decrypt_ecb_loop:
 266        LOAD_DATA(rD0, 0)
 267        mr              rKP,rKS
 268        LOAD_DATA(rD1, 4)
 269        subi            rLN,rLN,16
 270        LOAD_DATA(rD2, 8)
 271        cmpwi           rLN,15
 272        LOAD_DATA(rD3, 12)
 273        START_KEY(rD0, rD1, rD2, rD3)
 274        bl              ppc_decrypt_block
 275        xor             rD0,rD0,rW0
 276        SAVE_DATA(rD0, 0)
 277        xor             rD1,rD1,rW1
 278        SAVE_DATA(rD1, 4)
 279        xor             rD2,rD2,rW2
 280        SAVE_DATA(rD2, 8)
 281        xor             rD3,rD3,rW3
 282        SAVE_DATA(rD3, 12)
 283        NEXT_BLOCK
 284        bt              gt,ppc_decrypt_ecb_loop
 285        FINALIZE_CRYPT(0)
 286        blr
 287
 288/*
 289 * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
 290 *                 32 rounds, u32 bytes, u8 *iv);
 291 *
 292 * called from glue layer to encrypt multiple blocks via CBC
 293 * Bytes must be larger or equal 16 and only whole blocks are
 294 * processed. round values are AES128 = 4, AES192 = 5 and
 295 * AES256 = 6
 296 *
 297 */
 298_GLOBAL(ppc_encrypt_cbc)
 299        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
 300        LOAD_IV(rI0, 0)
 301        LOAD_IV(rI1, 4)
 302        LOAD_IV(rI2, 8)
 303        LOAD_IV(rI3, 12)
 304ppc_encrypt_cbc_loop:
 305        LOAD_DATA(rD0, 0)
 306        mr              rKP,rKS
 307        LOAD_DATA(rD1, 4)
 308        subi            rLN,rLN,16
 309        LOAD_DATA(rD2, 8)
 310        cmpwi           rLN,15
 311        LOAD_DATA(rD3, 12)
 312        xor             rD0,rD0,rI0
 313        xor             rD1,rD1,rI1
 314        xor             rD2,rD2,rI2
 315        xor             rD3,rD3,rI3
 316        START_KEY(rD0, rD1, rD2, rD3)
 317        bl              ppc_encrypt_block
 318        xor             rI0,rD0,rW0
 319        SAVE_DATA(rI0, 0)
 320        xor             rI1,rD1,rW1
 321        SAVE_DATA(rI1, 4)
 322        xor             rI2,rD2,rW2
 323        SAVE_DATA(rI2, 8)
 324        xor             rI3,rD3,rW3
 325        SAVE_DATA(rI3, 12)
 326        NEXT_BLOCK
 327        bt              gt,ppc_encrypt_cbc_loop
 328        START_IV
 329        SAVE_IV(rI0, 0)
 330        SAVE_IV(rI1, 4)
 331        SAVE_IV(rI2, 8)
 332        SAVE_IV(rI3, 12)
 333        FINALIZE_CRYPT(4)
 334        blr
 335
 336/*
 337 * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
 338 *                 u32 rounds, u32 bytes, u8 *iv);
 339 *
 340 * called from glue layer to decrypt multiple blocks via CBC
 341 * round values are AES128 = 4, AES192 = 5, AES256 = 6
 342 *
 343 */
 344_GLOBAL(ppc_decrypt_cbc)
 345        INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
 346        li              rT1,15
 347        LOAD_IV(rI0, 0)
 348        andc            rLN,rLN,rT1
 349        LOAD_IV(rI1, 4)
 350        subi            rLN,rLN,16
 351        LOAD_IV(rI2, 8)
 352        add             rSP,rSP,rLN     /* reverse processing           */
 353        LOAD_IV(rI3, 12)
 354        add             rDP,rDP,rLN
 355        LOAD_DATA(rD0, 0)
 356        addi            rT1,rT0,4096
 357        LOAD_DATA(rD1, 4)
 358        LOAD_DATA(rD2, 8)
 359        LOAD_DATA(rD3, 12)
 360        START_IV
 361        SAVE_IV(rD0, 0)
 362        SAVE_IV(rD1, 4)
 363        SAVE_IV(rD2, 8)
 364        cmpwi           rLN,16
 365        SAVE_IV(rD3, 12)
 366        bt              lt,ppc_decrypt_cbc_end
 367ppc_decrypt_cbc_loop:
 368        mr              rKP,rKS
 369        START_KEY(rD0, rD1, rD2, rD3)
 370        bl              ppc_decrypt_block
 371        subi            rLN,rLN,16
 372        subi            rSP,rSP,CBC_DEC
 373        xor             rW0,rD0,rW0
 374        LOAD_DATA(rD0, 0)
 375        xor             rW1,rD1,rW1
 376        LOAD_DATA(rD1, 4)
 377        xor             rW2,rD2,rW2
 378        LOAD_DATA(rD2, 8)
 379        xor             rW3,rD3,rW3
 380        LOAD_DATA(rD3, 12)
 381        xor             rW0,rW0,rD0
 382        SAVE_DATA(rW0, 0)
 383        xor             rW1,rW1,rD1
 384        SAVE_DATA(rW1, 4)
 385        xor             rW2,rW2,rD2
 386        SAVE_DATA(rW2, 8)
 387        xor             rW3,rW3,rD3
 388        SAVE_DATA(rW3, 12)
 389        cmpwi           rLN,15
 390        subi            rDP,rDP,CBC_DEC
 391        bt              gt,ppc_decrypt_cbc_loop
 392ppc_decrypt_cbc_end:
 393        mr              rKP,rKS
 394        START_KEY(rD0, rD1, rD2, rD3)
 395        bl              ppc_decrypt_block
 396        xor             rW0,rW0,rD0
 397        xor             rW1,rW1,rD1
 398        xor             rW2,rW2,rD2
 399        xor             rW3,rW3,rD3
 400        xor             rW0,rW0,rI0     /* decrypt with initial IV      */
 401        SAVE_DATA(rW0, 0)
 402        xor             rW1,rW1,rI1
 403        SAVE_DATA(rW1, 4)
 404        xor             rW2,rW2,rI2
 405        SAVE_DATA(rW2, 8)
 406        xor             rW3,rW3,rI3
 407        SAVE_DATA(rW3, 12)
 408        FINALIZE_CRYPT(4)
 409        blr
 410
 411/*
 412 * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
 413 *               u32 rounds, u32 bytes, u8 *iv);
 414 *
 415 * called from glue layer to encrypt/decrypt multiple blocks
 416 * via CTR. Number of bytes does not need to be a multiple of
 417 * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
 418 *
 419 */
 420_GLOBAL(ppc_crypt_ctr)
 421        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
 422        LOAD_IV(rI0, 0)
 423        LOAD_IV(rI1, 4)
 424        LOAD_IV(rI2, 8)
 425        cmpwi           rLN,16
 426        LOAD_IV(rI3, 12)
 427        START_IV
 428        bt              lt,ppc_crypt_ctr_partial
 429ppc_crypt_ctr_loop:
 430        mr              rKP,rKS
 431        START_KEY(rI0, rI1, rI2, rI3)
 432        bl              ppc_encrypt_block
 433        xor             rW0,rD0,rW0
 434        xor             rW1,rD1,rW1
 435        xor             rW2,rD2,rW2
 436        xor             rW3,rD3,rW3
 437        LOAD_DATA(rD0, 0)
 438        subi            rLN,rLN,16
 439        LOAD_DATA(rD1, 4)
 440        LOAD_DATA(rD2, 8)
 441        LOAD_DATA(rD3, 12)
 442        xor             rD0,rD0,rW0
 443        SAVE_DATA(rD0, 0)
 444        xor             rD1,rD1,rW1
 445        SAVE_DATA(rD1, 4)
 446        xor             rD2,rD2,rW2
 447        SAVE_DATA(rD2, 8)
 448        xor             rD3,rD3,rW3
 449        SAVE_DATA(rD3, 12)
 450        addic           rI3,rI3,1       /* increase counter                     */
 451        addze           rI2,rI2
 452        addze           rI1,rI1
 453        addze           rI0,rI0
 454        NEXT_BLOCK
 455        cmpwi           rLN,15
 456        bt              gt,ppc_crypt_ctr_loop
 457ppc_crypt_ctr_partial:
 458        cmpwi           rLN,0
 459        bt              eq,ppc_crypt_ctr_end
 460        mr              rKP,rKS
 461        START_KEY(rI0, rI1, rI2, rI3)
 462        bl              ppc_encrypt_block
 463        xor             rW0,rD0,rW0
 464        SAVE_IV(rW0, 0)
 465        xor             rW1,rD1,rW1
 466        SAVE_IV(rW1, 4)
 467        xor             rW2,rD2,rW2
 468        SAVE_IV(rW2, 8)
 469        xor             rW3,rD3,rW3
 470        SAVE_IV(rW3, 12)
 471        mtctr           rLN
 472        subi            rIP,rIP,CTR_DEC
 473        subi            rSP,rSP,1
 474        subi            rDP,rDP,1
 475ppc_crypt_ctr_xorbyte:
 476        lbzu            rW4,1(rIP)      /* bytewise xor for partial block       */
 477        lbzu            rW5,1(rSP)
 478        xor             rW4,rW4,rW5
 479        stbu            rW4,1(rDP)
 480        bdnz            ppc_crypt_ctr_xorbyte
 481        subf            rIP,rLN,rIP
 482        addi            rIP,rIP,1
 483        addic           rI3,rI3,1
 484        addze           rI2,rI2
 485        addze           rI1,rI1
 486        addze           rI0,rI0
 487ppc_crypt_ctr_end:
 488        SAVE_IV(rI0, 0)
 489        SAVE_IV(rI1, 4)
 490        SAVE_IV(rI2, 8)
 491        SAVE_IV(rI3, 12)
 492        FINALIZE_CRYPT(4)
 493        blr
 494
 495/*
 496 * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
 497 *                 u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
 498 *
 499 * called from glue layer to encrypt multiple blocks via XTS
 500 * If key_twk is given, the initial IV encryption will be
 501 * processed too. Round values are AES128 = 4, AES192 = 5,
 502 * AES256 = 6
 503 *
 504 */
 505_GLOBAL(ppc_encrypt_xts)
 506        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
 507        LOAD_IV(rI0, 0)
 508        LOAD_IV(rI1, 4)
 509        LOAD_IV(rI2, 8)
 510        cmpwi           rKT,0
 511        LOAD_IV(rI3, 12)
 512        bt              eq,ppc_encrypt_xts_notweak
 513        mr              rKP,rKT
 514        START_KEY(rI0, rI1, rI2, rI3)
 515        bl              ppc_encrypt_block
 516        xor             rI0,rD0,rW0
 517        xor             rI1,rD1,rW1
 518        xor             rI2,rD2,rW2
 519        xor             rI3,rD3,rW3
 520ppc_encrypt_xts_notweak:
 521        ENDIAN_SWAP(rG0, rG1, rI0, rI1)
 522        ENDIAN_SWAP(rG2, rG3, rI2, rI3)
 523ppc_encrypt_xts_loop:
 524        LOAD_DATA(rD0, 0)
 525        mr              rKP,rKS
 526        LOAD_DATA(rD1, 4)
 527        subi            rLN,rLN,16
 528        LOAD_DATA(rD2, 8)
 529        LOAD_DATA(rD3, 12)
 530        xor             rD0,rD0,rI0
 531        xor             rD1,rD1,rI1
 532        xor             rD2,rD2,rI2
 533        xor             rD3,rD3,rI3
 534        START_KEY(rD0, rD1, rD2, rD3)
 535        bl              ppc_encrypt_block
 536        xor             rD0,rD0,rW0
 537        xor             rD1,rD1,rW1
 538        xor             rD2,rD2,rW2
 539        xor             rD3,rD3,rW3
 540        xor             rD0,rD0,rI0
 541        SAVE_DATA(rD0, 0)
 542        xor             rD1,rD1,rI1
 543        SAVE_DATA(rD1, 4)
 544        xor             rD2,rD2,rI2
 545        SAVE_DATA(rD2, 8)
 546        xor             rD3,rD3,rI3
 547        SAVE_DATA(rD3, 12)
 548        GF128_MUL(rG0, rG1, rG2, rG3, rW0)
 549        ENDIAN_SWAP(rI0, rI1, rG0, rG1)
 550        ENDIAN_SWAP(rI2, rI3, rG2, rG3)
 551        cmpwi           rLN,0
 552        NEXT_BLOCK
 553        bt              gt,ppc_encrypt_xts_loop
 554        START_IV
 555        SAVE_IV(rI0, 0)
 556        SAVE_IV(rI1, 4)
 557        SAVE_IV(rI2, 8)
 558        SAVE_IV(rI3, 12)
 559        FINALIZE_CRYPT(8)
 560        blr
 561
 562/*
 563 * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
 564 *                 u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
 565 *
 566 * called from glue layer to decrypt multiple blocks via XTS
 567 * If key_twk is given, the initial IV encryption will be
 568 * processed too. Round values are AES128 = 4, AES192 = 5,
 569 * AES256 = 6
 570 *
 571 */
 572_GLOBAL(ppc_decrypt_xts)
 573        INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
 574        LOAD_IV(rI0, 0)
 575        addi            rT1,rT0,4096
 576        LOAD_IV(rI1, 4)
 577        LOAD_IV(rI2, 8)
 578        cmpwi           rKT,0
 579        LOAD_IV(rI3, 12)
 580        bt              eq,ppc_decrypt_xts_notweak
 581        subi            rT0,rT0,4096
 582        mr              rKP,rKT
 583        START_KEY(rI0, rI1, rI2, rI3)
 584        bl              ppc_encrypt_block
 585        xor             rI0,rD0,rW0
 586        xor             rI1,rD1,rW1
 587        xor             rI2,rD2,rW2
 588        xor             rI3,rD3,rW3
 589        addi            rT0,rT0,4096
 590ppc_decrypt_xts_notweak:
 591        ENDIAN_SWAP(rG0, rG1, rI0, rI1)
 592        ENDIAN_SWAP(rG2, rG3, rI2, rI3)
 593ppc_decrypt_xts_loop:
 594        LOAD_DATA(rD0, 0)
 595        mr              rKP,rKS
 596        LOAD_DATA(rD1, 4)
 597        subi            rLN,rLN,16
 598        LOAD_DATA(rD2, 8)
 599        LOAD_DATA(rD3, 12)
 600        xor             rD0,rD0,rI0
 601        xor             rD1,rD1,rI1
 602        xor             rD2,rD2,rI2
 603        xor             rD3,rD3,rI3
 604        START_KEY(rD0, rD1, rD2, rD3)
 605        bl              ppc_decrypt_block
 606        xor             rD0,rD0,rW0
 607        xor             rD1,rD1,rW1
 608        xor             rD2,rD2,rW2
 609        xor             rD3,rD3,rW3
 610        xor             rD0,rD0,rI0
 611        SAVE_DATA(rD0, 0)
 612        xor             rD1,rD1,rI1
 613        SAVE_DATA(rD1, 4)
 614        xor             rD2,rD2,rI2
 615        SAVE_DATA(rD2, 8)
 616        xor             rD3,rD3,rI3
 617        SAVE_DATA(rD3, 12)
 618        GF128_MUL(rG0, rG1, rG2, rG3, rW0)
 619        ENDIAN_SWAP(rI0, rI1, rG0, rG1)
 620        ENDIAN_SWAP(rI2, rI3, rG2, rG3)
 621        cmpwi           rLN,0
 622        NEXT_BLOCK
 623        bt              gt,ppc_decrypt_xts_loop
 624        START_IV
 625        SAVE_IV(rI0, 0)
 626        SAVE_IV(rI1, 4)
 627        SAVE_IV(rI2, 8)
 628        SAVE_IV(rI3, 12)
 629        FINALIZE_CRYPT(8)
 630        blr
 631