linux/arch/powerpc/crypto/aes-spe-modes.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-or-later */
   2/*
   3 * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
   4 *
   5 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
   6 */
   7
   8#include <asm/ppc_asm.h>
   9#include "aes-spe-regs.h"
  10
  11#ifdef __BIG_ENDIAN__                   /* Macros for big endian builds */
  12
  13#define LOAD_DATA(reg, off) \
  14        lwz             reg,off(rSP);   /* load with offset             */
  15#define SAVE_DATA(reg, off) \
  16        stw             reg,off(rDP);   /* save with offset             */
  17#define NEXT_BLOCK \
  18        addi            rSP,rSP,16;     /* increment pointers per bloc  */ \
  19        addi            rDP,rDP,16;
  20#define LOAD_IV(reg, off) \
  21        lwz             reg,off(rIP);   /* IV loading with offset       */
  22#define SAVE_IV(reg, off) \
  23        stw             reg,off(rIP);   /* IV saving with offset        */
  24#define START_IV                        /* nothing to reset             */
  25#define CBC_DEC 16                      /* CBC decrement per block      */
  26#define CTR_DEC 1                       /* CTR decrement one byte       */
  27
  28#else                                   /* Macros for little endian     */
  29
  30#define LOAD_DATA(reg, off) \
  31        lwbrx           reg,0,rSP;      /* load reversed                */ \
  32        addi            rSP,rSP,4;      /* and increment pointer        */
  33#define SAVE_DATA(reg, off) \
  34        stwbrx          reg,0,rDP;      /* save reversed                */ \
  35        addi            rDP,rDP,4;      /* and increment pointer        */
  36#define NEXT_BLOCK                      /* nothing todo                 */
  37#define LOAD_IV(reg, off) \
  38        lwbrx           reg,0,rIP;      /* load reversed                */ \
  39        addi            rIP,rIP,4;      /* and increment pointer        */
  40#define SAVE_IV(reg, off) \
  41        stwbrx          reg,0,rIP;      /* load reversed                */ \
  42        addi            rIP,rIP,4;      /* and increment pointer        */
  43#define START_IV \
  44        subi            rIP,rIP,16;     /* must reset pointer           */
  45#define CBC_DEC 32                      /* 2 blocks because of incs     */
  46#define CTR_DEC 17                      /* 1 block because of incs      */
  47
  48#endif
  49
  50#define SAVE_0_REGS
  51#define LOAD_0_REGS
  52
  53#define SAVE_4_REGS \
  54        stw             rI0,96(r1);     /* save 32 bit registers        */ \
  55        stw             rI1,100(r1);                                       \
  56        stw             rI2,104(r1);                                       \
  57        stw             rI3,108(r1);
  58
  59#define LOAD_4_REGS \
  60        lwz             rI0,96(r1);     /* restore 32 bit registers     */ \
  61        lwz             rI1,100(r1);                                       \
  62        lwz             rI2,104(r1);                                       \
  63        lwz             rI3,108(r1);
  64
  65#define SAVE_8_REGS \
  66        SAVE_4_REGS                                                        \
  67        stw             rG0,112(r1);    /* save 32 bit registers        */ \
  68        stw             rG1,116(r1);                                       \
  69        stw             rG2,120(r1);                                       \
  70        stw             rG3,124(r1);
  71
  72#define LOAD_8_REGS \
  73        LOAD_4_REGS                                                        \
  74        lwz             rG0,112(r1);    /* restore 32 bit registers     */ \
  75        lwz             rG1,116(r1);                                       \
  76        lwz             rG2,120(r1);                                       \
  77        lwz             rG3,124(r1);
  78
  79#define INITIALIZE_CRYPT(tab,nr32bitregs) \
  80        mflr            r0;                                                \
  81        stwu            r1,-160(r1);    /* create stack frame           */ \
  82        lis             rT0,tab@h;      /* en-/decryption table pointer */ \
  83        stw             r0,8(r1);       /* save link register           */ \
  84        ori             rT0,rT0,tab@l;                                     \
  85        evstdw          r14,16(r1);                                        \
  86        mr              rKS,rKP;                                           \
  87        evstdw          r15,24(r1);     /* We must save non volatile    */ \
  88        evstdw          r16,32(r1);     /* registers. Take the chance   */ \
  89        evstdw          r17,40(r1);     /* and save the SPE part too    */ \
  90        evstdw          r18,48(r1);                                        \
  91        evstdw          r19,56(r1);                                        \
  92        evstdw          r20,64(r1);                                        \
  93        evstdw          r21,72(r1);                                        \
  94        evstdw          r22,80(r1);                                        \
  95        evstdw          r23,88(r1);                                        \
  96        SAVE_##nr32bitregs##_REGS
  97
  98#define FINALIZE_CRYPT(nr32bitregs) \
  99        lwz             r0,8(r1);                                          \
 100        evldw           r14,16(r1);     /* restore SPE registers        */ \
 101        evldw           r15,24(r1);                                        \
 102        evldw           r16,32(r1);                                        \
 103        evldw           r17,40(r1);                                        \
 104        evldw           r18,48(r1);                                        \
 105        evldw           r19,56(r1);                                        \
 106        evldw           r20,64(r1);                                        \
 107        evldw           r21,72(r1);                                        \
 108        evldw           r22,80(r1);                                        \
 109        evldw           r23,88(r1);                                        \
 110        LOAD_##nr32bitregs##_REGS                                          \
 111        mtlr            r0;             /* restore link register        */ \
 112        xor             r0,r0,r0;                                          \
 113        stw             r0,16(r1);      /* delete sensitive data        */ \
 114        stw             r0,24(r1);      /* that we might have pushed    */ \
 115        stw             r0,32(r1);      /* from other context that runs */ \
 116        stw             r0,40(r1);      /* the same code                */ \
 117        stw             r0,48(r1);                                         \
 118        stw             r0,56(r1);                                         \
 119        stw             r0,64(r1);                                         \
 120        stw             r0,72(r1);                                         \
 121        stw             r0,80(r1);                                         \
 122        stw             r0,88(r1);                                         \
 123        addi            r1,r1,160;      /* cleanup stack frame          */
 124
 125#define ENDIAN_SWAP(t0, t1, s0, s1) \
 126        rotrwi          t0,s0,8;        /* swap endianness for 2 GPRs   */ \
 127        rotrwi          t1,s1,8;                                           \
 128        rlwimi          t0,s0,8,8,15;                                      \
 129        rlwimi          t1,s1,8,8,15;                                      \
 130        rlwimi          t0,s0,8,24,31;                                     \
 131        rlwimi          t1,s1,8,24,31;
 132
 133#define GF128_MUL(d0, d1, d2, d3, t0) \
 134        li              t0,0x87;        /* multiplication in GF128      */ \
 135        cmpwi           d3,-1;                                             \
 136        iselgt          t0,0,t0;                                           \
 137        rlwimi          d3,d2,0,0,0;    /* propagate "carry" bits       */ \
 138        rotlwi          d3,d3,1;                                           \
 139        rlwimi          d2,d1,0,0,0;                                       \
 140        rotlwi          d2,d2,1;                                           \
 141        rlwimi          d1,d0,0,0,0;                                       \
 142        slwi            d0,d0,1;        /* shift left 128 bit           */ \
 143        rotlwi          d1,d1,1;                                           \
 144        xor             d0,d0,t0;
 145
 146#define START_KEY(d0, d1, d2, d3) \
 147        lwz             rW0,0(rKP);                                        \
 148        mtctr           rRR;                                               \
 149        lwz             rW1,4(rKP);                                        \
 150        lwz             rW2,8(rKP);                                        \
 151        lwz             rW3,12(rKP);                                       \
 152        xor             rD0,d0,rW0;                                        \
 153        xor             rD1,d1,rW1;                                        \
 154        xor             rD2,d2,rW2;                                        \
 155        xor             rD3,d3,rW3;
 156
 157/*
 158 * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
 159 *                 u32 rounds)
 160 *
 161 * called from glue layer to encrypt a single 16 byte block
 162 * round values are AES128 = 4, AES192 = 5, AES256 = 6
 163 *
 164 */
 165_GLOBAL(ppc_encrypt_aes)
 166        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
 167        LOAD_DATA(rD0, 0)
 168        LOAD_DATA(rD1, 4)
 169        LOAD_DATA(rD2, 8)
 170        LOAD_DATA(rD3, 12)
 171        START_KEY(rD0, rD1, rD2, rD3)
 172        bl              ppc_encrypt_block
 173        xor             rD0,rD0,rW0
 174        SAVE_DATA(rD0, 0)
 175        xor             rD1,rD1,rW1
 176        SAVE_DATA(rD1, 4)
 177        xor             rD2,rD2,rW2
 178        SAVE_DATA(rD2, 8)
 179        xor             rD3,rD3,rW3
 180        SAVE_DATA(rD3, 12)
 181        FINALIZE_CRYPT(0)
 182        blr
 183
 184/*
 185 * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
 186 *                 u32 rounds)
 187 *
 188 * called from glue layer to decrypt a single 16 byte block
 189 * round values are AES128 = 4, AES192 = 5, AES256 = 6
 190 *
 191 */
 192_GLOBAL(ppc_decrypt_aes)
 193        INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
 194        LOAD_DATA(rD0, 0)
 195        addi            rT1,rT0,4096
 196        LOAD_DATA(rD1, 4)
 197        LOAD_DATA(rD2, 8)
 198        LOAD_DATA(rD3, 12)
 199        START_KEY(rD0, rD1, rD2, rD3)
 200        bl              ppc_decrypt_block
 201        xor             rD0,rD0,rW0
 202        SAVE_DATA(rD0, 0)
 203        xor             rD1,rD1,rW1
 204        SAVE_DATA(rD1, 4)
 205        xor             rD2,rD2,rW2
 206        SAVE_DATA(rD2, 8)
 207        xor             rD3,rD3,rW3
 208        SAVE_DATA(rD3, 12)
 209        FINALIZE_CRYPT(0)
 210        blr
 211
 212/*
 213 * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
 214 *                 u32 rounds, u32 bytes);
 215 *
 216 * called from glue layer to encrypt multiple blocks via ECB
 217 * Bytes must be larger or equal 16 and only whole blocks are
 218 * processed. round values are AES128 = 4, AES192 = 5 and
 219 * AES256 = 6
 220 *
 221 */
 222_GLOBAL(ppc_encrypt_ecb)
 223        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
 224ppc_encrypt_ecb_loop:
 225        LOAD_DATA(rD0, 0)
 226        mr              rKP,rKS
 227        LOAD_DATA(rD1, 4)
 228        subi            rLN,rLN,16
 229        LOAD_DATA(rD2, 8)
 230        cmpwi           rLN,15
 231        LOAD_DATA(rD3, 12)
 232        START_KEY(rD0, rD1, rD2, rD3)
 233        bl              ppc_encrypt_block
 234        xor             rD0,rD0,rW0
 235        SAVE_DATA(rD0, 0)
 236        xor             rD1,rD1,rW1
 237        SAVE_DATA(rD1, 4)
 238        xor             rD2,rD2,rW2
 239        SAVE_DATA(rD2, 8)
 240        xor             rD3,rD3,rW3
 241        SAVE_DATA(rD3, 12)
 242        NEXT_BLOCK
 243        bt              gt,ppc_encrypt_ecb_loop
 244        FINALIZE_CRYPT(0)
 245        blr
 246
 247/*
 248 * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
 249 *                 u32 rounds, u32 bytes);
 250 *
 251 * called from glue layer to decrypt multiple blocks via ECB
 252 * Bytes must be larger or equal 16 and only whole blocks are
 253 * processed. round values are AES128 = 4, AES192 = 5 and
 254 * AES256 = 6
 255 *
 256 */
 257_GLOBAL(ppc_decrypt_ecb)
 258        INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
 259        addi            rT1,rT0,4096
 260ppc_decrypt_ecb_loop:
 261        LOAD_DATA(rD0, 0)
 262        mr              rKP,rKS
 263        LOAD_DATA(rD1, 4)
 264        subi            rLN,rLN,16
 265        LOAD_DATA(rD2, 8)
 266        cmpwi           rLN,15
 267        LOAD_DATA(rD3, 12)
 268        START_KEY(rD0, rD1, rD2, rD3)
 269        bl              ppc_decrypt_block
 270        xor             rD0,rD0,rW0
 271        SAVE_DATA(rD0, 0)
 272        xor             rD1,rD1,rW1
 273        SAVE_DATA(rD1, 4)
 274        xor             rD2,rD2,rW2
 275        SAVE_DATA(rD2, 8)
 276        xor             rD3,rD3,rW3
 277        SAVE_DATA(rD3, 12)
 278        NEXT_BLOCK
 279        bt              gt,ppc_decrypt_ecb_loop
 280        FINALIZE_CRYPT(0)
 281        blr
 282
 283/*
 284 * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
 285 *                 32 rounds, u32 bytes, u8 *iv);
 286 *
 287 * called from glue layer to encrypt multiple blocks via CBC
 288 * Bytes must be larger or equal 16 and only whole blocks are
 289 * processed. round values are AES128 = 4, AES192 = 5 and
 290 * AES256 = 6
 291 *
 292 */
 293_GLOBAL(ppc_encrypt_cbc)
 294        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
 295        LOAD_IV(rI0, 0)
 296        LOAD_IV(rI1, 4)
 297        LOAD_IV(rI2, 8)
 298        LOAD_IV(rI3, 12)
 299ppc_encrypt_cbc_loop:
 300        LOAD_DATA(rD0, 0)
 301        mr              rKP,rKS
 302        LOAD_DATA(rD1, 4)
 303        subi            rLN,rLN,16
 304        LOAD_DATA(rD2, 8)
 305        cmpwi           rLN,15
 306        LOAD_DATA(rD3, 12)
 307        xor             rD0,rD0,rI0
 308        xor             rD1,rD1,rI1
 309        xor             rD2,rD2,rI2
 310        xor             rD3,rD3,rI3
 311        START_KEY(rD0, rD1, rD2, rD3)
 312        bl              ppc_encrypt_block
 313        xor             rI0,rD0,rW0
 314        SAVE_DATA(rI0, 0)
 315        xor             rI1,rD1,rW1
 316        SAVE_DATA(rI1, 4)
 317        xor             rI2,rD2,rW2
 318        SAVE_DATA(rI2, 8)
 319        xor             rI3,rD3,rW3
 320        SAVE_DATA(rI3, 12)
 321        NEXT_BLOCK
 322        bt              gt,ppc_encrypt_cbc_loop
 323        START_IV
 324        SAVE_IV(rI0, 0)
 325        SAVE_IV(rI1, 4)
 326        SAVE_IV(rI2, 8)
 327        SAVE_IV(rI3, 12)
 328        FINALIZE_CRYPT(4)
 329        blr
 330
 331/*
 332 * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
 333 *                 u32 rounds, u32 bytes, u8 *iv);
 334 *
 335 * called from glue layer to decrypt multiple blocks via CBC
 336 * round values are AES128 = 4, AES192 = 5, AES256 = 6
 337 *
 338 */
 339_GLOBAL(ppc_decrypt_cbc)
 340        INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
 341        li              rT1,15
 342        LOAD_IV(rI0, 0)
 343        andc            rLN,rLN,rT1
 344        LOAD_IV(rI1, 4)
 345        subi            rLN,rLN,16
 346        LOAD_IV(rI2, 8)
 347        add             rSP,rSP,rLN     /* reverse processing           */
 348        LOAD_IV(rI3, 12)
 349        add             rDP,rDP,rLN
 350        LOAD_DATA(rD0, 0)
 351        addi            rT1,rT0,4096
 352        LOAD_DATA(rD1, 4)
 353        LOAD_DATA(rD2, 8)
 354        LOAD_DATA(rD3, 12)
 355        START_IV
 356        SAVE_IV(rD0, 0)
 357        SAVE_IV(rD1, 4)
 358        SAVE_IV(rD2, 8)
 359        cmpwi           rLN,16
 360        SAVE_IV(rD3, 12)
 361        bt              lt,ppc_decrypt_cbc_end
 362ppc_decrypt_cbc_loop:
 363        mr              rKP,rKS
 364        START_KEY(rD0, rD1, rD2, rD3)
 365        bl              ppc_decrypt_block
 366        subi            rLN,rLN,16
 367        subi            rSP,rSP,CBC_DEC
 368        xor             rW0,rD0,rW0
 369        LOAD_DATA(rD0, 0)
 370        xor             rW1,rD1,rW1
 371        LOAD_DATA(rD1, 4)
 372        xor             rW2,rD2,rW2
 373        LOAD_DATA(rD2, 8)
 374        xor             rW3,rD3,rW3
 375        LOAD_DATA(rD3, 12)
 376        xor             rW0,rW0,rD0
 377        SAVE_DATA(rW0, 0)
 378        xor             rW1,rW1,rD1
 379        SAVE_DATA(rW1, 4)
 380        xor             rW2,rW2,rD2
 381        SAVE_DATA(rW2, 8)
 382        xor             rW3,rW3,rD3
 383        SAVE_DATA(rW3, 12)
 384        cmpwi           rLN,15
 385        subi            rDP,rDP,CBC_DEC
 386        bt              gt,ppc_decrypt_cbc_loop
 387ppc_decrypt_cbc_end:
 388        mr              rKP,rKS
 389        START_KEY(rD0, rD1, rD2, rD3)
 390        bl              ppc_decrypt_block
 391        xor             rW0,rW0,rD0
 392        xor             rW1,rW1,rD1
 393        xor             rW2,rW2,rD2
 394        xor             rW3,rW3,rD3
 395        xor             rW0,rW0,rI0     /* decrypt with initial IV      */
 396        SAVE_DATA(rW0, 0)
 397        xor             rW1,rW1,rI1
 398        SAVE_DATA(rW1, 4)
 399        xor             rW2,rW2,rI2
 400        SAVE_DATA(rW2, 8)
 401        xor             rW3,rW3,rI3
 402        SAVE_DATA(rW3, 12)
 403        FINALIZE_CRYPT(4)
 404        blr
 405
 406/*
 407 * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
 408 *               u32 rounds, u32 bytes, u8 *iv);
 409 *
 410 * called from glue layer to encrypt/decrypt multiple blocks
 411 * via CTR. Number of bytes does not need to be a multiple of
 412 * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
 413 *
 414 */
 415_GLOBAL(ppc_crypt_ctr)
 416        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
 417        LOAD_IV(rI0, 0)
 418        LOAD_IV(rI1, 4)
 419        LOAD_IV(rI2, 8)
 420        cmpwi           rLN,16
 421        LOAD_IV(rI3, 12)
 422        START_IV
 423        bt              lt,ppc_crypt_ctr_partial
 424ppc_crypt_ctr_loop:
 425        mr              rKP,rKS
 426        START_KEY(rI0, rI1, rI2, rI3)
 427        bl              ppc_encrypt_block
 428        xor             rW0,rD0,rW0
 429        xor             rW1,rD1,rW1
 430        xor             rW2,rD2,rW2
 431        xor             rW3,rD3,rW3
 432        LOAD_DATA(rD0, 0)
 433        subi            rLN,rLN,16
 434        LOAD_DATA(rD1, 4)
 435        LOAD_DATA(rD2, 8)
 436        LOAD_DATA(rD3, 12)
 437        xor             rD0,rD0,rW0
 438        SAVE_DATA(rD0, 0)
 439        xor             rD1,rD1,rW1
 440        SAVE_DATA(rD1, 4)
 441        xor             rD2,rD2,rW2
 442        SAVE_DATA(rD2, 8)
 443        xor             rD3,rD3,rW3
 444        SAVE_DATA(rD3, 12)
 445        addic           rI3,rI3,1       /* increase counter                     */
 446        addze           rI2,rI2
 447        addze           rI1,rI1
 448        addze           rI0,rI0
 449        NEXT_BLOCK
 450        cmpwi           rLN,15
 451        bt              gt,ppc_crypt_ctr_loop
 452ppc_crypt_ctr_partial:
 453        cmpwi           rLN,0
 454        bt              eq,ppc_crypt_ctr_end
 455        mr              rKP,rKS
 456        START_KEY(rI0, rI1, rI2, rI3)
 457        bl              ppc_encrypt_block
 458        xor             rW0,rD0,rW0
 459        SAVE_IV(rW0, 0)
 460        xor             rW1,rD1,rW1
 461        SAVE_IV(rW1, 4)
 462        xor             rW2,rD2,rW2
 463        SAVE_IV(rW2, 8)
 464        xor             rW3,rD3,rW3
 465        SAVE_IV(rW3, 12)
 466        mtctr           rLN
 467        subi            rIP,rIP,CTR_DEC
 468        subi            rSP,rSP,1
 469        subi            rDP,rDP,1
 470ppc_crypt_ctr_xorbyte:
 471        lbzu            rW4,1(rIP)      /* bytewise xor for partial block       */
 472        lbzu            rW5,1(rSP)
 473        xor             rW4,rW4,rW5
 474        stbu            rW4,1(rDP)
 475        bdnz            ppc_crypt_ctr_xorbyte
 476        subf            rIP,rLN,rIP
 477        addi            rIP,rIP,1
 478        addic           rI3,rI3,1
 479        addze           rI2,rI2
 480        addze           rI1,rI1
 481        addze           rI0,rI0
 482ppc_crypt_ctr_end:
 483        SAVE_IV(rI0, 0)
 484        SAVE_IV(rI1, 4)
 485        SAVE_IV(rI2, 8)
 486        SAVE_IV(rI3, 12)
 487        FINALIZE_CRYPT(4)
 488        blr
 489
 490/*
 491 * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
 492 *                 u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
 493 *
 494 * called from glue layer to encrypt multiple blocks via XTS
 495 * If key_twk is given, the initial IV encryption will be
 496 * processed too. Round values are AES128 = 4, AES192 = 5,
 497 * AES256 = 6
 498 *
 499 */
 500_GLOBAL(ppc_encrypt_xts)
 501        INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
 502        LOAD_IV(rI0, 0)
 503        LOAD_IV(rI1, 4)
 504        LOAD_IV(rI2, 8)
 505        cmpwi           rKT,0
 506        LOAD_IV(rI3, 12)
 507        bt              eq,ppc_encrypt_xts_notweak
 508        mr              rKP,rKT
 509        START_KEY(rI0, rI1, rI2, rI3)
 510        bl              ppc_encrypt_block
 511        xor             rI0,rD0,rW0
 512        xor             rI1,rD1,rW1
 513        xor             rI2,rD2,rW2
 514        xor             rI3,rD3,rW3
 515ppc_encrypt_xts_notweak:
 516        ENDIAN_SWAP(rG0, rG1, rI0, rI1)
 517        ENDIAN_SWAP(rG2, rG3, rI2, rI3)
 518ppc_encrypt_xts_loop:
 519        LOAD_DATA(rD0, 0)
 520        mr              rKP,rKS
 521        LOAD_DATA(rD1, 4)
 522        subi            rLN,rLN,16
 523        LOAD_DATA(rD2, 8)
 524        LOAD_DATA(rD3, 12)
 525        xor             rD0,rD0,rI0
 526        xor             rD1,rD1,rI1
 527        xor             rD2,rD2,rI2
 528        xor             rD3,rD3,rI3
 529        START_KEY(rD0, rD1, rD2, rD3)
 530        bl              ppc_encrypt_block
 531        xor             rD0,rD0,rW0
 532        xor             rD1,rD1,rW1
 533        xor             rD2,rD2,rW2
 534        xor             rD3,rD3,rW3
 535        xor             rD0,rD0,rI0
 536        SAVE_DATA(rD0, 0)
 537        xor             rD1,rD1,rI1
 538        SAVE_DATA(rD1, 4)
 539        xor             rD2,rD2,rI2
 540        SAVE_DATA(rD2, 8)
 541        xor             rD3,rD3,rI3
 542        SAVE_DATA(rD3, 12)
 543        GF128_MUL(rG0, rG1, rG2, rG3, rW0)
 544        ENDIAN_SWAP(rI0, rI1, rG0, rG1)
 545        ENDIAN_SWAP(rI2, rI3, rG2, rG3)
 546        cmpwi           rLN,0
 547        NEXT_BLOCK
 548        bt              gt,ppc_encrypt_xts_loop
 549        START_IV
 550        SAVE_IV(rI0, 0)
 551        SAVE_IV(rI1, 4)
 552        SAVE_IV(rI2, 8)
 553        SAVE_IV(rI3, 12)
 554        FINALIZE_CRYPT(8)
 555        blr
 556
 557/*
 558 * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
 559 *                 u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
 560 *
 561 * called from glue layer to decrypt multiple blocks via XTS
 562 * If key_twk is given, the initial IV encryption will be
 563 * processed too. Round values are AES128 = 4, AES192 = 5,
 564 * AES256 = 6
 565 *
 566 */
 567_GLOBAL(ppc_decrypt_xts)
 568        INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
 569        LOAD_IV(rI0, 0)
 570        addi            rT1,rT0,4096
 571        LOAD_IV(rI1, 4)
 572        LOAD_IV(rI2, 8)
 573        cmpwi           rKT,0
 574        LOAD_IV(rI3, 12)
 575        bt              eq,ppc_decrypt_xts_notweak
 576        subi            rT0,rT0,4096
 577        mr              rKP,rKT
 578        START_KEY(rI0, rI1, rI2, rI3)
 579        bl              ppc_encrypt_block
 580        xor             rI0,rD0,rW0
 581        xor             rI1,rD1,rW1
 582        xor             rI2,rD2,rW2
 583        xor             rI3,rD3,rW3
 584        addi            rT0,rT0,4096
 585ppc_decrypt_xts_notweak:
 586        ENDIAN_SWAP(rG0, rG1, rI0, rI1)
 587        ENDIAN_SWAP(rG2, rG3, rI2, rI3)
 588ppc_decrypt_xts_loop:
 589        LOAD_DATA(rD0, 0)
 590        mr              rKP,rKS
 591        LOAD_DATA(rD1, 4)
 592        subi            rLN,rLN,16
 593        LOAD_DATA(rD2, 8)
 594        LOAD_DATA(rD3, 12)
 595        xor             rD0,rD0,rI0
 596        xor             rD1,rD1,rI1
 597        xor             rD2,rD2,rI2
 598        xor             rD3,rD3,rI3
 599        START_KEY(rD0, rD1, rD2, rD3)
 600        bl              ppc_decrypt_block
 601        xor             rD0,rD0,rW0
 602        xor             rD1,rD1,rW1
 603        xor             rD2,rD2,rW2
 604        xor             rD3,rD3,rW3
 605        xor             rD0,rD0,rI0
 606        SAVE_DATA(rD0, 0)
 607        xor             rD1,rD1,rI1
 608        SAVE_DATA(rD1, 4)
 609        xor             rD2,rD2,rI2
 610        SAVE_DATA(rD2, 8)
 611        xor             rD3,rD3,rI3
 612        SAVE_DATA(rD3, 12)
 613        GF128_MUL(rG0, rG1, rG2, rG3, rW0)
 614        ENDIAN_SWAP(rI0, rI1, rG0, rG1)
 615        ENDIAN_SWAP(rI2, rI3, rG2, rG3)
 616        cmpwi           rLN,0
 617        NEXT_BLOCK
 618        bt              gt,ppc_decrypt_xts_loop
 619        START_IV
 620        SAVE_IV(rI0, 0)
 621        SAVE_IV(rI1, 4)
 622        SAVE_IV(rI2, 8)
 623        SAVE_IV(rI3, 12)
 624        FINALIZE_CRYPT(8)
 625        blr
 626