linux/arch/s390/crypto/chacha-s390.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * Original implementation written by Andy Polyakov, @dot-asm.
   4 * This is an adaptation of the original code for kernel use.
   5 *
   6 * Copyright (C) 2006-2019 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
   7 */
   8
   9#include <linux/linkage.h>
  10#include <asm/nospec-insn.h>
  11#include <asm/vx-insn.h>
  12
  13#define SP      %r15
  14#define FRAME   (16 * 8 + 4 * 8)
  15
  16.data
  17.align  32
  18
  19.Lsigma:
  20.long   0x61707865,0x3320646e,0x79622d32,0x6b206574     # endian-neutral
  21.long   1,0,0,0
  22.long   2,0,0,0
  23.long   3,0,0,0
  24.long   0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c     # byte swap
  25
  26.long   0,1,2,3
  27.long   0x61707865,0x61707865,0x61707865,0x61707865     # smashed sigma
  28.long   0x3320646e,0x3320646e,0x3320646e,0x3320646e
  29.long   0x79622d32,0x79622d32,0x79622d32,0x79622d32
  30.long   0x6b206574,0x6b206574,0x6b206574,0x6b206574
  31
  32.previous
  33
  34        GEN_BR_THUNK %r14
  35
  36.text
  37
  38#############################################################################
  39# void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len,
  40#                     counst u32 *key, const u32 *counter)
  41
  42#define OUT             %r2
  43#define INP             %r3
  44#define LEN             %r4
  45#define KEY             %r5
  46#define COUNTER         %r6
  47
  48#define BEPERM          %v31
  49#define CTR             %v26
  50
  51#define K0              %v16
  52#define K1              %v17
  53#define K2              %v18
  54#define K3              %v19
  55
  56#define XA0             %v0
  57#define XA1             %v1
  58#define XA2             %v2
  59#define XA3             %v3
  60
  61#define XB0             %v4
  62#define XB1             %v5
  63#define XB2             %v6
  64#define XB3             %v7
  65
  66#define XC0             %v8
  67#define XC1             %v9
  68#define XC2             %v10
  69#define XC3             %v11
  70
  71#define XD0             %v12
  72#define XD1             %v13
  73#define XD2             %v14
  74#define XD3             %v15
  75
  76#define XT0             %v27
  77#define XT1             %v28
  78#define XT2             %v29
  79#define XT3             %v30
  80
  81ENTRY(chacha20_vx_4x)
  82        stmg    %r6,%r7,6*8(SP)
  83
  84        larl    %r7,.Lsigma
  85        lhi     %r0,10
  86        lhi     %r1,0
  87
  88        VL      K0,0,,%r7               # load sigma
  89        VL      K1,0,,KEY               # load key
  90        VL      K2,16,,KEY
  91        VL      K3,0,,COUNTER           # load counter
  92
  93        VL      BEPERM,0x40,,%r7
  94        VL      CTR,0x50,,%r7
  95
  96        VLM     XA0,XA3,0x60,%r7,4      # load [smashed] sigma
  97
  98        VREPF   XB0,K1,0                # smash the key
  99        VREPF   XB1,K1,1
 100        VREPF   XB2,K1,2
 101        VREPF   XB3,K1,3
 102
 103        VREPF   XD0,K3,0
 104        VREPF   XD1,K3,1
 105        VREPF   XD2,K3,2
 106        VREPF   XD3,K3,3
 107        VAF     XD0,XD0,CTR
 108
 109        VREPF   XC0,K2,0
 110        VREPF   XC1,K2,1
 111        VREPF   XC2,K2,2
 112        VREPF   XC3,K2,3
 113
 114.Loop_4x:
 115        VAF     XA0,XA0,XB0
 116        VX      XD0,XD0,XA0
 117        VERLLF  XD0,XD0,16
 118
 119        VAF     XA1,XA1,XB1
 120        VX      XD1,XD1,XA1
 121        VERLLF  XD1,XD1,16
 122
 123        VAF     XA2,XA2,XB2
 124        VX      XD2,XD2,XA2
 125        VERLLF  XD2,XD2,16
 126
 127        VAF     XA3,XA3,XB3
 128        VX      XD3,XD3,XA3
 129        VERLLF  XD3,XD3,16
 130
 131        VAF     XC0,XC0,XD0
 132        VX      XB0,XB0,XC0
 133        VERLLF  XB0,XB0,12
 134
 135        VAF     XC1,XC1,XD1
 136        VX      XB1,XB1,XC1
 137        VERLLF  XB1,XB1,12
 138
 139        VAF     XC2,XC2,XD2
 140        VX      XB2,XB2,XC2
 141        VERLLF  XB2,XB2,12
 142
 143        VAF     XC3,XC3,XD3
 144        VX      XB3,XB3,XC3
 145        VERLLF  XB3,XB3,12
 146
 147        VAF     XA0,XA0,XB0
 148        VX      XD0,XD0,XA0
 149        VERLLF  XD0,XD0,8
 150
 151        VAF     XA1,XA1,XB1
 152        VX      XD1,XD1,XA1
 153        VERLLF  XD1,XD1,8
 154
 155        VAF     XA2,XA2,XB2
 156        VX      XD2,XD2,XA2
 157        VERLLF  XD2,XD2,8
 158
 159        VAF     XA3,XA3,XB3
 160        VX      XD3,XD3,XA3
 161        VERLLF  XD3,XD3,8
 162
 163        VAF     XC0,XC0,XD0
 164        VX      XB0,XB0,XC0
 165        VERLLF  XB0,XB0,7
 166
 167        VAF     XC1,XC1,XD1
 168        VX      XB1,XB1,XC1
 169        VERLLF  XB1,XB1,7
 170
 171        VAF     XC2,XC2,XD2
 172        VX      XB2,XB2,XC2
 173        VERLLF  XB2,XB2,7
 174
 175        VAF     XC3,XC3,XD3
 176        VX      XB3,XB3,XC3
 177        VERLLF  XB3,XB3,7
 178
 179        VAF     XA0,XA0,XB1
 180        VX      XD3,XD3,XA0
 181        VERLLF  XD3,XD3,16
 182
 183        VAF     XA1,XA1,XB2
 184        VX      XD0,XD0,XA1
 185        VERLLF  XD0,XD0,16
 186
 187        VAF     XA2,XA2,XB3
 188        VX      XD1,XD1,XA2
 189        VERLLF  XD1,XD1,16
 190
 191        VAF     XA3,XA3,XB0
 192        VX      XD2,XD2,XA3
 193        VERLLF  XD2,XD2,16
 194
 195        VAF     XC2,XC2,XD3
 196        VX      XB1,XB1,XC2
 197        VERLLF  XB1,XB1,12
 198
 199        VAF     XC3,XC3,XD0
 200        VX      XB2,XB2,XC3
 201        VERLLF  XB2,XB2,12
 202
 203        VAF     XC0,XC0,XD1
 204        VX      XB3,XB3,XC0
 205        VERLLF  XB3,XB3,12
 206
 207        VAF     XC1,XC1,XD2
 208        VX      XB0,XB0,XC1
 209        VERLLF  XB0,XB0,12
 210
 211        VAF     XA0,XA0,XB1
 212        VX      XD3,XD3,XA0
 213        VERLLF  XD3,XD3,8
 214
 215        VAF     XA1,XA1,XB2
 216        VX      XD0,XD0,XA1
 217        VERLLF  XD0,XD0,8
 218
 219        VAF     XA2,XA2,XB3
 220        VX      XD1,XD1,XA2
 221        VERLLF  XD1,XD1,8
 222
 223        VAF     XA3,XA3,XB0
 224        VX      XD2,XD2,XA3
 225        VERLLF  XD2,XD2,8
 226
 227        VAF     XC2,XC2,XD3
 228        VX      XB1,XB1,XC2
 229        VERLLF  XB1,XB1,7
 230
 231        VAF     XC3,XC3,XD0
 232        VX      XB2,XB2,XC3
 233        VERLLF  XB2,XB2,7
 234
 235        VAF     XC0,XC0,XD1
 236        VX      XB3,XB3,XC0
 237        VERLLF  XB3,XB3,7
 238
 239        VAF     XC1,XC1,XD2
 240        VX      XB0,XB0,XC1
 241        VERLLF  XB0,XB0,7
 242        brct    %r0,.Loop_4x
 243
 244        VAF     XD0,XD0,CTR
 245
 246        VMRHF   XT0,XA0,XA1             # transpose data
 247        VMRHF   XT1,XA2,XA3
 248        VMRLF   XT2,XA0,XA1
 249        VMRLF   XT3,XA2,XA3
 250        VPDI    XA0,XT0,XT1,0b0000
 251        VPDI    XA1,XT0,XT1,0b0101
 252        VPDI    XA2,XT2,XT3,0b0000
 253        VPDI    XA3,XT2,XT3,0b0101
 254
 255        VMRHF   XT0,XB0,XB1
 256        VMRHF   XT1,XB2,XB3
 257        VMRLF   XT2,XB0,XB1
 258        VMRLF   XT3,XB2,XB3
 259        VPDI    XB0,XT0,XT1,0b0000
 260        VPDI    XB1,XT0,XT1,0b0101
 261        VPDI    XB2,XT2,XT3,0b0000
 262        VPDI    XB3,XT2,XT3,0b0101
 263
 264        VMRHF   XT0,XC0,XC1
 265        VMRHF   XT1,XC2,XC3
 266        VMRLF   XT2,XC0,XC1
 267        VMRLF   XT3,XC2,XC3
 268        VPDI    XC0,XT0,XT1,0b0000
 269        VPDI    XC1,XT0,XT1,0b0101
 270        VPDI    XC2,XT2,XT3,0b0000
 271        VPDI    XC3,XT2,XT3,0b0101
 272
 273        VMRHF   XT0,XD0,XD1
 274        VMRHF   XT1,XD2,XD3
 275        VMRLF   XT2,XD0,XD1
 276        VMRLF   XT3,XD2,XD3
 277        VPDI    XD0,XT0,XT1,0b0000
 278        VPDI    XD1,XT0,XT1,0b0101
 279        VPDI    XD2,XT2,XT3,0b0000
 280        VPDI    XD3,XT2,XT3,0b0101
 281
 282        VAF     XA0,XA0,K0
 283        VAF     XB0,XB0,K1
 284        VAF     XC0,XC0,K2
 285        VAF     XD0,XD0,K3
 286
 287        VPERM   XA0,XA0,XA0,BEPERM
 288        VPERM   XB0,XB0,XB0,BEPERM
 289        VPERM   XC0,XC0,XC0,BEPERM
 290        VPERM   XD0,XD0,XD0,BEPERM
 291
 292        VLM     XT0,XT3,0,INP,0
 293
 294        VX      XT0,XT0,XA0
 295        VX      XT1,XT1,XB0
 296        VX      XT2,XT2,XC0
 297        VX      XT3,XT3,XD0
 298
 299        VSTM    XT0,XT3,0,OUT,0
 300
 301        la      INP,0x40(INP)
 302        la      OUT,0x40(OUT)
 303        aghi    LEN,-0x40
 304
 305        VAF     XA0,XA1,K0
 306        VAF     XB0,XB1,K1
 307        VAF     XC0,XC1,K2
 308        VAF     XD0,XD1,K3
 309
 310        VPERM   XA0,XA0,XA0,BEPERM
 311        VPERM   XB0,XB0,XB0,BEPERM
 312        VPERM   XC0,XC0,XC0,BEPERM
 313        VPERM   XD0,XD0,XD0,BEPERM
 314
 315        clgfi   LEN,0x40
 316        jl      .Ltail_4x
 317
 318        VLM     XT0,XT3,0,INP,0
 319
 320        VX      XT0,XT0,XA0
 321        VX      XT1,XT1,XB0
 322        VX      XT2,XT2,XC0
 323        VX      XT3,XT3,XD0
 324
 325        VSTM    XT0,XT3,0,OUT,0
 326
 327        la      INP,0x40(INP)
 328        la      OUT,0x40(OUT)
 329        aghi    LEN,-0x40
 330        je      .Ldone_4x
 331
 332        VAF     XA0,XA2,K0
 333        VAF     XB0,XB2,K1
 334        VAF     XC0,XC2,K2
 335        VAF     XD0,XD2,K3
 336
 337        VPERM   XA0,XA0,XA0,BEPERM
 338        VPERM   XB0,XB0,XB0,BEPERM
 339        VPERM   XC0,XC0,XC0,BEPERM
 340        VPERM   XD0,XD0,XD0,BEPERM
 341
 342        clgfi   LEN,0x40
 343        jl      .Ltail_4x
 344
 345        VLM     XT0,XT3,0,INP,0
 346
 347        VX      XT0,XT0,XA0
 348        VX      XT1,XT1,XB0
 349        VX      XT2,XT2,XC0
 350        VX      XT3,XT3,XD0
 351
 352        VSTM    XT0,XT3,0,OUT,0
 353
 354        la      INP,0x40(INP)
 355        la      OUT,0x40(OUT)
 356        aghi    LEN,-0x40
 357        je      .Ldone_4x
 358
 359        VAF     XA0,XA3,K0
 360        VAF     XB0,XB3,K1
 361        VAF     XC0,XC3,K2
 362        VAF     XD0,XD3,K3
 363
 364        VPERM   XA0,XA0,XA0,BEPERM
 365        VPERM   XB0,XB0,XB0,BEPERM
 366        VPERM   XC0,XC0,XC0,BEPERM
 367        VPERM   XD0,XD0,XD0,BEPERM
 368
 369        clgfi   LEN,0x40
 370        jl      .Ltail_4x
 371
 372        VLM     XT0,XT3,0,INP,0
 373
 374        VX      XT0,XT0,XA0
 375        VX      XT1,XT1,XB0
 376        VX      XT2,XT2,XC0
 377        VX      XT3,XT3,XD0
 378
 379        VSTM    XT0,XT3,0,OUT,0
 380
 381.Ldone_4x:
 382        lmg     %r6,%r7,6*8(SP)
 383        BR_EX   %r14
 384
 385.Ltail_4x:
 386        VLR     XT0,XC0
 387        VLR     XT1,XD0
 388
 389        VST     XA0,8*8+0x00,,SP
 390        VST     XB0,8*8+0x10,,SP
 391        VST     XT0,8*8+0x20,,SP
 392        VST     XT1,8*8+0x30,,SP
 393
 394        lghi    %r1,0
 395
 396.Loop_tail_4x:
 397        llgc    %r5,0(%r1,INP)
 398        llgc    %r6,8*8(%r1,SP)
 399        xr      %r6,%r5
 400        stc     %r6,0(%r1,OUT)
 401        la      %r1,1(%r1)
 402        brct    LEN,.Loop_tail_4x
 403
 404        lmg     %r6,%r7,6*8(SP)
 405        BR_EX   %r14
 406ENDPROC(chacha20_vx_4x)
 407
 408#undef  OUT
 409#undef  INP
 410#undef  LEN
 411#undef  KEY
 412#undef  COUNTER
 413
 414#undef BEPERM
 415
 416#undef K0
 417#undef K1
 418#undef K2
 419#undef K3
 420
 421
 422#############################################################################
 423# void chacha20_vx(u8 *out, counst u8 *inp, size_t len,
 424#                  counst u32 *key, const u32 *counter)
 425
 426#define OUT             %r2
 427#define INP             %r3
 428#define LEN             %r4
 429#define KEY             %r5
 430#define COUNTER         %r6
 431
 432#define BEPERM          %v31
 433
 434#define K0              %v27
 435#define K1              %v24
 436#define K2              %v25
 437#define K3              %v26
 438
 439#define A0              %v0
 440#define B0              %v1
 441#define C0              %v2
 442#define D0              %v3
 443
 444#define A1              %v4
 445#define B1              %v5
 446#define C1              %v6
 447#define D1              %v7
 448
 449#define A2              %v8
 450#define B2              %v9
 451#define C2              %v10
 452#define D2              %v11
 453
 454#define A3              %v12
 455#define B3              %v13
 456#define C3              %v14
 457#define D3              %v15
 458
 459#define A4              %v16
 460#define B4              %v17
 461#define C4              %v18
 462#define D4              %v19
 463
 464#define A5              %v20
 465#define B5              %v21
 466#define C5              %v22
 467#define D5              %v23
 468
 469#define T0              %v27
 470#define T1              %v28
 471#define T2              %v29
 472#define T3              %v30
 473
 474ENTRY(chacha20_vx)
 475        clgfi   LEN,256
 476        jle     chacha20_vx_4x
 477        stmg    %r6,%r7,6*8(SP)
 478
 479        lghi    %r1,-FRAME
 480        lgr     %r0,SP
 481        la      SP,0(%r1,SP)
 482        stg     %r0,0(SP)               # back-chain
 483
 484        larl    %r7,.Lsigma
 485        lhi     %r0,10
 486
 487        VLM     K1,K2,0,KEY,0           # load key
 488        VL      K3,0,,COUNTER           # load counter
 489
 490        VLM     K0,BEPERM,0,%r7,4       # load sigma, increments, ...
 491
 492.Loop_outer_vx:
 493        VLR     A0,K0
 494        VLR     B0,K1
 495        VLR     A1,K0
 496        VLR     B1,K1
 497        VLR     A2,K0
 498        VLR     B2,K1
 499        VLR     A3,K0
 500        VLR     B3,K1
 501        VLR     A4,K0
 502        VLR     B4,K1
 503        VLR     A5,K0
 504        VLR     B5,K1
 505
 506        VLR     D0,K3
 507        VAF     D1,K3,T1                # K[3]+1
 508        VAF     D2,K3,T2                # K[3]+2
 509        VAF     D3,K3,T3                # K[3]+3
 510        VAF     D4,D2,T2                # K[3]+4
 511        VAF     D5,D2,T3                # K[3]+5
 512
 513        VLR     C0,K2
 514        VLR     C1,K2
 515        VLR     C2,K2
 516        VLR     C3,K2
 517        VLR     C4,K2
 518        VLR     C5,K2
 519
 520        VLR     T1,D1
 521        VLR     T2,D2
 522        VLR     T3,D3
 523
 524.Loop_vx:
 525        VAF     A0,A0,B0
 526        VAF     A1,A1,B1
 527        VAF     A2,A2,B2
 528        VAF     A3,A3,B3
 529        VAF     A4,A4,B4
 530        VAF     A5,A5,B5
 531        VX      D0,D0,A0
 532        VX      D1,D1,A1
 533        VX      D2,D2,A2
 534        VX      D3,D3,A3
 535        VX      D4,D4,A4
 536        VX      D5,D5,A5
 537        VERLLF  D0,D0,16
 538        VERLLF  D1,D1,16
 539        VERLLF  D2,D2,16
 540        VERLLF  D3,D3,16
 541        VERLLF  D4,D4,16
 542        VERLLF  D5,D5,16
 543
 544        VAF     C0,C0,D0
 545        VAF     C1,C1,D1
 546        VAF     C2,C2,D2
 547        VAF     C3,C3,D3
 548        VAF     C4,C4,D4
 549        VAF     C5,C5,D5
 550        VX      B0,B0,C0
 551        VX      B1,B1,C1
 552        VX      B2,B2,C2
 553        VX      B3,B3,C3
 554        VX      B4,B4,C4
 555        VX      B5,B5,C5
 556        VERLLF  B0,B0,12
 557        VERLLF  B1,B1,12
 558        VERLLF  B2,B2,12
 559        VERLLF  B3,B3,12
 560        VERLLF  B4,B4,12
 561        VERLLF  B5,B5,12
 562
 563        VAF     A0,A0,B0
 564        VAF     A1,A1,B1
 565        VAF     A2,A2,B2
 566        VAF     A3,A3,B3
 567        VAF     A4,A4,B4
 568        VAF     A5,A5,B5
 569        VX      D0,D0,A0
 570        VX      D1,D1,A1
 571        VX      D2,D2,A2
 572        VX      D3,D3,A3
 573        VX      D4,D4,A4
 574        VX      D5,D5,A5
 575        VERLLF  D0,D0,8
 576        VERLLF  D1,D1,8
 577        VERLLF  D2,D2,8
 578        VERLLF  D3,D3,8
 579        VERLLF  D4,D4,8
 580        VERLLF  D5,D5,8
 581
 582        VAF     C0,C0,D0
 583        VAF     C1,C1,D1
 584        VAF     C2,C2,D2
 585        VAF     C3,C3,D3
 586        VAF     C4,C4,D4
 587        VAF     C5,C5,D5
 588        VX      B0,B0,C0
 589        VX      B1,B1,C1
 590        VX      B2,B2,C2
 591        VX      B3,B3,C3
 592        VX      B4,B4,C4
 593        VX      B5,B5,C5
 594        VERLLF  B0,B0,7
 595        VERLLF  B1,B1,7
 596        VERLLF  B2,B2,7
 597        VERLLF  B3,B3,7
 598        VERLLF  B4,B4,7
 599        VERLLF  B5,B5,7
 600
 601        VSLDB   C0,C0,C0,8
 602        VSLDB   C1,C1,C1,8
 603        VSLDB   C2,C2,C2,8
 604        VSLDB   C3,C3,C3,8
 605        VSLDB   C4,C4,C4,8
 606        VSLDB   C5,C5,C5,8
 607        VSLDB   B0,B0,B0,4
 608        VSLDB   B1,B1,B1,4
 609        VSLDB   B2,B2,B2,4
 610        VSLDB   B3,B3,B3,4
 611        VSLDB   B4,B4,B4,4
 612        VSLDB   B5,B5,B5,4
 613        VSLDB   D0,D0,D0,12
 614        VSLDB   D1,D1,D1,12
 615        VSLDB   D2,D2,D2,12
 616        VSLDB   D3,D3,D3,12
 617        VSLDB   D4,D4,D4,12
 618        VSLDB   D5,D5,D5,12
 619
 620        VAF     A0,A0,B0
 621        VAF     A1,A1,B1
 622        VAF     A2,A2,B2
 623        VAF     A3,A3,B3
 624        VAF     A4,A4,B4
 625        VAF     A5,A5,B5
 626        VX      D0,D0,A0
 627        VX      D1,D1,A1
 628        VX      D2,D2,A2
 629        VX      D3,D3,A3
 630        VX      D4,D4,A4
 631        VX      D5,D5,A5
 632        VERLLF  D0,D0,16
 633        VERLLF  D1,D1,16
 634        VERLLF  D2,D2,16
 635        VERLLF  D3,D3,16
 636        VERLLF  D4,D4,16
 637        VERLLF  D5,D5,16
 638
 639        VAF     C0,C0,D0
 640        VAF     C1,C1,D1
 641        VAF     C2,C2,D2
 642        VAF     C3,C3,D3
 643        VAF     C4,C4,D4
 644        VAF     C5,C5,D5
 645        VX      B0,B0,C0
 646        VX      B1,B1,C1
 647        VX      B2,B2,C2
 648        VX      B3,B3,C3
 649        VX      B4,B4,C4
 650        VX      B5,B5,C5
 651        VERLLF  B0,B0,12
 652        VERLLF  B1,B1,12
 653        VERLLF  B2,B2,12
 654        VERLLF  B3,B3,12
 655        VERLLF  B4,B4,12
 656        VERLLF  B5,B5,12
 657
 658        VAF     A0,A0,B0
 659        VAF     A1,A1,B1
 660        VAF     A2,A2,B2
 661        VAF     A3,A3,B3
 662        VAF     A4,A4,B4
 663        VAF     A5,A5,B5
 664        VX      D0,D0,A0
 665        VX      D1,D1,A1
 666        VX      D2,D2,A2
 667        VX      D3,D3,A3
 668        VX      D4,D4,A4
 669        VX      D5,D5,A5
 670        VERLLF  D0,D0,8
 671        VERLLF  D1,D1,8
 672        VERLLF  D2,D2,8
 673        VERLLF  D3,D3,8
 674        VERLLF  D4,D4,8
 675        VERLLF  D5,D5,8
 676
 677        VAF     C0,C0,D0
 678        VAF     C1,C1,D1
 679        VAF     C2,C2,D2
 680        VAF     C3,C3,D3
 681        VAF     C4,C4,D4
 682        VAF     C5,C5,D5
 683        VX      B0,B0,C0
 684        VX      B1,B1,C1
 685        VX      B2,B2,C2
 686        VX      B3,B3,C3
 687        VX      B4,B4,C4
 688        VX      B5,B5,C5
 689        VERLLF  B0,B0,7
 690        VERLLF  B1,B1,7
 691        VERLLF  B2,B2,7
 692        VERLLF  B3,B3,7
 693        VERLLF  B4,B4,7
 694        VERLLF  B5,B5,7
 695
 696        VSLDB   C0,C0,C0,8
 697        VSLDB   C1,C1,C1,8
 698        VSLDB   C2,C2,C2,8
 699        VSLDB   C3,C3,C3,8
 700        VSLDB   C4,C4,C4,8
 701        VSLDB   C5,C5,C5,8
 702        VSLDB   B0,B0,B0,12
 703        VSLDB   B1,B1,B1,12
 704        VSLDB   B2,B2,B2,12
 705        VSLDB   B3,B3,B3,12
 706        VSLDB   B4,B4,B4,12
 707        VSLDB   B5,B5,B5,12
 708        VSLDB   D0,D0,D0,4
 709        VSLDB   D1,D1,D1,4
 710        VSLDB   D2,D2,D2,4
 711        VSLDB   D3,D3,D3,4
 712        VSLDB   D4,D4,D4,4
 713        VSLDB   D5,D5,D5,4
 714        brct    %r0,.Loop_vx
 715
 716        VAF     A0,A0,K0
 717        VAF     B0,B0,K1
 718        VAF     C0,C0,K2
 719        VAF     D0,D0,K3
 720        VAF     A1,A1,K0
 721        VAF     D1,D1,T1                # +K[3]+1
 722
 723        VPERM   A0,A0,A0,BEPERM
 724        VPERM   B0,B0,B0,BEPERM
 725        VPERM   C0,C0,C0,BEPERM
 726        VPERM   D0,D0,D0,BEPERM
 727
 728        clgfi   LEN,0x40
 729        jl      .Ltail_vx
 730
 731        VAF     D2,D2,T2                # +K[3]+2
 732        VAF     D3,D3,T3                # +K[3]+3
 733        VLM     T0,T3,0,INP,0
 734
 735        VX      A0,A0,T0
 736        VX      B0,B0,T1
 737        VX      C0,C0,T2
 738        VX      D0,D0,T3
 739
 740        VLM     K0,T3,0,%r7,4           # re-load sigma and increments
 741
 742        VSTM    A0,D0,0,OUT,0
 743
 744        la      INP,0x40(INP)
 745        la      OUT,0x40(OUT)
 746        aghi    LEN,-0x40
 747        je      .Ldone_vx
 748
 749        VAF     B1,B1,K1
 750        VAF     C1,C1,K2
 751
 752        VPERM   A0,A1,A1,BEPERM
 753        VPERM   B0,B1,B1,BEPERM
 754        VPERM   C0,C1,C1,BEPERM
 755        VPERM   D0,D1,D1,BEPERM
 756
 757        clgfi   LEN,0x40
 758        jl      .Ltail_vx
 759
 760        VLM     A1,D1,0,INP,0
 761
 762        VX      A0,A0,A1
 763        VX      B0,B0,B1
 764        VX      C0,C0,C1
 765        VX      D0,D0,D1
 766
 767        VSTM    A0,D0,0,OUT,0
 768
 769        la      INP,0x40(INP)
 770        la      OUT,0x40(OUT)
 771        aghi    LEN,-0x40
 772        je      .Ldone_vx
 773
 774        VAF     A2,A2,K0
 775        VAF     B2,B2,K1
 776        VAF     C2,C2,K2
 777
 778        VPERM   A0,A2,A2,BEPERM
 779        VPERM   B0,B2,B2,BEPERM
 780        VPERM   C0,C2,C2,BEPERM
 781        VPERM   D0,D2,D2,BEPERM
 782
 783        clgfi   LEN,0x40
 784        jl      .Ltail_vx
 785
 786        VLM     A1,D1,0,INP,0
 787
 788        VX      A0,A0,A1
 789        VX      B0,B0,B1
 790        VX      C0,C0,C1
 791        VX      D0,D0,D1
 792
 793        VSTM    A0,D0,0,OUT,0
 794
 795        la      INP,0x40(INP)
 796        la      OUT,0x40(OUT)
 797        aghi    LEN,-0x40
 798        je      .Ldone_vx
 799
 800        VAF     A3,A3,K0
 801        VAF     B3,B3,K1
 802        VAF     C3,C3,K2
 803        VAF     D2,K3,T3                # K[3]+3
 804
 805        VPERM   A0,A3,A3,BEPERM
 806        VPERM   B0,B3,B3,BEPERM
 807        VPERM   C0,C3,C3,BEPERM
 808        VPERM   D0,D3,D3,BEPERM
 809
 810        clgfi   LEN,0x40
 811        jl      .Ltail_vx
 812
 813        VAF     D3,D2,T1                # K[3]+4
 814        VLM     A1,D1,0,INP,0
 815
 816        VX      A0,A0,A1
 817        VX      B0,B0,B1
 818        VX      C0,C0,C1
 819        VX      D0,D0,D1
 820
 821        VSTM    A0,D0,0,OUT,0
 822
 823        la      INP,0x40(INP)
 824        la      OUT,0x40(OUT)
 825        aghi    LEN,-0x40
 826        je      .Ldone_vx
 827
 828        VAF     A4,A4,K0
 829        VAF     B4,B4,K1
 830        VAF     C4,C4,K2
 831        VAF     D4,D4,D3                # +K[3]+4
 832        VAF     D3,D3,T1                # K[3]+5
 833        VAF     K3,D2,T3                # K[3]+=6
 834
 835        VPERM   A0,A4,A4,BEPERM
 836        VPERM   B0,B4,B4,BEPERM
 837        VPERM   C0,C4,C4,BEPERM
 838        VPERM   D0,D4,D4,BEPERM
 839
 840        clgfi   LEN,0x40
 841        jl      .Ltail_vx
 842
 843        VLM     A1,D1,0,INP,0
 844
 845        VX      A0,A0,A1
 846        VX      B0,B0,B1
 847        VX      C0,C0,C1
 848        VX      D0,D0,D1
 849
 850        VSTM    A0,D0,0,OUT,0
 851
 852        la      INP,0x40(INP)
 853        la      OUT,0x40(OUT)
 854        aghi    LEN,-0x40
 855        je      .Ldone_vx
 856
 857        VAF     A5,A5,K0
 858        VAF     B5,B5,K1
 859        VAF     C5,C5,K2
 860        VAF     D5,D5,D3                # +K[3]+5
 861
 862        VPERM   A0,A5,A5,BEPERM
 863        VPERM   B0,B5,B5,BEPERM
 864        VPERM   C0,C5,C5,BEPERM
 865        VPERM   D0,D5,D5,BEPERM
 866
 867        clgfi   LEN,0x40
 868        jl      .Ltail_vx
 869
 870        VLM     A1,D1,0,INP,0
 871
 872        VX      A0,A0,A1
 873        VX      B0,B0,B1
 874        VX      C0,C0,C1
 875        VX      D0,D0,D1
 876
 877        VSTM    A0,D0,0,OUT,0
 878
 879        la      INP,0x40(INP)
 880        la      OUT,0x40(OUT)
 881        lhi     %r0,10
 882        aghi    LEN,-0x40
 883        jne     .Loop_outer_vx
 884
 885.Ldone_vx:
 886        lmg     %r6,%r7,FRAME+6*8(SP)
 887        la      SP,FRAME(SP)
 888        BR_EX   %r14
 889
 890.Ltail_vx:
 891        VSTM    A0,D0,8*8,SP,3
 892        lghi    %r1,0
 893
 894.Loop_tail_vx:
 895        llgc    %r5,0(%r1,INP)
 896        llgc    %r6,8*8(%r1,SP)
 897        xr      %r6,%r5
 898        stc     %r6,0(%r1,OUT)
 899        la      %r1,1(%r1)
 900        brct    LEN,.Loop_tail_vx
 901
 902        lmg     %r6,%r7,FRAME+6*8(SP)
 903        la      SP,FRAME(SP)
 904        BR_EX   %r14
 905ENDPROC(chacha20_vx)
 906
 907.previous
 908