1
2
3
4
5
6
7
8
9#include <linux/linkage.h>
10#include <asm/nospec-insn.h>
11#include <asm/vx-insn.h>
12
13#define SP %r15
14#define FRAME (16 * 8 + 4 * 8)
15
16.data
17.align 32
18
19.Lsigma:
20.long 0x61707865,0x3320646e,0x79622d32,0x6b206574
21.long 1,0,0,0
22.long 2,0,0,0
23.long 3,0,0,0
24.long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c
25
26.long 0,1,2,3
27.long 0x61707865,0x61707865,0x61707865,0x61707865
28.long 0x3320646e,0x3320646e,0x3320646e,0x3320646e
29.long 0x79622d32,0x79622d32,0x79622d32,0x79622d32
30.long 0x6b206574,0x6b206574,0x6b206574,0x6b206574
31
32.previous
33
34 GEN_BR_THUNK %r14
35
36.text
37
38
39
40
41
42#define OUT %r2
43#define INP %r3
44#define LEN %r4
45#define KEY %r5
46#define COUNTER %r6
47
48#define BEPERM %v31
49#define CTR %v26
50
51#define K0 %v16
52#define K1 %v17
53#define K2 %v18
54#define K3 %v19
55
56#define XA0 %v0
57#define XA1 %v1
58#define XA2 %v2
59#define XA3 %v3
60
61#define XB0 %v4
62#define XB1 %v5
63#define XB2 %v6
64#define XB3 %v7
65
66#define XC0 %v8
67#define XC1 %v9
68#define XC2 %v10
69#define XC3 %v11
70
71#define XD0 %v12
72#define XD1 %v13
73#define XD2 %v14
74#define XD3 %v15
75
76#define XT0 %v27
77#define XT1 %v28
78#define XT2 %v29
79#define XT3 %v30
80
81ENTRY(chacha20_vx_4x)
82 stmg %r6,%r7,6*8(SP)
83
84 larl %r7,.Lsigma
85 lhi %r0,10
86 lhi %r1,0
87
88 VL K0,0,,%r7
89 VL K1,0,,KEY
90 VL K2,16,,KEY
91 VL K3,0,,COUNTER
92
93 VL BEPERM,0x40,,%r7
94 VL CTR,0x50,,%r7
95
96 VLM XA0,XA3,0x60,%r7,4
97
98 VREPF XB0,K1,0
99 VREPF XB1,K1,1
100 VREPF XB2,K1,2
101 VREPF XB3,K1,3
102
103 VREPF XD0,K3,0
104 VREPF XD1,K3,1
105 VREPF XD2,K3,2
106 VREPF XD3,K3,3
107 VAF XD0,XD0,CTR
108
109 VREPF XC0,K2,0
110 VREPF XC1,K2,1
111 VREPF XC2,K2,2
112 VREPF XC3,K2,3
113
114.Loop_4x:
115 VAF XA0,XA0,XB0
116 VX XD0,XD0,XA0
117 VERLLF XD0,XD0,16
118
119 VAF XA1,XA1,XB1
120 VX XD1,XD1,XA1
121 VERLLF XD1,XD1,16
122
123 VAF XA2,XA2,XB2
124 VX XD2,XD2,XA2
125 VERLLF XD2,XD2,16
126
127 VAF XA3,XA3,XB3
128 VX XD3,XD3,XA3
129 VERLLF XD3,XD3,16
130
131 VAF XC0,XC0,XD0
132 VX XB0,XB0,XC0
133 VERLLF XB0,XB0,12
134
135 VAF XC1,XC1,XD1
136 VX XB1,XB1,XC1
137 VERLLF XB1,XB1,12
138
139 VAF XC2,XC2,XD2
140 VX XB2,XB2,XC2
141 VERLLF XB2,XB2,12
142
143 VAF XC3,XC3,XD3
144 VX XB3,XB3,XC3
145 VERLLF XB3,XB3,12
146
147 VAF XA0,XA0,XB0
148 VX XD0,XD0,XA0
149 VERLLF XD0,XD0,8
150
151 VAF XA1,XA1,XB1
152 VX XD1,XD1,XA1
153 VERLLF XD1,XD1,8
154
155 VAF XA2,XA2,XB2
156 VX XD2,XD2,XA2
157 VERLLF XD2,XD2,8
158
159 VAF XA3,XA3,XB3
160 VX XD3,XD3,XA3
161 VERLLF XD3,XD3,8
162
163 VAF XC0,XC0,XD0
164 VX XB0,XB0,XC0
165 VERLLF XB0,XB0,7
166
167 VAF XC1,XC1,XD1
168 VX XB1,XB1,XC1
169 VERLLF XB1,XB1,7
170
171 VAF XC2,XC2,XD2
172 VX XB2,XB2,XC2
173 VERLLF XB2,XB2,7
174
175 VAF XC3,XC3,XD3
176 VX XB3,XB3,XC3
177 VERLLF XB3,XB3,7
178
179 VAF XA0,XA0,XB1
180 VX XD3,XD3,XA0
181 VERLLF XD3,XD3,16
182
183 VAF XA1,XA1,XB2
184 VX XD0,XD0,XA1
185 VERLLF XD0,XD0,16
186
187 VAF XA2,XA2,XB3
188 VX XD1,XD1,XA2
189 VERLLF XD1,XD1,16
190
191 VAF XA3,XA3,XB0
192 VX XD2,XD2,XA3
193 VERLLF XD2,XD2,16
194
195 VAF XC2,XC2,XD3
196 VX XB1,XB1,XC2
197 VERLLF XB1,XB1,12
198
199 VAF XC3,XC3,XD0
200 VX XB2,XB2,XC3
201 VERLLF XB2,XB2,12
202
203 VAF XC0,XC0,XD1
204 VX XB3,XB3,XC0
205 VERLLF XB3,XB3,12
206
207 VAF XC1,XC1,XD2
208 VX XB0,XB0,XC1
209 VERLLF XB0,XB0,12
210
211 VAF XA0,XA0,XB1
212 VX XD3,XD3,XA0
213 VERLLF XD3,XD3,8
214
215 VAF XA1,XA1,XB2
216 VX XD0,XD0,XA1
217 VERLLF XD0,XD0,8
218
219 VAF XA2,XA2,XB3
220 VX XD1,XD1,XA2
221 VERLLF XD1,XD1,8
222
223 VAF XA3,XA3,XB0
224 VX XD2,XD2,XA3
225 VERLLF XD2,XD2,8
226
227 VAF XC2,XC2,XD3
228 VX XB1,XB1,XC2
229 VERLLF XB1,XB1,7
230
231 VAF XC3,XC3,XD0
232 VX XB2,XB2,XC3
233 VERLLF XB2,XB2,7
234
235 VAF XC0,XC0,XD1
236 VX XB3,XB3,XC0
237 VERLLF XB3,XB3,7
238
239 VAF XC1,XC1,XD2
240 VX XB0,XB0,XC1
241 VERLLF XB0,XB0,7
242 brct %r0,.Loop_4x
243
244 VAF XD0,XD0,CTR
245
246 VMRHF XT0,XA0,XA1
247 VMRHF XT1,XA2,XA3
248 VMRLF XT2,XA0,XA1
249 VMRLF XT3,XA2,XA3
250 VPDI XA0,XT0,XT1,0b0000
251 VPDI XA1,XT0,XT1,0b0101
252 VPDI XA2,XT2,XT3,0b0000
253 VPDI XA3,XT2,XT3,0b0101
254
255 VMRHF XT0,XB0,XB1
256 VMRHF XT1,XB2,XB3
257 VMRLF XT2,XB0,XB1
258 VMRLF XT3,XB2,XB3
259 VPDI XB0,XT0,XT1,0b0000
260 VPDI XB1,XT0,XT1,0b0101
261 VPDI XB2,XT2,XT3,0b0000
262 VPDI XB3,XT2,XT3,0b0101
263
264 VMRHF XT0,XC0,XC1
265 VMRHF XT1,XC2,XC3
266 VMRLF XT2,XC0,XC1
267 VMRLF XT3,XC2,XC3
268 VPDI XC0,XT0,XT1,0b0000
269 VPDI XC1,XT0,XT1,0b0101
270 VPDI XC2,XT2,XT3,0b0000
271 VPDI XC3,XT2,XT3,0b0101
272
273 VMRHF XT0,XD0,XD1
274 VMRHF XT1,XD2,XD3
275 VMRLF XT2,XD0,XD1
276 VMRLF XT3,XD2,XD3
277 VPDI XD0,XT0,XT1,0b0000
278 VPDI XD1,XT0,XT1,0b0101
279 VPDI XD2,XT2,XT3,0b0000
280 VPDI XD3,XT2,XT3,0b0101
281
282 VAF XA0,XA0,K0
283 VAF XB0,XB0,K1
284 VAF XC0,XC0,K2
285 VAF XD0,XD0,K3
286
287 VPERM XA0,XA0,XA0,BEPERM
288 VPERM XB0,XB0,XB0,BEPERM
289 VPERM XC0,XC0,XC0,BEPERM
290 VPERM XD0,XD0,XD0,BEPERM
291
292 VLM XT0,XT3,0,INP,0
293
294 VX XT0,XT0,XA0
295 VX XT1,XT1,XB0
296 VX XT2,XT2,XC0
297 VX XT3,XT3,XD0
298
299 VSTM XT0,XT3,0,OUT,0
300
301 la INP,0x40(INP)
302 la OUT,0x40(OUT)
303 aghi LEN,-0x40
304
305 VAF XA0,XA1,K0
306 VAF XB0,XB1,K1
307 VAF XC0,XC1,K2
308 VAF XD0,XD1,K3
309
310 VPERM XA0,XA0,XA0,BEPERM
311 VPERM XB0,XB0,XB0,BEPERM
312 VPERM XC0,XC0,XC0,BEPERM
313 VPERM XD0,XD0,XD0,BEPERM
314
315 clgfi LEN,0x40
316 jl .Ltail_4x
317
318 VLM XT0,XT3,0,INP,0
319
320 VX XT0,XT0,XA0
321 VX XT1,XT1,XB0
322 VX XT2,XT2,XC0
323 VX XT3,XT3,XD0
324
325 VSTM XT0,XT3,0,OUT,0
326
327 la INP,0x40(INP)
328 la OUT,0x40(OUT)
329 aghi LEN,-0x40
330 je .Ldone_4x
331
332 VAF XA0,XA2,K0
333 VAF XB0,XB2,K1
334 VAF XC0,XC2,K2
335 VAF XD0,XD2,K3
336
337 VPERM XA0,XA0,XA0,BEPERM
338 VPERM XB0,XB0,XB0,BEPERM
339 VPERM XC0,XC0,XC0,BEPERM
340 VPERM XD0,XD0,XD0,BEPERM
341
342 clgfi LEN,0x40
343 jl .Ltail_4x
344
345 VLM XT0,XT3,0,INP,0
346
347 VX XT0,XT0,XA0
348 VX XT1,XT1,XB0
349 VX XT2,XT2,XC0
350 VX XT3,XT3,XD0
351
352 VSTM XT0,XT3,0,OUT,0
353
354 la INP,0x40(INP)
355 la OUT,0x40(OUT)
356 aghi LEN,-0x40
357 je .Ldone_4x
358
359 VAF XA0,XA3,K0
360 VAF XB0,XB3,K1
361 VAF XC0,XC3,K2
362 VAF XD0,XD3,K3
363
364 VPERM XA0,XA0,XA0,BEPERM
365 VPERM XB0,XB0,XB0,BEPERM
366 VPERM XC0,XC0,XC0,BEPERM
367 VPERM XD0,XD0,XD0,BEPERM
368
369 clgfi LEN,0x40
370 jl .Ltail_4x
371
372 VLM XT0,XT3,0,INP,0
373
374 VX XT0,XT0,XA0
375 VX XT1,XT1,XB0
376 VX XT2,XT2,XC0
377 VX XT3,XT3,XD0
378
379 VSTM XT0,XT3,0,OUT,0
380
381.Ldone_4x:
382 lmg %r6,%r7,6*8(SP)
383 BR_EX %r14
384
385.Ltail_4x:
386 VLR XT0,XC0
387 VLR XT1,XD0
388
389 VST XA0,8*8+0x00,,SP
390 VST XB0,8*8+0x10,,SP
391 VST XT0,8*8+0x20,,SP
392 VST XT1,8*8+0x30,,SP
393
394 lghi %r1,0
395
396.Loop_tail_4x:
397 llgc %r5,0(%r1,INP)
398 llgc %r6,8*8(%r1,SP)
399 xr %r6,%r5
400 stc %r6,0(%r1,OUT)
401 la %r1,1(%r1)
402 brct LEN,.Loop_tail_4x
403
404 lmg %r6,%r7,6*8(SP)
405 BR_EX %r14
406ENDPROC(chacha20_vx_4x)
407
408#undef OUT
409#undef INP
410#undef LEN
411#undef KEY
412#undef COUNTER
413
414#undef BEPERM
415
416#undef K0
417#undef K1
418#undef K2
419#undef K3
420
421
422
423
424
425
426#define OUT %r2
427#define INP %r3
428#define LEN %r4
429#define KEY %r5
430#define COUNTER %r6
431
432#define BEPERM %v31
433
434#define K0 %v27
435#define K1 %v24
436#define K2 %v25
437#define K3 %v26
438
439#define A0 %v0
440#define B0 %v1
441#define C0 %v2
442#define D0 %v3
443
444#define A1 %v4
445#define B1 %v5
446#define C1 %v6
447#define D1 %v7
448
449#define A2 %v8
450#define B2 %v9
451#define C2 %v10
452#define D2 %v11
453
454#define A3 %v12
455#define B3 %v13
456#define C3 %v14
457#define D3 %v15
458
459#define A4 %v16
460#define B4 %v17
461#define C4 %v18
462#define D4 %v19
463
464#define A5 %v20
465#define B5 %v21
466#define C5 %v22
467#define D5 %v23
468
469#define T0 %v27
470#define T1 %v28
471#define T2 %v29
472#define T3 %v30
473
474ENTRY(chacha20_vx)
475 clgfi LEN,256
476 jle chacha20_vx_4x
477 stmg %r6,%r7,6*8(SP)
478
479 lghi %r1,-FRAME
480 lgr %r0,SP
481 la SP,0(%r1,SP)
482 stg %r0,0(SP)
483
484 larl %r7,.Lsigma
485 lhi %r0,10
486
487 VLM K1,K2,0,KEY,0
488 VL K3,0,,COUNTER
489
490 VLM K0,BEPERM,0,%r7,4
491
492.Loop_outer_vx:
493 VLR A0,K0
494 VLR B0,K1
495 VLR A1,K0
496 VLR B1,K1
497 VLR A2,K0
498 VLR B2,K1
499 VLR A3,K0
500 VLR B3,K1
501 VLR A4,K0
502 VLR B4,K1
503 VLR A5,K0
504 VLR B5,K1
505
506 VLR D0,K3
507 VAF D1,K3,T1
508 VAF D2,K3,T2
509 VAF D3,K3,T3
510 VAF D4,D2,T2
511 VAF D5,D2,T3
512
513 VLR C0,K2
514 VLR C1,K2
515 VLR C2,K2
516 VLR C3,K2
517 VLR C4,K2
518 VLR C5,K2
519
520 VLR T1,D1
521 VLR T2,D2
522 VLR T3,D3
523
524.Loop_vx:
525 VAF A0,A0,B0
526 VAF A1,A1,B1
527 VAF A2,A2,B2
528 VAF A3,A3,B3
529 VAF A4,A4,B4
530 VAF A5,A5,B5
531 VX D0,D0,A0
532 VX D1,D1,A1
533 VX D2,D2,A2
534 VX D3,D3,A3
535 VX D4,D4,A4
536 VX D5,D5,A5
537 VERLLF D0,D0,16
538 VERLLF D1,D1,16
539 VERLLF D2,D2,16
540 VERLLF D3,D3,16
541 VERLLF D4,D4,16
542 VERLLF D5,D5,16
543
544 VAF C0,C0,D0
545 VAF C1,C1,D1
546 VAF C2,C2,D2
547 VAF C3,C3,D3
548 VAF C4,C4,D4
549 VAF C5,C5,D5
550 VX B0,B0,C0
551 VX B1,B1,C1
552 VX B2,B2,C2
553 VX B3,B3,C3
554 VX B4,B4,C4
555 VX B5,B5,C5
556 VERLLF B0,B0,12
557 VERLLF B1,B1,12
558 VERLLF B2,B2,12
559 VERLLF B3,B3,12
560 VERLLF B4,B4,12
561 VERLLF B5,B5,12
562
563 VAF A0,A0,B0
564 VAF A1,A1,B1
565 VAF A2,A2,B2
566 VAF A3,A3,B3
567 VAF A4,A4,B4
568 VAF A5,A5,B5
569 VX D0,D0,A0
570 VX D1,D1,A1
571 VX D2,D2,A2
572 VX D3,D3,A3
573 VX D4,D4,A4
574 VX D5,D5,A5
575 VERLLF D0,D0,8
576 VERLLF D1,D1,8
577 VERLLF D2,D2,8
578 VERLLF D3,D3,8
579 VERLLF D4,D4,8
580 VERLLF D5,D5,8
581
582 VAF C0,C0,D0
583 VAF C1,C1,D1
584 VAF C2,C2,D2
585 VAF C3,C3,D3
586 VAF C4,C4,D4
587 VAF C5,C5,D5
588 VX B0,B0,C0
589 VX B1,B1,C1
590 VX B2,B2,C2
591 VX B3,B3,C3
592 VX B4,B4,C4
593 VX B5,B5,C5
594 VERLLF B0,B0,7
595 VERLLF B1,B1,7
596 VERLLF B2,B2,7
597 VERLLF B3,B3,7
598 VERLLF B4,B4,7
599 VERLLF B5,B5,7
600
601 VSLDB C0,C0,C0,8
602 VSLDB C1,C1,C1,8
603 VSLDB C2,C2,C2,8
604 VSLDB C3,C3,C3,8
605 VSLDB C4,C4,C4,8
606 VSLDB C5,C5,C5,8
607 VSLDB B0,B0,B0,4
608 VSLDB B1,B1,B1,4
609 VSLDB B2,B2,B2,4
610 VSLDB B3,B3,B3,4
611 VSLDB B4,B4,B4,4
612 VSLDB B5,B5,B5,4
613 VSLDB D0,D0,D0,12
614 VSLDB D1,D1,D1,12
615 VSLDB D2,D2,D2,12
616 VSLDB D3,D3,D3,12
617 VSLDB D4,D4,D4,12
618 VSLDB D5,D5,D5,12
619
620 VAF A0,A0,B0
621 VAF A1,A1,B1
622 VAF A2,A2,B2
623 VAF A3,A3,B3
624 VAF A4,A4,B4
625 VAF A5,A5,B5
626 VX D0,D0,A0
627 VX D1,D1,A1
628 VX D2,D2,A2
629 VX D3,D3,A3
630 VX D4,D4,A4
631 VX D5,D5,A5
632 VERLLF D0,D0,16
633 VERLLF D1,D1,16
634 VERLLF D2,D2,16
635 VERLLF D3,D3,16
636 VERLLF D4,D4,16
637 VERLLF D5,D5,16
638
639 VAF C0,C0,D0
640 VAF C1,C1,D1
641 VAF C2,C2,D2
642 VAF C3,C3,D3
643 VAF C4,C4,D4
644 VAF C5,C5,D5
645 VX B0,B0,C0
646 VX B1,B1,C1
647 VX B2,B2,C2
648 VX B3,B3,C3
649 VX B4,B4,C4
650 VX B5,B5,C5
651 VERLLF B0,B0,12
652 VERLLF B1,B1,12
653 VERLLF B2,B2,12
654 VERLLF B3,B3,12
655 VERLLF B4,B4,12
656 VERLLF B5,B5,12
657
658 VAF A0,A0,B0
659 VAF A1,A1,B1
660 VAF A2,A2,B2
661 VAF A3,A3,B3
662 VAF A4,A4,B4
663 VAF A5,A5,B5
664 VX D0,D0,A0
665 VX D1,D1,A1
666 VX D2,D2,A2
667 VX D3,D3,A3
668 VX D4,D4,A4
669 VX D5,D5,A5
670 VERLLF D0,D0,8
671 VERLLF D1,D1,8
672 VERLLF D2,D2,8
673 VERLLF D3,D3,8
674 VERLLF D4,D4,8
675 VERLLF D5,D5,8
676
677 VAF C0,C0,D0
678 VAF C1,C1,D1
679 VAF C2,C2,D2
680 VAF C3,C3,D3
681 VAF C4,C4,D4
682 VAF C5,C5,D5
683 VX B0,B0,C0
684 VX B1,B1,C1
685 VX B2,B2,C2
686 VX B3,B3,C3
687 VX B4,B4,C4
688 VX B5,B5,C5
689 VERLLF B0,B0,7
690 VERLLF B1,B1,7
691 VERLLF B2,B2,7
692 VERLLF B3,B3,7
693 VERLLF B4,B4,7
694 VERLLF B5,B5,7
695
696 VSLDB C0,C0,C0,8
697 VSLDB C1,C1,C1,8
698 VSLDB C2,C2,C2,8
699 VSLDB C3,C3,C3,8
700 VSLDB C4,C4,C4,8
701 VSLDB C5,C5,C5,8
702 VSLDB B0,B0,B0,12
703 VSLDB B1,B1,B1,12
704 VSLDB B2,B2,B2,12
705 VSLDB B3,B3,B3,12
706 VSLDB B4,B4,B4,12
707 VSLDB B5,B5,B5,12
708 VSLDB D0,D0,D0,4
709 VSLDB D1,D1,D1,4
710 VSLDB D2,D2,D2,4
711 VSLDB D3,D3,D3,4
712 VSLDB D4,D4,D4,4
713 VSLDB D5,D5,D5,4
714 brct %r0,.Loop_vx
715
716 VAF A0,A0,K0
717 VAF B0,B0,K1
718 VAF C0,C0,K2
719 VAF D0,D0,K3
720 VAF A1,A1,K0
721 VAF D1,D1,T1
722
723 VPERM A0,A0,A0,BEPERM
724 VPERM B0,B0,B0,BEPERM
725 VPERM C0,C0,C0,BEPERM
726 VPERM D0,D0,D0,BEPERM
727
728 clgfi LEN,0x40
729 jl .Ltail_vx
730
731 VAF D2,D2,T2
732 VAF D3,D3,T3
733 VLM T0,T3,0,INP,0
734
735 VX A0,A0,T0
736 VX B0,B0,T1
737 VX C0,C0,T2
738 VX D0,D0,T3
739
740 VLM K0,T3,0,%r7,4
741
742 VSTM A0,D0,0,OUT,0
743
744 la INP,0x40(INP)
745 la OUT,0x40(OUT)
746 aghi LEN,-0x40
747 je .Ldone_vx
748
749 VAF B1,B1,K1
750 VAF C1,C1,K2
751
752 VPERM A0,A1,A1,BEPERM
753 VPERM B0,B1,B1,BEPERM
754 VPERM C0,C1,C1,BEPERM
755 VPERM D0,D1,D1,BEPERM
756
757 clgfi LEN,0x40
758 jl .Ltail_vx
759
760 VLM A1,D1,0,INP,0
761
762 VX A0,A0,A1
763 VX B0,B0,B1
764 VX C0,C0,C1
765 VX D0,D0,D1
766
767 VSTM A0,D0,0,OUT,0
768
769 la INP,0x40(INP)
770 la OUT,0x40(OUT)
771 aghi LEN,-0x40
772 je .Ldone_vx
773
774 VAF A2,A2,K0
775 VAF B2,B2,K1
776 VAF C2,C2,K2
777
778 VPERM A0,A2,A2,BEPERM
779 VPERM B0,B2,B2,BEPERM
780 VPERM C0,C2,C2,BEPERM
781 VPERM D0,D2,D2,BEPERM
782
783 clgfi LEN,0x40
784 jl .Ltail_vx
785
786 VLM A1,D1,0,INP,0
787
788 VX A0,A0,A1
789 VX B0,B0,B1
790 VX C0,C0,C1
791 VX D0,D0,D1
792
793 VSTM A0,D0,0,OUT,0
794
795 la INP,0x40(INP)
796 la OUT,0x40(OUT)
797 aghi LEN,-0x40
798 je .Ldone_vx
799
800 VAF A3,A3,K0
801 VAF B3,B3,K1
802 VAF C3,C3,K2
803 VAF D2,K3,T3
804
805 VPERM A0,A3,A3,BEPERM
806 VPERM B0,B3,B3,BEPERM
807 VPERM C0,C3,C3,BEPERM
808 VPERM D0,D3,D3,BEPERM
809
810 clgfi LEN,0x40
811 jl .Ltail_vx
812
813 VAF D3,D2,T1
814 VLM A1,D1,0,INP,0
815
816 VX A0,A0,A1
817 VX B0,B0,B1
818 VX C0,C0,C1
819 VX D0,D0,D1
820
821 VSTM A0,D0,0,OUT,0
822
823 la INP,0x40(INP)
824 la OUT,0x40(OUT)
825 aghi LEN,-0x40
826 je .Ldone_vx
827
828 VAF A4,A4,K0
829 VAF B4,B4,K1
830 VAF C4,C4,K2
831 VAF D4,D4,D3
832 VAF D3,D3,T1
833 VAF K3,D2,T3
834
835 VPERM A0,A4,A4,BEPERM
836 VPERM B0,B4,B4,BEPERM
837 VPERM C0,C4,C4,BEPERM
838 VPERM D0,D4,D4,BEPERM
839
840 clgfi LEN,0x40
841 jl .Ltail_vx
842
843 VLM A1,D1,0,INP,0
844
845 VX A0,A0,A1
846 VX B0,B0,B1
847 VX C0,C0,C1
848 VX D0,D0,D1
849
850 VSTM A0,D0,0,OUT,0
851
852 la INP,0x40(INP)
853 la OUT,0x40(OUT)
854 aghi LEN,-0x40
855 je .Ldone_vx
856
857 VAF A5,A5,K0
858 VAF B5,B5,K1
859 VAF C5,C5,K2
860 VAF D5,D5,D3
861
862 VPERM A0,A5,A5,BEPERM
863 VPERM B0,B5,B5,BEPERM
864 VPERM C0,C5,C5,BEPERM
865 VPERM D0,D5,D5,BEPERM
866
867 clgfi LEN,0x40
868 jl .Ltail_vx
869
870 VLM A1,D1,0,INP,0
871
872 VX A0,A0,A1
873 VX B0,B0,B1
874 VX C0,C0,C1
875 VX D0,D0,D1
876
877 VSTM A0,D0,0,OUT,0
878
879 la INP,0x40(INP)
880 la OUT,0x40(OUT)
881 lhi %r0,10
882 aghi LEN,-0x40
883 jne .Loop_outer_vx
884
885.Ldone_vx:
886 lmg %r6,%r7,FRAME+6*8(SP)
887 la SP,FRAME(SP)
888 BR_EX %r14
889
890.Ltail_vx:
891 VSTM A0,D0,8*8,SP,3
892 lghi %r1,0
893
894.Loop_tail_vx:
895 llgc %r5,0(%r1,INP)
896 llgc %r6,8*8(%r1,SP)
897 xr %r6,%r5
898 stc %r6,0(%r1,OUT)
899 la %r1,1(%r1)
900 brct LEN,.Loop_tail_vx
901
902 lmg %r6,%r7,FRAME+6*8(SP)
903 la SP,FRAME(SP)
904 BR_EX %r14
905ENDPROC(chacha20_vx)
906
907.previous
908