linux/arch/ia64/lib/xor.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-or-later */
   2/*
   3 * arch/ia64/lib/xor.S
   4 *
   5 * Optimized RAID-5 checksumming functions for IA-64.
   6 */
   7
   8#include <asm/asmmacro.h>
   9#include <asm/export.h>
  10
  11GLOBAL_ENTRY(xor_ia64_2)
  12        .prologue
  13        .fframe 0
  14        .save ar.pfs, r31
  15        alloc r31 = ar.pfs, 3, 0, 13, 16
  16        .save ar.lc, r30
  17        mov r30 = ar.lc
  18        .save pr, r29
  19        mov r29 = pr
  20        ;;
  21        .body
  22        mov r8 = in1
  23        mov ar.ec = 6 + 2
  24        shr in0 = in0, 3
  25        ;;
  26        adds in0 = -1, in0
  27        mov r16 = in1
  28        mov r17 = in2
  29        ;;
  30        mov ar.lc = in0
  31        mov pr.rot = 1 << 16
  32        ;;
  33        .rotr s1[6+1], s2[6+1], d[2]
  34        .rotp p[6+2]
  350:
  36(p[0])  ld8.nta s1[0] = [r16], 8
  37(p[0])  ld8.nta s2[0] = [r17], 8
  38(p[6])  xor d[0] = s1[6], s2[6]
  39(p[6+1])st8.nta [r8] = d[1], 8
  40        nop.f 0
  41        br.ctop.dptk.few 0b
  42        ;;
  43        mov ar.lc = r30
  44        mov pr = r29, -1
  45        br.ret.sptk.few rp
  46END(xor_ia64_2)
  47EXPORT_SYMBOL(xor_ia64_2)
  48
  49GLOBAL_ENTRY(xor_ia64_3)
  50        .prologue
  51        .fframe 0
  52        .save ar.pfs, r31
  53        alloc r31 = ar.pfs, 4, 0, 20, 24
  54        .save ar.lc, r30
  55        mov r30 = ar.lc
  56        .save pr, r29
  57        mov r29 = pr
  58        ;;
  59        .body
  60        mov r8 = in1
  61        mov ar.ec = 6 + 2
  62        shr in0 = in0, 3
  63        ;;
  64        adds in0 = -1, in0
  65        mov r16 = in1
  66        mov r17 = in2
  67        ;;
  68        mov r18 = in3
  69        mov ar.lc = in0
  70        mov pr.rot = 1 << 16
  71        ;;
  72        .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
  73        .rotp p[6+2]
  740:
  75(p[0])  ld8.nta s1[0] = [r16], 8
  76(p[0])  ld8.nta s2[0] = [r17], 8
  77(p[6])  xor d[0] = s1[6], s2[6]
  78        ;;
  79(p[0])  ld8.nta s3[0] = [r18], 8
  80(p[6+1])st8.nta [r8] = d[1], 8
  81(p[6])  xor d[0] = d[0], s3[6]
  82        br.ctop.dptk.few 0b
  83        ;;
  84        mov ar.lc = r30
  85        mov pr = r29, -1
  86        br.ret.sptk.few rp
  87END(xor_ia64_3)
  88EXPORT_SYMBOL(xor_ia64_3)
  89
  90GLOBAL_ENTRY(xor_ia64_4)
  91        .prologue
  92        .fframe 0
  93        .save ar.pfs, r31
  94        alloc r31 = ar.pfs, 5, 0, 27, 32
  95        .save ar.lc, r30
  96        mov r30 = ar.lc
  97        .save pr, r29
  98        mov r29 = pr
  99        ;;
 100        .body
 101        mov r8 = in1
 102        mov ar.ec = 6 + 2
 103        shr in0 = in0, 3
 104        ;;
 105        adds in0 = -1, in0
 106        mov r16 = in1
 107        mov r17 = in2
 108        ;;
 109        mov r18 = in3
 110        mov ar.lc = in0
 111        mov pr.rot = 1 << 16
 112        mov r19 = in4
 113        ;;
 114        .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
 115        .rotp p[6+2]
 1160:
 117(p[0])  ld8.nta s1[0] = [r16], 8
 118(p[0])  ld8.nta s2[0] = [r17], 8
 119(p[6])  xor d[0] = s1[6], s2[6]
 120(p[0])  ld8.nta s3[0] = [r18], 8
 121(p[0])  ld8.nta s4[0] = [r19], 8
 122(p[6])  xor r20 = s3[6], s4[6]
 123        ;;
 124(p[6+1])st8.nta [r8] = d[1], 8
 125(p[6])  xor d[0] = d[0], r20
 126        br.ctop.dptk.few 0b
 127        ;;
 128        mov ar.lc = r30
 129        mov pr = r29, -1
 130        br.ret.sptk.few rp
 131END(xor_ia64_4)
 132EXPORT_SYMBOL(xor_ia64_4)
 133
 134GLOBAL_ENTRY(xor_ia64_5)
 135        .prologue
 136        .fframe 0
 137        .save ar.pfs, r31
 138        alloc r31 = ar.pfs, 6, 0, 34, 40
 139        .save ar.lc, r30
 140        mov r30 = ar.lc
 141        .save pr, r29
 142        mov r29 = pr
 143        ;;
 144        .body
 145        mov r8 = in1
 146        mov ar.ec = 6 + 2
 147        shr in0 = in0, 3
 148        ;;
 149        adds in0 = -1, in0
 150        mov r16 = in1
 151        mov r17 = in2
 152        ;;
 153        mov r18 = in3
 154        mov ar.lc = in0
 155        mov pr.rot = 1 << 16
 156        mov r19 = in4
 157        mov r20 = in5
 158        ;;
 159        .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
 160        .rotp p[6+2]
 1610:
 162(p[0])  ld8.nta s1[0] = [r16], 8
 163(p[0])  ld8.nta s2[0] = [r17], 8
 164(p[6])  xor d[0] = s1[6], s2[6]
 165(p[0])  ld8.nta s3[0] = [r18], 8
 166(p[0])  ld8.nta s4[0] = [r19], 8
 167(p[6])  xor r21 = s3[6], s4[6]
 168        ;;
 169(p[0])  ld8.nta s5[0] = [r20], 8
 170(p[6+1])st8.nta [r8] = d[1], 8
 171(p[6])  xor d[0] = d[0], r21
 172        ;;
 173(p[6])    xor d[0] = d[0], s5[6]
 174        nop.f 0
 175        br.ctop.dptk.few 0b
 176        ;;
 177        mov ar.lc = r30
 178        mov pr = r29, -1
 179        br.ret.sptk.few rp
 180END(xor_ia64_5)
 181EXPORT_SYMBOL(xor_ia64_5)
 182