linux/arch/ia64/lib/xor.S
<<
>>
Prefs
   1/*
   2 * arch/ia64/lib/xor.S
   3 *
   4 * Optimized RAID-5 checksumming functions for IA-64.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2, or (at your option)
   9 * any later version.
  10 *
  11 * You should have received a copy of the GNU General Public License
  12 * (for example /usr/src/linux/COPYING); if not, write to the Free
  13 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  14 */
  15
  16#include <asm/asmmacro.h>
  17
  18GLOBAL_ENTRY(xor_ia64_2)
  19        .prologue
  20        .fframe 0
  21        .save ar.pfs, r31
  22        alloc r31 = ar.pfs, 3, 0, 13, 16
  23        .save ar.lc, r30
  24        mov r30 = ar.lc
  25        .save pr, r29
  26        mov r29 = pr
  27        ;;
  28        .body
  29        mov r8 = in1
  30        mov ar.ec = 6 + 2
  31        shr in0 = in0, 3
  32        ;;
  33        adds in0 = -1, in0
  34        mov r16 = in1
  35        mov r17 = in2
  36        ;;
  37        mov ar.lc = in0
  38        mov pr.rot = 1 << 16
  39        ;;
  40        .rotr s1[6+1], s2[6+1], d[2]
  41        .rotp p[6+2]
  420:
  43(p[0])  ld8.nta s1[0] = [r16], 8
  44(p[0])  ld8.nta s2[0] = [r17], 8
  45(p[6])  xor d[0] = s1[6], s2[6]
  46(p[6+1])st8.nta [r8] = d[1], 8
  47        nop.f 0
  48        br.ctop.dptk.few 0b
  49        ;;
  50        mov ar.lc = r30
  51        mov pr = r29, -1
  52        br.ret.sptk.few rp
  53END(xor_ia64_2)
  54
  55GLOBAL_ENTRY(xor_ia64_3)
  56        .prologue
  57        .fframe 0
  58        .save ar.pfs, r31
  59        alloc r31 = ar.pfs, 4, 0, 20, 24
  60        .save ar.lc, r30
  61        mov r30 = ar.lc
  62        .save pr, r29
  63        mov r29 = pr
  64        ;;
  65        .body
  66        mov r8 = in1
  67        mov ar.ec = 6 + 2
  68        shr in0 = in0, 3
  69        ;;
  70        adds in0 = -1, in0
  71        mov r16 = in1
  72        mov r17 = in2
  73        ;;
  74        mov r18 = in3
  75        mov ar.lc = in0
  76        mov pr.rot = 1 << 16
  77        ;;
  78        .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
  79        .rotp p[6+2]
  800:
  81(p[0])  ld8.nta s1[0] = [r16], 8
  82(p[0])  ld8.nta s2[0] = [r17], 8
  83(p[6])  xor d[0] = s1[6], s2[6]
  84        ;;
  85(p[0])  ld8.nta s3[0] = [r18], 8
  86(p[6+1])st8.nta [r8] = d[1], 8
  87(p[6])  xor d[0] = d[0], s3[6]
  88        br.ctop.dptk.few 0b
  89        ;;
  90        mov ar.lc = r30
  91        mov pr = r29, -1
  92        br.ret.sptk.few rp
  93END(xor_ia64_3)
  94
  95GLOBAL_ENTRY(xor_ia64_4)
  96        .prologue
  97        .fframe 0
  98        .save ar.pfs, r31
  99        alloc r31 = ar.pfs, 5, 0, 27, 32
 100        .save ar.lc, r30
 101        mov r30 = ar.lc
 102        .save pr, r29
 103        mov r29 = pr
 104        ;;
 105        .body
 106        mov r8 = in1
 107        mov ar.ec = 6 + 2
 108        shr in0 = in0, 3
 109        ;;
 110        adds in0 = -1, in0
 111        mov r16 = in1
 112        mov r17 = in2
 113        ;;
 114        mov r18 = in3
 115        mov ar.lc = in0
 116        mov pr.rot = 1 << 16
 117        mov r19 = in4
 118        ;;
 119        .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
 120        .rotp p[6+2]
 1210:
 122(p[0])  ld8.nta s1[0] = [r16], 8
 123(p[0])  ld8.nta s2[0] = [r17], 8
 124(p[6])  xor d[0] = s1[6], s2[6]
 125(p[0])  ld8.nta s3[0] = [r18], 8
 126(p[0])  ld8.nta s4[0] = [r19], 8
 127(p[6])  xor r20 = s3[6], s4[6]
 128        ;;
 129(p[6+1])st8.nta [r8] = d[1], 8
 130(p[6])  xor d[0] = d[0], r20
 131        br.ctop.dptk.few 0b
 132        ;;
 133        mov ar.lc = r30
 134        mov pr = r29, -1
 135        br.ret.sptk.few rp
 136END(xor_ia64_4)
 137
 138GLOBAL_ENTRY(xor_ia64_5)
 139        .prologue
 140        .fframe 0
 141        .save ar.pfs, r31
 142        alloc r31 = ar.pfs, 6, 0, 34, 40
 143        .save ar.lc, r30
 144        mov r30 = ar.lc
 145        .save pr, r29
 146        mov r29 = pr
 147        ;;
 148        .body
 149        mov r8 = in1
 150        mov ar.ec = 6 + 2
 151        shr in0 = in0, 3
 152        ;;
 153        adds in0 = -1, in0
 154        mov r16 = in1
 155        mov r17 = in2
 156        ;;
 157        mov r18 = in3
 158        mov ar.lc = in0
 159        mov pr.rot = 1 << 16
 160        mov r19 = in4
 161        mov r20 = in5
 162        ;;
 163        .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
 164        .rotp p[6+2]
 1650:
 166(p[0])  ld8.nta s1[0] = [r16], 8
 167(p[0])  ld8.nta s2[0] = [r17], 8
 168(p[6])  xor d[0] = s1[6], s2[6]
 169(p[0])  ld8.nta s3[0] = [r18], 8
 170(p[0])  ld8.nta s4[0] = [r19], 8
 171(p[6])  xor r21 = s3[6], s4[6]
 172        ;;
 173(p[0])  ld8.nta s5[0] = [r20], 8
 174(p[6+1])st8.nta [r8] = d[1], 8
 175(p[6])  xor d[0] = d[0], r21
 176        ;;
 177(p[6])    xor d[0] = d[0], s5[6]
 178        nop.f 0
 179        br.ctop.dptk.few 0b
 180        ;;
 181        mov ar.lc = r30
 182        mov pr = r29, -1
 183        br.ret.sptk.few rp
 184END(xor_ia64_5)
 185