linux/arch/arm/include/asm/xor.h
<<
>>
Prefs
   1/*
   2 *  arch/arm/include/asm/xor.h
   3 *
   4 *  Copyright (C) 2001 Russell King
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10#include <linux/hardirq.h>
  11#include <asm-generic/xor.h>
  12#include <asm/hwcap.h>
  13#include <asm/neon.h>
  14
  15#define __XOR(a1, a2) a1 ^= a2
  16
  17#define GET_BLOCK_2(dst) \
  18        __asm__("ldmia  %0, {%1, %2}" \
  19                : "=r" (dst), "=r" (a1), "=r" (a2) \
  20                : "0" (dst))
  21
  22#define GET_BLOCK_4(dst) \
  23        __asm__("ldmia  %0, {%1, %2, %3, %4}" \
  24                : "=r" (dst), "=r" (a1), "=r" (a2), "=r" (a3), "=r" (a4) \
  25                : "0" (dst))
  26
  27#define XOR_BLOCK_2(src) \
  28        __asm__("ldmia  %0!, {%1, %2}" \
  29                : "=r" (src), "=r" (b1), "=r" (b2) \
  30                : "0" (src)); \
  31        __XOR(a1, b1); __XOR(a2, b2);
  32
  33#define XOR_BLOCK_4(src) \
  34        __asm__("ldmia  %0!, {%1, %2, %3, %4}" \
  35                : "=r" (src), "=r" (b1), "=r" (b2), "=r" (b3), "=r" (b4) \
  36                : "0" (src)); \
  37        __XOR(a1, b1); __XOR(a2, b2); __XOR(a3, b3); __XOR(a4, b4)
  38
  39#define PUT_BLOCK_2(dst) \
  40        __asm__ __volatile__("stmia     %0!, {%2, %3}" \
  41                : "=r" (dst) \
  42                : "0" (dst), "r" (a1), "r" (a2))
  43
  44#define PUT_BLOCK_4(dst) \
  45        __asm__ __volatile__("stmia     %0!, {%2, %3, %4, %5}" \
  46                : "=r" (dst) \
  47                : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4))
  48
  49static void
  50xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
  51{
  52        unsigned int lines = bytes / sizeof(unsigned long) / 4;
  53        register unsigned int a1 __asm__("r4");
  54        register unsigned int a2 __asm__("r5");
  55        register unsigned int a3 __asm__("r6");
  56        register unsigned int a4 __asm__("r7");
  57        register unsigned int b1 __asm__("r8");
  58        register unsigned int b2 __asm__("r9");
  59        register unsigned int b3 __asm__("ip");
  60        register unsigned int b4 __asm__("lr");
  61
  62        do {
  63                GET_BLOCK_4(p1);
  64                XOR_BLOCK_4(p2);
  65                PUT_BLOCK_4(p1);
  66        } while (--lines);
  67}
  68
  69static void
  70xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
  71                unsigned long *p3)
  72{
  73        unsigned int lines = bytes / sizeof(unsigned long) / 4;
  74        register unsigned int a1 __asm__("r4");
  75        register unsigned int a2 __asm__("r5");
  76        register unsigned int a3 __asm__("r6");
  77        register unsigned int a4 __asm__("r7");
  78        register unsigned int b1 __asm__("r8");
  79        register unsigned int b2 __asm__("r9");
  80        register unsigned int b3 __asm__("ip");
  81        register unsigned int b4 __asm__("lr");
  82
  83        do {
  84                GET_BLOCK_4(p1);
  85                XOR_BLOCK_4(p2);
  86                XOR_BLOCK_4(p3);
  87                PUT_BLOCK_4(p1);
  88        } while (--lines);
  89}
  90
  91static void
  92xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
  93                unsigned long *p3, unsigned long *p4)
  94{
  95        unsigned int lines = bytes / sizeof(unsigned long) / 2;
  96        register unsigned int a1 __asm__("r8");
  97        register unsigned int a2 __asm__("r9");
  98        register unsigned int b1 __asm__("ip");
  99        register unsigned int b2 __asm__("lr");
 100
 101        do {
 102                GET_BLOCK_2(p1);
 103                XOR_BLOCK_2(p2);
 104                XOR_BLOCK_2(p3);
 105                XOR_BLOCK_2(p4);
 106                PUT_BLOCK_2(p1);
 107        } while (--lines);
 108}
 109
 110static void
 111xor_arm4regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 112                unsigned long *p3, unsigned long *p4, unsigned long *p5)
 113{
 114        unsigned int lines = bytes / sizeof(unsigned long) / 2;
 115        register unsigned int a1 __asm__("r8");
 116        register unsigned int a2 __asm__("r9");
 117        register unsigned int b1 __asm__("ip");
 118        register unsigned int b2 __asm__("lr");
 119
 120        do {
 121                GET_BLOCK_2(p1);
 122                XOR_BLOCK_2(p2);
 123                XOR_BLOCK_2(p3);
 124                XOR_BLOCK_2(p4);
 125                XOR_BLOCK_2(p5);
 126                PUT_BLOCK_2(p1);
 127        } while (--lines);
 128}
 129
 130static struct xor_block_template xor_block_arm4regs = {
 131        .name   = "arm4regs",
 132        .do_2   = xor_arm4regs_2,
 133        .do_3   = xor_arm4regs_3,
 134        .do_4   = xor_arm4regs_4,
 135        .do_5   = xor_arm4regs_5,
 136};
 137
 138#undef XOR_TRY_TEMPLATES
 139#define XOR_TRY_TEMPLATES                       \
 140        do {                                    \
 141                xor_speed(&xor_block_arm4regs); \
 142                xor_speed(&xor_block_8regs);    \
 143                xor_speed(&xor_block_32regs);   \
 144                NEON_TEMPLATES;                 \
 145        } while (0)
 146
 147#ifdef CONFIG_KERNEL_MODE_NEON
 148
 149extern struct xor_block_template const xor_block_neon_inner;
 150
 151static void
 152xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 153{
 154        if (in_interrupt()) {
 155                xor_arm4regs_2(bytes, p1, p2);
 156        } else {
 157                kernel_neon_begin();
 158                xor_block_neon_inner.do_2(bytes, p1, p2);
 159                kernel_neon_end();
 160        }
 161}
 162
 163static void
 164xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 165                unsigned long *p3)
 166{
 167        if (in_interrupt()) {
 168                xor_arm4regs_3(bytes, p1, p2, p3);
 169        } else {
 170                kernel_neon_begin();
 171                xor_block_neon_inner.do_3(bytes, p1, p2, p3);
 172                kernel_neon_end();
 173        }
 174}
 175
 176static void
 177xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 178                unsigned long *p3, unsigned long *p4)
 179{
 180        if (in_interrupt()) {
 181                xor_arm4regs_4(bytes, p1, p2, p3, p4);
 182        } else {
 183                kernel_neon_begin();
 184                xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4);
 185                kernel_neon_end();
 186        }
 187}
 188
 189static void
 190xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 191                unsigned long *p3, unsigned long *p4, unsigned long *p5)
 192{
 193        if (in_interrupt()) {
 194                xor_arm4regs_5(bytes, p1, p2, p3, p4, p5);
 195        } else {
 196                kernel_neon_begin();
 197                xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5);
 198                kernel_neon_end();
 199        }
 200}
 201
 202static struct xor_block_template xor_block_neon = {
 203        .name   = "neon",
 204        .do_2   = xor_neon_2,
 205        .do_3   = xor_neon_3,
 206        .do_4   = xor_neon_4,
 207        .do_5   = xor_neon_5
 208};
 209
 210#define NEON_TEMPLATES  \
 211        do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0)
 212#else
 213#define NEON_TEMPLATES
 214#endif
 215