linux/arch/arm/include/asm/xor.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-only */
   2/*
   3 *  arch/arm/include/asm/xor.h
   4 *
   5 *  Copyright (C) 2001 Russell King
   6 */
   7#include <linux/hardirq.h>
   8#include <asm-generic/xor.h>
   9#include <asm/hwcap.h>
  10#include <asm/neon.h>
  11
  12#define __XOR(a1, a2) a1 ^= a2
  13
  14#define GET_BLOCK_2(dst) \
  15        __asm__("ldmia  %0, {%1, %2}" \
  16                : "=r" (dst), "=r" (a1), "=r" (a2) \
  17                : "0" (dst))
  18
  19#define GET_BLOCK_4(dst) \
  20        __asm__("ldmia  %0, {%1, %2, %3, %4}" \
  21                : "=r" (dst), "=r" (a1), "=r" (a2), "=r" (a3), "=r" (a4) \
  22                : "0" (dst))
  23
  24#define XOR_BLOCK_2(src) \
  25        __asm__("ldmia  %0!, {%1, %2}" \
  26                : "=r" (src), "=r" (b1), "=r" (b2) \
  27                : "0" (src)); \
  28        __XOR(a1, b1); __XOR(a2, b2);
  29
  30#define XOR_BLOCK_4(src) \
  31        __asm__("ldmia  %0!, {%1, %2, %3, %4}" \
  32                : "=r" (src), "=r" (b1), "=r" (b2), "=r" (b3), "=r" (b4) \
  33                : "0" (src)); \
  34        __XOR(a1, b1); __XOR(a2, b2); __XOR(a3, b3); __XOR(a4, b4)
  35
  36#define PUT_BLOCK_2(dst) \
  37        __asm__ __volatile__("stmia     %0!, {%2, %3}" \
  38                : "=r" (dst) \
  39                : "0" (dst), "r" (a1), "r" (a2))
  40
  41#define PUT_BLOCK_4(dst) \
  42        __asm__ __volatile__("stmia     %0!, {%2, %3, %4, %5}" \
  43                : "=r" (dst) \
  44                : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4))
  45
  46static void
  47xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
  48{
  49        unsigned int lines = bytes / sizeof(unsigned long) / 4;
  50        register unsigned int a1 __asm__("r4");
  51        register unsigned int a2 __asm__("r5");
  52        register unsigned int a3 __asm__("r6");
  53        register unsigned int a4 __asm__("r7");
  54        register unsigned int b1 __asm__("r8");
  55        register unsigned int b2 __asm__("r9");
  56        register unsigned int b3 __asm__("ip");
  57        register unsigned int b4 __asm__("lr");
  58
  59        do {
  60                GET_BLOCK_4(p1);
  61                XOR_BLOCK_4(p2);
  62                PUT_BLOCK_4(p1);
  63        } while (--lines);
  64}
  65
  66static void
  67xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
  68                unsigned long *p3)
  69{
  70        unsigned int lines = bytes / sizeof(unsigned long) / 4;
  71        register unsigned int a1 __asm__("r4");
  72        register unsigned int a2 __asm__("r5");
  73        register unsigned int a3 __asm__("r6");
  74        register unsigned int a4 __asm__("r7");
  75        register unsigned int b1 __asm__("r8");
  76        register unsigned int b2 __asm__("r9");
  77        register unsigned int b3 __asm__("ip");
  78        register unsigned int b4 __asm__("lr");
  79
  80        do {
  81                GET_BLOCK_4(p1);
  82                XOR_BLOCK_4(p2);
  83                XOR_BLOCK_4(p3);
  84                PUT_BLOCK_4(p1);
  85        } while (--lines);
  86}
  87
  88static void
  89xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
  90                unsigned long *p3, unsigned long *p4)
  91{
  92        unsigned int lines = bytes / sizeof(unsigned long) / 2;
  93        register unsigned int a1 __asm__("r8");
  94        register unsigned int a2 __asm__("r9");
  95        register unsigned int b1 __asm__("ip");
  96        register unsigned int b2 __asm__("lr");
  97
  98        do {
  99                GET_BLOCK_2(p1);
 100                XOR_BLOCK_2(p2);
 101                XOR_BLOCK_2(p3);
 102                XOR_BLOCK_2(p4);
 103                PUT_BLOCK_2(p1);
 104        } while (--lines);
 105}
 106
 107static void
 108xor_arm4regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 109                unsigned long *p3, unsigned long *p4, unsigned long *p5)
 110{
 111        unsigned int lines = bytes / sizeof(unsigned long) / 2;
 112        register unsigned int a1 __asm__("r8");
 113        register unsigned int a2 __asm__("r9");
 114        register unsigned int b1 __asm__("ip");
 115        register unsigned int b2 __asm__("lr");
 116
 117        do {
 118                GET_BLOCK_2(p1);
 119                XOR_BLOCK_2(p2);
 120                XOR_BLOCK_2(p3);
 121                XOR_BLOCK_2(p4);
 122                XOR_BLOCK_2(p5);
 123                PUT_BLOCK_2(p1);
 124        } while (--lines);
 125}
 126
 127static struct xor_block_template xor_block_arm4regs = {
 128        .name   = "arm4regs",
 129        .do_2   = xor_arm4regs_2,
 130        .do_3   = xor_arm4regs_3,
 131        .do_4   = xor_arm4regs_4,
 132        .do_5   = xor_arm4regs_5,
 133};
 134
 135#undef XOR_TRY_TEMPLATES
 136#define XOR_TRY_TEMPLATES                       \
 137        do {                                    \
 138                xor_speed(&xor_block_arm4regs); \
 139                xor_speed(&xor_block_8regs);    \
 140                xor_speed(&xor_block_32regs);   \
 141                NEON_TEMPLATES;                 \
 142        } while (0)
 143
 144#ifdef CONFIG_KERNEL_MODE_NEON
 145
 146extern struct xor_block_template const xor_block_neon_inner;
 147
 148static void
 149xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 150{
 151        if (in_interrupt()) {
 152                xor_arm4regs_2(bytes, p1, p2);
 153        } else {
 154                kernel_neon_begin();
 155                xor_block_neon_inner.do_2(bytes, p1, p2);
 156                kernel_neon_end();
 157        }
 158}
 159
 160static void
 161xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 162                unsigned long *p3)
 163{
 164        if (in_interrupt()) {
 165                xor_arm4regs_3(bytes, p1, p2, p3);
 166        } else {
 167                kernel_neon_begin();
 168                xor_block_neon_inner.do_3(bytes, p1, p2, p3);
 169                kernel_neon_end();
 170        }
 171}
 172
 173static void
 174xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 175                unsigned long *p3, unsigned long *p4)
 176{
 177        if (in_interrupt()) {
 178                xor_arm4regs_4(bytes, p1, p2, p3, p4);
 179        } else {
 180                kernel_neon_begin();
 181                xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4);
 182                kernel_neon_end();
 183        }
 184}
 185
 186static void
 187xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 188                unsigned long *p3, unsigned long *p4, unsigned long *p5)
 189{
 190        if (in_interrupt()) {
 191                xor_arm4regs_5(bytes, p1, p2, p3, p4, p5);
 192        } else {
 193                kernel_neon_begin();
 194                xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5);
 195                kernel_neon_end();
 196        }
 197}
 198
 199static struct xor_block_template xor_block_neon = {
 200        .name   = "neon",
 201        .do_2   = xor_neon_2,
 202        .do_3   = xor_neon_3,
 203        .do_4   = xor_neon_4,
 204        .do_5   = xor_neon_5
 205};
 206
 207#define NEON_TEMPLATES  \
 208        do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0)
 209#else
 210#define NEON_TEMPLATES
 211#endif
 212