linux/arch/x86/include/asm/xor_avx.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-only */
   2#ifndef _ASM_X86_XOR_AVX_H
   3#define _ASM_X86_XOR_AVX_H
   4
   5/*
   6 * Optimized RAID-5 checksumming functions for AVX
   7 *
   8 * Copyright (C) 2012 Intel Corporation
   9 * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
  10 *
  11 * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
  12 */
  13
  14#include <linux/compiler.h>
  15#include <asm/fpu/api.h>
  16
  17#define BLOCK4(i) \
  18                BLOCK(32 * i, 0) \
  19                BLOCK(32 * (i + 1), 1) \
  20                BLOCK(32 * (i + 2), 2) \
  21                BLOCK(32 * (i + 3), 3)
  22
  23#define BLOCK16() \
  24                BLOCK4(0) \
  25                BLOCK4(4) \
  26                BLOCK4(8) \
  27                BLOCK4(12)
  28
  29static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
  30{
  31        unsigned long lines = bytes >> 9;
  32
  33        kernel_fpu_begin();
  34
  35        while (lines--) {
  36#undef BLOCK
  37#define BLOCK(i, reg) \
  38do { \
  39        asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
  40        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm"  #reg : : \
  41                "m" (p0[i / sizeof(*p0)])); \
  42        asm volatile("vmovdqa %%ymm" #reg ", %0" : \
  43                "=m" (p0[i / sizeof(*p0)])); \
  44} while (0);
  45
  46                BLOCK16()
  47
  48                p0 = (unsigned long *)((uintptr_t)p0 + 512);
  49                p1 = (unsigned long *)((uintptr_t)p1 + 512);
  50        }
  51
  52        kernel_fpu_end();
  53}
  54
  55static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
  56        unsigned long *p2)
  57{
  58        unsigned long lines = bytes >> 9;
  59
  60        kernel_fpu_begin();
  61
  62        while (lines--) {
  63#undef BLOCK
  64#define BLOCK(i, reg) \
  65do { \
  66        asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
  67        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
  68                "m" (p1[i / sizeof(*p1)])); \
  69        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
  70                "m" (p0[i / sizeof(*p0)])); \
  71        asm volatile("vmovdqa %%ymm" #reg ", %0" : \
  72                "=m" (p0[i / sizeof(*p0)])); \
  73} while (0);
  74
  75                BLOCK16()
  76
  77                p0 = (unsigned long *)((uintptr_t)p0 + 512);
  78                p1 = (unsigned long *)((uintptr_t)p1 + 512);
  79                p2 = (unsigned long *)((uintptr_t)p2 + 512);
  80        }
  81
  82        kernel_fpu_end();
  83}
  84
  85static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
  86        unsigned long *p2, unsigned long *p3)
  87{
  88        unsigned long lines = bytes >> 9;
  89
  90        kernel_fpu_begin();
  91
  92        while (lines--) {
  93#undef BLOCK
  94#define BLOCK(i, reg) \
  95do { \
  96        asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
  97        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
  98                "m" (p2[i / sizeof(*p2)])); \
  99        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 100                "m" (p1[i / sizeof(*p1)])); \
 101        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 102                "m" (p0[i / sizeof(*p0)])); \
 103        asm volatile("vmovdqa %%ymm" #reg ", %0" : \
 104                "=m" (p0[i / sizeof(*p0)])); \
 105} while (0);
 106
 107                BLOCK16();
 108
 109                p0 = (unsigned long *)((uintptr_t)p0 + 512);
 110                p1 = (unsigned long *)((uintptr_t)p1 + 512);
 111                p2 = (unsigned long *)((uintptr_t)p2 + 512);
 112                p3 = (unsigned long *)((uintptr_t)p3 + 512);
 113        }
 114
 115        kernel_fpu_end();
 116}
 117
 118static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
 119        unsigned long *p2, unsigned long *p3, unsigned long *p4)
 120{
 121        unsigned long lines = bytes >> 9;
 122
 123        kernel_fpu_begin();
 124
 125        while (lines--) {
 126#undef BLOCK
 127#define BLOCK(i, reg) \
 128do { \
 129        asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
 130        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 131                "m" (p3[i / sizeof(*p3)])); \
 132        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 133                "m" (p2[i / sizeof(*p2)])); \
 134        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 135                "m" (p1[i / sizeof(*p1)])); \
 136        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 137                "m" (p0[i / sizeof(*p0)])); \
 138        asm volatile("vmovdqa %%ymm" #reg ", %0" : \
 139                "=m" (p0[i / sizeof(*p0)])); \
 140} while (0);
 141
 142                BLOCK16()
 143
 144                p0 = (unsigned long *)((uintptr_t)p0 + 512);
 145                p1 = (unsigned long *)((uintptr_t)p1 + 512);
 146                p2 = (unsigned long *)((uintptr_t)p2 + 512);
 147                p3 = (unsigned long *)((uintptr_t)p3 + 512);
 148                p4 = (unsigned long *)((uintptr_t)p4 + 512);
 149        }
 150
 151        kernel_fpu_end();
 152}
 153
 154static struct xor_block_template xor_block_avx = {
 155        .name = "avx",
 156        .do_2 = xor_avx_2,
 157        .do_3 = xor_avx_3,
 158        .do_4 = xor_avx_4,
 159        .do_5 = xor_avx_5,
 160};
 161
 162#define AVX_XOR_SPEED \
 163do { \
 164        if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
 165                xor_speed(&xor_block_avx); \
 166} while (0)
 167
 168#define AVX_SELECT(FASTEST) \
 169        (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
 170
 171#endif
 172