linux/arch/x86/include/asm/xor_avx.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-only */
   2#ifndef _ASM_X86_XOR_AVX_H
   3#define _ASM_X86_XOR_AVX_H
   4
   5/*
   6 * Optimized RAID-5 checksumming functions for AVX
   7 *
   8 * Copyright (C) 2012 Intel Corporation
   9 * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
  10 *
  11 * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
  12 */
  13
  14#ifdef CONFIG_AS_AVX
  15
  16#include <linux/compiler.h>
  17#include <asm/fpu/api.h>
  18
  19#define BLOCK4(i) \
  20                BLOCK(32 * i, 0) \
  21                BLOCK(32 * (i + 1), 1) \
  22                BLOCK(32 * (i + 2), 2) \
  23                BLOCK(32 * (i + 3), 3)
  24
  25#define BLOCK16() \
  26                BLOCK4(0) \
  27                BLOCK4(4) \
  28                BLOCK4(8) \
  29                BLOCK4(12)
  30
  31static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
  32{
  33        unsigned long lines = bytes >> 9;
  34
  35        kernel_fpu_begin();
  36
  37        while (lines--) {
  38#undef BLOCK
  39#define BLOCK(i, reg) \
  40do { \
  41        asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
  42        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm"  #reg : : \
  43                "m" (p0[i / sizeof(*p0)])); \
  44        asm volatile("vmovdqa %%ymm" #reg ", %0" : \
  45                "=m" (p0[i / sizeof(*p0)])); \
  46} while (0);
  47
  48                BLOCK16()
  49
  50                p0 = (unsigned long *)((uintptr_t)p0 + 512);
  51                p1 = (unsigned long *)((uintptr_t)p1 + 512);
  52        }
  53
  54        kernel_fpu_end();
  55}
  56
  57static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
  58        unsigned long *p2)
  59{
  60        unsigned long lines = bytes >> 9;
  61
  62        kernel_fpu_begin();
  63
  64        while (lines--) {
  65#undef BLOCK
  66#define BLOCK(i, reg) \
  67do { \
  68        asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
  69        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
  70                "m" (p1[i / sizeof(*p1)])); \
  71        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
  72                "m" (p0[i / sizeof(*p0)])); \
  73        asm volatile("vmovdqa %%ymm" #reg ", %0" : \
  74                "=m" (p0[i / sizeof(*p0)])); \
  75} while (0);
  76
  77                BLOCK16()
  78
  79                p0 = (unsigned long *)((uintptr_t)p0 + 512);
  80                p1 = (unsigned long *)((uintptr_t)p1 + 512);
  81                p2 = (unsigned long *)((uintptr_t)p2 + 512);
  82        }
  83
  84        kernel_fpu_end();
  85}
  86
  87static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
  88        unsigned long *p2, unsigned long *p3)
  89{
  90        unsigned long lines = bytes >> 9;
  91
  92        kernel_fpu_begin();
  93
  94        while (lines--) {
  95#undef BLOCK
  96#define BLOCK(i, reg) \
  97do { \
  98        asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
  99        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 100                "m" (p2[i / sizeof(*p2)])); \
 101        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 102                "m" (p1[i / sizeof(*p1)])); \
 103        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 104                "m" (p0[i / sizeof(*p0)])); \
 105        asm volatile("vmovdqa %%ymm" #reg ", %0" : \
 106                "=m" (p0[i / sizeof(*p0)])); \
 107} while (0);
 108
 109                BLOCK16();
 110
 111                p0 = (unsigned long *)((uintptr_t)p0 + 512);
 112                p1 = (unsigned long *)((uintptr_t)p1 + 512);
 113                p2 = (unsigned long *)((uintptr_t)p2 + 512);
 114                p3 = (unsigned long *)((uintptr_t)p3 + 512);
 115        }
 116
 117        kernel_fpu_end();
 118}
 119
 120static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
 121        unsigned long *p2, unsigned long *p3, unsigned long *p4)
 122{
 123        unsigned long lines = bytes >> 9;
 124
 125        kernel_fpu_begin();
 126
 127        while (lines--) {
 128#undef BLOCK
 129#define BLOCK(i, reg) \
 130do { \
 131        asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
 132        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 133                "m" (p3[i / sizeof(*p3)])); \
 134        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 135                "m" (p2[i / sizeof(*p2)])); \
 136        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 137                "m" (p1[i / sizeof(*p1)])); \
 138        asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 139                "m" (p0[i / sizeof(*p0)])); \
 140        asm volatile("vmovdqa %%ymm" #reg ", %0" : \
 141                "=m" (p0[i / sizeof(*p0)])); \
 142} while (0);
 143
 144                BLOCK16()
 145
 146                p0 = (unsigned long *)((uintptr_t)p0 + 512);
 147                p1 = (unsigned long *)((uintptr_t)p1 + 512);
 148                p2 = (unsigned long *)((uintptr_t)p2 + 512);
 149                p3 = (unsigned long *)((uintptr_t)p3 + 512);
 150                p4 = (unsigned long *)((uintptr_t)p4 + 512);
 151        }
 152
 153        kernel_fpu_end();
 154}
 155
 156static struct xor_block_template xor_block_avx = {
 157        .name = "avx",
 158        .do_2 = xor_avx_2,
 159        .do_3 = xor_avx_3,
 160        .do_4 = xor_avx_4,
 161        .do_5 = xor_avx_5,
 162};
 163
 164#define AVX_XOR_SPEED \
 165do { \
 166        if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
 167                xor_speed(&xor_block_avx); \
 168} while (0)
 169
 170#define AVX_SELECT(FASTEST) \
 171        (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
 172
 173#else
 174
 175#define AVX_XOR_SPEED {}
 176
 177#define AVX_SELECT(FASTEST) (FASTEST)
 178
 179#endif
 180#endif
 181