linux/lib/raid6/avx2.c
<<
>>
Prefs
   1/* -*- linux-c -*- ------------------------------------------------------- *
   2 *
   3 *   Copyright (C) 2012 Intel Corporation
   4 *   Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
   5 *
   6 *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
   7 *
   8 *
   9 *   This program is free software; you can redistribute it and/or modify
  10 *   it under the terms of the GNU General Public License as published by
  11 *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
  12 *   Boston MA 02111-1307, USA; either version 2 of the License, or
  13 *   (at your option) any later version; incorporated herein by reference.
  14 *
  15 * ----------------------------------------------------------------------- */
  16
  17/*
  18 * AVX2 implementation of RAID-6 syndrome functions
  19 *
  20 */
  21
  22#ifdef CONFIG_AS_AVX2
  23
  24#include <linux/raid/pq.h>
  25#include "x86.h"
  26
  27static const struct raid6_avx2_constants {
  28        u64 x1d[4];
  29} raid6_avx2_constants __aligned(32) = {
  30        { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
  31          0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
  32};
  33
  34static int raid6_have_avx2(void)
  35{
  36        return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX);
  37}
  38
  39/*
  40 * Plain AVX2 implementation
  41 */
  42static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
  43{
  44        u8 **dptr = (u8 **)ptrs;
  45        u8 *p, *q;
  46        int d, z, z0;
  47
  48        z0 = disks - 3;         /* Highest data disk */
  49        p = dptr[z0+1];         /* XOR parity */
  50        q = dptr[z0+2];         /* RS syndrome */
  51
  52        kernel_fpu_begin();
  53
  54        asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
  55        asm volatile("vpxor %ymm3,%ymm3,%ymm3");        /* Zero temp */
  56
  57        for (d = 0; d < bytes; d += 32) {
  58                asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
  59                asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
  60                asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
  61                asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
  62                asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
  63                for (z = z0-2; z >= 0; z--) {
  64                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
  65                        asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
  66                        asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
  67                        asm volatile("vpand %ymm0,%ymm5,%ymm5");
  68                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
  69                        asm volatile("vpxor %ymm6,%ymm2,%ymm2");
  70                        asm volatile("vpxor %ymm6,%ymm4,%ymm4");
  71                        asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
  72                }
  73                asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
  74                asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
  75                asm volatile("vpand %ymm0,%ymm5,%ymm5");
  76                asm volatile("vpxor %ymm5,%ymm4,%ymm4");
  77                asm volatile("vpxor %ymm6,%ymm2,%ymm2");
  78                asm volatile("vpxor %ymm6,%ymm4,%ymm4");
  79
  80                asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
  81                asm volatile("vpxor %ymm2,%ymm2,%ymm2");
  82                asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
  83                asm volatile("vpxor %ymm4,%ymm4,%ymm4");
  84        }
  85
  86        asm volatile("sfence" : : : "memory");
  87        kernel_fpu_end();
  88}
  89
  90const struct raid6_calls raid6_avx2x1 = {
  91        raid6_avx21_gen_syndrome,
  92        raid6_have_avx2,
  93        "avx2x1",
  94        1                       /* Has cache hints */
  95};
  96
  97/*
  98 * Unrolled-by-2 AVX2 implementation
  99 */
 100static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
 101{
 102        u8 **dptr = (u8 **)ptrs;
 103        u8 *p, *q;
 104        int d, z, z0;
 105
 106        z0 = disks - 3;         /* Highest data disk */
 107        p = dptr[z0+1];         /* XOR parity */
 108        q = dptr[z0+2];         /* RS syndrome */
 109
 110        kernel_fpu_begin();
 111
 112        asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
 113        asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
 114
 115        /* We uniformly assume a single prefetch covers at least 32 bytes */
 116        for (d = 0; d < bytes; d += 64) {
 117                asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
 118                asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32]));
 119                asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
 120                asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */
 121                asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
 122                asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
 123                for (z = z0-1; z >= 0; z--) {
 124                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
 125                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
 126                        asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
 127                        asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
 128                        asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 129                        asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
 130                        asm volatile("vpand %ymm0,%ymm5,%ymm5");
 131                        asm volatile("vpand %ymm0,%ymm7,%ymm7");
 132                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 133                        asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 134                        asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
 135                        asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
 136                        asm volatile("vpxor %ymm5,%ymm2,%ymm2");
 137                        asm volatile("vpxor %ymm7,%ymm3,%ymm3");
 138                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 139                        asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 140                }
 141                asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
 142                asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
 143                asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
 144                asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
 145        }
 146
 147        asm volatile("sfence" : : : "memory");
 148        kernel_fpu_end();
 149}
 150
 151const struct raid6_calls raid6_avx2x2 = {
 152        raid6_avx22_gen_syndrome,
 153        raid6_have_avx2,
 154        "avx2x2",
 155        1                       /* Has cache hints */
 156};
 157
 158#ifdef CONFIG_X86_64
 159
 160/*
 161 * Unrolled-by-4 AVX2 implementation
 162 */
 163static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
 164{
 165        u8 **dptr = (u8 **)ptrs;
 166        u8 *p, *q;
 167        int d, z, z0;
 168
 169        z0 = disks - 3;         /* Highest data disk */
 170        p = dptr[z0+1];         /* XOR parity */
 171        q = dptr[z0+2];         /* RS syndrome */
 172
 173        kernel_fpu_begin();
 174
 175        asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
 176        asm volatile("vpxor %ymm1,%ymm1,%ymm1");        /* Zero temp */
 177        asm volatile("vpxor %ymm2,%ymm2,%ymm2");        /* P[0] */
 178        asm volatile("vpxor %ymm3,%ymm3,%ymm3");        /* P[1] */
 179        asm volatile("vpxor %ymm4,%ymm4,%ymm4");        /* Q[0] */
 180        asm volatile("vpxor %ymm6,%ymm6,%ymm6");        /* Q[1] */
 181        asm volatile("vpxor %ymm10,%ymm10,%ymm10");     /* P[2] */
 182        asm volatile("vpxor %ymm11,%ymm11,%ymm11");     /* P[3] */
 183        asm volatile("vpxor %ymm12,%ymm12,%ymm12");     /* Q[2] */
 184        asm volatile("vpxor %ymm14,%ymm14,%ymm14");     /* Q[3] */
 185
 186        for (d = 0; d < bytes; d += 128) {
 187                for (z = z0; z >= 0; z--) {
 188                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
 189                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
 190                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64]));
 191                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96]));
 192                        asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
 193                        asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
 194                        asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
 195                        asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
 196                        asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 197                        asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
 198                        asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
 199                        asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
 200                        asm volatile("vpand %ymm0,%ymm5,%ymm5");
 201                        asm volatile("vpand %ymm0,%ymm7,%ymm7");
 202                        asm volatile("vpand %ymm0,%ymm13,%ymm13");
 203                        asm volatile("vpand %ymm0,%ymm15,%ymm15");
 204                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 205                        asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 206                        asm volatile("vpxor %ymm13,%ymm12,%ymm12");
 207                        asm volatile("vpxor %ymm15,%ymm14,%ymm14");
 208                        asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
 209                        asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
 210                        asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64]));
 211                        asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96]));
 212                        asm volatile("vpxor %ymm5,%ymm2,%ymm2");
 213                        asm volatile("vpxor %ymm7,%ymm3,%ymm3");
 214                        asm volatile("vpxor %ymm13,%ymm10,%ymm10");
 215                        asm volatile("vpxor %ymm15,%ymm11,%ymm11");
 216                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 217                        asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 218                        asm volatile("vpxor %ymm13,%ymm12,%ymm12");
 219                        asm volatile("vpxor %ymm15,%ymm14,%ymm14");
 220                }
 221                asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
 222                asm volatile("vpxor %ymm2,%ymm2,%ymm2");
 223                asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
 224                asm volatile("vpxor %ymm3,%ymm3,%ymm3");
 225                asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
 226                asm volatile("vpxor %ymm10,%ymm10,%ymm10");
 227                asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
 228                asm volatile("vpxor %ymm11,%ymm11,%ymm11");
 229                asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
 230                asm volatile("vpxor %ymm4,%ymm4,%ymm4");
 231                asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
 232                asm volatile("vpxor %ymm6,%ymm6,%ymm6");
 233                asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
 234                asm volatile("vpxor %ymm12,%ymm12,%ymm12");
 235                asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
 236                asm volatile("vpxor %ymm14,%ymm14,%ymm14");
 237        }
 238
 239        asm volatile("sfence" : : : "memory");
 240        kernel_fpu_end();
 241}
 242
 243const struct raid6_calls raid6_avx2x4 = {
 244        raid6_avx24_gen_syndrome,
 245        raid6_have_avx2,
 246        "avx2x4",
 247        1                       /* Has cache hints */
 248};
 249#endif
 250
 251#endif /* CONFIG_AS_AVX2 */
 252