linux/lib/raid6/avx2.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/* -*- linux-c -*- ------------------------------------------------------- *
   3 *
   4 *   Copyright (C) 2012 Intel Corporation
   5 *   Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
   6 *
   7 *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
   8 *
   9 * ----------------------------------------------------------------------- */
  10
  11/*
  12 * AVX2 implementation of RAID-6 syndrome functions
  13 *
  14 */
  15
  16#ifdef CONFIG_AS_AVX2
  17
  18#include <linux/raid/pq.h>
  19#include "x86.h"
  20
  21static const struct raid6_avx2_constants {
  22        u64 x1d[4];
  23} raid6_avx2_constants __aligned(32) = {
  24        { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
  25          0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
  26};
  27
  28static int raid6_have_avx2(void)
  29{
  30        return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX);
  31}
  32
  33/*
  34 * Plain AVX2 implementation
  35 */
  36static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
  37{
  38        u8 **dptr = (u8 **)ptrs;
  39        u8 *p, *q;
  40        int d, z, z0;
  41
  42        z0 = disks - 3;         /* Highest data disk */
  43        p = dptr[z0+1];         /* XOR parity */
  44        q = dptr[z0+2];         /* RS syndrome */
  45
  46        kernel_fpu_begin();
  47
  48        asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
  49        asm volatile("vpxor %ymm3,%ymm3,%ymm3");        /* Zero temp */
  50
  51        for (d = 0; d < bytes; d += 32) {
  52                asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
  53                asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
  54                asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
  55                asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
  56                asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
  57                for (z = z0-2; z >= 0; z--) {
  58                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
  59                        asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
  60                        asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
  61                        asm volatile("vpand %ymm0,%ymm5,%ymm5");
  62                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
  63                        asm volatile("vpxor %ymm6,%ymm2,%ymm2");
  64                        asm volatile("vpxor %ymm6,%ymm4,%ymm4");
  65                        asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
  66                }
  67                asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
  68                asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
  69                asm volatile("vpand %ymm0,%ymm5,%ymm5");
  70                asm volatile("vpxor %ymm5,%ymm4,%ymm4");
  71                asm volatile("vpxor %ymm6,%ymm2,%ymm2");
  72                asm volatile("vpxor %ymm6,%ymm4,%ymm4");
  73
  74                asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
  75                asm volatile("vpxor %ymm2,%ymm2,%ymm2");
  76                asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
  77                asm volatile("vpxor %ymm4,%ymm4,%ymm4");
  78        }
  79
  80        asm volatile("sfence" : : : "memory");
  81        kernel_fpu_end();
  82}
  83
  84static void raid6_avx21_xor_syndrome(int disks, int start, int stop,
  85                                     size_t bytes, void **ptrs)
  86{
  87        u8 **dptr = (u8 **)ptrs;
  88        u8 *p, *q;
  89        int d, z, z0;
  90
  91        z0 = stop;              /* P/Q right side optimization */
  92        p = dptr[disks-2];      /* XOR parity */
  93        q = dptr[disks-1];      /* RS syndrome */
  94
  95        kernel_fpu_begin();
  96
  97        asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
  98
  99        for (d = 0 ; d < bytes ; d += 32) {
 100                asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
 101                asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
 102                asm volatile("vpxor %ymm4,%ymm2,%ymm2");
 103                /* P/Q data pages */
 104                for (z = z0-1 ; z >= start ; z--) {
 105                        asm volatile("vpxor %ymm5,%ymm5,%ymm5");
 106                        asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
 107                        asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 108                        asm volatile("vpand %ymm0,%ymm5,%ymm5");
 109                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 110                        asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
 111                        asm volatile("vpxor %ymm5,%ymm2,%ymm2");
 112                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 113                }
 114                /* P/Q left side optimization */
 115                for (z = start-1 ; z >= 0 ; z--) {
 116                        asm volatile("vpxor %ymm5,%ymm5,%ymm5");
 117                        asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
 118                        asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 119                        asm volatile("vpand %ymm0,%ymm5,%ymm5");
 120                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 121                }
 122                asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
 123                /* Don't use movntdq for r/w memory area < cache line */
 124                asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
 125                asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
 126        }
 127
 128        asm volatile("sfence" : : : "memory");
 129        kernel_fpu_end();
 130}
 131
 132const struct raid6_calls raid6_avx2x1 = {
 133        raid6_avx21_gen_syndrome,
 134        raid6_avx21_xor_syndrome,
 135        raid6_have_avx2,
 136        "avx2x1",
 137        1                       /* Has cache hints */
 138};
 139
 140/*
 141 * Unrolled-by-2 AVX2 implementation
 142 */
 143static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
 144{
 145        u8 **dptr = (u8 **)ptrs;
 146        u8 *p, *q;
 147        int d, z, z0;
 148
 149        z0 = disks - 3;         /* Highest data disk */
 150        p = dptr[z0+1];         /* XOR parity */
 151        q = dptr[z0+2];         /* RS syndrome */
 152
 153        kernel_fpu_begin();
 154
 155        asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
 156        asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
 157
 158        /* We uniformly assume a single prefetch covers at least 32 bytes */
 159        for (d = 0; d < bytes; d += 64) {
 160                asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
 161                asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32]));
 162                asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
 163                asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */
 164                asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
 165                asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
 166                for (z = z0-1; z >= 0; z--) {
 167                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
 168                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
 169                        asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
 170                        asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
 171                        asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 172                        asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
 173                        asm volatile("vpand %ymm0,%ymm5,%ymm5");
 174                        asm volatile("vpand %ymm0,%ymm7,%ymm7");
 175                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 176                        asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 177                        asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
 178                        asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
 179                        asm volatile("vpxor %ymm5,%ymm2,%ymm2");
 180                        asm volatile("vpxor %ymm7,%ymm3,%ymm3");
 181                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 182                        asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 183                }
 184                asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
 185                asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
 186                asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
 187                asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
 188        }
 189
 190        asm volatile("sfence" : : : "memory");
 191        kernel_fpu_end();
 192}
 193
 194static void raid6_avx22_xor_syndrome(int disks, int start, int stop,
 195                                     size_t bytes, void **ptrs)
 196{
 197        u8 **dptr = (u8 **)ptrs;
 198        u8 *p, *q;
 199        int d, z, z0;
 200
 201        z0 = stop;              /* P/Q right side optimization */
 202        p = dptr[disks-2];      /* XOR parity */
 203        q = dptr[disks-1];      /* RS syndrome */
 204
 205        kernel_fpu_begin();
 206
 207        asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
 208
 209        for (d = 0 ; d < bytes ; d += 64) {
 210                asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
 211                asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
 212                asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
 213                asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
 214                asm volatile("vpxor %ymm4,%ymm2,%ymm2");
 215                asm volatile("vpxor %ymm6,%ymm3,%ymm3");
 216                /* P/Q data pages */
 217                for (z = z0-1 ; z >= start ; z--) {
 218                        asm volatile("vpxor %ymm5,%ymm5,%ymm5");
 219                        asm volatile("vpxor %ymm7,%ymm7,%ymm7");
 220                        asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
 221                        asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
 222                        asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 223                        asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
 224                        asm volatile("vpand %ymm0,%ymm5,%ymm5");
 225                        asm volatile("vpand %ymm0,%ymm7,%ymm7");
 226                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 227                        asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 228                        asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
 229                        asm volatile("vmovdqa %0,%%ymm7"
 230                                     :: "m" (dptr[z][d+32]));
 231                        asm volatile("vpxor %ymm5,%ymm2,%ymm2");
 232                        asm volatile("vpxor %ymm7,%ymm3,%ymm3");
 233                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 234                        asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 235                }
 236                /* P/Q left side optimization */
 237                for (z = start-1 ; z >= 0 ; z--) {
 238                        asm volatile("vpxor %ymm5,%ymm5,%ymm5");
 239                        asm volatile("vpxor %ymm7,%ymm7,%ymm7");
 240                        asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
 241                        asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
 242                        asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 243                        asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
 244                        asm volatile("vpand %ymm0,%ymm5,%ymm5");
 245                        asm volatile("vpand %ymm0,%ymm7,%ymm7");
 246                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 247                        asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 248                }
 249                asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
 250                asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
 251                /* Don't use movntdq for r/w memory area < cache line */
 252                asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
 253                asm volatile("vmovdqa %%ymm6,%0" : "=m" (q[d+32]));
 254                asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
 255                asm volatile("vmovdqa %%ymm3,%0" : "=m" (p[d+32]));
 256        }
 257
 258        asm volatile("sfence" : : : "memory");
 259        kernel_fpu_end();
 260}
 261
 262const struct raid6_calls raid6_avx2x2 = {
 263        raid6_avx22_gen_syndrome,
 264        raid6_avx22_xor_syndrome,
 265        raid6_have_avx2,
 266        "avx2x2",
 267        1                       /* Has cache hints */
 268};
 269
 270#ifdef CONFIG_X86_64
 271
 272/*
 273 * Unrolled-by-4 AVX2 implementation
 274 */
 275static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
 276{
 277        u8 **dptr = (u8 **)ptrs;
 278        u8 *p, *q;
 279        int d, z, z0;
 280
 281        z0 = disks - 3;         /* Highest data disk */
 282        p = dptr[z0+1];         /* XOR parity */
 283        q = dptr[z0+2];         /* RS syndrome */
 284
 285        kernel_fpu_begin();
 286
 287        asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
 288        asm volatile("vpxor %ymm1,%ymm1,%ymm1");        /* Zero temp */
 289        asm volatile("vpxor %ymm2,%ymm2,%ymm2");        /* P[0] */
 290        asm volatile("vpxor %ymm3,%ymm3,%ymm3");        /* P[1] */
 291        asm volatile("vpxor %ymm4,%ymm4,%ymm4");        /* Q[0] */
 292        asm volatile("vpxor %ymm6,%ymm6,%ymm6");        /* Q[1] */
 293        asm volatile("vpxor %ymm10,%ymm10,%ymm10");     /* P[2] */
 294        asm volatile("vpxor %ymm11,%ymm11,%ymm11");     /* P[3] */
 295        asm volatile("vpxor %ymm12,%ymm12,%ymm12");     /* Q[2] */
 296        asm volatile("vpxor %ymm14,%ymm14,%ymm14");     /* Q[3] */
 297
 298        for (d = 0; d < bytes; d += 128) {
 299                for (z = z0; z >= 0; z--) {
 300                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
 301                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
 302                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64]));
 303                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96]));
 304                        asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
 305                        asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
 306                        asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
 307                        asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
 308                        asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 309                        asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
 310                        asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
 311                        asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
 312                        asm volatile("vpand %ymm0,%ymm5,%ymm5");
 313                        asm volatile("vpand %ymm0,%ymm7,%ymm7");
 314                        asm volatile("vpand %ymm0,%ymm13,%ymm13");
 315                        asm volatile("vpand %ymm0,%ymm15,%ymm15");
 316                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 317                        asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 318                        asm volatile("vpxor %ymm13,%ymm12,%ymm12");
 319                        asm volatile("vpxor %ymm15,%ymm14,%ymm14");
 320                        asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
 321                        asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
 322                        asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64]));
 323                        asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96]));
 324                        asm volatile("vpxor %ymm5,%ymm2,%ymm2");
 325                        asm volatile("vpxor %ymm7,%ymm3,%ymm3");
 326                        asm volatile("vpxor %ymm13,%ymm10,%ymm10");
 327                        asm volatile("vpxor %ymm15,%ymm11,%ymm11");
 328                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 329                        asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 330                        asm volatile("vpxor %ymm13,%ymm12,%ymm12");
 331                        asm volatile("vpxor %ymm15,%ymm14,%ymm14");
 332                }
 333                asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
 334                asm volatile("vpxor %ymm2,%ymm2,%ymm2");
 335                asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
 336                asm volatile("vpxor %ymm3,%ymm3,%ymm3");
 337                asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
 338                asm volatile("vpxor %ymm10,%ymm10,%ymm10");
 339                asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
 340                asm volatile("vpxor %ymm11,%ymm11,%ymm11");
 341                asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
 342                asm volatile("vpxor %ymm4,%ymm4,%ymm4");
 343                asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
 344                asm volatile("vpxor %ymm6,%ymm6,%ymm6");
 345                asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
 346                asm volatile("vpxor %ymm12,%ymm12,%ymm12");
 347                asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
 348                asm volatile("vpxor %ymm14,%ymm14,%ymm14");
 349        }
 350
 351        asm volatile("sfence" : : : "memory");
 352        kernel_fpu_end();
 353}
 354
 355static void raid6_avx24_xor_syndrome(int disks, int start, int stop,
 356                                     size_t bytes, void **ptrs)
 357{
 358        u8 **dptr = (u8 **)ptrs;
 359        u8 *p, *q;
 360        int d, z, z0;
 361
 362        z0 = stop;              /* P/Q right side optimization */
 363        p = dptr[disks-2];      /* XOR parity */
 364        q = dptr[disks-1];      /* RS syndrome */
 365
 366        kernel_fpu_begin();
 367
 368        asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants.x1d[0]));
 369
 370        for (d = 0 ; d < bytes ; d += 128) {
 371                asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
 372                asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
 373                asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr[z0][d+64]));
 374                asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr[z0][d+96]));
 375                asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
 376                asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
 377                asm volatile("vmovdqa %0,%%ymm10" : : "m" (p[d+64]));
 378                asm volatile("vmovdqa %0,%%ymm11" : : "m" (p[d+96]));
 379                asm volatile("vpxor %ymm4,%ymm2,%ymm2");
 380                asm volatile("vpxor %ymm6,%ymm3,%ymm3");
 381                asm volatile("vpxor %ymm12,%ymm10,%ymm10");
 382                asm volatile("vpxor %ymm14,%ymm11,%ymm11");
 383                /* P/Q data pages */
 384                for (z = z0-1 ; z >= start ; z--) {
 385                        asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
 386                        asm volatile("prefetchnta %0" :: "m" (dptr[z][d+64]));
 387                        asm volatile("vpxor %ymm5,%ymm5,%ymm5");
 388                        asm volatile("vpxor %ymm7,%ymm7,%ymm7");
 389                        asm volatile("vpxor %ymm13,%ymm13,%ymm13");
 390                        asm volatile("vpxor %ymm15,%ymm15,%ymm15");
 391                        asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
 392                        asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
 393                        asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
 394                        asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
 395                        asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 396                        asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
 397                        asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
 398                        asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
 399                        asm volatile("vpand %ymm0,%ymm5,%ymm5");
 400                        asm volatile("vpand %ymm0,%ymm7,%ymm7");
 401                        asm volatile("vpand %ymm0,%ymm13,%ymm13");
 402                        asm volatile("vpand %ymm0,%ymm15,%ymm15");
 403                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 404                        asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 405                        asm volatile("vpxor %ymm13,%ymm12,%ymm12");
 406                        asm volatile("vpxor %ymm15,%ymm14,%ymm14");
 407                        asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
 408                        asm volatile("vmovdqa %0,%%ymm7"
 409                                     :: "m" (dptr[z][d+32]));
 410                        asm volatile("vmovdqa %0,%%ymm13"
 411                                     :: "m" (dptr[z][d+64]));
 412                        asm volatile("vmovdqa %0,%%ymm15"
 413                                     :: "m" (dptr[z][d+96]));
 414                        asm volatile("vpxor %ymm5,%ymm2,%ymm2");
 415                        asm volatile("vpxor %ymm7,%ymm3,%ymm3");
 416                        asm volatile("vpxor %ymm13,%ymm10,%ymm10");
 417                        asm volatile("vpxor %ymm15,%ymm11,%ymm11");
 418                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 419                        asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 420                        asm volatile("vpxor %ymm13,%ymm12,%ymm12");
 421                        asm volatile("vpxor %ymm15,%ymm14,%ymm14");
 422                }
 423                asm volatile("prefetchnta %0" :: "m" (q[d]));
 424                asm volatile("prefetchnta %0" :: "m" (q[d+64]));
 425                /* P/Q left side optimization */
 426                for (z = start-1 ; z >= 0 ; z--) {
 427                        asm volatile("vpxor %ymm5,%ymm5,%ymm5");
 428                        asm volatile("vpxor %ymm7,%ymm7,%ymm7");
 429                        asm volatile("vpxor %ymm13,%ymm13,%ymm13");
 430                        asm volatile("vpxor %ymm15,%ymm15,%ymm15");
 431                        asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
 432                        asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
 433                        asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
 434                        asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
 435                        asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 436                        asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
 437                        asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
 438                        asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
 439                        asm volatile("vpand %ymm0,%ymm5,%ymm5");
 440                        asm volatile("vpand %ymm0,%ymm7,%ymm7");
 441                        asm volatile("vpand %ymm0,%ymm13,%ymm13");
 442                        asm volatile("vpand %ymm0,%ymm15,%ymm15");
 443                        asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 444                        asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 445                        asm volatile("vpxor %ymm13,%ymm12,%ymm12");
 446                        asm volatile("vpxor %ymm15,%ymm14,%ymm14");
 447                }
 448                asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
 449                asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
 450                asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
 451                asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
 452                asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
 453                asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
 454                asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q[d+64]));
 455                asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q[d+96]));
 456                asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
 457                asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
 458                asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
 459                asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
 460        }
 461        asm volatile("sfence" : : : "memory");
 462        kernel_fpu_end();
 463}
 464
 465const struct raid6_calls raid6_avx2x4 = {
 466        raid6_avx24_gen_syndrome,
 467        raid6_avx24_xor_syndrome,
 468        raid6_have_avx2,
 469        "avx2x4",
 470        1                       /* Has cache hints */
 471};
 472#endif
 473
 474#endif /* CONFIG_AS_AVX2 */
 475