linux/lib/raid6/recov_avx2.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2012 Intel Corporation
   3 * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License
   7 * as published by the Free Software Foundation; version 2
   8 * of the License.
   9 */
  10
  11#if CONFIG_AS_AVX2
  12
  13#include <linux/raid/pq.h>
  14#include "x86.h"
  15
  16static int raid6_has_avx2(void)
  17{
  18        return boot_cpu_has(X86_FEATURE_AVX2) &&
  19                boot_cpu_has(X86_FEATURE_AVX);
  20}
  21
  22static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila,
  23                int failb, void **ptrs)
  24{
  25        u8 *p, *q, *dp, *dq;
  26        const u8 *pbmul;        /* P multiplier table for B data */
  27        const u8 *qmul;         /* Q multiplier table (for both) */
  28        const u8 x0f = 0x0f;
  29
  30        p = (u8 *)ptrs[disks-2];
  31        q = (u8 *)ptrs[disks-1];
  32
  33        /* Compute syndrome with zero for the missing data pages
  34           Use the dead data pages as temporary storage for
  35           delta p and delta q */
  36        dp = (u8 *)ptrs[faila];
  37        ptrs[faila] = (void *)raid6_empty_zero_page;
  38        ptrs[disks-2] = dp;
  39        dq = (u8 *)ptrs[failb];
  40        ptrs[failb] = (void *)raid6_empty_zero_page;
  41        ptrs[disks-1] = dq;
  42
  43        raid6_call.gen_syndrome(disks, bytes, ptrs);
  44
  45        /* Restore pointer table */
  46        ptrs[faila]   = dp;
  47        ptrs[failb]   = dq;
  48        ptrs[disks-2] = p;
  49        ptrs[disks-1] = q;
  50
  51        /* Now, pick the proper data tables */
  52        pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
  53        qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
  54                raid6_gfexp[failb]]];
  55
  56        kernel_fpu_begin();
  57
  58        /* ymm0 = x0f[16] */
  59        asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
  60
  61        while (bytes) {
  62#ifdef CONFIG_X86_64
  63                asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0]));
  64                asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32]));
  65                asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0]));
  66                asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32]));
  67                asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0]));
  68                asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32]));
  69                asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0]));
  70                asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32]));
  71
  72                /*
  73                 * 1 = dq[0]  ^ q[0]
  74                 * 9 = dq[32] ^ q[32]
  75                 * 0 = dp[0]  ^ p[0]
  76                 * 8 = dp[32] ^ p[32]
  77                 */
  78
  79                asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
  80                asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
  81
  82                asm volatile("vpsraw $4, %ymm1, %ymm3");
  83                asm volatile("vpsraw $4, %ymm9, %ymm12");
  84                asm volatile("vpand %ymm7, %ymm1, %ymm1");
  85                asm volatile("vpand %ymm7, %ymm9, %ymm9");
  86                asm volatile("vpand %ymm7, %ymm3, %ymm3");
  87                asm volatile("vpand %ymm7, %ymm12, %ymm12");
  88                asm volatile("vpshufb %ymm9, %ymm4, %ymm14");
  89                asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
  90                asm volatile("vpshufb %ymm12, %ymm5, %ymm15");
  91                asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
  92                asm volatile("vpxor %ymm14, %ymm15, %ymm15");
  93                asm volatile("vpxor %ymm4, %ymm5, %ymm5");
  94
  95                /*
  96                 * 5 = qx[0]
  97                 * 15 = qx[32]
  98                 */
  99
 100                asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
 101                asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
 102                asm volatile("vpsraw $4, %ymm0, %ymm2");
 103                asm volatile("vpsraw $4, %ymm8, %ymm6");
 104                asm volatile("vpand %ymm7, %ymm0, %ymm3");
 105                asm volatile("vpand %ymm7, %ymm8, %ymm14");
 106                asm volatile("vpand %ymm7, %ymm2, %ymm2");
 107                asm volatile("vpand %ymm7, %ymm6, %ymm6");
 108                asm volatile("vpshufb %ymm14, %ymm4, %ymm12");
 109                asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
 110                asm volatile("vpshufb %ymm6, %ymm1, %ymm13");
 111                asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
 112                asm volatile("vpxor %ymm4, %ymm1, %ymm1");
 113                asm volatile("vpxor %ymm12, %ymm13, %ymm13");
 114
 115                /*
 116                 * 1  = pbmul[px[0]]
 117                 * 13 = pbmul[px[32]]
 118                 */
 119                asm volatile("vpxor %ymm5, %ymm1, %ymm1");
 120                asm volatile("vpxor %ymm15, %ymm13, %ymm13");
 121
 122                /*
 123                 * 1 = db = DQ
 124                 * 13 = db[32] = DQ[32]
 125                 */
 126                asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
 127                asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32]));
 128                asm volatile("vpxor %ymm1, %ymm0, %ymm0");
 129                asm volatile("vpxor %ymm13, %ymm8, %ymm8");
 130
 131                asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
 132                asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32]));
 133
 134                bytes -= 64;
 135                p += 64;
 136                q += 64;
 137                dp += 64;
 138                dq += 64;
 139#else
 140                asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q));
 141                asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p));
 142                asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq));
 143                asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp));
 144
 145                /* 1 = dq ^ q;  0 = dp ^ p */
 146
 147                asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
 148                asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
 149
 150                /*
 151                 * 1 = dq ^ q
 152                 * 3 = dq ^ p >> 4
 153                 */
 154                asm volatile("vpsraw $4, %ymm1, %ymm3");
 155                asm volatile("vpand %ymm7, %ymm1, %ymm1");
 156                asm volatile("vpand %ymm7, %ymm3, %ymm3");
 157                asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
 158                asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
 159                asm volatile("vpxor %ymm4, %ymm5, %ymm5");
 160
 161                /* 5 = qx */
 162
 163                asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
 164                asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
 165
 166                asm volatile("vpsraw $4, %ymm0, %ymm2");
 167                asm volatile("vpand %ymm7, %ymm0, %ymm3");
 168                asm volatile("vpand %ymm7, %ymm2, %ymm2");
 169                asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
 170                asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
 171                asm volatile("vpxor %ymm4, %ymm1, %ymm1");
 172
 173                /* 1 = pbmul[px] */
 174                asm volatile("vpxor %ymm5, %ymm1, %ymm1");
 175                /* 1 = db = DQ */
 176                asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
 177
 178                asm volatile("vpxor %ymm1, %ymm0, %ymm0");
 179                asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
 180
 181                bytes -= 32;
 182                p += 32;
 183                q += 32;
 184                dp += 32;
 185                dq += 32;
 186#endif
 187        }
 188
 189        kernel_fpu_end();
 190}
 191
 192static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila,
 193                void **ptrs)
 194{
 195        u8 *p, *q, *dq;
 196        const u8 *qmul;         /* Q multiplier table */
 197        const u8 x0f = 0x0f;
 198
 199        p = (u8 *)ptrs[disks-2];
 200        q = (u8 *)ptrs[disks-1];
 201
 202        /* Compute syndrome with zero for the missing data page
 203           Use the dead data page as temporary storage for delta q */
 204        dq = (u8 *)ptrs[faila];
 205        ptrs[faila] = (void *)raid6_empty_zero_page;
 206        ptrs[disks-1] = dq;
 207
 208        raid6_call.gen_syndrome(disks, bytes, ptrs);
 209
 210        /* Restore pointer table */
 211        ptrs[faila]   = dq;
 212        ptrs[disks-1] = q;
 213
 214        /* Now, pick the proper data tables */
 215        qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
 216
 217        kernel_fpu_begin();
 218
 219        asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
 220
 221        while (bytes) {
 222#ifdef CONFIG_X86_64
 223                asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
 224                asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32]));
 225                asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
 226                asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32]));
 227
 228                /*
 229                 * 3 = q[0] ^ dq[0]
 230                 * 8 = q[32] ^ dq[32]
 231                 */
 232                asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
 233                asm volatile("vmovapd %ymm0, %ymm13");
 234                asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
 235                asm volatile("vmovapd %ymm1, %ymm14");
 236
 237                asm volatile("vpsraw $4, %ymm3, %ymm6");
 238                asm volatile("vpsraw $4, %ymm8, %ymm12");
 239                asm volatile("vpand %ymm7, %ymm3, %ymm3");
 240                asm volatile("vpand %ymm7, %ymm8, %ymm8");
 241                asm volatile("vpand %ymm7, %ymm6, %ymm6");
 242                asm volatile("vpand %ymm7, %ymm12, %ymm12");
 243                asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
 244                asm volatile("vpshufb %ymm8, %ymm13, %ymm13");
 245                asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
 246                asm volatile("vpshufb %ymm12, %ymm14, %ymm14");
 247                asm volatile("vpxor %ymm0, %ymm1, %ymm1");
 248                asm volatile("vpxor %ymm13, %ymm14, %ymm14");
 249
 250                /*
 251                 * 1  = qmul[q[0]  ^ dq[0]]
 252                 * 14 = qmul[q[32] ^ dq[32]]
 253                 */
 254                asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
 255                asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32]));
 256                asm volatile("vpxor %ymm1, %ymm2, %ymm2");
 257                asm volatile("vpxor %ymm14, %ymm12, %ymm12");
 258
 259                /*
 260                 * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
 261                 * 12 = p[32] ^ qmul[q[32] ^ dq[32]]
 262                 */
 263
 264                asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
 265                asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32]));
 266                asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
 267                asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32]));
 268
 269                bytes -= 64;
 270                p += 64;
 271                q += 64;
 272                dq += 64;
 273#else
 274                asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
 275                asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
 276
 277                /* 3 = q ^ dq */
 278
 279                asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
 280                asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
 281
 282                asm volatile("vpsraw $4, %ymm3, %ymm6");
 283                asm volatile("vpand %ymm7, %ymm3, %ymm3");
 284                asm volatile("vpand %ymm7, %ymm6, %ymm6");
 285                asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
 286                asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
 287                asm volatile("vpxor %ymm0, %ymm1, %ymm1");
 288
 289                /* 1 = qmul[q ^ dq] */
 290
 291                asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
 292                asm volatile("vpxor %ymm1, %ymm2, %ymm2");
 293
 294                /* 2 = p ^ qmul[q ^ dq] */
 295
 296                asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
 297                asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
 298
 299                bytes -= 32;
 300                p += 32;
 301                q += 32;
 302                dq += 32;
 303#endif
 304        }
 305
 306        kernel_fpu_end();
 307}
 308
 309const struct raid6_recov_calls raid6_recov_avx2 = {
 310        .data2 = raid6_2data_recov_avx2,
 311        .datap = raid6_datap_recov_avx2,
 312        .valid = raid6_has_avx2,
 313#ifdef CONFIG_X86_64
 314        .name = "avx2x2",
 315#else
 316        .name = "avx2x1",
 317#endif
 318        .priority = 2,
 319};
 320
 321#else
 322#warning "your version of binutils lacks AVX2 support"
 323#endif
 324