linux/lib/raid6/recov_avx512.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2016 Intel Corporation
   4 *
   5 * Author: Gayatri Kammela <gayatri.kammela@intel.com>
   6 * Author: Megha Dey <megha.dey@linux.intel.com>
   7 */
   8
   9#ifdef CONFIG_AS_AVX512
  10
  11#include <linux/raid/pq.h>
  12#include "x86.h"
  13
  14static int raid6_has_avx512(void)
  15{
  16        return boot_cpu_has(X86_FEATURE_AVX2) &&
  17                boot_cpu_has(X86_FEATURE_AVX) &&
  18                boot_cpu_has(X86_FEATURE_AVX512F) &&
  19                boot_cpu_has(X86_FEATURE_AVX512BW) &&
  20                boot_cpu_has(X86_FEATURE_AVX512VL) &&
  21                boot_cpu_has(X86_FEATURE_AVX512DQ);
  22}
  23
  24static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
  25                                     int failb, void **ptrs)
  26{
  27        u8 *p, *q, *dp, *dq;
  28        const u8 *pbmul;        /* P multiplier table for B data */
  29        const u8 *qmul;         /* Q multiplier table (for both) */
  30        const u8 x0f = 0x0f;
  31
  32        p = (u8 *)ptrs[disks-2];
  33        q = (u8 *)ptrs[disks-1];
  34
  35        /*
  36         * Compute syndrome with zero for the missing data pages
  37         * Use the dead data pages as temporary storage for
  38         * delta p and delta q
  39         */
  40
  41        dp = (u8 *)ptrs[faila];
  42        ptrs[faila] = (void *)raid6_empty_zero_page;
  43        ptrs[disks-2] = dp;
  44        dq = (u8 *)ptrs[failb];
  45        ptrs[failb] = (void *)raid6_empty_zero_page;
  46        ptrs[disks-1] = dq;
  47
  48        raid6_call.gen_syndrome(disks, bytes, ptrs);
  49
  50        /* Restore pointer table */
  51        ptrs[faila]   = dp;
  52        ptrs[failb]   = dq;
  53        ptrs[disks-2] = p;
  54        ptrs[disks-1] = q;
  55
  56        /* Now, pick the proper data tables */
  57        pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
  58        qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
  59                raid6_gfexp[failb]]];
  60
  61        kernel_fpu_begin();
  62
  63        /* zmm0 = x0f[16] */
  64        asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
  65
  66        while (bytes) {
  67#ifdef CONFIG_X86_64
  68                asm volatile("vmovdqa64 %0, %%zmm1\n\t"
  69                             "vmovdqa64 %1, %%zmm9\n\t"
  70                             "vmovdqa64 %2, %%zmm0\n\t"
  71                             "vmovdqa64 %3, %%zmm8\n\t"
  72                             "vpxorq %4, %%zmm1, %%zmm1\n\t"
  73                             "vpxorq %5, %%zmm9, %%zmm9\n\t"
  74                             "vpxorq %6, %%zmm0, %%zmm0\n\t"
  75                             "vpxorq %7, %%zmm8, %%zmm8"
  76                             :
  77                             : "m" (q[0]), "m" (q[64]), "m" (p[0]),
  78                               "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
  79                               "m" (dp[0]), "m" (dp[64]));
  80
  81                /*
  82                 * 1 = dq[0]  ^ q[0]
  83                 * 9 = dq[64] ^ q[64]
  84                 * 0 = dp[0]  ^ p[0]
  85                 * 8 = dp[64] ^ p[64]
  86                 */
  87
  88                asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
  89                             "vbroadcasti64x2 %1, %%zmm5"
  90                             :
  91                             : "m" (qmul[0]), "m" (qmul[16]));
  92
  93                asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
  94                             "vpsraw $4, %%zmm9, %%zmm12\n\t"
  95                             "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
  96                             "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
  97                             "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
  98                             "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
  99                             "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
 100                             "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
 101                             "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
 102                             "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
 103                             "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
 104                             "vpxorq %%zmm4, %%zmm5, %%zmm5"
 105                             :
 106                             : );
 107
 108                /*
 109                 * 5 = qx[0]
 110                 * 15 = qx[64]
 111                 */
 112
 113                asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
 114                             "vbroadcasti64x2 %1, %%zmm1\n\t"
 115                             "vpsraw $4, %%zmm0, %%zmm2\n\t"
 116                             "vpsraw $4, %%zmm8, %%zmm6\n\t"
 117                             "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
 118                             "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
 119                             "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
 120                             "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
 121                             "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
 122                             "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
 123                             "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
 124                             "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
 125                             "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
 126                             "vpxorq %%zmm12, %%zmm13, %%zmm13"
 127                             :
 128                             : "m" (pbmul[0]), "m" (pbmul[16]));
 129
 130                /*
 131                 * 1  = pbmul[px[0]]
 132                 * 13 = pbmul[px[64]]
 133                 */
 134                asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
 135                             "vpxorq %%zmm15, %%zmm13, %%zmm13"
 136                             :
 137                             : );
 138
 139                /*
 140                 * 1 = db = DQ
 141                 * 13 = db[64] = DQ[64]
 142                 */
 143                asm volatile("vmovdqa64 %%zmm1, %0\n\t"
 144                             "vmovdqa64 %%zmm13,%1\n\t"
 145                             "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
 146                             "vpxorq %%zmm13, %%zmm8, %%zmm8"
 147                             :
 148                             : "m" (dq[0]), "m" (dq[64]));
 149
 150                asm volatile("vmovdqa64 %%zmm0, %0\n\t"
 151                             "vmovdqa64 %%zmm8, %1"
 152                             :
 153                             : "m" (dp[0]), "m" (dp[64]));
 154
 155                bytes -= 128;
 156                p += 128;
 157                q += 128;
 158                dp += 128;
 159                dq += 128;
 160#else
 161                asm volatile("vmovdqa64 %0, %%zmm1\n\t"
 162                             "vmovdqa64 %1, %%zmm0\n\t"
 163                             "vpxorq %2, %%zmm1, %%zmm1\n\t"
 164                             "vpxorq %3, %%zmm0, %%zmm0"
 165                             :
 166                             : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
 167
 168                /* 1 = dq ^ q;  0 = dp ^ p */
 169
 170                asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
 171                             "vbroadcasti64x2 %1, %%zmm5"
 172                             :
 173                             : "m" (qmul[0]), "m" (qmul[16]));
 174
 175                /*
 176                 * 1 = dq ^ q
 177                 * 3 = dq ^ p >> 4
 178                 */
 179                asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
 180                             "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
 181                             "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
 182                             "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
 183                             "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
 184                             "vpxorq %%zmm4, %%zmm5, %%zmm5"
 185                             :
 186                             : );
 187
 188                /* 5 = qx */
 189
 190                asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
 191                             "vbroadcasti64x2 %1, %%zmm1"
 192                             :
 193                             : "m" (pbmul[0]), "m" (pbmul[16]));
 194
 195                asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
 196                             "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
 197                             "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
 198                             "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
 199                             "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
 200                             "vpxorq %%zmm4, %%zmm1, %%zmm1"
 201                             :
 202                             : );
 203
 204                /* 1 = pbmul[px] */
 205                asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
 206                             /* 1 = db = DQ */
 207                             "vmovdqa64 %%zmm1, %0\n\t"
 208                             :
 209                             : "m" (dq[0]));
 210
 211                asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
 212                             "vmovdqa64 %%zmm0, %0"
 213                             :
 214                             : "m" (dp[0]));
 215
 216                bytes -= 64;
 217                p += 64;
 218                q += 64;
 219                dp += 64;
 220                dq += 64;
 221#endif
 222        }
 223
 224        kernel_fpu_end();
 225}
 226
 227static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
 228                                     void **ptrs)
 229{
 230        u8 *p, *q, *dq;
 231        const u8 *qmul;         /* Q multiplier table */
 232        const u8 x0f = 0x0f;
 233
 234        p = (u8 *)ptrs[disks-2];
 235        q = (u8 *)ptrs[disks-1];
 236
 237        /*
 238         * Compute syndrome with zero for the missing data page
 239         * Use the dead data page as temporary storage for delta q
 240         */
 241
 242        dq = (u8 *)ptrs[faila];
 243        ptrs[faila] = (void *)raid6_empty_zero_page;
 244        ptrs[disks-1] = dq;
 245
 246        raid6_call.gen_syndrome(disks, bytes, ptrs);
 247
 248        /* Restore pointer table */
 249        ptrs[faila]   = dq;
 250        ptrs[disks-1] = q;
 251
 252        /* Now, pick the proper data tables */
 253        qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
 254
 255        kernel_fpu_begin();
 256
 257        asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
 258
 259        while (bytes) {
 260#ifdef CONFIG_X86_64
 261                asm volatile("vmovdqa64 %0, %%zmm3\n\t"
 262                             "vmovdqa64 %1, %%zmm8\n\t"
 263                             "vpxorq %2, %%zmm3, %%zmm3\n\t"
 264                             "vpxorq %3, %%zmm8, %%zmm8"
 265                             :
 266                             : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
 267                               "m" (q[64]));
 268
 269                /*
 270                 * 3 = q[0] ^ dq[0]
 271                 * 8 = q[64] ^ dq[64]
 272                 */
 273                asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
 274                             "vmovapd %%zmm0, %%zmm13\n\t"
 275                             "vbroadcasti64x2 %1, %%zmm1\n\t"
 276                             "vmovapd %%zmm1, %%zmm14"
 277                             :
 278                             : "m" (qmul[0]), "m" (qmul[16]));
 279
 280                asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
 281                             "vpsraw $4, %%zmm8, %%zmm12\n\t"
 282                             "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
 283                             "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
 284                             "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
 285                             "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
 286                             "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
 287                             "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
 288                             "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
 289                             "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
 290                             "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
 291                             "vpxorq %%zmm13, %%zmm14, %%zmm14"
 292                             :
 293                             : );
 294
 295                /*
 296                 * 1  = qmul[q[0]  ^ dq[0]]
 297                 * 14 = qmul[q[64] ^ dq[64]]
 298                 */
 299                asm volatile("vmovdqa64 %0, %%zmm2\n\t"
 300                             "vmovdqa64 %1, %%zmm12\n\t"
 301                             "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
 302                             "vpxorq %%zmm14, %%zmm12, %%zmm12"
 303                             :
 304                             : "m" (p[0]), "m" (p[64]));
 305
 306                /*
 307                 * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
 308                 * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
 309                 */
 310
 311                asm volatile("vmovdqa64 %%zmm1, %0\n\t"
 312                             "vmovdqa64 %%zmm14, %1\n\t"
 313                             "vmovdqa64 %%zmm2, %2\n\t"
 314                             "vmovdqa64 %%zmm12,%3"
 315                             :
 316                             : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
 317                               "m" (p[64]));
 318
 319                bytes -= 128;
 320                p += 128;
 321                q += 128;
 322                dq += 128;
 323#else
 324                asm volatile("vmovdqa64 %0, %%zmm3\n\t"
 325                             "vpxorq %1, %%zmm3, %%zmm3"
 326                             :
 327                             : "m" (dq[0]), "m" (q[0]));
 328
 329                /* 3 = q ^ dq */
 330
 331                asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
 332                             "vbroadcasti64x2 %1, %%zmm1"
 333                             :
 334                             : "m" (qmul[0]), "m" (qmul[16]));
 335
 336                asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
 337                             "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
 338                             "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
 339                             "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
 340                             "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
 341                             "vpxorq %%zmm0, %%zmm1, %%zmm1"
 342                             :
 343                             : );
 344
 345                /* 1 = qmul[q ^ dq] */
 346
 347                asm volatile("vmovdqa64 %0, %%zmm2\n\t"
 348                             "vpxorq %%zmm1, %%zmm2, %%zmm2"
 349                             :
 350                             : "m" (p[0]));
 351
 352                /* 2 = p ^ qmul[q ^ dq] */
 353
 354                asm volatile("vmovdqa64 %%zmm1, %0\n\t"
 355                             "vmovdqa64 %%zmm2, %1"
 356                             :
 357                             : "m" (dq[0]), "m" (p[0]));
 358
 359                bytes -= 64;
 360                p += 64;
 361                q += 64;
 362                dq += 64;
 363#endif
 364        }
 365
 366        kernel_fpu_end();
 367}
 368
 369const struct raid6_recov_calls raid6_recov_avx512 = {
 370        .data2 = raid6_2data_recov_avx512,
 371        .datap = raid6_datap_recov_avx512,
 372        .valid = raid6_has_avx512,
 373#ifdef CONFIG_X86_64
 374        .name = "avx512x2",
 375#else
 376        .name = "avx512x1",
 377#endif
 378        .priority = 3,
 379};
 380
 381#else
 382#warning "your version of binutils lacks AVX512 support"
 383#endif
 384