linux/lib/raid6/recov_ssse3.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2012 Intel Corporation
   4 */
   5
   6#include <linux/raid/pq.h>
   7#include "x86.h"
   8
   9static int raid6_has_ssse3(void)
  10{
  11        return boot_cpu_has(X86_FEATURE_XMM) &&
  12                boot_cpu_has(X86_FEATURE_XMM2) &&
  13                boot_cpu_has(X86_FEATURE_SSSE3);
  14}
  15
  16static void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila,
  17                int failb, void **ptrs)
  18{
  19        u8 *p, *q, *dp, *dq;
  20        const u8 *pbmul;        /* P multiplier table for B data */
  21        const u8 *qmul;         /* Q multiplier table (for both) */
  22        static const u8 __aligned(16) x0f[16] = {
  23                 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
  24                 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
  25
  26        p = (u8 *)ptrs[disks-2];
  27        q = (u8 *)ptrs[disks-1];
  28
  29        /* Compute syndrome with zero for the missing data pages
  30           Use the dead data pages as temporary storage for
  31           delta p and delta q */
  32        dp = (u8 *)ptrs[faila];
  33        ptrs[faila] = (void *)raid6_empty_zero_page;
  34        ptrs[disks-2] = dp;
  35        dq = (u8 *)ptrs[failb];
  36        ptrs[failb] = (void *)raid6_empty_zero_page;
  37        ptrs[disks-1] = dq;
  38
  39        raid6_call.gen_syndrome(disks, bytes, ptrs);
  40
  41        /* Restore pointer table */
  42        ptrs[faila]   = dp;
  43        ptrs[failb]   = dq;
  44        ptrs[disks-2] = p;
  45        ptrs[disks-1] = q;
  46
  47        /* Now, pick the proper data tables */
  48        pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
  49        qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
  50                raid6_gfexp[failb]]];
  51
  52        kernel_fpu_begin();
  53
  54        asm volatile("movdqa %0,%%xmm7" : : "m" (x0f[0]));
  55
  56#ifdef CONFIG_X86_64
  57        asm volatile("movdqa %0,%%xmm6" : : "m" (qmul[0]));
  58        asm volatile("movdqa %0,%%xmm14" : : "m" (pbmul[0]));
  59        asm volatile("movdqa %0,%%xmm15" : : "m" (pbmul[16]));
  60#endif
  61
  62        /* Now do it... */
  63        while (bytes) {
  64#ifdef CONFIG_X86_64
  65                /* xmm6, xmm14, xmm15 */
  66
  67                asm volatile("movdqa %0,%%xmm1" : : "m" (q[0]));
  68                asm volatile("movdqa %0,%%xmm9" : : "m" (q[16]));
  69                asm volatile("movdqa %0,%%xmm0" : : "m" (p[0]));
  70                asm volatile("movdqa %0,%%xmm8" : : "m" (p[16]));
  71                asm volatile("pxor   %0,%%xmm1" : : "m" (dq[0]));
  72                asm volatile("pxor   %0,%%xmm9" : : "m" (dq[16]));
  73                asm volatile("pxor   %0,%%xmm0" : : "m" (dp[0]));
  74                asm volatile("pxor   %0,%%xmm8" : : "m" (dp[16]));
  75
  76                /* xmm0/8 = px */
  77
  78                asm volatile("movdqa %xmm6,%xmm4");
  79                asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16]));
  80                asm volatile("movdqa %xmm6,%xmm12");
  81                asm volatile("movdqa %xmm5,%xmm13");
  82                asm volatile("movdqa %xmm1,%xmm3");
  83                asm volatile("movdqa %xmm9,%xmm11");
  84                asm volatile("movdqa %xmm0,%xmm2"); /* xmm2/10 = px */
  85                asm volatile("movdqa %xmm8,%xmm10");
  86                asm volatile("psraw  $4,%xmm1");
  87                asm volatile("psraw  $4,%xmm9");
  88                asm volatile("pand   %xmm7,%xmm3");
  89                asm volatile("pand   %xmm7,%xmm11");
  90                asm volatile("pand   %xmm7,%xmm1");
  91                asm volatile("pand   %xmm7,%xmm9");
  92                asm volatile("pshufb %xmm3,%xmm4");
  93                asm volatile("pshufb %xmm11,%xmm12");
  94                asm volatile("pshufb %xmm1,%xmm5");
  95                asm volatile("pshufb %xmm9,%xmm13");
  96                asm volatile("pxor   %xmm4,%xmm5");
  97                asm volatile("pxor   %xmm12,%xmm13");
  98
  99                /* xmm5/13 = qx */
 100
 101                asm volatile("movdqa %xmm14,%xmm4");
 102                asm volatile("movdqa %xmm15,%xmm1");
 103                asm volatile("movdqa %xmm14,%xmm12");
 104                asm volatile("movdqa %xmm15,%xmm9");
 105                asm volatile("movdqa %xmm2,%xmm3");
 106                asm volatile("movdqa %xmm10,%xmm11");
 107                asm volatile("psraw  $4,%xmm2");
 108                asm volatile("psraw  $4,%xmm10");
 109                asm volatile("pand   %xmm7,%xmm3");
 110                asm volatile("pand   %xmm7,%xmm11");
 111                asm volatile("pand   %xmm7,%xmm2");
 112                asm volatile("pand   %xmm7,%xmm10");
 113                asm volatile("pshufb %xmm3,%xmm4");
 114                asm volatile("pshufb %xmm11,%xmm12");
 115                asm volatile("pshufb %xmm2,%xmm1");
 116                asm volatile("pshufb %xmm10,%xmm9");
 117                asm volatile("pxor   %xmm4,%xmm1");
 118                asm volatile("pxor   %xmm12,%xmm9");
 119
 120                /* xmm1/9 = pbmul[px] */
 121                asm volatile("pxor   %xmm5,%xmm1");
 122                asm volatile("pxor   %xmm13,%xmm9");
 123                /* xmm1/9 = db = DQ */
 124                asm volatile("movdqa %%xmm1,%0" : "=m" (dq[0]));
 125                asm volatile("movdqa %%xmm9,%0" : "=m" (dq[16]));
 126
 127                asm volatile("pxor   %xmm1,%xmm0");
 128                asm volatile("pxor   %xmm9,%xmm8");
 129                asm volatile("movdqa %%xmm0,%0" : "=m" (dp[0]));
 130                asm volatile("movdqa %%xmm8,%0" : "=m" (dp[16]));
 131
 132                bytes -= 32;
 133                p += 32;
 134                q += 32;
 135                dp += 32;
 136                dq += 32;
 137#else
 138                asm volatile("movdqa %0,%%xmm1" : : "m" (*q));
 139                asm volatile("movdqa %0,%%xmm0" : : "m" (*p));
 140                asm volatile("pxor   %0,%%xmm1" : : "m" (*dq));
 141                asm volatile("pxor   %0,%%xmm0" : : "m" (*dp));
 142
 143                /* 1 = dq ^ q
 144                 * 0 = dp ^ p
 145                 */
 146                asm volatile("movdqa %0,%%xmm4" : : "m" (qmul[0]));
 147                asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16]));
 148
 149                asm volatile("movdqa %xmm1,%xmm3");
 150                asm volatile("psraw  $4,%xmm1");
 151                asm volatile("pand   %xmm7,%xmm3");
 152                asm volatile("pand   %xmm7,%xmm1");
 153                asm volatile("pshufb %xmm3,%xmm4");
 154                asm volatile("pshufb %xmm1,%xmm5");
 155                asm volatile("pxor   %xmm4,%xmm5");
 156
 157                asm volatile("movdqa %xmm0,%xmm2"); /* xmm2 = px */
 158
 159                /* xmm5 = qx */
 160
 161                asm volatile("movdqa %0,%%xmm4" : : "m" (pbmul[0]));
 162                asm volatile("movdqa %0,%%xmm1" : : "m" (pbmul[16]));
 163                asm volatile("movdqa %xmm2,%xmm3");
 164                asm volatile("psraw  $4,%xmm2");
 165                asm volatile("pand   %xmm7,%xmm3");
 166                asm volatile("pand   %xmm7,%xmm2");
 167                asm volatile("pshufb %xmm3,%xmm4");
 168                asm volatile("pshufb %xmm2,%xmm1");
 169                asm volatile("pxor   %xmm4,%xmm1");
 170
 171                /* xmm1 = pbmul[px] */
 172                asm volatile("pxor   %xmm5,%xmm1");
 173                /* xmm1 = db = DQ */
 174                asm volatile("movdqa %%xmm1,%0" : "=m" (*dq));
 175
 176                asm volatile("pxor   %xmm1,%xmm0");
 177                asm volatile("movdqa %%xmm0,%0" : "=m" (*dp));
 178
 179                bytes -= 16;
 180                p += 16;
 181                q += 16;
 182                dp += 16;
 183                dq += 16;
 184#endif
 185        }
 186
 187        kernel_fpu_end();
 188}
 189
 190
 191static void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila,
 192                void **ptrs)
 193{
 194        u8 *p, *q, *dq;
 195        const u8 *qmul;         /* Q multiplier table */
 196        static const u8 __aligned(16) x0f[16] = {
 197                 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
 198                 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
 199
 200        p = (u8 *)ptrs[disks-2];
 201        q = (u8 *)ptrs[disks-1];
 202
 203        /* Compute syndrome with zero for the missing data page
 204           Use the dead data page as temporary storage for delta q */
 205        dq = (u8 *)ptrs[faila];
 206        ptrs[faila] = (void *)raid6_empty_zero_page;
 207        ptrs[disks-1] = dq;
 208
 209        raid6_call.gen_syndrome(disks, bytes, ptrs);
 210
 211        /* Restore pointer table */
 212        ptrs[faila]   = dq;
 213        ptrs[disks-1] = q;
 214
 215        /* Now, pick the proper data tables */
 216        qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
 217
 218        kernel_fpu_begin();
 219
 220        asm volatile("movdqa %0, %%xmm7" : : "m" (x0f[0]));
 221
 222        while (bytes) {
 223#ifdef CONFIG_X86_64
 224                asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0]));
 225                asm volatile("movdqa %0, %%xmm4" : : "m" (dq[16]));
 226                asm volatile("pxor %0, %%xmm3" : : "m" (q[0]));
 227                asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0]));
 228
 229                /* xmm3 = q[0] ^ dq[0] */
 230
 231                asm volatile("pxor %0, %%xmm4" : : "m" (q[16]));
 232                asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16]));
 233
 234                /* xmm4 = q[16] ^ dq[16] */
 235
 236                asm volatile("movdqa %xmm3, %xmm6");
 237                asm volatile("movdqa %xmm4, %xmm8");
 238
 239                /* xmm4 = xmm8 = q[16] ^ dq[16] */
 240
 241                asm volatile("psraw $4, %xmm3");
 242                asm volatile("pand %xmm7, %xmm6");
 243                asm volatile("pand %xmm7, %xmm3");
 244                asm volatile("pshufb %xmm6, %xmm0");
 245                asm volatile("pshufb %xmm3, %xmm1");
 246                asm volatile("movdqa %0, %%xmm10" : : "m" (qmul[0]));
 247                asm volatile("pxor %xmm0, %xmm1");
 248                asm volatile("movdqa %0, %%xmm11" : : "m" (qmul[16]));
 249
 250                /* xmm1 = qmul[q[0] ^ dq[0]] */
 251
 252                asm volatile("psraw $4, %xmm4");
 253                asm volatile("pand %xmm7, %xmm8");
 254                asm volatile("pand %xmm7, %xmm4");
 255                asm volatile("pshufb %xmm8, %xmm10");
 256                asm volatile("pshufb %xmm4, %xmm11");
 257                asm volatile("movdqa %0, %%xmm2" : : "m" (p[0]));
 258                asm volatile("pxor %xmm10, %xmm11");
 259                asm volatile("movdqa %0, %%xmm12" : : "m" (p[16]));
 260
 261                /* xmm11 = qmul[q[16] ^ dq[16]] */
 262
 263                asm volatile("pxor %xmm1, %xmm2");
 264
 265                /* xmm2 = p[0] ^ qmul[q[0] ^ dq[0]] */
 266
 267                asm volatile("pxor %xmm11, %xmm12");
 268
 269                /* xmm12 = p[16] ^ qmul[q[16] ^ dq[16]] */
 270
 271                asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0]));
 272                asm volatile("movdqa %%xmm11, %0" : "=m" (dq[16]));
 273
 274                asm volatile("movdqa %%xmm2, %0" : "=m" (p[0]));
 275                asm volatile("movdqa %%xmm12, %0" : "=m" (p[16]));
 276
 277                bytes -= 32;
 278                p += 32;
 279                q += 32;
 280                dq += 32;
 281
 282#else
 283                asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0]));
 284                asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0]));
 285                asm volatile("pxor %0, %%xmm3" : : "m" (q[0]));
 286                asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16]));
 287
 288                /* xmm3 = *q ^ *dq */
 289
 290                asm volatile("movdqa %xmm3, %xmm6");
 291                asm volatile("movdqa %0, %%xmm2" : : "m" (p[0]));
 292                asm volatile("psraw $4, %xmm3");
 293                asm volatile("pand %xmm7, %xmm6");
 294                asm volatile("pand %xmm7, %xmm3");
 295                asm volatile("pshufb %xmm6, %xmm0");
 296                asm volatile("pshufb %xmm3, %xmm1");
 297                asm volatile("pxor %xmm0, %xmm1");
 298
 299                /* xmm1 = qmul[*q ^ *dq */
 300
 301                asm volatile("pxor %xmm1, %xmm2");
 302
 303                /* xmm2 = *p ^ qmul[*q ^ *dq] */
 304
 305                asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0]));
 306                asm volatile("movdqa %%xmm2, %0" : "=m" (p[0]));
 307
 308                bytes -= 16;
 309                p += 16;
 310                q += 16;
 311                dq += 16;
 312#endif
 313        }
 314
 315        kernel_fpu_end();
 316}
 317
 318const struct raid6_recov_calls raid6_recov_ssse3 = {
 319        .data2 = raid6_2data_recov_ssse3,
 320        .datap = raid6_datap_recov_ssse3,
 321        .valid = raid6_has_ssse3,
 322#ifdef CONFIG_X86_64
 323        .name = "ssse3x2",
 324#else
 325        .name = "ssse3x1",
 326#endif
 327        .priority = 1,
 328};
 329