linux/lib/raid6/sse1.c
<<
>>
Prefs
   1/* -*- linux-c -*- ------------------------------------------------------- *
   2 *
   3 *   Copyright 2002 H. Peter Anvin - All Rights Reserved
   4 *
   5 *   This program is free software; you can redistribute it and/or modify
   6 *   it under the terms of the GNU General Public License as published by
   7 *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
   8 *   Boston MA 02111-1307, USA; either version 2 of the License, or
   9 *   (at your option) any later version; incorporated herein by reference.
  10 *
  11 * ----------------------------------------------------------------------- */
  12
  13/*
  14 * raid6/sse1.c
  15 *
  16 * SSE-1/MMXEXT implementation of RAID-6 syndrome functions
  17 *
  18 * This is really an MMX implementation, but it requires SSE-1 or
  19 * AMD MMXEXT for prefetch support and a few other features.  The
  20 * support for nontemporal memory accesses is enough to make this
  21 * worthwhile as a separate implementation.
  22 */
  23
  24#ifdef CONFIG_X86_32
  25
  26#include <linux/raid/pq.h>
  27#include "x86.h"
  28
  29/* Defined in raid6/mmx.c */
  30extern const struct raid6_mmx_constants {
  31        u64 x1d;
  32} raid6_mmx_constants;
  33
  34static int raid6_have_sse1_or_mmxext(void)
  35{
  36        /* Not really boot_cpu but "all_cpus" */
  37        return boot_cpu_has(X86_FEATURE_MMX) &&
  38                (boot_cpu_has(X86_FEATURE_XMM) ||
  39                 boot_cpu_has(X86_FEATURE_MMXEXT));
  40}
  41
  42/*
  43 * Plain SSE1 implementation
  44 */
  45static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
  46{
  47        u8 **dptr = (u8 **)ptrs;
  48        u8 *p, *q;
  49        int d, z, z0;
  50
  51        z0 = disks - 3;         /* Highest data disk */
  52        p = dptr[z0+1];         /* XOR parity */
  53        q = dptr[z0+2];         /* RS syndrome */
  54
  55        kernel_fpu_begin();
  56
  57        asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
  58        asm volatile("pxor %mm5,%mm5"); /* Zero temp */
  59
  60        for ( d = 0 ; d < bytes ; d += 8 ) {
  61                asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
  62                asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
  63                asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
  64                asm volatile("movq %mm2,%mm4"); /* Q[0] */
  65                asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
  66                for ( z = z0-2 ; z >= 0 ; z-- ) {
  67                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
  68                        asm volatile("pcmpgtb %mm4,%mm5");
  69                        asm volatile("paddb %mm4,%mm4");
  70                        asm volatile("pand %mm0,%mm5");
  71                        asm volatile("pxor %mm5,%mm4");
  72                        asm volatile("pxor %mm5,%mm5");
  73                        asm volatile("pxor %mm6,%mm2");
  74                        asm volatile("pxor %mm6,%mm4");
  75                        asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
  76                }
  77                asm volatile("pcmpgtb %mm4,%mm5");
  78                asm volatile("paddb %mm4,%mm4");
  79                asm volatile("pand %mm0,%mm5");
  80                asm volatile("pxor %mm5,%mm4");
  81                asm volatile("pxor %mm5,%mm5");
  82                asm volatile("pxor %mm6,%mm2");
  83                asm volatile("pxor %mm6,%mm4");
  84
  85                asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
  86                asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
  87        }
  88
  89        asm volatile("sfence" : : : "memory");
  90        kernel_fpu_end();
  91}
  92
  93const struct raid6_calls raid6_sse1x1 = {
  94        raid6_sse11_gen_syndrome,
  95        NULL,                   /* XOR not yet implemented */
  96        raid6_have_sse1_or_mmxext,
  97        "sse1x1",
  98        1                       /* Has cache hints */
  99};
 100
 101/*
 102 * Unrolled-by-2 SSE1 implementation
 103 */
 104static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
 105{
 106        u8 **dptr = (u8 **)ptrs;
 107        u8 *p, *q;
 108        int d, z, z0;
 109
 110        z0 = disks - 3;         /* Highest data disk */
 111        p = dptr[z0+1];         /* XOR parity */
 112        q = dptr[z0+2];         /* RS syndrome */
 113
 114        kernel_fpu_begin();
 115
 116        asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
 117        asm volatile("pxor %mm5,%mm5"); /* Zero temp */
 118        asm volatile("pxor %mm7,%mm7"); /* Zero temp */
 119
 120        /* We uniformly assume a single prefetch covers at least 16 bytes */
 121        for ( d = 0 ; d < bytes ; d += 16 ) {
 122                asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
 123                asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
 124                asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
 125                asm volatile("movq %mm2,%mm4"); /* Q[0] */
 126                asm volatile("movq %mm3,%mm6"); /* Q[1] */
 127                for ( z = z0-1 ; z >= 0 ; z-- ) {
 128                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
 129                        asm volatile("pcmpgtb %mm4,%mm5");
 130                        asm volatile("pcmpgtb %mm6,%mm7");
 131                        asm volatile("paddb %mm4,%mm4");
 132                        asm volatile("paddb %mm6,%mm6");
 133                        asm volatile("pand %mm0,%mm5");
 134                        asm volatile("pand %mm0,%mm7");
 135                        asm volatile("pxor %mm5,%mm4");
 136                        asm volatile("pxor %mm7,%mm6");
 137                        asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
 138                        asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
 139                        asm volatile("pxor %mm5,%mm2");
 140                        asm volatile("pxor %mm7,%mm3");
 141                        asm volatile("pxor %mm5,%mm4");
 142                        asm volatile("pxor %mm7,%mm6");
 143                        asm volatile("pxor %mm5,%mm5");
 144                        asm volatile("pxor %mm7,%mm7");
 145                }
 146                asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
 147                asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
 148                asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
 149                asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
 150        }
 151
 152        asm volatile("sfence" : :: "memory");
 153        kernel_fpu_end();
 154}
 155
 156const struct raid6_calls raid6_sse1x2 = {
 157        raid6_sse12_gen_syndrome,
 158        NULL,                   /* XOR not yet implemented */
 159        raid6_have_sse1_or_mmxext,
 160        "sse1x2",
 161        1                       /* Has cache hints */
 162};
 163
 164#endif
 165