linux/lib/bch.c
<<
>>
Prefs
   1/*
   2 * Generic binary BCH encoding/decoding library
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms of the GNU General Public License version 2 as published by
   6 * the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc., 51
  15 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  16 *
  17 * Copyright © 2011 Parrot S.A.
  18 *
  19 * Author: Ivan Djelic <ivan.djelic@parrot.com>
  20 *
  21 * Description:
  22 *
  23 * This library provides runtime configurable encoding/decoding of binary
  24 * Bose-Chaudhuri-Hocquenghem (BCH) codes.
  25 *
  26 * Call bch_init to get a pointer to a newly allocated bch_control structure for
  27 * the given m (Galois field order), t (error correction capability) and
  28 * (optional) primitive polynomial parameters.
  29 *
  30 * Call bch_encode to compute and store ecc parity bytes to a given buffer.
  31 * Call bch_decode to detect and locate errors in received data.
  32 *
  33 * On systems supporting hw BCH features, intermediate results may be provided
  34 * to bch_decode in order to skip certain steps. See bch_decode() documentation
  35 * for details.
  36 *
  37 * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
  38 * parameters m and t; thus allowing extra compiler optimizations and providing
  39 * better (up to 2x) encoding performance. Using this option makes sense when
  40 * (m,t) are fixed and known in advance, e.g. when using BCH error correction
  41 * on a particular NAND flash device.
  42 *
  43 * Algorithmic details:
  44 *
  45 * Encoding is performed by processing 32 input bits in parallel, using 4
  46 * remainder lookup tables.
  47 *
  48 * The final stage of decoding involves the following internal steps:
  49 * a. Syndrome computation
  50 * b. Error locator polynomial computation using Berlekamp-Massey algorithm
  51 * c. Error locator root finding (by far the most expensive step)
  52 *
  53 * In this implementation, step c is not performed using the usual Chien search.
  54 * Instead, an alternative approach described in [1] is used. It consists in
  55 * factoring the error locator polynomial using the Berlekamp Trace algorithm
  56 * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
  57 * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
  58 * much better performance than Chien search for usual (m,t) values (typically
  59 * m >= 13, t < 32, see [1]).
  60 *
  61 * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
  62 * of characteristic 2, in: Western European Workshop on Research in Cryptology
  63 * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
  64 * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
  65 * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
  66 */
  67
  68#include <linux/kernel.h>
  69#include <linux/errno.h>
  70#include <linux/init.h>
  71#include <linux/module.h>
  72#include <linux/slab.h>
  73#include <linux/bitops.h>
  74#include <asm/byteorder.h>
  75#include <linux/bch.h>
  76
  77#if defined(CONFIG_BCH_CONST_PARAMS)
  78#define GF_M(_p)               (CONFIG_BCH_CONST_M)
  79#define GF_T(_p)               (CONFIG_BCH_CONST_T)
  80#define GF_N(_p)               ((1 << (CONFIG_BCH_CONST_M))-1)
  81#define BCH_MAX_M              (CONFIG_BCH_CONST_M)
  82#define BCH_MAX_T              (CONFIG_BCH_CONST_T)
  83#else
  84#define GF_M(_p)               ((_p)->m)
  85#define GF_T(_p)               ((_p)->t)
  86#define GF_N(_p)               ((_p)->n)
  87#define BCH_MAX_M              15 /* 2KB */
  88#define BCH_MAX_T              64 /* 64 bit correction */
  89#endif
  90
  91#define BCH_ECC_WORDS(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
  92#define BCH_ECC_BYTES(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
  93
  94#define BCH_ECC_MAX_WORDS      DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
  95
  96#ifndef dbg
  97#define dbg(_fmt, args...)     do {} while (0)
  98#endif
  99
 100/*
 101 * represent a polynomial over GF(2^m)
 102 */
 103struct gf_poly {
 104        unsigned int deg;    /* polynomial degree */
 105        unsigned int c[];   /* polynomial terms */
 106};
 107
 108/* given its degree, compute a polynomial size in bytes */
 109#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
 110
 111/* polynomial of degree 1 */
 112struct gf_poly_deg1 {
 113        struct gf_poly poly;
 114        unsigned int   c[2];
 115};
 116
 117static u8 swap_bits_table[] = {
 118        0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
 119        0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
 120        0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
 121        0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
 122        0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
 123        0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
 124        0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
 125        0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
 126        0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
 127        0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
 128        0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
 129        0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
 130        0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
 131        0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
 132        0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
 133        0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
 134        0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
 135        0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
 136        0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
 137        0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
 138        0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
 139        0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
 140        0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
 141        0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
 142        0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
 143        0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
 144        0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
 145        0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
 146        0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
 147        0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
 148        0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
 149        0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
 150};
 151
 152static u8 swap_bits(struct bch_control *bch, u8 in)
 153{
 154        if (!bch->swap_bits)
 155                return in;
 156
 157        return swap_bits_table[in];
 158}
 159
 160/*
 161 * same as bch_encode(), but process input data one byte at a time
 162 */
 163static void bch_encode_unaligned(struct bch_control *bch,
 164                                 const unsigned char *data, unsigned int len,
 165                                 uint32_t *ecc)
 166{
 167        int i;
 168        const uint32_t *p;
 169        const int l = BCH_ECC_WORDS(bch)-1;
 170
 171        while (len--) {
 172                u8 tmp = swap_bits(bch, *data++);
 173
 174                p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(tmp)) & 0xff);
 175
 176                for (i = 0; i < l; i++)
 177                        ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
 178
 179                ecc[l] = (ecc[l] << 8)^(*p);
 180        }
 181}
 182
 183/*
 184 * convert ecc bytes to aligned, zero-padded 32-bit ecc words
 185 */
 186static void load_ecc8(struct bch_control *bch, uint32_t *dst,
 187                      const uint8_t *src)
 188{
 189        uint8_t pad[4] = {0, 0, 0, 0};
 190        unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
 191
 192        for (i = 0; i < nwords; i++, src += 4)
 193                dst[i] = ((u32)swap_bits(bch, src[0]) << 24) |
 194                        ((u32)swap_bits(bch, src[1]) << 16) |
 195                        ((u32)swap_bits(bch, src[2]) << 8) |
 196                        swap_bits(bch, src[3]);
 197
 198        memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
 199        dst[nwords] = ((u32)swap_bits(bch, pad[0]) << 24) |
 200                ((u32)swap_bits(bch, pad[1]) << 16) |
 201                ((u32)swap_bits(bch, pad[2]) << 8) |
 202                swap_bits(bch, pad[3]);
 203}
 204
 205/*
 206 * convert 32-bit ecc words to ecc bytes
 207 */
 208static void store_ecc8(struct bch_control *bch, uint8_t *dst,
 209                       const uint32_t *src)
 210{
 211        uint8_t pad[4];
 212        unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
 213
 214        for (i = 0; i < nwords; i++) {
 215                *dst++ = swap_bits(bch, src[i] >> 24);
 216                *dst++ = swap_bits(bch, src[i] >> 16);
 217                *dst++ = swap_bits(bch, src[i] >> 8);
 218                *dst++ = swap_bits(bch, src[i]);
 219        }
 220        pad[0] = swap_bits(bch, src[nwords] >> 24);
 221        pad[1] = swap_bits(bch, src[nwords] >> 16);
 222        pad[2] = swap_bits(bch, src[nwords] >> 8);
 223        pad[3] = swap_bits(bch, src[nwords]);
 224        memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
 225}
 226
 227/**
 228 * bch_encode - calculate BCH ecc parity of data
 229 * @bch:   BCH control structure
 230 * @data:  data to encode
 231 * @len:   data length in bytes
 232 * @ecc:   ecc parity data, must be initialized by caller
 233 *
 234 * The @ecc parity array is used both as input and output parameter, in order to
 235 * allow incremental computations. It should be of the size indicated by member
 236 * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
 237 *
 238 * The exact number of computed ecc parity bits is given by member @ecc_bits of
 239 * @bch; it may be less than m*t for large values of t.
 240 */
 241void bch_encode(struct bch_control *bch, const uint8_t *data,
 242                unsigned int len, uint8_t *ecc)
 243{
 244        const unsigned int l = BCH_ECC_WORDS(bch)-1;
 245        unsigned int i, mlen;
 246        unsigned long m;
 247        uint32_t w, r[BCH_ECC_MAX_WORDS];
 248        const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
 249        const uint32_t * const tab0 = bch->mod8_tab;
 250        const uint32_t * const tab1 = tab0 + 256*(l+1);
 251        const uint32_t * const tab2 = tab1 + 256*(l+1);
 252        const uint32_t * const tab3 = tab2 + 256*(l+1);
 253        const uint32_t *pdata, *p0, *p1, *p2, *p3;
 254
 255        if (WARN_ON(r_bytes > sizeof(r)))
 256                return;
 257
 258        if (ecc) {
 259                /* load ecc parity bytes into internal 32-bit buffer */
 260                load_ecc8(bch, bch->ecc_buf, ecc);
 261        } else {
 262                memset(bch->ecc_buf, 0, r_bytes);
 263        }
 264
 265        /* process first unaligned data bytes */
 266        m = ((unsigned long)data) & 3;
 267        if (m) {
 268                mlen = (len < (4-m)) ? len : 4-m;
 269                bch_encode_unaligned(bch, data, mlen, bch->ecc_buf);
 270                data += mlen;
 271                len  -= mlen;
 272        }
 273
 274        /* process 32-bit aligned data words */
 275        pdata = (uint32_t *)data;
 276        mlen  = len/4;
 277        data += 4*mlen;
 278        len  -= 4*mlen;
 279        memcpy(r, bch->ecc_buf, r_bytes);
 280
 281        /*
 282         * split each 32-bit word into 4 polynomials of weight 8 as follows:
 283         *
 284         * 31 ...24  23 ...16  15 ... 8  7 ... 0
 285         * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt
 286         *                               tttttttt  mod g = r0 (precomputed)
 287         *                     zzzzzzzz  00000000  mod g = r1 (precomputed)
 288         *           yyyyyyyy  00000000  00000000  mod g = r2 (precomputed)
 289         * xxxxxxxx  00000000  00000000  00000000  mod g = r3 (precomputed)
 290         * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt  mod g = r0^r1^r2^r3
 291         */
 292        while (mlen--) {
 293                /* input data is read in big-endian format */
 294                w = cpu_to_be32(*pdata++);
 295                if (bch->swap_bits)
 296                        w = (u32)swap_bits(bch, w) |
 297                            ((u32)swap_bits(bch, w >> 8) << 8) |
 298                            ((u32)swap_bits(bch, w >> 16) << 16) |
 299                            ((u32)swap_bits(bch, w >> 24) << 24);
 300                w ^= r[0];
 301                p0 = tab0 + (l+1)*((w >>  0) & 0xff);
 302                p1 = tab1 + (l+1)*((w >>  8) & 0xff);
 303                p2 = tab2 + (l+1)*((w >> 16) & 0xff);
 304                p3 = tab3 + (l+1)*((w >> 24) & 0xff);
 305
 306                for (i = 0; i < l; i++)
 307                        r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
 308
 309                r[l] = p0[l]^p1[l]^p2[l]^p3[l];
 310        }
 311        memcpy(bch->ecc_buf, r, r_bytes);
 312
 313        /* process last unaligned bytes */
 314        if (len)
 315                bch_encode_unaligned(bch, data, len, bch->ecc_buf);
 316
 317        /* store ecc parity bytes into original parity buffer */
 318        if (ecc)
 319                store_ecc8(bch, ecc, bch->ecc_buf);
 320}
 321EXPORT_SYMBOL_GPL(bch_encode);
 322
 323static inline int modulo(struct bch_control *bch, unsigned int v)
 324{
 325        const unsigned int n = GF_N(bch);
 326        while (v >= n) {
 327                v -= n;
 328                v = (v & n) + (v >> GF_M(bch));
 329        }
 330        return v;
 331}
 332
 333/*
 334 * shorter and faster modulo function, only works when v < 2N.
 335 */
 336static inline int mod_s(struct bch_control *bch, unsigned int v)
 337{
 338        const unsigned int n = GF_N(bch);
 339        return (v < n) ? v : v-n;
 340}
 341
 342static inline int deg(unsigned int poly)
 343{
 344        /* polynomial degree is the most-significant bit index */
 345        return fls(poly)-1;
 346}
 347
 348static inline int parity(unsigned int x)
 349{
 350        /*
 351         * public domain code snippet, lifted from
 352         * http://www-graphics.stanford.edu/~seander/bithacks.html
 353         */
 354        x ^= x >> 1;
 355        x ^= x >> 2;
 356        x = (x & 0x11111111U) * 0x11111111U;
 357        return (x >> 28) & 1;
 358}
 359
 360/* Galois field basic operations: multiply, divide, inverse, etc. */
 361
 362static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
 363                                  unsigned int b)
 364{
 365        return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
 366                                               bch->a_log_tab[b])] : 0;
 367}
 368
 369static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
 370{
 371        return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
 372}
 373
 374static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
 375                                  unsigned int b)
 376{
 377        return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
 378                                        GF_N(bch)-bch->a_log_tab[b])] : 0;
 379}
 380
 381static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
 382{
 383        return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
 384}
 385
 386static inline unsigned int a_pow(struct bch_control *bch, int i)
 387{
 388        return bch->a_pow_tab[modulo(bch, i)];
 389}
 390
 391static inline int a_log(struct bch_control *bch, unsigned int x)
 392{
 393        return bch->a_log_tab[x];
 394}
 395
 396static inline int a_ilog(struct bch_control *bch, unsigned int x)
 397{
 398        return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
 399}
 400
 401/*
 402 * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
 403 */
 404static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
 405                              unsigned int *syn)
 406{
 407        int i, j, s;
 408        unsigned int m;
 409        uint32_t poly;
 410        const int t = GF_T(bch);
 411
 412        s = bch->ecc_bits;
 413
 414        /* make sure extra bits in last ecc word are cleared */
 415        m = ((unsigned int)s) & 31;
 416        if (m)
 417                ecc[s/32] &= ~((1u << (32-m))-1);
 418        memset(syn, 0, 2*t*sizeof(*syn));
 419
 420        /* compute v(a^j) for j=1 .. 2t-1 */
 421        do {
 422                poly = *ecc++;
 423                s -= 32;
 424                while (poly) {
 425                        i = deg(poly);
 426                        for (j = 0; j < 2*t; j += 2)
 427                                syn[j] ^= a_pow(bch, (j+1)*(i+s));
 428
 429                        poly ^= (1 << i);
 430                }
 431        } while (s > 0);
 432
 433        /* v(a^(2j)) = v(a^j)^2 */
 434        for (j = 0; j < t; j++)
 435                syn[2*j+1] = gf_sqr(bch, syn[j]);
 436}
 437
 438static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
 439{
 440        memcpy(dst, src, GF_POLY_SZ(src->deg));
 441}
 442
 443static int compute_error_locator_polynomial(struct bch_control *bch,
 444                                            const unsigned int *syn)
 445{
 446        const unsigned int t = GF_T(bch);
 447        const unsigned int n = GF_N(bch);
 448        unsigned int i, j, tmp, l, pd = 1, d = syn[0];
 449        struct gf_poly *elp = bch->elp;
 450        struct gf_poly *pelp = bch->poly_2t[0];
 451        struct gf_poly *elp_copy = bch->poly_2t[1];
 452        int k, pp = -1;
 453
 454        memset(pelp, 0, GF_POLY_SZ(2*t));
 455        memset(elp, 0, GF_POLY_SZ(2*t));
 456
 457        pelp->deg = 0;
 458        pelp->c[0] = 1;
 459        elp->deg = 0;
 460        elp->c[0] = 1;
 461
 462        /* use simplified binary Berlekamp-Massey algorithm */
 463        for (i = 0; (i < t) && (elp->deg <= t); i++) {
 464                if (d) {
 465                        k = 2*i-pp;
 466                        gf_poly_copy(elp_copy, elp);
 467                        /* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
 468                        tmp = a_log(bch, d)+n-a_log(bch, pd);
 469                        for (j = 0; j <= pelp->deg; j++) {
 470                                if (pelp->c[j]) {
 471                                        l = a_log(bch, pelp->c[j]);
 472                                        elp->c[j+k] ^= a_pow(bch, tmp+l);
 473                                }
 474                        }
 475                        /* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
 476                        tmp = pelp->deg+k;
 477                        if (tmp > elp->deg) {
 478                                elp->deg = tmp;
 479                                gf_poly_copy(pelp, elp_copy);
 480                                pd = d;
 481                                pp = 2*i;
 482                        }
 483                }
 484                /* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
 485                if (i < t-1) {
 486                        d = syn[2*i+2];
 487                        for (j = 1; j <= elp->deg; j++)
 488                                d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
 489                }
 490        }
 491        dbg("elp=%s\n", gf_poly_str(elp));
 492        return (elp->deg > t) ? -1 : (int)elp->deg;
 493}
 494
 495/*
 496 * solve a m x m linear system in GF(2) with an expected number of solutions,
 497 * and return the number of found solutions
 498 */
 499static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
 500                               unsigned int *sol, int nsol)
 501{
 502        const int m = GF_M(bch);
 503        unsigned int tmp, mask;
 504        int rem, c, r, p, k, param[BCH_MAX_M];
 505
 506        k = 0;
 507        mask = 1 << m;
 508
 509        /* Gaussian elimination */
 510        for (c = 0; c < m; c++) {
 511                rem = 0;
 512                p = c-k;
 513                /* find suitable row for elimination */
 514                for (r = p; r < m; r++) {
 515                        if (rows[r] & mask) {
 516                                if (r != p) {
 517                                        tmp = rows[r];
 518                                        rows[r] = rows[p];
 519                                        rows[p] = tmp;
 520                                }
 521                                rem = r+1;
 522                                break;
 523                        }
 524                }
 525                if (rem) {
 526                        /* perform elimination on remaining rows */
 527                        tmp = rows[p];
 528                        for (r = rem; r < m; r++) {
 529                                if (rows[r] & mask)
 530                                        rows[r] ^= tmp;
 531                        }
 532                } else {
 533                        /* elimination not needed, store defective row index */
 534                        param[k++] = c;
 535                }
 536                mask >>= 1;
 537        }
 538        /* rewrite system, inserting fake parameter rows */
 539        if (k > 0) {
 540                p = k;
 541                for (r = m-1; r >= 0; r--) {
 542                        if ((r > m-1-k) && rows[r])
 543                                /* system has no solution */
 544                                return 0;
 545
 546                        rows[r] = (p && (r == param[p-1])) ?
 547                                p--, 1u << (m-r) : rows[r-p];
 548                }
 549        }
 550
 551        if (nsol != (1 << k))
 552                /* unexpected number of solutions */
 553                return 0;
 554
 555        for (p = 0; p < nsol; p++) {
 556                /* set parameters for p-th solution */
 557                for (c = 0; c < k; c++)
 558                        rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
 559
 560                /* compute unique solution */
 561                tmp = 0;
 562                for (r = m-1; r >= 0; r--) {
 563                        mask = rows[r] & (tmp|1);
 564                        tmp |= parity(mask) << (m-r);
 565                }
 566                sol[p] = tmp >> 1;
 567        }
 568        return nsol;
 569}
 570
 571/*
 572 * this function builds and solves a linear system for finding roots of a degree
 573 * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
 574 */
 575static int find_affine4_roots(struct bch_control *bch, unsigned int a,
 576                              unsigned int b, unsigned int c,
 577                              unsigned int *roots)
 578{
 579        int i, j, k;
 580        const int m = GF_M(bch);
 581        unsigned int mask = 0xff, t, rows[16] = {0,};
 582
 583        j = a_log(bch, b);
 584        k = a_log(bch, a);
 585        rows[0] = c;
 586
 587        /* build linear system to solve X^4+aX^2+bX+c = 0 */
 588        for (i = 0; i < m; i++) {
 589                rows[i+1] = bch->a_pow_tab[4*i]^
 590                        (a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
 591                        (b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
 592                j++;
 593                k += 2;
 594        }
 595        /*
 596         * transpose 16x16 matrix before passing it to linear solver
 597         * warning: this code assumes m < 16
 598         */
 599        for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
 600                for (k = 0; k < 16; k = (k+j+1) & ~j) {
 601                        t = ((rows[k] >> j)^rows[k+j]) & mask;
 602                        rows[k] ^= (t << j);
 603                        rows[k+j] ^= t;
 604                }
 605        }
 606        return solve_linear_system(bch, rows, roots, 4);
 607}
 608
 609/*
 610 * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
 611 */
 612static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
 613                                unsigned int *roots)
 614{
 615        int n = 0;
 616
 617        if (poly->c[0])
 618                /* poly[X] = bX+c with c!=0, root=c/b */
 619                roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
 620                                   bch->a_log_tab[poly->c[1]]);
 621        return n;
 622}
 623
 624/*
 625 * compute roots of a degree 2 polynomial over GF(2^m)
 626 */
 627static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
 628                                unsigned int *roots)
 629{
 630        int n = 0, i, l0, l1, l2;
 631        unsigned int u, v, r;
 632
 633        if (poly->c[0] && poly->c[1]) {
 634
 635                l0 = bch->a_log_tab[poly->c[0]];
 636                l1 = bch->a_log_tab[poly->c[1]];
 637                l2 = bch->a_log_tab[poly->c[2]];
 638
 639                /* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
 640                u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
 641                /*
 642                 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
 643                 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
 644                 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
 645                 * i.e. r and r+1 are roots iff Tr(u)=0
 646                 */
 647                r = 0;
 648                v = u;
 649                while (v) {
 650                        i = deg(v);
 651                        r ^= bch->xi_tab[i];
 652                        v ^= (1 << i);
 653                }
 654                /* verify root */
 655                if ((gf_sqr(bch, r)^r) == u) {
 656                        /* reverse z=a/bX transformation and compute log(1/r) */
 657                        roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
 658                                            bch->a_log_tab[r]+l2);
 659                        roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
 660                                            bch->a_log_tab[r^1]+l2);
 661                }
 662        }
 663        return n;
 664}
 665
 666/*
 667 * compute roots of a degree 3 polynomial over GF(2^m)
 668 */
 669static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
 670                                unsigned int *roots)
 671{
 672        int i, n = 0;
 673        unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
 674
 675        if (poly->c[0]) {
 676                /* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
 677                e3 = poly->c[3];
 678                c2 = gf_div(bch, poly->c[0], e3);
 679                b2 = gf_div(bch, poly->c[1], e3);
 680                a2 = gf_div(bch, poly->c[2], e3);
 681
 682                /* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
 683                c = gf_mul(bch, a2, c2);           /* c = a2c2      */
 684                b = gf_mul(bch, a2, b2)^c2;        /* b = a2b2 + c2 */
 685                a = gf_sqr(bch, a2)^b2;            /* a = a2^2 + b2 */
 686
 687                /* find the 4 roots of this affine polynomial */
 688                if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
 689                        /* remove a2 from final list of roots */
 690                        for (i = 0; i < 4; i++) {
 691                                if (tmp[i] != a2)
 692                                        roots[n++] = a_ilog(bch, tmp[i]);
 693                        }
 694                }
 695        }
 696        return n;
 697}
 698
 699/*
 700 * compute roots of a degree 4 polynomial over GF(2^m)
 701 */
 702static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
 703                                unsigned int *roots)
 704{
 705        int i, l, n = 0;
 706        unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
 707
 708        if (poly->c[0] == 0)
 709                return 0;
 710
 711        /* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
 712        e4 = poly->c[4];
 713        d = gf_div(bch, poly->c[0], e4);
 714        c = gf_div(bch, poly->c[1], e4);
 715        b = gf_div(bch, poly->c[2], e4);
 716        a = gf_div(bch, poly->c[3], e4);
 717
 718        /* use Y=1/X transformation to get an affine polynomial */
 719        if (a) {
 720                /* first, eliminate cX by using z=X+e with ae^2+c=0 */
 721                if (c) {
 722                        /* compute e such that e^2 = c/a */
 723                        f = gf_div(bch, c, a);
 724                        l = a_log(bch, f);
 725                        l += (l & 1) ? GF_N(bch) : 0;
 726                        e = a_pow(bch, l/2);
 727                        /*
 728                         * use transformation z=X+e:
 729                         * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
 730                         * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
 731                         * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
 732                         * z^4 + az^3 +     b'z^2 + d'
 733                         */
 734                        d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
 735                        b = gf_mul(bch, a, e)^b;
 736                }
 737                /* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
 738                if (d == 0)
 739                        /* assume all roots have multiplicity 1 */
 740                        return 0;
 741
 742                c2 = gf_inv(bch, d);
 743                b2 = gf_div(bch, a, d);
 744                a2 = gf_div(bch, b, d);
 745        } else {
 746                /* polynomial is already affine */
 747                c2 = d;
 748                b2 = c;
 749                a2 = b;
 750        }
 751        /* find the 4 roots of this affine polynomial */
 752        if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
 753                for (i = 0; i < 4; i++) {
 754                        /* post-process roots (reverse transformations) */
 755                        f = a ? gf_inv(bch, roots[i]) : roots[i];
 756                        roots[i] = a_ilog(bch, f^e);
 757                }
 758                n = 4;
 759        }
 760        return n;
 761}
 762
 763/*
 764 * build monic, log-based representation of a polynomial
 765 */
 766static void gf_poly_logrep(struct bch_control *bch,
 767                           const struct gf_poly *a, int *rep)
 768{
 769        int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
 770
 771        /* represent 0 values with -1; warning, rep[d] is not set to 1 */
 772        for (i = 0; i < d; i++)
 773                rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
 774}
 775
 776/*
 777 * compute polynomial Euclidean division remainder in GF(2^m)[X]
 778 */
 779static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
 780                        const struct gf_poly *b, int *rep)
 781{
 782        int la, p, m;
 783        unsigned int i, j, *c = a->c;
 784        const unsigned int d = b->deg;
 785
 786        if (a->deg < d)
 787                return;
 788
 789        /* reuse or compute log representation of denominator */
 790        if (!rep) {
 791                rep = bch->cache;
 792                gf_poly_logrep(bch, b, rep);
 793        }
 794
 795        for (j = a->deg; j >= d; j--) {
 796                if (c[j]) {
 797                        la = a_log(bch, c[j]);
 798                        p = j-d;
 799                        for (i = 0; i < d; i++, p++) {
 800                                m = rep[i];
 801                                if (m >= 0)
 802                                        c[p] ^= bch->a_pow_tab[mod_s(bch,
 803                                                                     m+la)];
 804                        }
 805                }
 806        }
 807        a->deg = d-1;
 808        while (!c[a->deg] && a->deg)
 809                a->deg--;
 810}
 811
 812/*
 813 * compute polynomial Euclidean division quotient in GF(2^m)[X]
 814 */
 815static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
 816                        const struct gf_poly *b, struct gf_poly *q)
 817{
 818        if (a->deg >= b->deg) {
 819                q->deg = a->deg-b->deg;
 820                /* compute a mod b (modifies a) */
 821                gf_poly_mod(bch, a, b, NULL);
 822                /* quotient is stored in upper part of polynomial a */
 823                memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
 824        } else {
 825                q->deg = 0;
 826                q->c[0] = 0;
 827        }
 828}
 829
 830/*
 831 * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
 832 */
 833static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
 834                                   struct gf_poly *b)
 835{
 836        struct gf_poly *tmp;
 837
 838        dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
 839
 840        if (a->deg < b->deg) {
 841                tmp = b;
 842                b = a;
 843                a = tmp;
 844        }
 845
 846        while (b->deg > 0) {
 847                gf_poly_mod(bch, a, b, NULL);
 848                tmp = b;
 849                b = a;
 850                a = tmp;
 851        }
 852
 853        dbg("%s\n", gf_poly_str(a));
 854
 855        return a;
 856}
 857
 858/*
 859 * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
 860 * This is used in Berlekamp Trace algorithm for splitting polynomials
 861 */
 862static void compute_trace_bk_mod(struct bch_control *bch, int k,
 863                                 const struct gf_poly *f, struct gf_poly *z,
 864                                 struct gf_poly *out)
 865{
 866        const int m = GF_M(bch);
 867        int i, j;
 868
 869        /* z contains z^2j mod f */
 870        z->deg = 1;
 871        z->c[0] = 0;
 872        z->c[1] = bch->a_pow_tab[k];
 873
 874        out->deg = 0;
 875        memset(out, 0, GF_POLY_SZ(f->deg));
 876
 877        /* compute f log representation only once */
 878        gf_poly_logrep(bch, f, bch->cache);
 879
 880        for (i = 0; i < m; i++) {
 881                /* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
 882                for (j = z->deg; j >= 0; j--) {
 883                        out->c[j] ^= z->c[j];
 884                        z->c[2*j] = gf_sqr(bch, z->c[j]);
 885                        z->c[2*j+1] = 0;
 886                }
 887                if (z->deg > out->deg)
 888                        out->deg = z->deg;
 889
 890                if (i < m-1) {
 891                        z->deg *= 2;
 892                        /* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
 893                        gf_poly_mod(bch, z, f, bch->cache);
 894                }
 895        }
 896        while (!out->c[out->deg] && out->deg)
 897                out->deg--;
 898
 899        dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
 900}
 901
 902/*
 903 * factor a polynomial using Berlekamp Trace algorithm (BTA)
 904 */
 905static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
 906                              struct gf_poly **g, struct gf_poly **h)
 907{
 908        struct gf_poly *f2 = bch->poly_2t[0];
 909        struct gf_poly *q  = bch->poly_2t[1];
 910        struct gf_poly *tk = bch->poly_2t[2];
 911        struct gf_poly *z  = bch->poly_2t[3];
 912        struct gf_poly *gcd;
 913
 914        dbg("factoring %s...\n", gf_poly_str(f));
 915
 916        *g = f;
 917        *h = NULL;
 918
 919        /* tk = Tr(a^k.X) mod f */
 920        compute_trace_bk_mod(bch, k, f, z, tk);
 921
 922        if (tk->deg > 0) {
 923                /* compute g = gcd(f, tk) (destructive operation) */
 924                gf_poly_copy(f2, f);
 925                gcd = gf_poly_gcd(bch, f2, tk);
 926                if (gcd->deg < f->deg) {
 927                        /* compute h=f/gcd(f,tk); this will modify f and q */
 928                        gf_poly_div(bch, f, gcd, q);
 929                        /* store g and h in-place (clobbering f) */
 930                        *h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
 931                        gf_poly_copy(*g, gcd);
 932                        gf_poly_copy(*h, q);
 933                }
 934        }
 935}
 936
 937/*
 938 * find roots of a polynomial, using BTZ algorithm; see the beginning of this
 939 * file for details
 940 */
 941static int find_poly_roots(struct bch_control *bch, unsigned int k,
 942                           struct gf_poly *poly, unsigned int *roots)
 943{
 944        int cnt;
 945        struct gf_poly *f1, *f2;
 946
 947        switch (poly->deg) {
 948                /* handle low degree polynomials with ad hoc techniques */
 949        case 1:
 950                cnt = find_poly_deg1_roots(bch, poly, roots);
 951                break;
 952        case 2:
 953                cnt = find_poly_deg2_roots(bch, poly, roots);
 954                break;
 955        case 3:
 956                cnt = find_poly_deg3_roots(bch, poly, roots);
 957                break;
 958        case 4:
 959                cnt = find_poly_deg4_roots(bch, poly, roots);
 960                break;
 961        default:
 962                /* factor polynomial using Berlekamp Trace Algorithm (BTA) */
 963                cnt = 0;
 964                if (poly->deg && (k <= GF_M(bch))) {
 965                        factor_polynomial(bch, k, poly, &f1, &f2);
 966                        if (f1)
 967                                cnt += find_poly_roots(bch, k+1, f1, roots);
 968                        if (f2)
 969                                cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
 970                }
 971                break;
 972        }
 973        return cnt;
 974}
 975
 976#if defined(USE_CHIEN_SEARCH)
 977/*
 978 * exhaustive root search (Chien) implementation - not used, included only for
 979 * reference/comparison tests
 980 */
 981static int chien_search(struct bch_control *bch, unsigned int len,
 982                        struct gf_poly *p, unsigned int *roots)
 983{
 984        int m;
 985        unsigned int i, j, syn, syn0, count = 0;
 986        const unsigned int k = 8*len+bch->ecc_bits;
 987
 988        /* use a log-based representation of polynomial */
 989        gf_poly_logrep(bch, p, bch->cache);
 990        bch->cache[p->deg] = 0;
 991        syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
 992
 993        for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
 994                /* compute elp(a^i) */
 995                for (j = 1, syn = syn0; j <= p->deg; j++) {
 996                        m = bch->cache[j];
 997                        if (m >= 0)
 998                                syn ^= a_pow(bch, m+j*i);
 999                }
1000                if (syn == 0) {
1001                        roots[count++] = GF_N(bch)-i;
1002                        if (count == p->deg)
1003                                break;
1004                }
1005        }
1006        return (count == p->deg) ? count : 0;
1007}
1008#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
1009#endif /* USE_CHIEN_SEARCH */
1010
1011/**
1012 * bch_decode - decode received codeword and find bit error locations
1013 * @bch:      BCH control structure
1014 * @data:     received data, ignored if @calc_ecc is provided
1015 * @len:      data length in bytes, must always be provided
1016 * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
1017 * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
1018 * @syn:      hw computed syndrome data (if NULL, syndrome is calculated)
1019 * @errloc:   output array of error locations
1020 *
1021 * Returns:
1022 *  The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
1023 *  invalid parameters were provided
1024 *
1025 * Depending on the available hw BCH support and the need to compute @calc_ecc
1026 * separately (using bch_encode()), this function should be called with one of
1027 * the following parameter configurations -
1028 *
1029 * by providing @data and @recv_ecc only:
1030 *   bch_decode(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
1031 *
1032 * by providing @recv_ecc and @calc_ecc:
1033 *   bch_decode(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
1034 *
1035 * by providing ecc = recv_ecc XOR calc_ecc:
1036 *   bch_decode(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
1037 *
1038 * by providing syndrome results @syn:
1039 *   bch_decode(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
1040 *
1041 * Once bch_decode() has successfully returned with a positive value, error
1042 * locations returned in array @errloc should be interpreted as follows -
1043 *
1044 * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
1045 * data correction)
1046 *
1047 * if (errloc[n] < 8*len), then n-th error is located in data and can be
1048 * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
1049 *
1050 * Note that this function does not perform any data correction by itself, it
1051 * merely indicates error locations.
1052 */
1053int bch_decode(struct bch_control *bch, const uint8_t *data, unsigned int len,
1054               const uint8_t *recv_ecc, const uint8_t *calc_ecc,
1055               const unsigned int *syn, unsigned int *errloc)
1056{
1057        const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1058        unsigned int nbits;
1059        int i, err, nroots;
1060        uint32_t sum;
1061
1062        /* sanity check: make sure data length can be handled */
1063        if (8*len > (bch->n-bch->ecc_bits))
1064                return -EINVAL;
1065
1066        /* if caller does not provide syndromes, compute them */
1067        if (!syn) {
1068                if (!calc_ecc) {
1069                        /* compute received data ecc into an internal buffer */
1070                        if (!data || !recv_ecc)
1071                                return -EINVAL;
1072                        bch_encode(bch, data, len, NULL);
1073                } else {
1074                        /* load provided calculated ecc */
1075                        load_ecc8(bch, bch->ecc_buf, calc_ecc);
1076                }
1077                /* load received ecc or assume it was XORed in calc_ecc */
1078                if (recv_ecc) {
1079                        load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1080                        /* XOR received and calculated ecc */
1081                        for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1082                                bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1083                                sum |= bch->ecc_buf[i];
1084                        }
1085                        if (!sum)
1086                                /* no error found */
1087                                return 0;
1088                }
1089                compute_syndromes(bch, bch->ecc_buf, bch->syn);
1090                syn = bch->syn;
1091        }
1092
1093        err = compute_error_locator_polynomial(bch, syn);
1094        if (err > 0) {
1095                nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1096                if (err != nroots)
1097                        err = -1;
1098        }
1099        if (err > 0) {
1100                /* post-process raw error locations for easier correction */
1101                nbits = (len*8)+bch->ecc_bits;
1102                for (i = 0; i < err; i++) {
1103                        if (errloc[i] >= nbits) {
1104                                err = -1;
1105                                break;
1106                        }
1107                        errloc[i] = nbits-1-errloc[i];
1108                        if (!bch->swap_bits)
1109                                errloc[i] = (errloc[i] & ~7) |
1110                                            (7-(errloc[i] & 7));
1111                }
1112        }
1113        return (err >= 0) ? err : -EBADMSG;
1114}
1115EXPORT_SYMBOL_GPL(bch_decode);
1116
1117/*
1118 * generate Galois field lookup tables
1119 */
1120static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1121{
1122        unsigned int i, x = 1;
1123        const unsigned int k = 1 << deg(poly);
1124
1125        /* primitive polynomial must be of degree m */
1126        if (k != (1u << GF_M(bch)))
1127                return -1;
1128
1129        for (i = 0; i < GF_N(bch); i++) {
1130                bch->a_pow_tab[i] = x;
1131                bch->a_log_tab[x] = i;
1132                if (i && (x == 1))
1133                        /* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1134                        return -1;
1135                x <<= 1;
1136                if (x & k)
1137                        x ^= poly;
1138        }
1139        bch->a_pow_tab[GF_N(bch)] = 1;
1140        bch->a_log_tab[0] = 0;
1141
1142        return 0;
1143}
1144
1145/*
1146 * compute generator polynomial remainder tables for fast encoding
1147 */
1148static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1149{
1150        int i, j, b, d;
1151        uint32_t data, hi, lo, *tab;
1152        const int l = BCH_ECC_WORDS(bch);
1153        const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1154        const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1155
1156        memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1157
1158        for (i = 0; i < 256; i++) {
1159                /* p(X)=i is a small polynomial of weight <= 8 */
1160                for (b = 0; b < 4; b++) {
1161                        /* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1162                        tab = bch->mod8_tab + (b*256+i)*l;
1163                        data = i << (8*b);
1164                        while (data) {
1165                                d = deg(data);
1166                                /* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1167                                data ^= g[0] >> (31-d);
1168                                for (j = 0; j < ecclen; j++) {
1169                                        hi = (d < 31) ? g[j] << (d+1) : 0;
1170                                        lo = (j+1 < plen) ?
1171                                                g[j+1] >> (31-d) : 0;
1172                                        tab[j] ^= hi|lo;
1173                                }
1174                        }
1175                }
1176        }
1177}
1178
1179/*
1180 * build a base for factoring degree 2 polynomials
1181 */
1182static int build_deg2_base(struct bch_control *bch)
1183{
1184        const int m = GF_M(bch);
1185        int i, j, r;
1186        unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M];
1187
1188        /* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1189        for (i = 0; i < m; i++) {
1190                for (j = 0, sum = 0; j < m; j++)
1191                        sum ^= a_pow(bch, i*(1 << j));
1192
1193                if (sum) {
1194                        ak = bch->a_pow_tab[i];
1195                        break;
1196                }
1197        }
1198        /* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1199        remaining = m;
1200        memset(xi, 0, sizeof(xi));
1201
1202        for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1203                y = gf_sqr(bch, x)^x;
1204                for (i = 0; i < 2; i++) {
1205                        r = a_log(bch, y);
1206                        if (y && (r < m) && !xi[r]) {
1207                                bch->xi_tab[r] = x;
1208                                xi[r] = 1;
1209                                remaining--;
1210                                dbg("x%d = %x\n", r, x);
1211                                break;
1212                        }
1213                        y ^= ak;
1214                }
1215        }
1216        /* should not happen but check anyway */
1217        return remaining ? -1 : 0;
1218}
1219
1220static void *bch_alloc(size_t size, int *err)
1221{
1222        void *ptr;
1223
1224        ptr = kmalloc(size, GFP_KERNEL);
1225        if (ptr == NULL)
1226                *err = 1;
1227        return ptr;
1228}
1229
1230/*
1231 * compute generator polynomial for given (m,t) parameters.
1232 */
1233static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1234{
1235        const unsigned int m = GF_M(bch);
1236        const unsigned int t = GF_T(bch);
1237        int n, err = 0;
1238        unsigned int i, j, nbits, r, word, *roots;
1239        struct gf_poly *g;
1240        uint32_t *genpoly;
1241
1242        g = bch_alloc(GF_POLY_SZ(m*t), &err);
1243        roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1244        genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1245
1246        if (err) {
1247                kfree(genpoly);
1248                genpoly = NULL;
1249                goto finish;
1250        }
1251
1252        /* enumerate all roots of g(X) */
1253        memset(roots , 0, (bch->n+1)*sizeof(*roots));
1254        for (i = 0; i < t; i++) {
1255                for (j = 0, r = 2*i+1; j < m; j++) {
1256                        roots[r] = 1;
1257                        r = mod_s(bch, 2*r);
1258                }
1259        }
1260        /* build generator polynomial g(X) */
1261        g->deg = 0;
1262        g->c[0] = 1;
1263        for (i = 0; i < GF_N(bch); i++) {
1264                if (roots[i]) {
1265                        /* multiply g(X) by (X+root) */
1266                        r = bch->a_pow_tab[i];
1267                        g->c[g->deg+1] = 1;
1268                        for (j = g->deg; j > 0; j--)
1269                                g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1270
1271                        g->c[0] = gf_mul(bch, g->c[0], r);
1272                        g->deg++;
1273                }
1274        }
1275        /* store left-justified binary representation of g(X) */
1276        n = g->deg+1;
1277        i = 0;
1278
1279        while (n > 0) {
1280                nbits = (n > 32) ? 32 : n;
1281                for (j = 0, word = 0; j < nbits; j++) {
1282                        if (g->c[n-1-j])
1283                                word |= 1u << (31-j);
1284                }
1285                genpoly[i++] = word;
1286                n -= nbits;
1287        }
1288        bch->ecc_bits = g->deg;
1289
1290finish:
1291        kfree(g);
1292        kfree(roots);
1293
1294        return genpoly;
1295}
1296
1297/**
1298 * bch_init - initialize a BCH encoder/decoder
1299 * @m:          Galois field order, should be in the range 5-15
1300 * @t:          maximum error correction capability, in bits
1301 * @prim_poly:  user-provided primitive polynomial (or 0 to use default)
1302 * @swap_bits:  swap bits within data and syndrome bytes
1303 *
1304 * Returns:
1305 *  a newly allocated BCH control structure if successful, NULL otherwise
1306 *
1307 * This initialization can take some time, as lookup tables are built for fast
1308 * encoding/decoding; make sure not to call this function from a time critical
1309 * path. Usually, bch_init() should be called on module/driver init and
1310 * bch_free() should be called to release memory on exit.
1311 *
1312 * You may provide your own primitive polynomial of degree @m in argument
1313 * @prim_poly, or let bch_init() use its default polynomial.
1314 *
1315 * Once bch_init() has successfully returned a pointer to a newly allocated
1316 * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1317 * the structure.
1318 */
1319struct bch_control *bch_init(int m, int t, unsigned int prim_poly,
1320                             bool swap_bits)
1321{
1322        int err = 0;
1323        unsigned int i, words;
1324        uint32_t *genpoly;
1325        struct bch_control *bch = NULL;
1326
1327        const int min_m = 5;
1328
1329        /* default primitive polynomials */
1330        static const unsigned int prim_poly_tab[] = {
1331                0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1332                0x402b, 0x8003,
1333        };
1334
1335#if defined(CONFIG_BCH_CONST_PARAMS)
1336        if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1337                printk(KERN_ERR "bch encoder/decoder was configured to support "
1338                       "parameters m=%d, t=%d only!\n",
1339                       CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1340                goto fail;
1341        }
1342#endif
1343        if ((m < min_m) || (m > BCH_MAX_M))
1344                /*
1345                 * values of m greater than 15 are not currently supported;
1346                 * supporting m > 15 would require changing table base type
1347                 * (uint16_t) and a small patch in matrix transposition
1348                 */
1349                goto fail;
1350
1351        if (t > BCH_MAX_T)
1352                /*
1353                 * we can support larger than 64 bits if necessary, at the
1354                 * cost of higher stack usage.
1355                 */
1356                goto fail;
1357
1358        /* sanity checks */
1359        if ((t < 1) || (m*t >= ((1 << m)-1)))
1360                /* invalid t value */
1361                goto fail;
1362
1363        /* select a primitive polynomial for generating GF(2^m) */
1364        if (prim_poly == 0)
1365                prim_poly = prim_poly_tab[m-min_m];
1366
1367        bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1368        if (bch == NULL)
1369                goto fail;
1370
1371        bch->m = m;
1372        bch->t = t;
1373        bch->n = (1 << m)-1;
1374        words  = DIV_ROUND_UP(m*t, 32);
1375        bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1376        bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1377        bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1378        bch->mod8_tab  = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1379        bch->ecc_buf   = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1380        bch->ecc_buf2  = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1381        bch->xi_tab    = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1382        bch->syn       = bch_alloc(2*t*sizeof(*bch->syn), &err);
1383        bch->cache     = bch_alloc(2*t*sizeof(*bch->cache), &err);
1384        bch->elp       = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
1385        bch->swap_bits = swap_bits;
1386
1387        for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1388                bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1389
1390        if (err)
1391                goto fail;
1392
1393        err = build_gf_tables(bch, prim_poly);
1394        if (err)
1395                goto fail;
1396
1397        /* use generator polynomial for computing encoding tables */
1398        genpoly = compute_generator_polynomial(bch);
1399        if (genpoly == NULL)
1400                goto fail;
1401
1402        build_mod8_tables(bch, genpoly);
1403        kfree(genpoly);
1404
1405        err = build_deg2_base(bch);
1406        if (err)
1407                goto fail;
1408
1409        return bch;
1410
1411fail:
1412        bch_free(bch);
1413        return NULL;
1414}
1415EXPORT_SYMBOL_GPL(bch_init);
1416
1417/**
1418 *  bch_free - free the BCH control structure
1419 *  @bch:    BCH control structure to release
1420 */
1421void bch_free(struct bch_control *bch)
1422{
1423        unsigned int i;
1424
1425        if (bch) {
1426                kfree(bch->a_pow_tab);
1427                kfree(bch->a_log_tab);
1428                kfree(bch->mod8_tab);
1429                kfree(bch->ecc_buf);
1430                kfree(bch->ecc_buf2);
1431                kfree(bch->xi_tab);
1432                kfree(bch->syn);
1433                kfree(bch->cache);
1434                kfree(bch->elp);
1435
1436                for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1437                        kfree(bch->poly_2t[i]);
1438
1439                kfree(bch);
1440        }
1441}
1442EXPORT_SYMBOL_GPL(bch_free);
1443
1444MODULE_LICENSE("GPL");
1445MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1446MODULE_DESCRIPTION("Binary BCH encoder/decoder");
1447