LXR linux/drivers/mtd/nand/ecc-sw-hamming.c

   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * This file contains an ECC algorithm that detects and corrects 1 bit
   4 * errors in a 256 byte block of data.
   5 *
   6 * Copyright © 2008 Koninklijke Philips Electronics NV.
   7 *                  Author: Frans Meulenbroeks
   8 *
   9 * Completely replaces the previous ECC implementation which was written by:
  10 *   Steven J. Hill (sjhill@realitydiluted.com)
  11 *   Thomas Gleixner (tglx@linutronix.de)
  12 *
  13 * Information on how this algorithm works and how it was developed
  14 * can be found in Documentation/driver-api/mtd/nand_ecc.rst
  15 */
  16
  17#include <linux/types.h>
  18#include <linux/kernel.h>
  19#include <linux/module.h>
  20#include <linux/mtd/nand.h>
  21#include <linux/mtd/nand-ecc-sw-hamming.h>
  22#include <linux/slab.h>
  23#include <asm/byteorder.h>
  24
  25/*
  26 * invparity is a 256 byte table that contains the odd parity
  27 * for each byte. So if the number of bits in a byte is even,
  28 * the array element is 1, and when the number of bits is odd
  29 * the array eleemnt is 0.
  30 */
  31static const char invparity[256] = {
  32        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
  33        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  34        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  35        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
  36        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  37        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
  38        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
  39        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  40        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  41        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
  42        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
  43        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  44        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
  45        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  46        0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
  47        1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
  48};
  49
  50/*
  51 * bitsperbyte contains the number of bits per byte
  52 * this is only used for testing and repairing parity
  53 * (a precalculated value slightly improves performance)
  54 */
  55static const char bitsperbyte[256] = {
  56        0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
  57        1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  58        1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  59        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  60        1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  61        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  62        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  63        3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
  64        1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  65        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  66        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  67        3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
  68        2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  69        3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
  70        3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
  71        4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
  72};
  73
  74/*
  75 * addressbits is a lookup table to filter out the bits from the xor-ed
  76 * ECC data that identify the faulty location.
  77 * this is only used for repairing parity
  78 * see the comments in nand_ecc_sw_hamming_correct for more details
  79 */
  80static const char addressbits[256] = {
  81        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
  82        0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
  83        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
  84        0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
  85        0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
  86        0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
  87        0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
  88        0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
  89        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
  90        0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
  91        0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
  92        0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
  93        0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
  94        0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
  95        0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
  96        0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
  97        0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
  98        0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
  99        0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
 100        0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
 101        0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
 102        0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
 103        0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
 104        0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
 105        0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
 106        0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
 107        0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
 108        0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
 109        0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
 110        0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
 111        0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
 112        0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
 113};
 114
 115int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size,
 116                             unsigned char *code, bool sm_order)
 117{
 118        const u32 *bp = (uint32_t *)buf;
 119        const u32 eccsize_mult = (step_size == 256) ? 1 : 2;
 120        /* current value in buffer */
 121        u32 cur;
 122        /* rp0..rp17 are the various accumulated parities (per byte) */
 123        u32 rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7, rp8, rp9, rp10, rp11, rp12,
 124                rp13, rp14, rp15, rp16, rp17;
 125        /* Cumulative parity for all data */
 126        u32 par;
 127        /* Cumulative parity at the end of the loop (rp12, rp14, rp16) */
 128        u32 tmppar;
 129        int i;
 130
 131        par = 0;
 132        rp4 = 0;
 133        rp6 = 0;
 134        rp8 = 0;
 135        rp10 = 0;
 136        rp12 = 0;
 137        rp14 = 0;
 138        rp16 = 0;
 139        rp17 = 0;
 140
 141        /*
 142         * The loop is unrolled a number of times;
 143         * This avoids if statements to decide on which rp value to update
 144         * Also we process the data by longwords.
 145         * Note: passing unaligned data might give a performance penalty.
 146         * It is assumed that the buffers are aligned.
 147         * tmppar is the cumulative sum of this iteration.
 148         * needed for calculating rp12, rp14, rp16 and par
 149         * also used as a performance improvement for rp6, rp8 and rp10
 150         */
 151        for (i = 0; i < eccsize_mult << 2; i++) {
 152                cur = *bp++;
 153                tmppar = cur;
 154                rp4 ^= cur;
 155                cur = *bp++;
 156                tmppar ^= cur;
 157                rp6 ^= tmppar;
 158                cur = *bp++;
 159                tmppar ^= cur;
 160                rp4 ^= cur;
 161                cur = *bp++;
 162                tmppar ^= cur;
 163                rp8 ^= tmppar;
 164
 165                cur = *bp++;
 166                tmppar ^= cur;
 167                rp4 ^= cur;
 168                rp6 ^= cur;
 169                cur = *bp++;
 170                tmppar ^= cur;
 171                rp6 ^= cur;
 172                cur = *bp++;
 173                tmppar ^= cur;
 174                rp4 ^= cur;
 175                cur = *bp++;
 176                tmppar ^= cur;
 177                rp10 ^= tmppar;
 178
 179                cur = *bp++;
 180                tmppar ^= cur;
 181                rp4 ^= cur;
 182                rp6 ^= cur;
 183                rp8 ^= cur;
 184                cur = *bp++;
 185                tmppar ^= cur;
 186                rp6 ^= cur;
 187                rp8 ^= cur;
 188                cur = *bp++;
 189                tmppar ^= cur;
 190                rp4 ^= cur;
 191                rp8 ^= cur;
 192                cur = *bp++;
 193                tmppar ^= cur;
 194                rp8 ^= cur;
 195
 196                cur = *bp++;
 197                tmppar ^= cur;
 198                rp4 ^= cur;
 199                rp6 ^= cur;
 200                cur = *bp++;
 201                tmppar ^= cur;
 202                rp6 ^= cur;
 203                cur = *bp++;
 204                tmppar ^= cur;
 205                rp4 ^= cur;
 206                cur = *bp++;
 207                tmppar ^= cur;
 208
 209                par ^= tmppar;
 210                if ((i & 0x1) == 0)
 211                        rp12 ^= tmppar;
 212                if ((i & 0x2) == 0)
 213                        rp14 ^= tmppar;
 214                if (eccsize_mult == 2 && (i & 0x4) == 0)
 215                        rp16 ^= tmppar;
 216        }
 217
 218        /*
 219         * handle the fact that we use longword operations
 220         * we'll bring rp4..rp14..rp16 back to single byte entities by
 221         * shifting and xoring first fold the upper and lower 16 bits,
 222         * then the upper and lower 8 bits.
 223         */
 224        rp4 ^= (rp4 >> 16);
 225        rp4 ^= (rp4 >> 8);
 226        rp4 &= 0xff;
 227        rp6 ^= (rp6 >> 16);
 228        rp6 ^= (rp6 >> 8);
 229        rp6 &= 0xff;
 230        rp8 ^= (rp8 >> 16);
 231        rp8 ^= (rp8 >> 8);
 232        rp8 &= 0xff;
 233        rp10 ^= (rp10 >> 16);
 234        rp10 ^= (rp10 >> 8);
 235        rp10 &= 0xff;
 236        rp12 ^= (rp12 >> 16);
 237        rp12 ^= (rp12 >> 8);
 238        rp12 &= 0xff;
 239        rp14 ^= (rp14 >> 16);
 240        rp14 ^= (rp14 >> 8);
 241        rp14 &= 0xff;
 242        if (eccsize_mult == 2) {
 243                rp16 ^= (rp16 >> 16);
 244                rp16 ^= (rp16 >> 8);
 245                rp16 &= 0xff;
 246        }
 247
 248        /*
 249         * we also need to calculate the row parity for rp0..rp3
 250         * This is present in par, because par is now
 251         * rp3 rp3 rp2 rp2 in little endian and
 252         * rp2 rp2 rp3 rp3 in big endian
 253         * as well as
 254         * rp1 rp0 rp1 rp0 in little endian and
 255         * rp0 rp1 rp0 rp1 in big endian
 256         * First calculate rp2 and rp3
 257         */
 258#ifdef __BIG_ENDIAN
 259        rp2 = (par >> 16);
 260        rp2 ^= (rp2 >> 8);
 261        rp2 &= 0xff;
 262        rp3 = par & 0xffff;
 263        rp3 ^= (rp3 >> 8);
 264        rp3 &= 0xff;
 265#else
 266        rp3 = (par >> 16);
 267        rp3 ^= (rp3 >> 8);
 268        rp3 &= 0xff;
 269        rp2 = par & 0xffff;
 270        rp2 ^= (rp2 >> 8);
 271        rp2 &= 0xff;
 272#endif
 273
 274        /* reduce par to 16 bits then calculate rp1 and rp0 */
 275        par ^= (par >> 16);
 276#ifdef __BIG_ENDIAN
 277        rp0 = (par >> 8) & 0xff;
 278        rp1 = (par & 0xff);
 279#else
 280        rp1 = (par >> 8) & 0xff;
 281        rp0 = (par & 0xff);
 282#endif
 283
 284        /* finally reduce par to 8 bits */
 285        par ^= (par >> 8);
 286        par &= 0xff;
 287
 288        /*
 289         * and calculate rp5..rp15..rp17
 290         * note that par = rp4 ^ rp5 and due to the commutative property
 291         * of the ^ operator we can say:
 292         * rp5 = (par ^ rp4);
 293         * The & 0xff seems superfluous, but benchmarking learned that
 294         * leaving it out gives slightly worse results. No idea why, probably
 295         * it has to do with the way the pipeline in pentium is organized.
 296         */
 297        rp5 = (par ^ rp4) & 0xff;
 298        rp7 = (par ^ rp6) & 0xff;
 299        rp9 = (par ^ rp8) & 0xff;
 300        rp11 = (par ^ rp10) & 0xff;
 301        rp13 = (par ^ rp12) & 0xff;
 302        rp15 = (par ^ rp14) & 0xff;
 303        if (eccsize_mult == 2)
 304                rp17 = (par ^ rp16) & 0xff;
 305
 306        /*
 307         * Finally calculate the ECC bits.
 308         * Again here it might seem that there are performance optimisations
 309         * possible, but benchmarks showed that on the system this is developed
 310         * the code below is the fastest
 311         */
 312        if (sm_order) {
 313                code[0] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
 314                          (invparity[rp5] << 5) | (invparity[rp4] << 4) |
 315                          (invparity[rp3] << 3) | (invparity[rp2] << 2) |
 316                          (invparity[rp1] << 1) | (invparity[rp0]);
 317                code[1] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
 318                          (invparity[rp13] << 5) | (invparity[rp12] << 4) |
 319                          (invparity[rp11] << 3) | (invparity[rp10] << 2) |
 320                          (invparity[rp9] << 1) | (invparity[rp8]);
 321        } else {
 322                code[1] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
 323                          (invparity[rp5] << 5) | (invparity[rp4] << 4) |
 324                          (invparity[rp3] << 3) | (invparity[rp2] << 2) |
 325                          (invparity[rp1] << 1) | (invparity[rp0]);
 326                code[0] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
 327                          (invparity[rp13] << 5) | (invparity[rp12] << 4) |
 328                          (invparity[rp11] << 3) | (invparity[rp10] << 2) |
 329                          (invparity[rp9] << 1) | (invparity[rp8]);
 330        }
 331
 332        if (eccsize_mult == 1)
 333                code[2] =
 334                    (invparity[par & 0xf0] << 7) |
 335                    (invparity[par & 0x0f] << 6) |
 336                    (invparity[par & 0xcc] << 5) |
 337                    (invparity[par & 0x33] << 4) |
 338                    (invparity[par & 0xaa] << 3) |
 339                    (invparity[par & 0x55] << 2) |
 340                    3;
 341        else
 342                code[2] =
 343                    (invparity[par & 0xf0] << 7) |
 344                    (invparity[par & 0x0f] << 6) |
 345                    (invparity[par & 0xcc] << 5) |
 346                    (invparity[par & 0x33] << 4) |
 347                    (invparity[par & 0xaa] << 3) |
 348                    (invparity[par & 0x55] << 2) |
 349                    (invparity[rp17] << 1) |
 350                    (invparity[rp16] << 0);
 351
 352        return 0;
 353}
 354EXPORT_SYMBOL(ecc_sw_hamming_calculate);
 355
 356/**
 357 * nand_ecc_sw_hamming_calculate - Calculate 3-byte ECC for 256/512-byte block
 358 * @nand: NAND device
 359 * @buf: Input buffer with raw data
 360 * @code: Output buffer with ECC
 361 */
 362int nand_ecc_sw_hamming_calculate(struct nand_device *nand,
 363                                  const unsigned char *buf, unsigned char *code)
 364{
 365        struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
 366        unsigned int step_size = nand->ecc.ctx.conf.step_size;
 367        bool sm_order = engine_conf ? engine_conf->sm_order : false;
 368
 369        return ecc_sw_hamming_calculate(buf, step_size, code, sm_order);
 370}
 371EXPORT_SYMBOL(nand_ecc_sw_hamming_calculate);
 372
 373int ecc_sw_hamming_correct(unsigned char *buf, unsigned char *read_ecc,
 374                           unsigned char *calc_ecc, unsigned int step_size,
 375                           bool sm_order)
 376{
 377        const u32 eccsize_mult = step_size >> 8;
 378        unsigned char b0, b1, b2, bit_addr;
 379        unsigned int byte_addr;
 380
 381        /*
 382         * b0 to b2 indicate which bit is faulty (if any)
 383         * we might need the xor result  more than once,
 384         * so keep them in a local var
 385        */
 386        if (sm_order) {
 387                b0 = read_ecc[0] ^ calc_ecc[0];
 388                b1 = read_ecc[1] ^ calc_ecc[1];
 389        } else {
 390                b0 = read_ecc[1] ^ calc_ecc[1];
 391                b1 = read_ecc[0] ^ calc_ecc[0];
 392        }
 393
 394        b2 = read_ecc[2] ^ calc_ecc[2];
 395
 396        /* check if there are any bitfaults */
 397
 398        /* repeated if statements are slightly more efficient than switch ... */
 399        /* ordered in order of likelihood */
 400
 401        if ((b0 | b1 | b2) == 0)
 402                return 0;       /* no error */
 403
 404        if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
 405            (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
 406            ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) ||
 407             (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) {
 408        /* single bit error */
 409                /*
 410                 * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
 411                 * byte, cp 5/3/1 indicate the faulty bit.
 412                 * A lookup table (called addressbits) is used to filter
 413                 * the bits from the byte they are in.
 414                 * A marginal optimisation is possible by having three
 415                 * different lookup tables.
 416                 * One as we have now (for b0), one for b2
 417                 * (that would avoid the >> 1), and one for b1 (with all values
 418                 * << 4). However it was felt that introducing two more tables
 419                 * hardly justify the gain.
 420                 *
 421                 * The b2 shift is there to get rid of the lowest two bits.
 422                 * We could also do addressbits[b2] >> 1 but for the
 423                 * performance it does not make any difference
 424                 */
 425                if (eccsize_mult == 1)
 426                        byte_addr = (addressbits[b1] << 4) + addressbits[b0];
 427                else
 428                        byte_addr = (addressbits[b2 & 0x3] << 8) +
 429                                    (addressbits[b1] << 4) + addressbits[b0];
 430                bit_addr = addressbits[b2 >> 2];
 431                /* flip the bit */
 432                buf[byte_addr] ^= (1 << bit_addr);
 433                return 1;
 434
 435        }
 436        /* count nr of bits; use table lookup, faster than calculating it */
 437        if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
 438                return 1;       /* error in ECC data; no action needed */
 439
 440        pr_err("%s: uncorrectable ECC error\n", __func__);
 441        return -EBADMSG;
 442}
 443EXPORT_SYMBOL(ecc_sw_hamming_correct);
 444
 445/**
 446 * nand_ecc_sw_hamming_correct - Detect and correct bit error(s)
 447 * @nand: NAND device
 448 * @buf: Raw data read from the chip
 449 * @read_ecc: ECC bytes read from the chip
 450 * @calc_ecc: ECC calculated from the raw data
 451 *
 452 * Detect and correct up to 1 bit error per 256/512-byte block.
 453 */
 454int nand_ecc_sw_hamming_correct(struct nand_device *nand, unsigned char *buf,
 455                                unsigned char *read_ecc,
 456                                unsigned char *calc_ecc)
 457{
 458        struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
 459        unsigned int step_size = nand->ecc.ctx.conf.step_size;
 460        bool sm_order = engine_conf ? engine_conf->sm_order : false;
 461
 462        return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, step_size,
 463                                      sm_order);
 464}
 465EXPORT_SYMBOL(nand_ecc_sw_hamming_correct);
 466
 467int nand_ecc_sw_hamming_init_ctx(struct nand_device *nand)
 468{
 469        struct nand_ecc_props *conf = &nand->ecc.ctx.conf;
 470        struct nand_ecc_sw_hamming_conf *engine_conf;
 471        struct mtd_info *mtd = nanddev_to_mtd(nand);
 472        int ret;
 473
 474        if (!mtd->ooblayout) {
 475                switch (mtd->oobsize) {
 476                case 8:
 477                case 16:
 478                        mtd_set_ooblayout(mtd, nand_get_small_page_ooblayout());
 479                        break;
 480                case 64:
 481                case 128:
 482                        mtd_set_ooblayout(mtd,
 483                                          nand_get_large_page_hamming_ooblayout());
 484                        break;
 485                default:
 486                        return -ENOTSUPP;
 487                }
 488        }
 489
 490        conf->engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
 491        conf->algo = NAND_ECC_ALGO_HAMMING;
 492        conf->step_size = nand->ecc.user_conf.step_size;
 493        conf->strength = 1;
 494
 495        /* Use the strongest configuration by default */
 496        if (conf->step_size != 256 && conf->step_size != 512)
 497                conf->step_size = 256;
 498
 499        engine_conf = kzalloc(sizeof(*engine_conf), GFP_KERNEL);
 500        if (!engine_conf)
 501                return -ENOMEM;
 502
 503        ret = nand_ecc_init_req_tweaking(&engine_conf->req_ctx, nand);
 504        if (ret)
 505                goto free_engine_conf;
 506
 507        engine_conf->code_size = 3;
 508        engine_conf->calc_buf = kzalloc(mtd->oobsize, GFP_KERNEL);
 509        engine_conf->code_buf = kzalloc(mtd->oobsize, GFP_KERNEL);
 510        if (!engine_conf->calc_buf || !engine_conf->code_buf) {
 511                ret = -ENOMEM;
 512                goto free_bufs;
 513        }
 514
 515        nand->ecc.ctx.priv = engine_conf;
 516        nand->ecc.ctx.nsteps = mtd->writesize / conf->step_size;
 517        nand->ecc.ctx.total = nand->ecc.ctx.nsteps * engine_conf->code_size;
 518
 519        return 0;
 520
 521free_bufs:
 522        nand_ecc_cleanup_req_tweaking(&engine_conf->req_ctx);
 523        kfree(engine_conf->calc_buf);
 524        kfree(engine_conf->code_buf);
 525free_engine_conf:
 526        kfree(engine_conf);
 527
 528        return ret;
 529}
 530EXPORT_SYMBOL(nand_ecc_sw_hamming_init_ctx);
 531
 532void nand_ecc_sw_hamming_cleanup_ctx(struct nand_device *nand)
 533{
 534        struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
 535
 536        if (engine_conf) {
 537                nand_ecc_cleanup_req_tweaking(&engine_conf->req_ctx);
 538                kfree(engine_conf->calc_buf);
 539                kfree(engine_conf->code_buf);
 540                kfree(engine_conf);
 541        }
 542}
 543EXPORT_SYMBOL(nand_ecc_sw_hamming_cleanup_ctx);
 544
 545static int nand_ecc_sw_hamming_prepare_io_req(struct nand_device *nand,
 546                                              struct nand_page_io_req *req)
 547{
 548        struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
 549        struct mtd_info *mtd = nanddev_to_mtd(nand);
 550        int eccsize = nand->ecc.ctx.conf.step_size;
 551        int eccbytes = engine_conf->code_size;
 552        int eccsteps = nand->ecc.ctx.nsteps;
 553        int total = nand->ecc.ctx.total;
 554        u8 *ecccalc = engine_conf->calc_buf;
 555        const u8 *data;
 556        int i;
 557
 558        /* Nothing to do for a raw operation */
 559        if (req->mode == MTD_OPS_RAW)
 560                return 0;
 561
 562        /* This engine does not provide BBM/free OOB bytes protection */
 563        if (!req->datalen)
 564                return 0;
 565
 566        nand_ecc_tweak_req(&engine_conf->req_ctx, req);
 567
 568        /* No more preparation for page read */
 569        if (req->type == NAND_PAGE_READ)
 570                return 0;
 571
 572        /* Preparation for page write: derive the ECC bytes and place them */
 573        for (i = 0, data = req->databuf.out;
 574             eccsteps;
 575             eccsteps--, i += eccbytes, data += eccsize)
 576                nand_ecc_sw_hamming_calculate(nand, data, &ecccalc[i]);
 577
 578        return mtd_ooblayout_set_eccbytes(mtd, ecccalc, (void *)req->oobbuf.out,
 579                                          0, total);
 580}
 581
 582static int nand_ecc_sw_hamming_finish_io_req(struct nand_device *nand,
 583                                             struct nand_page_io_req *req)
 584{
 585        struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
 586        struct mtd_info *mtd = nanddev_to_mtd(nand);
 587        int eccsize = nand->ecc.ctx.conf.step_size;
 588        int total = nand->ecc.ctx.total;
 589        int eccbytes = engine_conf->code_size;
 590        int eccsteps = nand->ecc.ctx.nsteps;
 591        u8 *ecccalc = engine_conf->calc_buf;
 592        u8 *ecccode = engine_conf->code_buf;
 593        unsigned int max_bitflips = 0;
 594        u8 *data = req->databuf.in;
 595        int i, ret;
 596
 597        /* Nothing to do for a raw operation */
 598        if (req->mode == MTD_OPS_RAW)
 599                return 0;
 600
 601        /* This engine does not provide BBM/free OOB bytes protection */
 602        if (!req->datalen)
 603                return 0;
 604
 605        /* No more preparation for page write */
 606        if (req->type == NAND_PAGE_WRITE) {
 607                nand_ecc_restore_req(&engine_conf->req_ctx, req);
 608                return 0;
 609        }
 610
 611        /* Finish a page read: retrieve the (raw) ECC bytes*/
 612        ret = mtd_ooblayout_get_eccbytes(mtd, ecccode, req->oobbuf.in, 0,
 613                                         total);
 614        if (ret)
 615                return ret;
 616
 617        /* Calculate the ECC bytes */
 618        for (i = 0; eccsteps; eccsteps--, i += eccbytes, data += eccsize)
 619                nand_ecc_sw_hamming_calculate(nand, data, &ecccalc[i]);
 620
 621        /* Finish a page read: compare and correct */
 622        for (eccsteps = nand->ecc.ctx.nsteps, i = 0, data = req->databuf.in;
 623             eccsteps;
 624             eccsteps--, i += eccbytes, data += eccsize) {
 625                int stat =  nand_ecc_sw_hamming_correct(nand, data,
 626                                                        &ecccode[i],
 627                                                        &ecccalc[i]);
 628                if (stat < 0) {
 629                        mtd->ecc_stats.failed++;
 630                } else {
 631                        mtd->ecc_stats.corrected += stat;
 632                        max_bitflips = max_t(unsigned int, max_bitflips, stat);
 633                }
 634        }
 635
 636        nand_ecc_restore_req(&engine_conf->req_ctx, req);
 637
 638        return max_bitflips;
 639}
 640
 641static struct nand_ecc_engine_ops nand_ecc_sw_hamming_engine_ops = {
 642        .init_ctx = nand_ecc_sw_hamming_init_ctx,
 643        .cleanup_ctx = nand_ecc_sw_hamming_cleanup_ctx,
 644        .prepare_io_req = nand_ecc_sw_hamming_prepare_io_req,
 645        .finish_io_req = nand_ecc_sw_hamming_finish_io_req,
 646};
 647
 648static struct nand_ecc_engine nand_ecc_sw_hamming_engine = {
 649        .ops = &nand_ecc_sw_hamming_engine_ops,
 650};
 651
 652struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void)
 653{
 654        return &nand_ecc_sw_hamming_engine;
 655}
 656EXPORT_SYMBOL(nand_ecc_sw_hamming_get_engine);
 657
 658MODULE_LICENSE("GPL");
 659MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>");
 660MODULE_DESCRIPTION("NAND software Hamming ECC support");
 661