linux/drivers/mtd/mtdswap.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Swap block device support for MTDs
   4 * Turns an MTD device into a swap device with block wear leveling
   5 *
   6 * Copyright © 2007,2011 Nokia Corporation. All rights reserved.
   7 *
   8 * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com>
   9 *
  10 * Based on Richard Purdie's earlier implementation in 2007. Background
  11 * support and lock-less operation written by Adrian Hunter.
  12 */
  13
  14#include <linux/kernel.h>
  15#include <linux/module.h>
  16#include <linux/mtd/mtd.h>
  17#include <linux/mtd/blktrans.h>
  18#include <linux/rbtree.h>
  19#include <linux/sched.h>
  20#include <linux/slab.h>
  21#include <linux/vmalloc.h>
  22#include <linux/genhd.h>
  23#include <linux/swap.h>
  24#include <linux/debugfs.h>
  25#include <linux/seq_file.h>
  26#include <linux/device.h>
  27#include <linux/math64.h>
  28
  29#define MTDSWAP_PREFIX "mtdswap"
  30
  31/*
  32 * The number of free eraseblocks when GC should stop
  33 */
  34#define CLEAN_BLOCK_THRESHOLD   20
  35
  36/*
  37 * Number of free eraseblocks below which GC can also collect low frag
  38 * blocks.
  39 */
  40#define LOW_FRAG_GC_THRESHOLD   5
  41
  42/*
  43 * Wear level cost amortization. We want to do wear leveling on the background
  44 * without disturbing gc too much. This is made by defining max GC frequency.
  45 * Frequency value 6 means 1/6 of the GC passes will pick an erase block based
  46 * on the biggest wear difference rather than the biggest dirtiness.
  47 *
  48 * The lower freq2 should be chosen so that it makes sure the maximum erase
  49 * difference will decrease even if a malicious application is deliberately
  50 * trying to make erase differences large.
  51 */
  52#define MAX_ERASE_DIFF          4000
  53#define COLLECT_NONDIRTY_BASE   MAX_ERASE_DIFF
  54#define COLLECT_NONDIRTY_FREQ1  6
  55#define COLLECT_NONDIRTY_FREQ2  4
  56
  57#define PAGE_UNDEF              UINT_MAX
  58#define BLOCK_UNDEF             UINT_MAX
  59#define BLOCK_ERROR             (UINT_MAX - 1)
  60#define BLOCK_MAX               (UINT_MAX - 2)
  61
  62#define EBLOCK_BAD              (1 << 0)
  63#define EBLOCK_NOMAGIC          (1 << 1)
  64#define EBLOCK_BITFLIP          (1 << 2)
  65#define EBLOCK_FAILED           (1 << 3)
  66#define EBLOCK_READERR          (1 << 4)
  67#define EBLOCK_IDX_SHIFT        5
  68
  69struct swap_eb {
  70        struct rb_node rb;
  71        struct rb_root *root;
  72
  73        unsigned int flags;
  74        unsigned int active_count;
  75        unsigned int erase_count;
  76        unsigned int pad;               /* speeds up pointer decrement */
  77};
  78
  79#define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \
  80                                rb)->erase_count)
  81#define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \
  82                                rb)->erase_count)
  83
  84struct mtdswap_tree {
  85        struct rb_root root;
  86        unsigned int count;
  87};
  88
  89enum {
  90        MTDSWAP_CLEAN,
  91        MTDSWAP_USED,
  92        MTDSWAP_LOWFRAG,
  93        MTDSWAP_HIFRAG,
  94        MTDSWAP_DIRTY,
  95        MTDSWAP_BITFLIP,
  96        MTDSWAP_FAILING,
  97        MTDSWAP_TREE_CNT,
  98};
  99
 100struct mtdswap_dev {
 101        struct mtd_blktrans_dev *mbd_dev;
 102        struct mtd_info *mtd;
 103        struct device *dev;
 104
 105        unsigned int *page_data;
 106        unsigned int *revmap;
 107
 108        unsigned int eblks;
 109        unsigned int spare_eblks;
 110        unsigned int pages_per_eblk;
 111        unsigned int max_erase_count;
 112        struct swap_eb *eb_data;
 113
 114        struct mtdswap_tree trees[MTDSWAP_TREE_CNT];
 115
 116        unsigned long long sect_read_count;
 117        unsigned long long sect_write_count;
 118        unsigned long long mtd_write_count;
 119        unsigned long long mtd_read_count;
 120        unsigned long long discard_count;
 121        unsigned long long discard_page_count;
 122
 123        unsigned int curr_write_pos;
 124        struct swap_eb *curr_write;
 125
 126        char *page_buf;
 127        char *oob_buf;
 128};
 129
 130struct mtdswap_oobdata {
 131        __le16 magic;
 132        __le32 count;
 133} __packed;
 134
 135#define MTDSWAP_MAGIC_CLEAN     0x2095
 136#define MTDSWAP_MAGIC_DIRTY     (MTDSWAP_MAGIC_CLEAN + 1)
 137#define MTDSWAP_TYPE_CLEAN      0
 138#define MTDSWAP_TYPE_DIRTY      1
 139#define MTDSWAP_OOBSIZE         sizeof(struct mtdswap_oobdata)
 140
 141#define MTDSWAP_ERASE_RETRIES   3 /* Before marking erase block bad */
 142#define MTDSWAP_IO_RETRIES      3
 143
 144enum {
 145        MTDSWAP_SCANNED_CLEAN,
 146        MTDSWAP_SCANNED_DIRTY,
 147        MTDSWAP_SCANNED_BITFLIP,
 148        MTDSWAP_SCANNED_BAD,
 149};
 150
 151/*
 152 * In the worst case mtdswap_writesect() has allocated the last clean
 153 * page from the current block and is then pre-empted by the GC
 154 * thread. The thread can consume a full erase block when moving a
 155 * block.
 156 */
 157#define MIN_SPARE_EBLOCKS       2
 158#define MIN_ERASE_BLOCKS        (MIN_SPARE_EBLOCKS + 1)
 159
 160#define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root)
 161#define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL)
 162#define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name))
 163#define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count)
 164
 165#define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv)
 166
 167static char partitions[128] = "";
 168module_param_string(partitions, partitions, sizeof(partitions), 0444);
 169MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap "
 170                "partitions=\"1,3,5\"");
 171
 172static unsigned int spare_eblocks = 10;
 173module_param(spare_eblocks, uint, 0444);
 174MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for "
 175                "garbage collection (default 10%)");
 176
 177static bool header; /* false */
 178module_param(header, bool, 0444);
 179MODULE_PARM_DESC(header,
 180                "Include builtin swap header (default 0, without header)");
 181
 182static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background);
 183
 184static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb)
 185{
 186        return (loff_t)(eb - d->eb_data) * d->mtd->erasesize;
 187}
 188
 189static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb)
 190{
 191        unsigned int oldidx;
 192        struct mtdswap_tree *tp;
 193
 194        if (eb->root) {
 195                tp = container_of(eb->root, struct mtdswap_tree, root);
 196                oldidx = tp - &d->trees[0];
 197
 198                d->trees[oldidx].count--;
 199                rb_erase(&eb->rb, eb->root);
 200        }
 201}
 202
 203static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb)
 204{
 205        struct rb_node **p, *parent = NULL;
 206        struct swap_eb *cur;
 207
 208        p = &root->rb_node;
 209        while (*p) {
 210                parent = *p;
 211                cur = rb_entry(parent, struct swap_eb, rb);
 212                if (eb->erase_count > cur->erase_count)
 213                        p = &(*p)->rb_right;
 214                else
 215                        p = &(*p)->rb_left;
 216        }
 217
 218        rb_link_node(&eb->rb, parent, p);
 219        rb_insert_color(&eb->rb, root);
 220}
 221
 222static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx)
 223{
 224        struct rb_root *root;
 225
 226        if (eb->root == &d->trees[idx].root)
 227                return;
 228
 229        mtdswap_eb_detach(d, eb);
 230        root = &d->trees[idx].root;
 231        __mtdswap_rb_add(root, eb);
 232        eb->root = root;
 233        d->trees[idx].count++;
 234}
 235
 236static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx)
 237{
 238        struct rb_node *p;
 239        unsigned int i;
 240
 241        p = rb_first(root);
 242        i = 0;
 243        while (i < idx && p) {
 244                p = rb_next(p);
 245                i++;
 246        }
 247
 248        return p;
 249}
 250
 251static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb)
 252{
 253        int ret;
 254        loff_t offset;
 255
 256        d->spare_eblks--;
 257        eb->flags |= EBLOCK_BAD;
 258        mtdswap_eb_detach(d, eb);
 259        eb->root = NULL;
 260
 261        /* badblocks not supported */
 262        if (!mtd_can_have_bb(d->mtd))
 263                return 1;
 264
 265        offset = mtdswap_eb_offset(d, eb);
 266        dev_warn(d->dev, "Marking bad block at %08llx\n", offset);
 267        ret = mtd_block_markbad(d->mtd, offset);
 268
 269        if (ret) {
 270                dev_warn(d->dev, "Mark block bad failed for block at %08llx "
 271                        "error %d\n", offset, ret);
 272                return ret;
 273        }
 274
 275        return 1;
 276
 277}
 278
 279static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb)
 280{
 281        unsigned int marked = eb->flags & EBLOCK_FAILED;
 282        struct swap_eb *curr_write = d->curr_write;
 283
 284        eb->flags |= EBLOCK_FAILED;
 285        if (curr_write == eb) {
 286                d->curr_write = NULL;
 287
 288                if (!marked && d->curr_write_pos != 0) {
 289                        mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
 290                        return 0;
 291                }
 292        }
 293
 294        return mtdswap_handle_badblock(d, eb);
 295}
 296
 297static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from,
 298                        struct mtd_oob_ops *ops)
 299{
 300        int ret = mtd_read_oob(d->mtd, from, ops);
 301
 302        if (mtd_is_bitflip(ret))
 303                return ret;
 304
 305        if (ret) {
 306                dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n",
 307                        ret, from);
 308                return ret;
 309        }
 310
 311        if (ops->oobretlen < ops->ooblen) {
 312                dev_warn(d->dev, "Read OOB return short read (%zd bytes not "
 313                        "%zd) for block at %08llx\n",
 314                        ops->oobretlen, ops->ooblen, from);
 315                return -EIO;
 316        }
 317
 318        return 0;
 319}
 320
 321static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
 322{
 323        struct mtdswap_oobdata *data, *data2;
 324        int ret;
 325        loff_t offset;
 326        struct mtd_oob_ops ops;
 327
 328        offset = mtdswap_eb_offset(d, eb);
 329
 330        /* Check first if the block is bad. */
 331        if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset))
 332                return MTDSWAP_SCANNED_BAD;
 333
 334        ops.ooblen = 2 * d->mtd->oobavail;
 335        ops.oobbuf = d->oob_buf;
 336        ops.ooboffs = 0;
 337        ops.datbuf = NULL;
 338        ops.mode = MTD_OPS_AUTO_OOB;
 339
 340        ret = mtdswap_read_oob(d, offset, &ops);
 341
 342        if (ret && !mtd_is_bitflip(ret))
 343                return ret;
 344
 345        data = (struct mtdswap_oobdata *)d->oob_buf;
 346        data2 = (struct mtdswap_oobdata *)
 347                (d->oob_buf + d->mtd->oobavail);
 348
 349        if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) {
 350                eb->erase_count = le32_to_cpu(data->count);
 351                if (mtd_is_bitflip(ret))
 352                        ret = MTDSWAP_SCANNED_BITFLIP;
 353                else {
 354                        if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY)
 355                                ret = MTDSWAP_SCANNED_DIRTY;
 356                        else
 357                                ret = MTDSWAP_SCANNED_CLEAN;
 358                }
 359        } else {
 360                eb->flags |= EBLOCK_NOMAGIC;
 361                ret = MTDSWAP_SCANNED_DIRTY;
 362        }
 363
 364        return ret;
 365}
 366
 367static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb,
 368                                u16 marker)
 369{
 370        struct mtdswap_oobdata n;
 371        int ret;
 372        loff_t offset;
 373        struct mtd_oob_ops ops;
 374
 375        ops.ooboffs = 0;
 376        ops.oobbuf = (uint8_t *)&n;
 377        ops.mode = MTD_OPS_AUTO_OOB;
 378        ops.datbuf = NULL;
 379
 380        if (marker == MTDSWAP_TYPE_CLEAN) {
 381                n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN);
 382                n.count = cpu_to_le32(eb->erase_count);
 383                ops.ooblen = MTDSWAP_OOBSIZE;
 384                offset = mtdswap_eb_offset(d, eb);
 385        } else {
 386                n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY);
 387                ops.ooblen = sizeof(n.magic);
 388                offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize;
 389        }
 390
 391        ret = mtd_write_oob(d->mtd, offset, &ops);
 392
 393        if (ret) {
 394                dev_warn(d->dev, "Write OOB failed for block at %08llx "
 395                        "error %d\n", offset, ret);
 396                if (ret == -EIO || mtd_is_eccerr(ret))
 397                        mtdswap_handle_write_error(d, eb);
 398                return ret;
 399        }
 400
 401        if (ops.oobretlen != ops.ooblen) {
 402                dev_warn(d->dev, "Short OOB write for block at %08llx: "
 403                        "%zd not %zd\n",
 404                        offset, ops.oobretlen, ops.ooblen);
 405                return ret;
 406        }
 407
 408        return 0;
 409}
 410
 411/*
 412 * Are there any erase blocks without MAGIC_CLEAN header, presumably
 413 * because power was cut off after erase but before header write? We
 414 * need to guestimate the erase count.
 415 */
 416static void mtdswap_check_counts(struct mtdswap_dev *d)
 417{
 418        struct rb_root hist_root = RB_ROOT;
 419        struct rb_node *medrb;
 420        struct swap_eb *eb;
 421        unsigned int i, cnt, median;
 422
 423        cnt = 0;
 424        for (i = 0; i < d->eblks; i++) {
 425                eb = d->eb_data + i;
 426
 427                if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
 428                        continue;
 429
 430                __mtdswap_rb_add(&hist_root, eb);
 431                cnt++;
 432        }
 433
 434        if (cnt == 0)
 435                return;
 436
 437        medrb = mtdswap_rb_index(&hist_root, cnt / 2);
 438        median = rb_entry(medrb, struct swap_eb, rb)->erase_count;
 439
 440        d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root);
 441
 442        for (i = 0; i < d->eblks; i++) {
 443                eb = d->eb_data + i;
 444
 445                if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR))
 446                        eb->erase_count = median;
 447
 448                if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
 449                        continue;
 450
 451                rb_erase(&eb->rb, &hist_root);
 452        }
 453}
 454
 455static void mtdswap_scan_eblks(struct mtdswap_dev *d)
 456{
 457        int status;
 458        unsigned int i, idx;
 459        struct swap_eb *eb;
 460
 461        for (i = 0; i < d->eblks; i++) {
 462                eb = d->eb_data + i;
 463
 464                status = mtdswap_read_markers(d, eb);
 465                if (status < 0)
 466                        eb->flags |= EBLOCK_READERR;
 467                else if (status == MTDSWAP_SCANNED_BAD) {
 468                        eb->flags |= EBLOCK_BAD;
 469                        continue;
 470                }
 471
 472                switch (status) {
 473                case MTDSWAP_SCANNED_CLEAN:
 474                        idx = MTDSWAP_CLEAN;
 475                        break;
 476                case MTDSWAP_SCANNED_DIRTY:
 477                case MTDSWAP_SCANNED_BITFLIP:
 478                        idx = MTDSWAP_DIRTY;
 479                        break;
 480                default:
 481                        idx = MTDSWAP_FAILING;
 482                }
 483
 484                eb->flags |= (idx << EBLOCK_IDX_SHIFT);
 485        }
 486
 487        mtdswap_check_counts(d);
 488
 489        for (i = 0; i < d->eblks; i++) {
 490                eb = d->eb_data + i;
 491
 492                if (eb->flags & EBLOCK_BAD)
 493                        continue;
 494
 495                idx = eb->flags >> EBLOCK_IDX_SHIFT;
 496                mtdswap_rb_add(d, eb, idx);
 497        }
 498}
 499
 500/*
 501 * Place eblk into a tree corresponding to its number of active blocks
 502 * it contains.
 503 */
 504static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb)
 505{
 506        unsigned int weight = eb->active_count;
 507        unsigned int maxweight = d->pages_per_eblk;
 508
 509        if (eb == d->curr_write)
 510                return;
 511
 512        if (eb->flags & EBLOCK_BITFLIP)
 513                mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
 514        else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED))
 515                mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
 516        if (weight == maxweight)
 517                mtdswap_rb_add(d, eb, MTDSWAP_USED);
 518        else if (weight == 0)
 519                mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
 520        else if (weight > (maxweight/2))
 521                mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG);
 522        else
 523                mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG);
 524}
 525
 526static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb)
 527{
 528        struct mtd_info *mtd = d->mtd;
 529        struct erase_info erase;
 530        unsigned int retries = 0;
 531        int ret;
 532
 533        eb->erase_count++;
 534        if (eb->erase_count > d->max_erase_count)
 535                d->max_erase_count = eb->erase_count;
 536
 537retry:
 538        memset(&erase, 0, sizeof(struct erase_info));
 539        erase.addr      = mtdswap_eb_offset(d, eb);
 540        erase.len       = mtd->erasesize;
 541
 542        ret = mtd_erase(mtd, &erase);
 543        if (ret) {
 544                if (retries++ < MTDSWAP_ERASE_RETRIES) {
 545                        dev_warn(d->dev,
 546                                "erase of erase block %#llx on %s failed",
 547                                erase.addr, mtd->name);
 548                        yield();
 549                        goto retry;
 550                }
 551
 552                dev_err(d->dev, "Cannot erase erase block %#llx on %s\n",
 553                        erase.addr, mtd->name);
 554
 555                mtdswap_handle_badblock(d, eb);
 556                return -EIO;
 557        }
 558
 559        return 0;
 560}
 561
 562static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page,
 563                                unsigned int *block)
 564{
 565        int ret;
 566        struct swap_eb *old_eb = d->curr_write;
 567        struct rb_root *clean_root;
 568        struct swap_eb *eb;
 569
 570        if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) {
 571                do {
 572                        if (TREE_EMPTY(d, CLEAN))
 573                                return -ENOSPC;
 574
 575                        clean_root = TREE_ROOT(d, CLEAN);
 576                        eb = rb_entry(rb_first(clean_root), struct swap_eb, rb);
 577                        rb_erase(&eb->rb, clean_root);
 578                        eb->root = NULL;
 579                        TREE_COUNT(d, CLEAN)--;
 580
 581                        ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY);
 582                } while (ret == -EIO || mtd_is_eccerr(ret));
 583
 584                if (ret)
 585                        return ret;
 586
 587                d->curr_write_pos = 0;
 588                d->curr_write = eb;
 589                if (old_eb)
 590                        mtdswap_store_eb(d, old_eb);
 591        }
 592
 593        *block = (d->curr_write - d->eb_data) * d->pages_per_eblk +
 594                d->curr_write_pos;
 595
 596        d->curr_write->active_count++;
 597        d->revmap[*block] = page;
 598        d->curr_write_pos++;
 599
 600        return 0;
 601}
 602
 603static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d)
 604{
 605        return TREE_COUNT(d, CLEAN) * d->pages_per_eblk +
 606                d->pages_per_eblk - d->curr_write_pos;
 607}
 608
 609static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d)
 610{
 611        return mtdswap_free_page_cnt(d) > d->pages_per_eblk;
 612}
 613
 614static int mtdswap_write_block(struct mtdswap_dev *d, char *buf,
 615                        unsigned int page, unsigned int *bp, int gc_context)
 616{
 617        struct mtd_info *mtd = d->mtd;
 618        struct swap_eb *eb;
 619        size_t retlen;
 620        loff_t writepos;
 621        int ret;
 622
 623retry:
 624        if (!gc_context)
 625                while (!mtdswap_enough_free_pages(d))
 626                        if (mtdswap_gc(d, 0) > 0)
 627                                return -ENOSPC;
 628
 629        ret = mtdswap_map_free_block(d, page, bp);
 630        eb = d->eb_data + (*bp / d->pages_per_eblk);
 631
 632        if (ret == -EIO || mtd_is_eccerr(ret)) {
 633                d->curr_write = NULL;
 634                eb->active_count--;
 635                d->revmap[*bp] = PAGE_UNDEF;
 636                goto retry;
 637        }
 638
 639        if (ret < 0)
 640                return ret;
 641
 642        writepos = (loff_t)*bp << PAGE_SHIFT;
 643        ret =  mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf);
 644        if (ret == -EIO || mtd_is_eccerr(ret)) {
 645                d->curr_write_pos--;
 646                eb->active_count--;
 647                d->revmap[*bp] = PAGE_UNDEF;
 648                mtdswap_handle_write_error(d, eb);
 649                goto retry;
 650        }
 651
 652        if (ret < 0) {
 653                dev_err(d->dev, "Write to MTD device failed: %d (%zd written)",
 654                        ret, retlen);
 655                goto err;
 656        }
 657
 658        if (retlen != PAGE_SIZE) {
 659                dev_err(d->dev, "Short write to MTD device: %zd written",
 660                        retlen);
 661                ret = -EIO;
 662                goto err;
 663        }
 664
 665        return ret;
 666
 667err:
 668        d->curr_write_pos--;
 669        eb->active_count--;
 670        d->revmap[*bp] = PAGE_UNDEF;
 671
 672        return ret;
 673}
 674
 675static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock,
 676                unsigned int *newblock)
 677{
 678        struct mtd_info *mtd = d->mtd;
 679        struct swap_eb *eb, *oldeb;
 680        int ret;
 681        size_t retlen;
 682        unsigned int page, retries;
 683        loff_t readpos;
 684
 685        page = d->revmap[oldblock];
 686        readpos = (loff_t) oldblock << PAGE_SHIFT;
 687        retries = 0;
 688
 689retry:
 690        ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf);
 691
 692        if (ret < 0 && !mtd_is_bitflip(ret)) {
 693                oldeb = d->eb_data + oldblock / d->pages_per_eblk;
 694                oldeb->flags |= EBLOCK_READERR;
 695
 696                dev_err(d->dev, "Read Error: %d (block %u)\n", ret,
 697                        oldblock);
 698                retries++;
 699                if (retries < MTDSWAP_IO_RETRIES)
 700                        goto retry;
 701
 702                goto read_error;
 703        }
 704
 705        if (retlen != PAGE_SIZE) {
 706                dev_err(d->dev, "Short read: %zd (block %u)\n", retlen,
 707                       oldblock);
 708                ret = -EIO;
 709                goto read_error;
 710        }
 711
 712        ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1);
 713        if (ret < 0) {
 714                d->page_data[page] = BLOCK_ERROR;
 715                dev_err(d->dev, "Write error: %d\n", ret);
 716                return ret;
 717        }
 718
 719        eb = d->eb_data + *newblock / d->pages_per_eblk;
 720        d->page_data[page] = *newblock;
 721        d->revmap[oldblock] = PAGE_UNDEF;
 722        eb = d->eb_data + oldblock / d->pages_per_eblk;
 723        eb->active_count--;
 724
 725        return 0;
 726
 727read_error:
 728        d->page_data[page] = BLOCK_ERROR;
 729        d->revmap[oldblock] = PAGE_UNDEF;
 730        return ret;
 731}
 732
 733static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb)
 734{
 735        unsigned int i, block, eblk_base, newblock;
 736        int ret, errcode;
 737
 738        errcode = 0;
 739        eblk_base = (eb - d->eb_data) * d->pages_per_eblk;
 740
 741        for (i = 0; i < d->pages_per_eblk; i++) {
 742                if (d->spare_eblks < MIN_SPARE_EBLOCKS)
 743                        return -ENOSPC;
 744
 745                block = eblk_base + i;
 746                if (d->revmap[block] == PAGE_UNDEF)
 747                        continue;
 748
 749                ret = mtdswap_move_block(d, block, &newblock);
 750                if (ret < 0 && !errcode)
 751                        errcode = ret;
 752        }
 753
 754        return errcode;
 755}
 756
 757static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d)
 758{
 759        int idx, stopat;
 760
 761        if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_THRESHOLD)
 762                stopat = MTDSWAP_LOWFRAG;
 763        else
 764                stopat = MTDSWAP_HIFRAG;
 765
 766        for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--)
 767                if (d->trees[idx].root.rb_node != NULL)
 768                        return idx;
 769
 770        return -1;
 771}
 772
 773static int mtdswap_wlfreq(unsigned int maxdiff)
 774{
 775        unsigned int h, x, y, dist, base;
 776
 777        /*
 778         * Calculate linear ramp down from f1 to f2 when maxdiff goes from
 779         * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE.  Similar
 780         * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE.
 781         */
 782
 783        dist = maxdiff - MAX_ERASE_DIFF;
 784        if (dist > COLLECT_NONDIRTY_BASE)
 785                dist = COLLECT_NONDIRTY_BASE;
 786
 787        /*
 788         * Modelling the slop as right angular triangle with base
 789         * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is
 790         * equal to the ratio h/base.
 791         */
 792        h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2;
 793        base = COLLECT_NONDIRTY_BASE;
 794
 795        x = dist - base;
 796        y = (x * h + base / 2) / base;
 797
 798        return COLLECT_NONDIRTY_FREQ2 + y;
 799}
 800
 801static int mtdswap_choose_wl_tree(struct mtdswap_dev *d)
 802{
 803        static unsigned int pick_cnt;
 804        unsigned int i, idx = -1, wear, max;
 805        struct rb_root *root;
 806
 807        max = 0;
 808        for (i = 0; i <= MTDSWAP_DIRTY; i++) {
 809                root = &d->trees[i].root;
 810                if (root->rb_node == NULL)
 811                        continue;
 812
 813                wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root);
 814                if (wear > max) {
 815                        max = wear;
 816                        idx = i;
 817                }
 818        }
 819
 820        if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) {
 821                pick_cnt = 0;
 822                return idx;
 823        }
 824
 825        pick_cnt++;
 826        return -1;
 827}
 828
 829static int mtdswap_choose_gc_tree(struct mtdswap_dev *d,
 830                                unsigned int background)
 831{
 832        int idx;
 833
 834        if (TREE_NONEMPTY(d, FAILING) &&
 835                (background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY))))
 836                return MTDSWAP_FAILING;
 837
 838        idx = mtdswap_choose_wl_tree(d);
 839        if (idx >= MTDSWAP_CLEAN)
 840                return idx;
 841
 842        return __mtdswap_choose_gc_tree(d);
 843}
 844
 845static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d,
 846                                        unsigned int background)
 847{
 848        struct rb_root *rp = NULL;
 849        struct swap_eb *eb = NULL;
 850        int idx;
 851
 852        if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD &&
 853                TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING))
 854                return NULL;
 855
 856        idx = mtdswap_choose_gc_tree(d, background);
 857        if (idx < 0)
 858                return NULL;
 859
 860        rp = &d->trees[idx].root;
 861        eb = rb_entry(rb_first(rp), struct swap_eb, rb);
 862
 863        rb_erase(&eb->rb, rp);
 864        eb->root = NULL;
 865        d->trees[idx].count--;
 866        return eb;
 867}
 868
 869static unsigned int mtdswap_test_patt(unsigned int i)
 870{
 871        return i % 2 ? 0x55555555 : 0xAAAAAAAA;
 872}
 873
 874static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
 875                                        struct swap_eb *eb)
 876{
 877        struct mtd_info *mtd = d->mtd;
 878        unsigned int test, i, j, patt, mtd_pages;
 879        loff_t base, pos;
 880        unsigned int *p1 = (unsigned int *)d->page_buf;
 881        unsigned char *p2 = (unsigned char *)d->oob_buf;
 882        struct mtd_oob_ops ops;
 883        int ret;
 884
 885        ops.mode = MTD_OPS_AUTO_OOB;
 886        ops.len = mtd->writesize;
 887        ops.ooblen = mtd->oobavail;
 888        ops.ooboffs = 0;
 889        ops.datbuf = d->page_buf;
 890        ops.oobbuf = d->oob_buf;
 891        base = mtdswap_eb_offset(d, eb);
 892        mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize;
 893
 894        for (test = 0; test < 2; test++) {
 895                pos = base;
 896                for (i = 0; i < mtd_pages; i++) {
 897                        patt = mtdswap_test_patt(test + i);
 898                        memset(d->page_buf, patt, mtd->writesize);
 899                        memset(d->oob_buf, patt, mtd->oobavail);
 900                        ret = mtd_write_oob(mtd, pos, &ops);
 901                        if (ret)
 902                                goto error;
 903
 904                        pos += mtd->writesize;
 905                }
 906
 907                pos = base;
 908                for (i = 0; i < mtd_pages; i++) {
 909                        ret = mtd_read_oob(mtd, pos, &ops);
 910                        if (ret)
 911                                goto error;
 912
 913                        patt = mtdswap_test_patt(test + i);
 914                        for (j = 0; j < mtd->writesize/sizeof(int); j++)
 915                                if (p1[j] != patt)
 916                                        goto error;
 917
 918                        for (j = 0; j < mtd->oobavail; j++)
 919                                if (p2[j] != (unsigned char)patt)
 920                                        goto error;
 921
 922                        pos += mtd->writesize;
 923                }
 924
 925                ret = mtdswap_erase_block(d, eb);
 926                if (ret)
 927                        goto error;
 928        }
 929
 930        eb->flags &= ~EBLOCK_READERR;
 931        return 1;
 932
 933error:
 934        mtdswap_handle_badblock(d, eb);
 935        return 0;
 936}
 937
 938static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background)
 939{
 940        struct swap_eb *eb;
 941        int ret;
 942
 943        if (d->spare_eblks < MIN_SPARE_EBLOCKS)
 944                return 1;
 945
 946        eb = mtdswap_pick_gc_eblk(d, background);
 947        if (!eb)
 948                return 1;
 949
 950        ret = mtdswap_gc_eblock(d, eb);
 951        if (ret == -ENOSPC)
 952                return 1;
 953
 954        if (eb->flags & EBLOCK_FAILED) {
 955                mtdswap_handle_badblock(d, eb);
 956                return 0;
 957        }
 958
 959        eb->flags &= ~EBLOCK_BITFLIP;
 960        ret = mtdswap_erase_block(d, eb);
 961        if ((eb->flags & EBLOCK_READERR) &&
 962                (ret || !mtdswap_eblk_passes(d, eb)))
 963                return 0;
 964
 965        if (ret == 0)
 966                ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN);
 967
 968        if (ret == 0)
 969                mtdswap_rb_add(d, eb, MTDSWAP_CLEAN);
 970        else if (ret != -EIO && !mtd_is_eccerr(ret))
 971                mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
 972
 973        return 0;
 974}
 975
 976static void mtdswap_background(struct mtd_blktrans_dev *dev)
 977{
 978        struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
 979        int ret;
 980
 981        while (1) {
 982                ret = mtdswap_gc(d, 1);
 983                if (ret || mtd_blktrans_cease_background(dev))
 984                        return;
 985        }
 986}
 987
 988static void mtdswap_cleanup(struct mtdswap_dev *d)
 989{
 990        vfree(d->eb_data);
 991        vfree(d->revmap);
 992        vfree(d->page_data);
 993        kfree(d->oob_buf);
 994        kfree(d->page_buf);
 995}
 996
 997static int mtdswap_flush(struct mtd_blktrans_dev *dev)
 998{
 999        struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1000
1001        mtd_sync(d->mtd);
1002        return 0;
1003}
1004
1005static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size)
1006{
1007        loff_t offset;
1008        unsigned int badcnt;
1009
1010        badcnt = 0;
1011
1012        if (mtd_can_have_bb(mtd))
1013                for (offset = 0; offset < size; offset += mtd->erasesize)
1014                        if (mtd_block_isbad(mtd, offset))
1015                                badcnt++;
1016
1017        return badcnt;
1018}
1019
1020static int mtdswap_writesect(struct mtd_blktrans_dev *dev,
1021                        unsigned long page, char *buf)
1022{
1023        struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1024        unsigned int newblock, mapped;
1025        struct swap_eb *eb;
1026        int ret;
1027
1028        d->sect_write_count++;
1029
1030        if (d->spare_eblks < MIN_SPARE_EBLOCKS)
1031                return -ENOSPC;
1032
1033        if (header) {
1034                /* Ignore writes to the header page */
1035                if (unlikely(page == 0))
1036                        return 0;
1037
1038                page--;
1039        }
1040
1041        mapped = d->page_data[page];
1042        if (mapped <= BLOCK_MAX) {
1043                eb = d->eb_data + (mapped / d->pages_per_eblk);
1044                eb->active_count--;
1045                mtdswap_store_eb(d, eb);
1046                d->page_data[page] = BLOCK_UNDEF;
1047                d->revmap[mapped] = PAGE_UNDEF;
1048        }
1049
1050        ret = mtdswap_write_block(d, buf, page, &newblock, 0);
1051        d->mtd_write_count++;
1052
1053        if (ret < 0)
1054                return ret;
1055
1056        eb = d->eb_data + (newblock / d->pages_per_eblk);
1057        d->page_data[page] = newblock;
1058
1059        return 0;
1060}
1061
1062/* Provide a dummy swap header for the kernel */
1063static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf)
1064{
1065        union swap_header *hd = (union swap_header *)(buf);
1066
1067        memset(buf, 0, PAGE_SIZE - 10);
1068
1069        hd->info.version = 1;
1070        hd->info.last_page = d->mbd_dev->size - 1;
1071        hd->info.nr_badpages = 0;
1072
1073        memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10);
1074
1075        return 0;
1076}
1077
1078static int mtdswap_readsect(struct mtd_blktrans_dev *dev,
1079                        unsigned long page, char *buf)
1080{
1081        struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1082        struct mtd_info *mtd = d->mtd;
1083        unsigned int realblock, retries;
1084        loff_t readpos;
1085        struct swap_eb *eb;
1086        size_t retlen;
1087        int ret;
1088
1089        d->sect_read_count++;
1090
1091        if (header) {
1092                if (unlikely(page == 0))
1093                        return mtdswap_auto_header(d, buf);
1094
1095                page--;
1096        }
1097
1098        realblock = d->page_data[page];
1099        if (realblock > BLOCK_MAX) {
1100                memset(buf, 0x0, PAGE_SIZE);
1101                if (realblock == BLOCK_UNDEF)
1102                        return 0;
1103                else
1104                        return -EIO;
1105        }
1106
1107        eb = d->eb_data + (realblock / d->pages_per_eblk);
1108        BUG_ON(d->revmap[realblock] == PAGE_UNDEF);
1109
1110        readpos = (loff_t)realblock << PAGE_SHIFT;
1111        retries = 0;
1112
1113retry:
1114        ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf);
1115
1116        d->mtd_read_count++;
1117        if (mtd_is_bitflip(ret)) {
1118                eb->flags |= EBLOCK_BITFLIP;
1119                mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
1120                ret = 0;
1121        }
1122
1123        if (ret < 0) {
1124                dev_err(d->dev, "Read error %d\n", ret);
1125                eb->flags |= EBLOCK_READERR;
1126                mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
1127                retries++;
1128                if (retries < MTDSWAP_IO_RETRIES)
1129                        goto retry;
1130
1131                return ret;
1132        }
1133
1134        if (retlen != PAGE_SIZE) {
1135                dev_err(d->dev, "Short read %zd\n", retlen);
1136                return -EIO;
1137        }
1138
1139        return 0;
1140}
1141
1142static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first,
1143                        unsigned nr_pages)
1144{
1145        struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1146        unsigned long page;
1147        struct swap_eb *eb;
1148        unsigned int mapped;
1149
1150        d->discard_count++;
1151
1152        for (page = first; page < first + nr_pages; page++) {
1153                mapped = d->page_data[page];
1154                if (mapped <= BLOCK_MAX) {
1155                        eb = d->eb_data + (mapped / d->pages_per_eblk);
1156                        eb->active_count--;
1157                        mtdswap_store_eb(d, eb);
1158                        d->page_data[page] = BLOCK_UNDEF;
1159                        d->revmap[mapped] = PAGE_UNDEF;
1160                        d->discard_page_count++;
1161                } else if (mapped == BLOCK_ERROR) {
1162                        d->page_data[page] = BLOCK_UNDEF;
1163                        d->discard_page_count++;
1164                }
1165        }
1166
1167        return 0;
1168}
1169
1170static int mtdswap_show(struct seq_file *s, void *data)
1171{
1172        struct mtdswap_dev *d = (struct mtdswap_dev *) s->private;
1173        unsigned long sum;
1174        unsigned int count[MTDSWAP_TREE_CNT];
1175        unsigned int min[MTDSWAP_TREE_CNT];
1176        unsigned int max[MTDSWAP_TREE_CNT];
1177        unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages;
1178        uint64_t use_size;
1179        static const char * const name[] = {
1180                "clean", "used", "low", "high", "dirty", "bitflip", "failing"
1181        };
1182
1183        mutex_lock(&d->mbd_dev->lock);
1184
1185        for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1186                struct rb_root *root = &d->trees[i].root;
1187
1188                if (root->rb_node) {
1189                        count[i] = d->trees[i].count;
1190                        min[i] = MTDSWAP_ECNT_MIN(root);
1191                        max[i] = MTDSWAP_ECNT_MAX(root);
1192                } else
1193                        count[i] = 0;
1194        }
1195
1196        if (d->curr_write) {
1197                cw = 1;
1198                cwp = d->curr_write_pos;
1199                cwecount = d->curr_write->erase_count;
1200        }
1201
1202        sum = 0;
1203        for (i = 0; i < d->eblks; i++)
1204                sum += d->eb_data[i].erase_count;
1205
1206        use_size = (uint64_t)d->eblks * d->mtd->erasesize;
1207        bb_cnt = mtdswap_badblocks(d->mtd, use_size);
1208
1209        mapped = 0;
1210        pages = d->mbd_dev->size;
1211        for (i = 0; i < pages; i++)
1212                if (d->page_data[i] != BLOCK_UNDEF)
1213                        mapped++;
1214
1215        mutex_unlock(&d->mbd_dev->lock);
1216
1217        for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
1218                if (!count[i])
1219                        continue;
1220
1221                if (min[i] != max[i])
1222                        seq_printf(s, "%s:\t%5d erase blocks, erased min %d, "
1223                                "max %d times\n",
1224                                name[i], count[i], min[i], max[i]);
1225                else
1226                        seq_printf(s, "%s:\t%5d erase blocks, all erased %d "
1227                                "times\n", name[i], count[i], min[i]);
1228        }
1229
1230        if (bb_cnt)
1231                seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt);
1232
1233        if (cw)
1234                seq_printf(s, "current erase block: %u pages used, %u free, "
1235                        "erased %u times\n",
1236                        cwp, d->pages_per_eblk - cwp, cwecount);
1237
1238        seq_printf(s, "total erasures: %lu\n", sum);
1239
1240        seq_puts(s, "\n");
1241
1242        seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count);
1243        seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count);
1244        seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count);
1245        seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count);
1246        seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count);
1247        seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count);
1248
1249        seq_puts(s, "\n");
1250        seq_printf(s, "total pages: %u\n", pages);
1251        seq_printf(s, "pages mapped: %u\n", mapped);
1252
1253        return 0;
1254}
1255DEFINE_SHOW_ATTRIBUTE(mtdswap);
1256
1257static int mtdswap_add_debugfs(struct mtdswap_dev *d)
1258{
1259        struct dentry *root = d->mtd->dbg.dfs_dir;
1260        struct dentry *dent;
1261
1262        if (!IS_ENABLED(CONFIG_DEBUG_FS))
1263                return 0;
1264
1265        if (IS_ERR_OR_NULL(root))
1266                return -1;
1267
1268        dent = debugfs_create_file("mtdswap_stats", S_IRUSR, root, d,
1269                                &mtdswap_fops);
1270        if (!dent) {
1271                dev_err(d->dev, "debugfs_create_file failed\n");
1272                return -1;
1273        }
1274
1275        return 0;
1276}
1277
1278static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks,
1279                        unsigned int spare_cnt)
1280{
1281        struct mtd_info *mtd = d->mbd_dev->mtd;
1282        unsigned int i, eblk_bytes, pages, blocks;
1283        int ret = -ENOMEM;
1284
1285        d->mtd = mtd;
1286        d->eblks = eblocks;
1287        d->spare_eblks = spare_cnt;
1288        d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT;
1289
1290        pages = d->mbd_dev->size;
1291        blocks = eblocks * d->pages_per_eblk;
1292
1293        for (i = 0; i < MTDSWAP_TREE_CNT; i++)
1294                d->trees[i].root = RB_ROOT;
1295
1296        d->page_data = vmalloc(array_size(pages, sizeof(int)));
1297        if (!d->page_data)
1298                goto page_data_fail;
1299
1300        d->revmap = vmalloc(array_size(blocks, sizeof(int)));
1301        if (!d->revmap)
1302                goto revmap_fail;
1303
1304        eblk_bytes = sizeof(struct swap_eb)*d->eblks;
1305        d->eb_data = vzalloc(eblk_bytes);
1306        if (!d->eb_data)
1307                goto eb_data_fail;
1308
1309        for (i = 0; i < pages; i++)
1310                d->page_data[i] = BLOCK_UNDEF;
1311
1312        for (i = 0; i < blocks; i++)
1313                d->revmap[i] = PAGE_UNDEF;
1314
1315        d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1316        if (!d->page_buf)
1317                goto page_buf_fail;
1318
1319        d->oob_buf = kmalloc_array(2, mtd->oobavail, GFP_KERNEL);
1320        if (!d->oob_buf)
1321                goto oob_buf_fail;
1322
1323        mtdswap_scan_eblks(d);
1324
1325        return 0;
1326
1327oob_buf_fail:
1328        kfree(d->page_buf);
1329page_buf_fail:
1330        vfree(d->eb_data);
1331eb_data_fail:
1332        vfree(d->revmap);
1333revmap_fail:
1334        vfree(d->page_data);
1335page_data_fail:
1336        printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret);
1337        return ret;
1338}
1339
1340static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
1341{
1342        struct mtdswap_dev *d;
1343        struct mtd_blktrans_dev *mbd_dev;
1344        char *parts;
1345        char *this_opt;
1346        unsigned long part;
1347        unsigned int eblocks, eavailable, bad_blocks, spare_cnt;
1348        uint64_t swap_size, use_size, size_limit;
1349        int ret;
1350
1351        parts = &partitions[0];
1352        if (!*parts)
1353                return;
1354
1355        while ((this_opt = strsep(&parts, ",")) != NULL) {
1356                if (kstrtoul(this_opt, 0, &part) < 0)
1357                        return;
1358
1359                if (mtd->index == part)
1360                        break;
1361        }
1362
1363        if (mtd->index != part)
1364                return;
1365
1366        if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) {
1367                printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE "
1368                        "%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE);
1369                return;
1370        }
1371
1372        if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) {
1373                printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size"
1374                        " %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize);
1375                return;
1376        }
1377
1378        if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) {
1379                printk(KERN_ERR "%s: Not enough free bytes in OOB, "
1380                        "%d available, %zu needed.\n",
1381                        MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE);
1382                return;
1383        }
1384
1385        if (spare_eblocks > 100)
1386                spare_eblocks = 100;
1387
1388        use_size = mtd->size;
1389        size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE;
1390
1391        if (mtd->size > size_limit) {
1392                printk(KERN_WARNING "%s: Device too large. Limiting size to "
1393                        "%llu bytes\n", MTDSWAP_PREFIX, size_limit);
1394                use_size = size_limit;
1395        }
1396
1397        eblocks = mtd_div_by_eb(use_size, mtd);
1398        use_size = (uint64_t)eblocks * mtd->erasesize;
1399        bad_blocks = mtdswap_badblocks(mtd, use_size);
1400        eavailable = eblocks - bad_blocks;
1401
1402        if (eavailable < MIN_ERASE_BLOCKS) {
1403                printk(KERN_ERR "%s: Not enough erase blocks. %u available, "
1404                        "%d needed\n", MTDSWAP_PREFIX, eavailable,
1405                        MIN_ERASE_BLOCKS);
1406                return;
1407        }
1408
1409        spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100);
1410
1411        if (spare_cnt < MIN_SPARE_EBLOCKS)
1412                spare_cnt = MIN_SPARE_EBLOCKS;
1413
1414        if (spare_cnt > eavailable - 1)
1415                spare_cnt = eavailable - 1;
1416
1417        swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize +
1418                (header ? PAGE_SIZE : 0);
1419
1420        printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, "
1421                "%u spare, %u bad blocks\n",
1422                MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks);
1423
1424        d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL);
1425        if (!d)
1426                return;
1427
1428        mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL);
1429        if (!mbd_dev) {
1430                kfree(d);
1431                return;
1432        }
1433
1434        d->mbd_dev = mbd_dev;
1435        mbd_dev->priv = d;
1436
1437        mbd_dev->mtd = mtd;
1438        mbd_dev->devnum = mtd->index;
1439        mbd_dev->size = swap_size >> PAGE_SHIFT;
1440        mbd_dev->tr = tr;
1441
1442        if (!(mtd->flags & MTD_WRITEABLE))
1443                mbd_dev->readonly = 1;
1444
1445        if (mtdswap_init(d, eblocks, spare_cnt) < 0)
1446                goto init_failed;
1447
1448        if (add_mtd_blktrans_dev(mbd_dev) < 0)
1449                goto cleanup;
1450
1451        d->dev = disk_to_dev(mbd_dev->disk);
1452
1453        ret = mtdswap_add_debugfs(d);
1454        if (ret < 0)
1455                goto debugfs_failed;
1456
1457        return;
1458
1459debugfs_failed:
1460        del_mtd_blktrans_dev(mbd_dev);
1461
1462cleanup:
1463        mtdswap_cleanup(d);
1464
1465init_failed:
1466        kfree(mbd_dev);
1467        kfree(d);
1468}
1469
1470static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev)
1471{
1472        struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
1473
1474        del_mtd_blktrans_dev(dev);
1475        mtdswap_cleanup(d);
1476        kfree(d);
1477}
1478
1479static struct mtd_blktrans_ops mtdswap_ops = {
1480        .name           = "mtdswap",
1481        .major          = 0,
1482        .part_bits      = 0,
1483        .blksize        = PAGE_SIZE,
1484        .flush          = mtdswap_flush,
1485        .readsect       = mtdswap_readsect,
1486        .writesect      = mtdswap_writesect,
1487        .discard        = mtdswap_discard,
1488        .background     = mtdswap_background,
1489        .add_mtd        = mtdswap_add_mtd,
1490        .remove_dev     = mtdswap_remove_dev,
1491        .owner          = THIS_MODULE,
1492};
1493
1494static int __init mtdswap_modinit(void)
1495{
1496        return register_mtd_blktrans(&mtdswap_ops);
1497}
1498
1499static void __exit mtdswap_modexit(void)
1500{
1501        deregister_mtd_blktrans(&mtdswap_ops);
1502}
1503
1504module_init(mtdswap_modinit);
1505module_exit(mtdswap_modexit);
1506
1507
1508MODULE_LICENSE("GPL");
1509MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
1510MODULE_DESCRIPTION("Block device access to an MTD suitable for using as "
1511                "swap space");
1512