uboot/drivers/mtd/ubi/wl.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) International Business Machines Corp., 2006
   3 *
   4 * SPDX-License-Identifier:     GPL-2.0+
   5 *
   6 * Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner
   7 */
   8
   9/*
  10 * UBI wear-leveling sub-system.
  11 *
  12 * This sub-system is responsible for wear-leveling. It works in terms of
  13 * physical eraseblocks and erase counters and knows nothing about logical
  14 * eraseblocks, volumes, etc. From this sub-system's perspective all physical
  15 * eraseblocks are of two types - used and free. Used physical eraseblocks are
  16 * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
  17 * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
  18 *
  19 * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
  20 * header. The rest of the physical eraseblock contains only %0xFF bytes.
  21 *
  22 * When physical eraseblocks are returned to the WL sub-system by means of the
  23 * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
  24 * done asynchronously in context of the per-UBI device background thread,
  25 * which is also managed by the WL sub-system.
  26 *
  27 * The wear-leveling is ensured by means of moving the contents of used
  28 * physical eraseblocks with low erase counter to free physical eraseblocks
  29 * with high erase counter.
  30 *
  31 * If the WL sub-system fails to erase a physical eraseblock, it marks it as
  32 * bad.
  33 *
  34 * This sub-system is also responsible for scrubbing. If a bit-flip is detected
  35 * in a physical eraseblock, it has to be moved. Technically this is the same
  36 * as moving it for wear-leveling reasons.
  37 *
  38 * As it was said, for the UBI sub-system all physical eraseblocks are either
  39 * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
  40 * used eraseblocks are kept in @wl->used, @wl->erroneous, or @wl->scrub
  41 * RB-trees, as well as (temporarily) in the @wl->pq queue.
  42 *
  43 * When the WL sub-system returns a physical eraseblock, the physical
  44 * eraseblock is protected from being moved for some "time". For this reason,
  45 * the physical eraseblock is not directly moved from the @wl->free tree to the
  46 * @wl->used tree. There is a protection queue in between where this
  47 * physical eraseblock is temporarily stored (@wl->pq).
  48 *
  49 * All this protection stuff is needed because:
  50 *  o we don't want to move physical eraseblocks just after we have given them
  51 *    to the user; instead, we first want to let users fill them up with data;
  52 *
  53 *  o there is a chance that the user will put the physical eraseblock very
  54 *    soon, so it makes sense not to move it for some time, but wait.
  55 *
  56 * Physical eraseblocks stay protected only for limited time. But the "time" is
  57 * measured in erase cycles in this case. This is implemented with help of the
  58 * protection queue. Eraseblocks are put to the tail of this queue when they
  59 * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the
  60 * head of the queue on each erase operation (for any eraseblock). So the
  61 * length of the queue defines how may (global) erase cycles PEBs are protected.
  62 *
  63 * To put it differently, each physical eraseblock has 2 main states: free and
  64 * used. The former state corresponds to the @wl->free tree. The latter state
  65 * is split up on several sub-states:
  66 * o the WL movement is allowed (@wl->used tree);
  67 * o the WL movement is disallowed (@wl->erroneous) because the PEB is
  68 *   erroneous - e.g., there was a read error;
  69 * o the WL movement is temporarily prohibited (@wl->pq queue);
  70 * o scrubbing is needed (@wl->scrub tree).
  71 *
  72 * Depending on the sub-state, wear-leveling entries of the used physical
  73 * eraseblocks may be kept in one of those structures.
  74 *
  75 * Note, in this implementation, we keep a small in-RAM object for each physical
  76 * eraseblock. This is surely not a scalable solution. But it appears to be good
  77 * enough for moderately large flashes and it is simple. In future, one may
  78 * re-work this sub-system and make it more scalable.
  79 *
  80 * At the moment this sub-system does not utilize the sequence number, which
  81 * was introduced relatively recently. But it would be wise to do this because
  82 * the sequence number of a logical eraseblock characterizes how old is it. For
  83 * example, when we move a PEB with low erase counter, and we need to pick the
  84 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
  85 * pick target PEB with an average EC if our PEB is not very "old". This is a
  86 * room for future re-works of the WL sub-system.
  87 */
  88
  89#ifndef __UBOOT__
  90#include <linux/slab.h>
  91#include <linux/crc32.h>
  92#include <linux/freezer.h>
  93#include <linux/kthread.h>
  94#else
  95#include <ubi_uboot.h>
  96#endif
  97
  98#include "ubi.h"
  99
 100/* Number of physical eraseblocks reserved for wear-leveling purposes */
 101#define WL_RESERVED_PEBS 1
 102
 103/*
 104 * Maximum difference between two erase counters. If this threshold is
 105 * exceeded, the WL sub-system starts moving data from used physical
 106 * eraseblocks with low erase counter to free physical eraseblocks with high
 107 * erase counter.
 108 */
 109#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
 110
 111/*
 112 * When a physical eraseblock is moved, the WL sub-system has to pick the target
 113 * physical eraseblock to move to. The simplest way would be just to pick the
 114 * one with the highest erase counter. But in certain workloads this could lead
 115 * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
 116 * situation when the picked physical eraseblock is constantly erased after the
 117 * data is written to it. So, we have a constant which limits the highest erase
 118 * counter of the free physical eraseblock to pick. Namely, the WL sub-system
 119 * does not pick eraseblocks with erase counter greater than the lowest erase
 120 * counter plus %WL_FREE_MAX_DIFF.
 121 */
 122#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
 123
 124/*
 125 * Maximum number of consecutive background thread failures which is enough to
 126 * switch to read-only mode.
 127 */
 128#define WL_MAX_FAILURES 32
 129
 130static int self_check_ec(struct ubi_device *ubi, int pnum, int ec);
 131static int self_check_in_wl_tree(const struct ubi_device *ubi,
 132                                 struct ubi_wl_entry *e, struct rb_root *root);
 133static int self_check_in_pq(const struct ubi_device *ubi,
 134                            struct ubi_wl_entry *e);
 135
 136#ifdef CONFIG_MTD_UBI_FASTMAP
 137#ifndef __UBOOT__
 138/**
 139 * update_fastmap_work_fn - calls ubi_update_fastmap from a work queue
 140 * @wrk: the work description object
 141 */
 142static void update_fastmap_work_fn(struct work_struct *wrk)
 143{
 144        struct ubi_device *ubi = container_of(wrk, struct ubi_device, fm_work);
 145        ubi_update_fastmap(ubi);
 146}
 147#endif
 148
 149/**
 150 *  ubi_ubi_is_fm_block - returns 1 if a PEB is currently used in a fastmap.
 151 *  @ubi: UBI device description object
 152 *  @pnum: the to be checked PEB
 153 */
 154static int ubi_is_fm_block(struct ubi_device *ubi, int pnum)
 155{
 156        int i;
 157
 158        if (!ubi->fm)
 159                return 0;
 160
 161        for (i = 0; i < ubi->fm->used_blocks; i++)
 162                if (ubi->fm->e[i]->pnum == pnum)
 163                        return 1;
 164
 165        return 0;
 166}
 167#else
 168static int ubi_is_fm_block(struct ubi_device *ubi, int pnum)
 169{
 170        return 0;
 171}
 172#endif
 173
 174/**
 175 * wl_tree_add - add a wear-leveling entry to a WL RB-tree.
 176 * @e: the wear-leveling entry to add
 177 * @root: the root of the tree
 178 *
 179 * Note, we use (erase counter, physical eraseblock number) pairs as keys in
 180 * the @ubi->used and @ubi->free RB-trees.
 181 */
 182static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root)
 183{
 184        struct rb_node **p, *parent = NULL;
 185
 186        p = &root->rb_node;
 187        while (*p) {
 188                struct ubi_wl_entry *e1;
 189
 190                parent = *p;
 191                e1 = rb_entry(parent, struct ubi_wl_entry, u.rb);
 192
 193                if (e->ec < e1->ec)
 194                        p = &(*p)->rb_left;
 195                else if (e->ec > e1->ec)
 196                        p = &(*p)->rb_right;
 197                else {
 198                        ubi_assert(e->pnum != e1->pnum);
 199                        if (e->pnum < e1->pnum)
 200                                p = &(*p)->rb_left;
 201                        else
 202                                p = &(*p)->rb_right;
 203                }
 204        }
 205
 206        rb_link_node(&e->u.rb, parent, p);
 207        rb_insert_color(&e->u.rb, root);
 208}
 209
 210/**
 211 * do_work - do one pending work.
 212 * @ubi: UBI device description object
 213 *
 214 * This function returns zero in case of success and a negative error code in
 215 * case of failure.
 216 */
 217static int do_work(struct ubi_device *ubi)
 218{
 219        int err;
 220        struct ubi_work *wrk;
 221
 222        cond_resched();
 223
 224        /*
 225         * @ubi->work_sem is used to synchronize with the workers. Workers take
 226         * it in read mode, so many of them may be doing works at a time. But
 227         * the queue flush code has to be sure the whole queue of works is
 228         * done, and it takes the mutex in write mode.
 229         */
 230        down_read(&ubi->work_sem);
 231        spin_lock(&ubi->wl_lock);
 232        if (list_empty(&ubi->works)) {
 233                spin_unlock(&ubi->wl_lock);
 234                up_read(&ubi->work_sem);
 235                return 0;
 236        }
 237
 238        wrk = list_entry(ubi->works.next, struct ubi_work, list);
 239        list_del(&wrk->list);
 240        ubi->works_count -= 1;
 241        ubi_assert(ubi->works_count >= 0);
 242        spin_unlock(&ubi->wl_lock);
 243
 244        /*
 245         * Call the worker function. Do not touch the work structure
 246         * after this call as it will have been freed or reused by that
 247         * time by the worker function.
 248         */
 249        err = wrk->func(ubi, wrk, 0);
 250        if (err)
 251                ubi_err("work failed with error code %d", err);
 252        up_read(&ubi->work_sem);
 253
 254        return err;
 255}
 256
 257/**
 258 * produce_free_peb - produce a free physical eraseblock.
 259 * @ubi: UBI device description object
 260 *
 261 * This function tries to make a free PEB by means of synchronous execution of
 262 * pending works. This may be needed if, for example the background thread is
 263 * disabled. Returns zero in case of success and a negative error code in case
 264 * of failure.
 265 */
 266static int produce_free_peb(struct ubi_device *ubi)
 267{
 268        int err;
 269
 270        while (!ubi->free.rb_node) {
 271                spin_unlock(&ubi->wl_lock);
 272
 273                dbg_wl("do one work synchronously");
 274                err = do_work(ubi);
 275
 276                spin_lock(&ubi->wl_lock);
 277                if (err)
 278                        return err;
 279        }
 280
 281        return 0;
 282}
 283
 284/**
 285 * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree.
 286 * @e: the wear-leveling entry to check
 287 * @root: the root of the tree
 288 *
 289 * This function returns non-zero if @e is in the @root RB-tree and zero if it
 290 * is not.
 291 */
 292static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root)
 293{
 294        struct rb_node *p;
 295
 296        p = root->rb_node;
 297        while (p) {
 298                struct ubi_wl_entry *e1;
 299
 300                e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
 301
 302                if (e->pnum == e1->pnum) {
 303                        ubi_assert(e == e1);
 304                        return 1;
 305                }
 306
 307                if (e->ec < e1->ec)
 308                        p = p->rb_left;
 309                else if (e->ec > e1->ec)
 310                        p = p->rb_right;
 311                else {
 312                        ubi_assert(e->pnum != e1->pnum);
 313                        if (e->pnum < e1->pnum)
 314                                p = p->rb_left;
 315                        else
 316                                p = p->rb_right;
 317                }
 318        }
 319
 320        return 0;
 321}
 322
 323/**
 324 * prot_queue_add - add physical eraseblock to the protection queue.
 325 * @ubi: UBI device description object
 326 * @e: the physical eraseblock to add
 327 *
 328 * This function adds @e to the tail of the protection queue @ubi->pq, where
 329 * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be
 330 * temporarily protected from the wear-leveling worker. Note, @wl->lock has to
 331 * be locked.
 332 */
 333static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e)
 334{
 335        int pq_tail = ubi->pq_head - 1;
 336
 337        if (pq_tail < 0)
 338                pq_tail = UBI_PROT_QUEUE_LEN - 1;
 339        ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN);
 340        list_add_tail(&e->u.list, &ubi->pq[pq_tail]);
 341        dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec);
 342}
 343
 344/**
 345 * find_wl_entry - find wear-leveling entry closest to certain erase counter.
 346 * @ubi: UBI device description object
 347 * @root: the RB-tree where to look for
 348 * @diff: maximum possible difference from the smallest erase counter
 349 *
 350 * This function looks for a wear leveling entry with erase counter closest to
 351 * min + @diff, where min is the smallest erase counter.
 352 */
 353static struct ubi_wl_entry *find_wl_entry(struct ubi_device *ubi,
 354                                          struct rb_root *root, int diff)
 355{
 356        struct rb_node *p;
 357        struct ubi_wl_entry *e, *prev_e = NULL;
 358        int max;
 359
 360        e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
 361        max = e->ec + diff;
 362
 363        p = root->rb_node;
 364        while (p) {
 365                struct ubi_wl_entry *e1;
 366
 367                e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
 368                if (e1->ec >= max)
 369                        p = p->rb_left;
 370                else {
 371                        p = p->rb_right;
 372                        prev_e = e;
 373                        e = e1;
 374                }
 375        }
 376
 377        /* If no fastmap has been written and this WL entry can be used
 378         * as anchor PEB, hold it back and return the second best WL entry
 379         * such that fastmap can use the anchor PEB later. */
 380        if (prev_e && !ubi->fm_disabled &&
 381            !ubi->fm && e->pnum < UBI_FM_MAX_START)
 382                return prev_e;
 383
 384        return e;
 385}
 386
 387/**
 388 * find_mean_wl_entry - find wear-leveling entry with medium erase counter.
 389 * @ubi: UBI device description object
 390 * @root: the RB-tree where to look for
 391 *
 392 * This function looks for a wear leveling entry with medium erase counter,
 393 * but not greater or equivalent than the lowest erase counter plus
 394 * %WL_FREE_MAX_DIFF/2.
 395 */
 396static struct ubi_wl_entry *find_mean_wl_entry(struct ubi_device *ubi,
 397                                               struct rb_root *root)
 398{
 399        struct ubi_wl_entry *e, *first, *last;
 400
 401        first = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
 402        last = rb_entry(rb_last(root), struct ubi_wl_entry, u.rb);
 403
 404        if (last->ec - first->ec < WL_FREE_MAX_DIFF) {
 405                e = rb_entry(root->rb_node, struct ubi_wl_entry, u.rb);
 406
 407#ifdef CONFIG_MTD_UBI_FASTMAP
 408                /* If no fastmap has been written and this WL entry can be used
 409                 * as anchor PEB, hold it back and return the second best
 410                 * WL entry such that fastmap can use the anchor PEB later. */
 411                if (e && !ubi->fm_disabled && !ubi->fm &&
 412                    e->pnum < UBI_FM_MAX_START)
 413                        e = rb_entry(rb_next(root->rb_node),
 414                                     struct ubi_wl_entry, u.rb);
 415#endif
 416        } else
 417                e = find_wl_entry(ubi, root, WL_FREE_MAX_DIFF/2);
 418
 419        return e;
 420}
 421
 422#ifdef CONFIG_MTD_UBI_FASTMAP
 423/**
 424 * find_anchor_wl_entry - find wear-leveling entry to used as anchor PEB.
 425 * @root: the RB-tree where to look for
 426 */
 427static struct ubi_wl_entry *find_anchor_wl_entry(struct rb_root *root)
 428{
 429        struct rb_node *p;
 430        struct ubi_wl_entry *e, *victim = NULL;
 431        int max_ec = UBI_MAX_ERASECOUNTER;
 432
 433        ubi_rb_for_each_entry(p, e, root, u.rb) {
 434                if (e->pnum < UBI_FM_MAX_START && e->ec < max_ec) {
 435                        victim = e;
 436                        max_ec = e->ec;
 437                }
 438        }
 439
 440        return victim;
 441}
 442
 443static int anchor_pebs_avalible(struct rb_root *root)
 444{
 445        struct rb_node *p;
 446        struct ubi_wl_entry *e;
 447
 448        ubi_rb_for_each_entry(p, e, root, u.rb)
 449                if (e->pnum < UBI_FM_MAX_START)
 450                        return 1;
 451
 452        return 0;
 453}
 454
 455/**
 456 * ubi_wl_get_fm_peb - find a physical erase block with a given maximal number.
 457 * @ubi: UBI device description object
 458 * @anchor: This PEB will be used as anchor PEB by fastmap
 459 *
 460 * The function returns a physical erase block with a given maximal number
 461 * and removes it from the wl subsystem.
 462 * Must be called with wl_lock held!
 463 */
 464struct ubi_wl_entry *ubi_wl_get_fm_peb(struct ubi_device *ubi, int anchor)
 465{
 466        struct ubi_wl_entry *e = NULL;
 467
 468        if (!ubi->free.rb_node || (ubi->free_count - ubi->beb_rsvd_pebs < 1))
 469                goto out;
 470
 471        if (anchor)
 472                e = find_anchor_wl_entry(&ubi->free);
 473        else
 474                e = find_mean_wl_entry(ubi, &ubi->free);
 475
 476        if (!e)
 477                goto out;
 478
 479        self_check_in_wl_tree(ubi, e, &ubi->free);
 480
 481        /* remove it from the free list,
 482         * the wl subsystem does no longer know this erase block */
 483        rb_erase(&e->u.rb, &ubi->free);
 484        ubi->free_count--;
 485out:
 486        return e;
 487}
 488#endif
 489
 490/**
 491 * __wl_get_peb - get a physical eraseblock.
 492 * @ubi: UBI device description object
 493 *
 494 * This function returns a physical eraseblock in case of success and a
 495 * negative error code in case of failure.
 496 */
 497static int __wl_get_peb(struct ubi_device *ubi)
 498{
 499        int err;
 500        struct ubi_wl_entry *e;
 501
 502retry:
 503        if (!ubi->free.rb_node) {
 504                if (ubi->works_count == 0) {
 505                        ubi_err("no free eraseblocks");
 506                        ubi_assert(list_empty(&ubi->works));
 507                        return -ENOSPC;
 508                }
 509
 510                err = produce_free_peb(ubi);
 511                if (err < 0)
 512                        return err;
 513                goto retry;
 514        }
 515
 516        e = find_mean_wl_entry(ubi, &ubi->free);
 517        if (!e) {
 518                ubi_err("no free eraseblocks");
 519                return -ENOSPC;
 520        }
 521
 522        self_check_in_wl_tree(ubi, e, &ubi->free);
 523
 524        /*
 525         * Move the physical eraseblock to the protection queue where it will
 526         * be protected from being moved for some time.
 527         */
 528        rb_erase(&e->u.rb, &ubi->free);
 529        ubi->free_count--;
 530        dbg_wl("PEB %d EC %d", e->pnum, e->ec);
 531#ifndef CONFIG_MTD_UBI_FASTMAP
 532        /* We have to enqueue e only if fastmap is disabled,
 533         * is fastmap enabled prot_queue_add() will be called by
 534         * ubi_wl_get_peb() after removing e from the pool. */
 535        prot_queue_add(ubi, e);
 536#endif
 537        return e->pnum;
 538}
 539
 540#ifdef CONFIG_MTD_UBI_FASTMAP
 541/**
 542 * return_unused_pool_pebs - returns unused PEB to the free tree.
 543 * @ubi: UBI device description object
 544 * @pool: fastmap pool description object
 545 */
 546static void return_unused_pool_pebs(struct ubi_device *ubi,
 547                                    struct ubi_fm_pool *pool)
 548{
 549        int i;
 550        struct ubi_wl_entry *e;
 551
 552        for (i = pool->used; i < pool->size; i++) {
 553                e = ubi->lookuptbl[pool->pebs[i]];
 554                wl_tree_add(e, &ubi->free);
 555                ubi->free_count++;
 556        }
 557}
 558
 559/**
 560 * refill_wl_pool - refills all the fastmap pool used by the
 561 * WL sub-system.
 562 * @ubi: UBI device description object
 563 */
 564static void refill_wl_pool(struct ubi_device *ubi)
 565{
 566        struct ubi_wl_entry *e;
 567        struct ubi_fm_pool *pool = &ubi->fm_wl_pool;
 568
 569        return_unused_pool_pebs(ubi, pool);
 570
 571        for (pool->size = 0; pool->size < pool->max_size; pool->size++) {
 572                if (!ubi->free.rb_node ||
 573                   (ubi->free_count - ubi->beb_rsvd_pebs < 5))
 574                        break;
 575
 576                e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF);
 577                self_check_in_wl_tree(ubi, e, &ubi->free);
 578                rb_erase(&e->u.rb, &ubi->free);
 579                ubi->free_count--;
 580
 581                pool->pebs[pool->size] = e->pnum;
 582        }
 583        pool->used = 0;
 584}
 585
 586/**
 587 * refill_wl_user_pool - refills all the fastmap pool used by ubi_wl_get_peb.
 588 * @ubi: UBI device description object
 589 */
 590static void refill_wl_user_pool(struct ubi_device *ubi)
 591{
 592        struct ubi_fm_pool *pool = &ubi->fm_pool;
 593
 594        return_unused_pool_pebs(ubi, pool);
 595
 596        for (pool->size = 0; pool->size < pool->max_size; pool->size++) {
 597                pool->pebs[pool->size] = __wl_get_peb(ubi);
 598                if (pool->pebs[pool->size] < 0)
 599                        break;
 600        }
 601        pool->used = 0;
 602}
 603
 604/**
 605 * ubi_refill_pools - refills all fastmap PEB pools.
 606 * @ubi: UBI device description object
 607 */
 608void ubi_refill_pools(struct ubi_device *ubi)
 609{
 610        spin_lock(&ubi->wl_lock);
 611        refill_wl_pool(ubi);
 612        refill_wl_user_pool(ubi);
 613        spin_unlock(&ubi->wl_lock);
 614}
 615
 616/* ubi_wl_get_peb - works exaclty like __wl_get_peb but keeps track of
 617 * the fastmap pool.
 618 */
 619int ubi_wl_get_peb(struct ubi_device *ubi)
 620{
 621        int ret;
 622        struct ubi_fm_pool *pool = &ubi->fm_pool;
 623        struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool;
 624
 625        if (!pool->size || !wl_pool->size || pool->used == pool->size ||
 626            wl_pool->used == wl_pool->size)
 627                ubi_update_fastmap(ubi);
 628
 629        /* we got not a single free PEB */
 630        if (!pool->size)
 631                ret = -ENOSPC;
 632        else {
 633                spin_lock(&ubi->wl_lock);
 634                ret = pool->pebs[pool->used++];
 635                prot_queue_add(ubi, ubi->lookuptbl[ret]);
 636                spin_unlock(&ubi->wl_lock);
 637        }
 638
 639        return ret;
 640}
 641
 642/* get_peb_for_wl - returns a PEB to be used internally by the WL sub-system.
 643 *
 644 * @ubi: UBI device description object
 645 */
 646static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi)
 647{
 648        struct ubi_fm_pool *pool = &ubi->fm_wl_pool;
 649        int pnum;
 650
 651        if (pool->used == pool->size || !pool->size) {
 652                /* We cannot update the fastmap here because this
 653                 * function is called in atomic context.
 654                 * Let's fail here and refill/update it as soon as possible. */
 655#ifndef __UBOOT__
 656                schedule_work(&ubi->fm_work);
 657#else
 658                /* In U-Boot we must call this directly */
 659                ubi_update_fastmap(ubi);
 660#endif
 661                return NULL;
 662        } else {
 663                pnum = pool->pebs[pool->used++];
 664                return ubi->lookuptbl[pnum];
 665        }
 666}
 667#else
 668static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi)
 669{
 670        struct ubi_wl_entry *e;
 671
 672        e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF);
 673        self_check_in_wl_tree(ubi, e, &ubi->free);
 674        ubi->free_count--;
 675        ubi_assert(ubi->free_count >= 0);
 676        rb_erase(&e->u.rb, &ubi->free);
 677
 678        return e;
 679}
 680
 681int ubi_wl_get_peb(struct ubi_device *ubi)
 682{
 683        int peb, err;
 684
 685        spin_lock(&ubi->wl_lock);
 686        peb = __wl_get_peb(ubi);
 687        spin_unlock(&ubi->wl_lock);
 688
 689        if (peb < 0)
 690                return peb;
 691
 692        err = ubi_self_check_all_ff(ubi, peb, ubi->vid_hdr_aloffset,
 693                                    ubi->peb_size - ubi->vid_hdr_aloffset);
 694        if (err) {
 695                ubi_err("new PEB %d does not contain all 0xFF bytes", peb);
 696                return err;
 697        }
 698
 699        return peb;
 700}
 701#endif
 702
 703/**
 704 * prot_queue_del - remove a physical eraseblock from the protection queue.
 705 * @ubi: UBI device description object
 706 * @pnum: the physical eraseblock to remove
 707 *
 708 * This function deletes PEB @pnum from the protection queue and returns zero
 709 * in case of success and %-ENODEV if the PEB was not found.
 710 */
 711static int prot_queue_del(struct ubi_device *ubi, int pnum)
 712{
 713        struct ubi_wl_entry *e;
 714
 715        e = ubi->lookuptbl[pnum];
 716        if (!e)
 717                return -ENODEV;
 718
 719        if (self_check_in_pq(ubi, e))
 720                return -ENODEV;
 721
 722        list_del(&e->u.list);
 723        dbg_wl("deleted PEB %d from the protection queue", e->pnum);
 724        return 0;
 725}
 726
 727/**
 728 * sync_erase - synchronously erase a physical eraseblock.
 729 * @ubi: UBI device description object
 730 * @e: the the physical eraseblock to erase
 731 * @torture: if the physical eraseblock has to be tortured
 732 *
 733 * This function returns zero in case of success and a negative error code in
 734 * case of failure.
 735 */
 736static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
 737                      int torture)
 738{
 739        int err;
 740        struct ubi_ec_hdr *ec_hdr;
 741        unsigned long long ec = e->ec;
 742
 743        dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec);
 744
 745        err = self_check_ec(ubi, e->pnum, e->ec);
 746        if (err)
 747                return -EINVAL;
 748
 749        ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
 750        if (!ec_hdr)
 751                return -ENOMEM;
 752
 753        err = ubi_io_sync_erase(ubi, e->pnum, torture);
 754        if (err < 0)
 755                goto out_free;
 756
 757        ec += err;
 758        if (ec > UBI_MAX_ERASECOUNTER) {
 759                /*
 760                 * Erase counter overflow. Upgrade UBI and use 64-bit
 761                 * erase counters internally.
 762                 */
 763                ubi_err("erase counter overflow at PEB %d, EC %llu",
 764                        e->pnum, ec);
 765                err = -EINVAL;
 766                goto out_free;
 767        }
 768
 769        dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec);
 770
 771        ec_hdr->ec = cpu_to_be64(ec);
 772
 773        err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr);
 774        if (err)
 775                goto out_free;
 776
 777        e->ec = ec;
 778        spin_lock(&ubi->wl_lock);
 779        if (e->ec > ubi->max_ec)
 780                ubi->max_ec = e->ec;
 781        spin_unlock(&ubi->wl_lock);
 782
 783out_free:
 784        kfree(ec_hdr);
 785        return err;
 786}
 787
 788/**
 789 * serve_prot_queue - check if it is time to stop protecting PEBs.
 790 * @ubi: UBI device description object
 791 *
 792 * This function is called after each erase operation and removes PEBs from the
 793 * tail of the protection queue. These PEBs have been protected for long enough
 794 * and should be moved to the used tree.
 795 */
 796static void serve_prot_queue(struct ubi_device *ubi)
 797{
 798        struct ubi_wl_entry *e, *tmp;
 799        int count;
 800
 801        /*
 802         * There may be several protected physical eraseblock to remove,
 803         * process them all.
 804         */
 805repeat:
 806        count = 0;
 807        spin_lock(&ubi->wl_lock);
 808        list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) {
 809                dbg_wl("PEB %d EC %d protection over, move to used tree",
 810                        e->pnum, e->ec);
 811
 812                list_del(&e->u.list);
 813                wl_tree_add(e, &ubi->used);
 814                if (count++ > 32) {
 815                        /*
 816                         * Let's be nice and avoid holding the spinlock for
 817                         * too long.
 818                         */
 819                        spin_unlock(&ubi->wl_lock);
 820                        cond_resched();
 821                        goto repeat;
 822                }
 823        }
 824
 825        ubi->pq_head += 1;
 826        if (ubi->pq_head == UBI_PROT_QUEUE_LEN)
 827                ubi->pq_head = 0;
 828        ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN);
 829        spin_unlock(&ubi->wl_lock);
 830}
 831
 832/**
 833 * __schedule_ubi_work - schedule a work.
 834 * @ubi: UBI device description object
 835 * @wrk: the work to schedule
 836 *
 837 * This function adds a work defined by @wrk to the tail of the pending works
 838 * list. Can only be used of ubi->work_sem is already held in read mode!
 839 */
 840static void __schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
 841{
 842        spin_lock(&ubi->wl_lock);
 843        list_add_tail(&wrk->list, &ubi->works);
 844        ubi_assert(ubi->works_count >= 0);
 845        ubi->works_count += 1;
 846#ifndef __UBOOT__
 847        if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi))
 848                wake_up_process(ubi->bgt_thread);
 849#else
 850        /*
 851         * U-Boot special: We have no bgt_thread in U-Boot!
 852         * So just call do_work() here directly.
 853         */
 854        do_work(ubi);
 855#endif
 856        spin_unlock(&ubi->wl_lock);
 857}
 858
 859/**
 860 * schedule_ubi_work - schedule a work.
 861 * @ubi: UBI device description object
 862 * @wrk: the work to schedule
 863 *
 864 * This function adds a work defined by @wrk to the tail of the pending works
 865 * list.
 866 */
 867static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
 868{
 869        down_read(&ubi->work_sem);
 870        __schedule_ubi_work(ubi, wrk);
 871        up_read(&ubi->work_sem);
 872}
 873
 874static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
 875                        int cancel);
 876
 877#ifdef CONFIG_MTD_UBI_FASTMAP
 878/**
 879 * ubi_is_erase_work - checks whether a work is erase work.
 880 * @wrk: The work object to be checked
 881 */
 882int ubi_is_erase_work(struct ubi_work *wrk)
 883{
 884        return wrk->func == erase_worker;
 885}
 886#endif
 887
 888/**
 889 * schedule_erase - schedule an erase work.
 890 * @ubi: UBI device description object
 891 * @e: the WL entry of the physical eraseblock to erase
 892 * @vol_id: the volume ID that last used this PEB
 893 * @lnum: the last used logical eraseblock number for the PEB
 894 * @torture: if the physical eraseblock has to be tortured
 895 *
 896 * This function returns zero in case of success and a %-ENOMEM in case of
 897 * failure.
 898 */
 899static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
 900                          int vol_id, int lnum, int torture)
 901{
 902        struct ubi_work *wl_wrk;
 903
 904        ubi_assert(e);
 905        ubi_assert(!ubi_is_fm_block(ubi, e->pnum));
 906
 907        dbg_wl("schedule erasure of PEB %d, EC %d, torture %d",
 908               e->pnum, e->ec, torture);
 909
 910        wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
 911        if (!wl_wrk)
 912                return -ENOMEM;
 913
 914        wl_wrk->func = &erase_worker;
 915        wl_wrk->e = e;
 916        wl_wrk->vol_id = vol_id;
 917        wl_wrk->lnum = lnum;
 918        wl_wrk->torture = torture;
 919
 920        schedule_ubi_work(ubi, wl_wrk);
 921        return 0;
 922}
 923
 924/**
 925 * do_sync_erase - run the erase worker synchronously.
 926 * @ubi: UBI device description object
 927 * @e: the WL entry of the physical eraseblock to erase
 928 * @vol_id: the volume ID that last used this PEB
 929 * @lnum: the last used logical eraseblock number for the PEB
 930 * @torture: if the physical eraseblock has to be tortured
 931 *
 932 */
 933static int do_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
 934                         int vol_id, int lnum, int torture)
 935{
 936        struct ubi_work *wl_wrk;
 937
 938        dbg_wl("sync erase of PEB %i", e->pnum);
 939
 940        wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
 941        if (!wl_wrk)
 942                return -ENOMEM;
 943
 944        wl_wrk->e = e;
 945        wl_wrk->vol_id = vol_id;
 946        wl_wrk->lnum = lnum;
 947        wl_wrk->torture = torture;
 948
 949        return erase_worker(ubi, wl_wrk, 0);
 950}
 951
 952#ifdef CONFIG_MTD_UBI_FASTMAP
 953/**
 954 * ubi_wl_put_fm_peb - returns a PEB used in a fastmap to the wear-leveling
 955 * sub-system.
 956 * see: ubi_wl_put_peb()
 957 *
 958 * @ubi: UBI device description object
 959 * @fm_e: physical eraseblock to return
 960 * @lnum: the last used logical eraseblock number for the PEB
 961 * @torture: if this physical eraseblock has to be tortured
 962 */
 963int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *fm_e,
 964                      int lnum, int torture)
 965{
 966        struct ubi_wl_entry *e;
 967        int vol_id, pnum = fm_e->pnum;
 968
 969        dbg_wl("PEB %d", pnum);
 970
 971        ubi_assert(pnum >= 0);
 972        ubi_assert(pnum < ubi->peb_count);
 973
 974        spin_lock(&ubi->wl_lock);
 975        e = ubi->lookuptbl[pnum];
 976
 977        /* This can happen if we recovered from a fastmap the very
 978         * first time and writing now a new one. In this case the wl system
 979         * has never seen any PEB used by the original fastmap.
 980         */
 981        if (!e) {
 982                e = fm_e;
 983                ubi_assert(e->ec >= 0);
 984                ubi->lookuptbl[pnum] = e;
 985        } else {
 986                e->ec = fm_e->ec;
 987                kfree(fm_e);
 988        }
 989
 990        spin_unlock(&ubi->wl_lock);
 991
 992        vol_id = lnum ? UBI_FM_DATA_VOLUME_ID : UBI_FM_SB_VOLUME_ID;
 993        return schedule_erase(ubi, e, vol_id, lnum, torture);
 994}
 995#endif
 996
 997/**
 998 * wear_leveling_worker - wear-leveling worker function.
 999 * @ubi: UBI device description object
1000 * @wrk: the work object
1001 * @cancel: non-zero if the worker has to free memory and exit
1002 *
1003 * This function copies a more worn out physical eraseblock to a less worn out
1004 * one. Returns zero in case of success and a negative error code in case of
1005 * failure.
1006 */
1007static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
1008                                int cancel)
1009{
1010        int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0;
1011        int vol_id = -1, uninitialized_var(lnum);
1012#ifdef CONFIG_MTD_UBI_FASTMAP
1013        int anchor = wrk->anchor;
1014#endif
1015        struct ubi_wl_entry *e1, *e2;
1016        struct ubi_vid_hdr *vid_hdr;
1017
1018        kfree(wrk);
1019        if (cancel)
1020                return 0;
1021
1022        vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
1023        if (!vid_hdr)
1024                return -ENOMEM;
1025
1026        mutex_lock(&ubi->move_mutex);
1027        spin_lock(&ubi->wl_lock);
1028        ubi_assert(!ubi->move_from && !ubi->move_to);
1029        ubi_assert(!ubi->move_to_put);
1030
1031        if (!ubi->free.rb_node ||
1032            (!ubi->used.rb_node && !ubi->scrub.rb_node)) {
1033                /*
1034                 * No free physical eraseblocks? Well, they must be waiting in
1035                 * the queue to be erased. Cancel movement - it will be
1036                 * triggered again when a free physical eraseblock appears.
1037                 *
1038                 * No used physical eraseblocks? They must be temporarily
1039                 * protected from being moved. They will be moved to the
1040                 * @ubi->used tree later and the wear-leveling will be
1041                 * triggered again.
1042                 */
1043                dbg_wl("cancel WL, a list is empty: free %d, used %d",
1044                       !ubi->free.rb_node, !ubi->used.rb_node);
1045                goto out_cancel;
1046        }
1047
1048#ifdef CONFIG_MTD_UBI_FASTMAP
1049        /* Check whether we need to produce an anchor PEB */
1050        if (!anchor)
1051                anchor = !anchor_pebs_avalible(&ubi->free);
1052
1053        if (anchor) {
1054                e1 = find_anchor_wl_entry(&ubi->used);
1055                if (!e1)
1056                        goto out_cancel;
1057                e2 = get_peb_for_wl(ubi);
1058                if (!e2)
1059                        goto out_cancel;
1060
1061                self_check_in_wl_tree(ubi, e1, &ubi->used);
1062                rb_erase(&e1->u.rb, &ubi->used);
1063                dbg_wl("anchor-move PEB %d to PEB %d", e1->pnum, e2->pnum);
1064        } else if (!ubi->scrub.rb_node) {
1065#else
1066        if (!ubi->scrub.rb_node) {
1067#endif
1068                /*
1069                 * Now pick the least worn-out used physical eraseblock and a
1070                 * highly worn-out free physical eraseblock. If the erase
1071                 * counters differ much enough, start wear-leveling.
1072                 */
1073                e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
1074                e2 = get_peb_for_wl(ubi);
1075                if (!e2)
1076                        goto out_cancel;
1077
1078                if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
1079                        dbg_wl("no WL needed: min used EC %d, max free EC %d",
1080                               e1->ec, e2->ec);
1081
1082                        /* Give the unused PEB back */
1083                        wl_tree_add(e2, &ubi->free);
1084                        ubi->free_count++;
1085                        goto out_cancel;
1086                }
1087                self_check_in_wl_tree(ubi, e1, &ubi->used);
1088                rb_erase(&e1->u.rb, &ubi->used);
1089                dbg_wl("move PEB %d EC %d to PEB %d EC %d",
1090                       e1->pnum, e1->ec, e2->pnum, e2->ec);
1091        } else {
1092                /* Perform scrubbing */
1093                scrubbing = 1;
1094                e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb);
1095                e2 = get_peb_for_wl(ubi);
1096                if (!e2)
1097                        goto out_cancel;
1098
1099                self_check_in_wl_tree(ubi, e1, &ubi->scrub);
1100                rb_erase(&e1->u.rb, &ubi->scrub);
1101                dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
1102        }
1103
1104        ubi->move_from = e1;
1105        ubi->move_to = e2;
1106        spin_unlock(&ubi->wl_lock);
1107
1108        /*
1109         * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum.
1110         * We so far do not know which logical eraseblock our physical
1111         * eraseblock (@e1) belongs to. We have to read the volume identifier
1112         * header first.
1113         *
1114         * Note, we are protected from this PEB being unmapped and erased. The
1115         * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB
1116         * which is being moved was unmapped.
1117         */
1118
1119        err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0);
1120        if (err && err != UBI_IO_BITFLIPS) {
1121                if (err == UBI_IO_FF) {
1122                        /*
1123                         * We are trying to move PEB without a VID header. UBI
1124                         * always write VID headers shortly after the PEB was
1125                         * given, so we have a situation when it has not yet
1126                         * had a chance to write it, because it was preempted.
1127                         * So add this PEB to the protection queue so far,
1128                         * because presumably more data will be written there
1129                         * (including the missing VID header), and then we'll
1130                         * move it.
1131                         */
1132                        dbg_wl("PEB %d has no VID header", e1->pnum);
1133                        protect = 1;
1134                        goto out_not_moved;
1135                } else if (err == UBI_IO_FF_BITFLIPS) {
1136                        /*
1137                         * The same situation as %UBI_IO_FF, but bit-flips were
1138                         * detected. It is better to schedule this PEB for
1139                         * scrubbing.
1140                         */
1141                        dbg_wl("PEB %d has no VID header but has bit-flips",
1142                               e1->pnum);
1143                        scrubbing = 1;
1144                        goto out_not_moved;
1145                }
1146
1147                ubi_err("error %d while reading VID header from PEB %d",
1148                        err, e1->pnum);
1149                goto out_error;
1150        }
1151
1152        vol_id = be32_to_cpu(vid_hdr->vol_id);
1153        lnum = be32_to_cpu(vid_hdr->lnum);
1154
1155        err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
1156        if (err) {
1157                if (err == MOVE_CANCEL_RACE) {
1158                        /*
1159                         * The LEB has not been moved because the volume is
1160                         * being deleted or the PEB has been put meanwhile. We
1161                         * should prevent this PEB from being selected for
1162                         * wear-leveling movement again, so put it to the
1163                         * protection queue.
1164                         */
1165                        protect = 1;
1166                        goto out_not_moved;
1167                }
1168                if (err == MOVE_RETRY) {
1169                        scrubbing = 1;
1170                        goto out_not_moved;
1171                }
1172                if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR ||
1173                    err == MOVE_TARGET_RD_ERR) {
1174                        /*
1175                         * Target PEB had bit-flips or write error - torture it.
1176                         */
1177                        torture = 1;
1178                        goto out_not_moved;
1179                }
1180
1181                if (err == MOVE_SOURCE_RD_ERR) {
1182                        /*
1183                         * An error happened while reading the source PEB. Do
1184                         * not switch to R/O mode in this case, and give the
1185                         * upper layers a possibility to recover from this,
1186                         * e.g. by unmapping corresponding LEB. Instead, just
1187                         * put this PEB to the @ubi->erroneous list to prevent
1188                         * UBI from trying to move it over and over again.
1189                         */
1190                        if (ubi->erroneous_peb_count > ubi->max_erroneous) {
1191                                ubi_err("too many erroneous eraseblocks (%d)",
1192                                        ubi->erroneous_peb_count);
1193                                goto out_error;
1194                        }
1195                        erroneous = 1;
1196                        goto out_not_moved;
1197                }
1198
1199                if (err < 0)
1200                        goto out_error;
1201
1202                ubi_assert(0);
1203        }
1204
1205        /* The PEB has been successfully moved */
1206        if (scrubbing)
1207                ubi_msg("scrubbed PEB %d (LEB %d:%d), data moved to PEB %d",
1208                        e1->pnum, vol_id, lnum, e2->pnum);
1209        ubi_free_vid_hdr(ubi, vid_hdr);
1210
1211        spin_lock(&ubi->wl_lock);
1212        if (!ubi->move_to_put) {
1213                wl_tree_add(e2, &ubi->used);
1214                e2 = NULL;
1215        }
1216        ubi->move_from = ubi->move_to = NULL;
1217        ubi->move_to_put = ubi->wl_scheduled = 0;
1218        spin_unlock(&ubi->wl_lock);
1219
1220        err = do_sync_erase(ubi, e1, vol_id, lnum, 0);
1221        if (err) {
1222                kmem_cache_free(ubi_wl_entry_slab, e1);
1223                if (e2)
1224                        kmem_cache_free(ubi_wl_entry_slab, e2);
1225                goto out_ro;
1226        }
1227
1228        if (e2) {
1229                /*
1230                 * Well, the target PEB was put meanwhile, schedule it for
1231                 * erasure.
1232                 */
1233                dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase",
1234                       e2->pnum, vol_id, lnum);
1235                err = do_sync_erase(ubi, e2, vol_id, lnum, 0);
1236                if (err) {
1237                        kmem_cache_free(ubi_wl_entry_slab, e2);
1238                        goto out_ro;
1239                }
1240        }
1241
1242        dbg_wl("done");
1243        mutex_unlock(&ubi->move_mutex);
1244        return 0;
1245
1246        /*
1247         * For some reasons the LEB was not moved, might be an error, might be
1248         * something else. @e1 was not changed, so return it back. @e2 might
1249         * have been changed, schedule it for erasure.
1250         */
1251out_not_moved:
1252        if (vol_id != -1)
1253                dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)",
1254                       e1->pnum, vol_id, lnum, e2->pnum, err);
1255        else
1256                dbg_wl("cancel moving PEB %d to PEB %d (%d)",
1257                       e1->pnum, e2->pnum, err);
1258        spin_lock(&ubi->wl_lock);
1259        if (protect)
1260                prot_queue_add(ubi, e1);
1261        else if (erroneous) {
1262                wl_tree_add(e1, &ubi->erroneous);
1263                ubi->erroneous_peb_count += 1;
1264        } else if (scrubbing)
1265                wl_tree_add(e1, &ubi->scrub);
1266        else
1267                wl_tree_add(e1, &ubi->used);
1268        ubi_assert(!ubi->move_to_put);
1269        ubi->move_from = ubi->move_to = NULL;
1270        ubi->wl_scheduled = 0;
1271        spin_unlock(&ubi->wl_lock);
1272
1273        ubi_free_vid_hdr(ubi, vid_hdr);
1274        err = do_sync_erase(ubi, e2, vol_id, lnum, torture);
1275        if (err) {
1276                kmem_cache_free(ubi_wl_entry_slab, e2);
1277                goto out_ro;
1278        }
1279        mutex_unlock(&ubi->move_mutex);
1280        return 0;
1281
1282out_error:
1283        if (vol_id != -1)
1284                ubi_err("error %d while moving PEB %d to PEB %d",
1285                        err, e1->pnum, e2->pnum);
1286        else
1287                ubi_err("error %d while moving PEB %d (LEB %d:%d) to PEB %d",
1288                        err, e1->pnum, vol_id, lnum, e2->pnum);
1289        spin_lock(&ubi->wl_lock);
1290        ubi->move_from = ubi->move_to = NULL;
1291        ubi->move_to_put = ubi->wl_scheduled = 0;
1292        spin_unlock(&ubi->wl_lock);
1293
1294        ubi_free_vid_hdr(ubi, vid_hdr);
1295        kmem_cache_free(ubi_wl_entry_slab, e1);
1296        kmem_cache_free(ubi_wl_entry_slab, e2);
1297
1298out_ro:
1299        ubi_ro_mode(ubi);
1300        mutex_unlock(&ubi->move_mutex);
1301        ubi_assert(err != 0);
1302        return err < 0 ? err : -EIO;
1303
1304out_cancel:
1305        ubi->wl_scheduled = 0;
1306        spin_unlock(&ubi->wl_lock);
1307        mutex_unlock(&ubi->move_mutex);
1308        ubi_free_vid_hdr(ubi, vid_hdr);
1309        return 0;
1310}
1311
1312/**
1313 * ensure_wear_leveling - schedule wear-leveling if it is needed.
1314 * @ubi: UBI device description object
1315 * @nested: set to non-zero if this function is called from UBI worker
1316 *
1317 * This function checks if it is time to start wear-leveling and schedules it
1318 * if yes. This function returns zero in case of success and a negative error
1319 * code in case of failure.
1320 */
1321static int ensure_wear_leveling(struct ubi_device *ubi, int nested)
1322{
1323        int err = 0;
1324        struct ubi_wl_entry *e1;
1325        struct ubi_wl_entry *e2;
1326        struct ubi_work *wrk;
1327
1328        spin_lock(&ubi->wl_lock);
1329        if (ubi->wl_scheduled)
1330                /* Wear-leveling is already in the work queue */
1331                goto out_unlock;
1332
1333        /*
1334         * If the ubi->scrub tree is not empty, scrubbing is needed, and the
1335         * the WL worker has to be scheduled anyway.
1336         */
1337        if (!ubi->scrub.rb_node) {
1338                if (!ubi->used.rb_node || !ubi->free.rb_node)
1339                        /* No physical eraseblocks - no deal */
1340                        goto out_unlock;
1341
1342                /*
1343                 * We schedule wear-leveling only if the difference between the
1344                 * lowest erase counter of used physical eraseblocks and a high
1345                 * erase counter of free physical eraseblocks is greater than
1346                 * %UBI_WL_THRESHOLD.
1347                 */
1348                e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
1349                e2 = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF);
1350
1351                if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD))
1352                        goto out_unlock;
1353                dbg_wl("schedule wear-leveling");
1354        } else
1355                dbg_wl("schedule scrubbing");
1356
1357        ubi->wl_scheduled = 1;
1358        spin_unlock(&ubi->wl_lock);
1359
1360        wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
1361        if (!wrk) {
1362                err = -ENOMEM;
1363                goto out_cancel;
1364        }
1365
1366        wrk->anchor = 0;
1367        wrk->func = &wear_leveling_worker;
1368        if (nested)
1369                __schedule_ubi_work(ubi, wrk);
1370        else
1371                schedule_ubi_work(ubi, wrk);
1372        return err;
1373
1374out_cancel:
1375        spin_lock(&ubi->wl_lock);
1376        ubi->wl_scheduled = 0;
1377out_unlock:
1378        spin_unlock(&ubi->wl_lock);
1379        return err;
1380}
1381
1382#ifdef CONFIG_MTD_UBI_FASTMAP
1383/**
1384 * ubi_ensure_anchor_pebs - schedule wear-leveling to produce an anchor PEB.
1385 * @ubi: UBI device description object
1386 */
1387int ubi_ensure_anchor_pebs(struct ubi_device *ubi)
1388{
1389        struct ubi_work *wrk;
1390
1391        spin_lock(&ubi->wl_lock);
1392        if (ubi->wl_scheduled) {
1393                spin_unlock(&ubi->wl_lock);
1394                return 0;
1395        }
1396        ubi->wl_scheduled = 1;
1397        spin_unlock(&ubi->wl_lock);
1398
1399        wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
1400        if (!wrk) {
1401                spin_lock(&ubi->wl_lock);
1402                ubi->wl_scheduled = 0;
1403                spin_unlock(&ubi->wl_lock);
1404                return -ENOMEM;
1405        }
1406
1407        wrk->anchor = 1;
1408        wrk->func = &wear_leveling_worker;
1409        schedule_ubi_work(ubi, wrk);
1410        return 0;
1411}
1412#endif
1413
1414/**
1415 * erase_worker - physical eraseblock erase worker function.
1416 * @ubi: UBI device description object
1417 * @wl_wrk: the work object
1418 * @cancel: non-zero if the worker has to free memory and exit
1419 *
1420 * This function erases a physical eraseblock and perform torture testing if
1421 * needed. It also takes care about marking the physical eraseblock bad if
1422 * needed. Returns zero in case of success and a negative error code in case of
1423 * failure.
1424 */
1425static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
1426                        int cancel)
1427{
1428        struct ubi_wl_entry *e = wl_wrk->e;
1429        int pnum = e->pnum;
1430        int vol_id = wl_wrk->vol_id;
1431        int lnum = wl_wrk->lnum;
1432        int err, available_consumed = 0;
1433
1434        if (cancel) {
1435                dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec);
1436                kfree(wl_wrk);
1437                kmem_cache_free(ubi_wl_entry_slab, e);
1438                return 0;
1439        }
1440
1441        dbg_wl("erase PEB %d EC %d LEB %d:%d",
1442               pnum, e->ec, wl_wrk->vol_id, wl_wrk->lnum);
1443
1444        ubi_assert(!ubi_is_fm_block(ubi, e->pnum));
1445
1446        err = sync_erase(ubi, e, wl_wrk->torture);
1447        if (!err) {
1448                /* Fine, we've erased it successfully */
1449                kfree(wl_wrk);
1450
1451                spin_lock(&ubi->wl_lock);
1452                wl_tree_add(e, &ubi->free);
1453                ubi->free_count++;
1454                spin_unlock(&ubi->wl_lock);
1455
1456                /*
1457                 * One more erase operation has happened, take care about
1458                 * protected physical eraseblocks.
1459                 */
1460                serve_prot_queue(ubi);
1461
1462                /* And take care about wear-leveling */
1463                err = ensure_wear_leveling(ubi, 1);
1464                return err;
1465        }
1466
1467        ubi_err("failed to erase PEB %d, error %d", pnum, err);
1468        kfree(wl_wrk);
1469
1470        if (err == -EINTR || err == -ENOMEM || err == -EAGAIN ||
1471            err == -EBUSY) {
1472                int err1;
1473
1474                /* Re-schedule the LEB for erasure */
1475                err1 = schedule_erase(ubi, e, vol_id, lnum, 0);
1476                if (err1) {
1477                        err = err1;
1478                        goto out_ro;
1479                }
1480                return err;
1481        }
1482
1483        kmem_cache_free(ubi_wl_entry_slab, e);
1484        if (err != -EIO)
1485                /*
1486                 * If this is not %-EIO, we have no idea what to do. Scheduling
1487                 * this physical eraseblock for erasure again would cause
1488                 * errors again and again. Well, lets switch to R/O mode.
1489                 */
1490                goto out_ro;
1491
1492        /* It is %-EIO, the PEB went bad */
1493
1494        if (!ubi->bad_allowed) {
1495                ubi_err("bad physical eraseblock %d detected", pnum);
1496                goto out_ro;
1497        }
1498
1499        spin_lock(&ubi->volumes_lock);
1500        if (ubi->beb_rsvd_pebs == 0) {
1501                if (ubi->avail_pebs == 0) {
1502                        spin_unlock(&ubi->volumes_lock);
1503                        ubi_err("no reserved/available physical eraseblocks");
1504                        goto out_ro;
1505                }
1506                ubi->avail_pebs -= 1;
1507                available_consumed = 1;
1508        }
1509        spin_unlock(&ubi->volumes_lock);
1510
1511        ubi_msg("mark PEB %d as bad", pnum);
1512        err = ubi_io_mark_bad(ubi, pnum);
1513        if (err)
1514                goto out_ro;
1515
1516        spin_lock(&ubi->volumes_lock);
1517        if (ubi->beb_rsvd_pebs > 0) {
1518                if (available_consumed) {
1519                        /*
1520                         * The amount of reserved PEBs increased since we last
1521                         * checked.
1522                         */
1523                        ubi->avail_pebs += 1;
1524                        available_consumed = 0;
1525                }
1526                ubi->beb_rsvd_pebs -= 1;
1527        }
1528        ubi->bad_peb_count += 1;
1529        ubi->good_peb_count -= 1;
1530        ubi_calculate_reserved(ubi);
1531        if (available_consumed)
1532                ubi_warn("no PEBs in the reserved pool, used an available PEB");
1533        else if (ubi->beb_rsvd_pebs)
1534                ubi_msg("%d PEBs left in the reserve", ubi->beb_rsvd_pebs);
1535        else
1536                ubi_warn("last PEB from the reserve was used");
1537        spin_unlock(&ubi->volumes_lock);
1538
1539        return err;
1540
1541out_ro:
1542        if (available_consumed) {
1543                spin_lock(&ubi->volumes_lock);
1544                ubi->avail_pebs += 1;
1545                spin_unlock(&ubi->volumes_lock);
1546        }
1547        ubi_ro_mode(ubi);
1548        return err;
1549}
1550
1551/**
1552 * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
1553 * @ubi: UBI device description object
1554 * @vol_id: the volume ID that last used this PEB
1555 * @lnum: the last used logical eraseblock number for the PEB
1556 * @pnum: physical eraseblock to return
1557 * @torture: if this physical eraseblock has to be tortured
1558 *
1559 * This function is called to return physical eraseblock @pnum to the pool of
1560 * free physical eraseblocks. The @torture flag has to be set if an I/O error
1561 * occurred to this @pnum and it has to be tested. This function returns zero
1562 * in case of success, and a negative error code in case of failure.
1563 */
1564int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum,
1565                   int pnum, int torture)
1566{
1567        int err;
1568        struct ubi_wl_entry *e;
1569
1570        dbg_wl("PEB %d", pnum);
1571        ubi_assert(pnum >= 0);
1572        ubi_assert(pnum < ubi->peb_count);
1573
1574retry:
1575        spin_lock(&ubi->wl_lock);
1576        e = ubi->lookuptbl[pnum];
1577        if (e == ubi->move_from) {
1578                /*
1579                 * User is putting the physical eraseblock which was selected to
1580                 * be moved. It will be scheduled for erasure in the
1581                 * wear-leveling worker.
1582                 */
1583                dbg_wl("PEB %d is being moved, wait", pnum);
1584                spin_unlock(&ubi->wl_lock);
1585
1586                /* Wait for the WL worker by taking the @ubi->move_mutex */
1587                mutex_lock(&ubi->move_mutex);
1588                mutex_unlock(&ubi->move_mutex);
1589                goto retry;
1590        } else if (e == ubi->move_to) {
1591                /*
1592                 * User is putting the physical eraseblock which was selected
1593                 * as the target the data is moved to. It may happen if the EBA
1594                 * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()'
1595                 * but the WL sub-system has not put the PEB to the "used" tree
1596                 * yet, but it is about to do this. So we just set a flag which
1597                 * will tell the WL worker that the PEB is not needed anymore
1598                 * and should be scheduled for erasure.
1599                 */
1600                dbg_wl("PEB %d is the target of data moving", pnum);
1601                ubi_assert(!ubi->move_to_put);
1602                ubi->move_to_put = 1;
1603                spin_unlock(&ubi->wl_lock);
1604                return 0;
1605        } else {
1606                if (in_wl_tree(e, &ubi->used)) {
1607                        self_check_in_wl_tree(ubi, e, &ubi->used);
1608                        rb_erase(&e->u.rb, &ubi->used);
1609                } else if (in_wl_tree(e, &ubi->scrub)) {
1610                        self_check_in_wl_tree(ubi, e, &ubi->scrub);
1611                        rb_erase(&e->u.rb, &ubi->scrub);
1612                } else if (in_wl_tree(e, &ubi->erroneous)) {
1613                        self_check_in_wl_tree(ubi, e, &ubi->erroneous);
1614                        rb_erase(&e->u.rb, &ubi->erroneous);
1615                        ubi->erroneous_peb_count -= 1;
1616                        ubi_assert(ubi->erroneous_peb_count >= 0);
1617                        /* Erroneous PEBs should be tortured */
1618                        torture = 1;
1619                } else {
1620                        err = prot_queue_del(ubi, e->pnum);
1621                        if (err) {
1622                                ubi_err("PEB %d not found", pnum);
1623                                ubi_ro_mode(ubi);
1624                                spin_unlock(&ubi->wl_lock);
1625                                return err;
1626                        }
1627                }
1628        }
1629        spin_unlock(&ubi->wl_lock);
1630
1631        err = schedule_erase(ubi, e, vol_id, lnum, torture);
1632        if (err) {
1633                spin_lock(&ubi->wl_lock);
1634                wl_tree_add(e, &ubi->used);
1635                spin_unlock(&ubi->wl_lock);
1636        }
1637
1638        return err;
1639}
1640
1641/**
1642 * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing.
1643 * @ubi: UBI device description object
1644 * @pnum: the physical eraseblock to schedule
1645 *
1646 * If a bit-flip in a physical eraseblock is detected, this physical eraseblock
1647 * needs scrubbing. This function schedules a physical eraseblock for
1648 * scrubbing which is done in background. This function returns zero in case of
1649 * success and a negative error code in case of failure.
1650 */
1651int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
1652{
1653        struct ubi_wl_entry *e;
1654
1655        ubi_msg("schedule PEB %d for scrubbing", pnum);
1656
1657retry:
1658        spin_lock(&ubi->wl_lock);
1659        e = ubi->lookuptbl[pnum];
1660        if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub) ||
1661                                   in_wl_tree(e, &ubi->erroneous)) {
1662                spin_unlock(&ubi->wl_lock);
1663                return 0;
1664        }
1665
1666        if (e == ubi->move_to) {
1667                /*
1668                 * This physical eraseblock was used to move data to. The data
1669                 * was moved but the PEB was not yet inserted to the proper
1670                 * tree. We should just wait a little and let the WL worker
1671                 * proceed.
1672                 */
1673                spin_unlock(&ubi->wl_lock);
1674                dbg_wl("the PEB %d is not in proper tree, retry", pnum);
1675                yield();
1676                goto retry;
1677        }
1678
1679        if (in_wl_tree(e, &ubi->used)) {
1680                self_check_in_wl_tree(ubi, e, &ubi->used);
1681                rb_erase(&e->u.rb, &ubi->used);
1682        } else {
1683                int err;
1684
1685                err = prot_queue_del(ubi, e->pnum);
1686                if (err) {
1687                        ubi_err("PEB %d not found", pnum);
1688                        ubi_ro_mode(ubi);
1689                        spin_unlock(&ubi->wl_lock);
1690                        return err;
1691                }
1692        }
1693
1694        wl_tree_add(e, &ubi->scrub);
1695        spin_unlock(&ubi->wl_lock);
1696
1697        /*
1698         * Technically scrubbing is the same as wear-leveling, so it is done
1699         * by the WL worker.
1700         */
1701        return ensure_wear_leveling(ubi, 0);
1702}
1703
1704/**
1705 * ubi_wl_flush - flush all pending works.
1706 * @ubi: UBI device description object
1707 * @vol_id: the volume id to flush for
1708 * @lnum: the logical eraseblock number to flush for
1709 *
1710 * This function executes all pending works for a particular volume id /
1711 * logical eraseblock number pair. If either value is set to %UBI_ALL, then it
1712 * acts as a wildcard for all of the corresponding volume numbers or logical
1713 * eraseblock numbers. It returns zero in case of success and a negative error
1714 * code in case of failure.
1715 */
1716int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum)
1717{
1718        int err = 0;
1719        int found = 1;
1720
1721        /*
1722         * Erase while the pending works queue is not empty, but not more than
1723         * the number of currently pending works.
1724         */
1725        dbg_wl("flush pending work for LEB %d:%d (%d pending works)",
1726               vol_id, lnum, ubi->works_count);
1727
1728        while (found) {
1729                struct ubi_work *wrk;
1730                found = 0;
1731
1732                down_read(&ubi->work_sem);
1733                spin_lock(&ubi->wl_lock);
1734                list_for_each_entry(wrk, &ubi->works, list) {
1735                        if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) &&
1736                            (lnum == UBI_ALL || wrk->lnum == lnum)) {
1737                                list_del(&wrk->list);
1738                                ubi->works_count -= 1;
1739                                ubi_assert(ubi->works_count >= 0);
1740                                spin_unlock(&ubi->wl_lock);
1741
1742                                err = wrk->func(ubi, wrk, 0);
1743                                if (err) {
1744                                        up_read(&ubi->work_sem);
1745                                        return err;
1746                                }
1747
1748                                spin_lock(&ubi->wl_lock);
1749                                found = 1;
1750                                break;
1751                        }
1752                }
1753                spin_unlock(&ubi->wl_lock);
1754                up_read(&ubi->work_sem);
1755        }
1756
1757        /*
1758         * Make sure all the works which have been done in parallel are
1759         * finished.
1760         */
1761        down_write(&ubi->work_sem);
1762        up_write(&ubi->work_sem);
1763
1764        return err;
1765}
1766
1767/**
1768 * tree_destroy - destroy an RB-tree.
1769 * @root: the root of the tree to destroy
1770 */
1771static void tree_destroy(struct rb_root *root)
1772{
1773        struct rb_node *rb;
1774        struct ubi_wl_entry *e;
1775
1776        rb = root->rb_node;
1777        while (rb) {
1778                if (rb->rb_left)
1779                        rb = rb->rb_left;
1780                else if (rb->rb_right)
1781                        rb = rb->rb_right;
1782                else {
1783                        e = rb_entry(rb, struct ubi_wl_entry, u.rb);
1784
1785                        rb = rb_parent(rb);
1786                        if (rb) {
1787                                if (rb->rb_left == &e->u.rb)
1788                                        rb->rb_left = NULL;
1789                                else
1790                                        rb->rb_right = NULL;
1791                        }
1792
1793                        kmem_cache_free(ubi_wl_entry_slab, e);
1794                }
1795        }
1796}
1797
1798/**
1799 * ubi_thread - UBI background thread.
1800 * @u: the UBI device description object pointer
1801 */
1802int ubi_thread(void *u)
1803{
1804        int failures = 0;
1805        struct ubi_device *ubi = u;
1806
1807        ubi_msg("background thread \"%s\" started, PID %d",
1808                ubi->bgt_name, task_pid_nr(current));
1809
1810        set_freezable();
1811        for (;;) {
1812                int err;
1813
1814                if (kthread_should_stop())
1815                        break;
1816
1817                if (try_to_freeze())
1818                        continue;
1819
1820                spin_lock(&ubi->wl_lock);
1821                if (list_empty(&ubi->works) || ubi->ro_mode ||
1822                    !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) {
1823                        set_current_state(TASK_INTERRUPTIBLE);
1824                        spin_unlock(&ubi->wl_lock);
1825                        schedule();
1826                        continue;
1827                }
1828                spin_unlock(&ubi->wl_lock);
1829
1830                err = do_work(ubi);
1831                if (err) {
1832                        ubi_err("%s: work failed with error code %d",
1833                                ubi->bgt_name, err);
1834                        if (failures++ > WL_MAX_FAILURES) {
1835                                /*
1836                                 * Too many failures, disable the thread and
1837                                 * switch to read-only mode.
1838                                 */
1839                                ubi_msg("%s: %d consecutive failures",
1840                                        ubi->bgt_name, WL_MAX_FAILURES);
1841                                ubi_ro_mode(ubi);
1842                                ubi->thread_enabled = 0;
1843                                continue;
1844                        }
1845                } else
1846                        failures = 0;
1847
1848                cond_resched();
1849        }
1850
1851        dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
1852        return 0;
1853}
1854
1855/**
1856 * cancel_pending - cancel all pending works.
1857 * @ubi: UBI device description object
1858 */
1859static void cancel_pending(struct ubi_device *ubi)
1860{
1861        while (!list_empty(&ubi->works)) {
1862                struct ubi_work *wrk;
1863
1864                wrk = list_entry(ubi->works.next, struct ubi_work, list);
1865                list_del(&wrk->list);
1866                wrk->func(ubi, wrk, 1);
1867                ubi->works_count -= 1;
1868                ubi_assert(ubi->works_count >= 0);
1869        }
1870}
1871
1872/**
1873 * ubi_wl_init - initialize the WL sub-system using attaching information.
1874 * @ubi: UBI device description object
1875 * @ai: attaching information
1876 *
1877 * This function returns zero in case of success, and a negative error code in
1878 * case of failure.
1879 */
1880int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
1881{
1882        int err, i, reserved_pebs, found_pebs = 0;
1883        struct rb_node *rb1, *rb2;
1884        struct ubi_ainf_volume *av;
1885        struct ubi_ainf_peb *aeb, *tmp;
1886        struct ubi_wl_entry *e;
1887
1888        ubi->used = ubi->erroneous = ubi->free = ubi->scrub = RB_ROOT;
1889        spin_lock_init(&ubi->wl_lock);
1890        mutex_init(&ubi->move_mutex);
1891        init_rwsem(&ubi->work_sem);
1892        ubi->max_ec = ai->max_ec;
1893        INIT_LIST_HEAD(&ubi->works);
1894#ifndef __UBOOT__
1895#ifdef CONFIG_MTD_UBI_FASTMAP
1896        INIT_WORK(&ubi->fm_work, update_fastmap_work_fn);
1897#endif
1898#endif
1899
1900        sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num);
1901
1902        err = -ENOMEM;
1903        ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL);
1904        if (!ubi->lookuptbl)
1905                return err;
1906
1907        for (i = 0; i < UBI_PROT_QUEUE_LEN; i++)
1908                INIT_LIST_HEAD(&ubi->pq[i]);
1909        ubi->pq_head = 0;
1910
1911        list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) {
1912                cond_resched();
1913
1914                e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
1915                if (!e)
1916                        goto out_free;
1917
1918                e->pnum = aeb->pnum;
1919                e->ec = aeb->ec;
1920                ubi_assert(!ubi_is_fm_block(ubi, e->pnum));
1921                ubi->lookuptbl[e->pnum] = e;
1922                if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) {
1923                        kmem_cache_free(ubi_wl_entry_slab, e);
1924                        goto out_free;
1925                }
1926
1927                found_pebs++;
1928        }
1929
1930        ubi->free_count = 0;
1931        list_for_each_entry(aeb, &ai->free, u.list) {
1932                cond_resched();
1933
1934                e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
1935                if (!e)
1936                        goto out_free;
1937
1938                e->pnum = aeb->pnum;
1939                e->ec = aeb->ec;
1940                ubi_assert(e->ec >= 0);
1941                ubi_assert(!ubi_is_fm_block(ubi, e->pnum));
1942
1943                wl_tree_add(e, &ubi->free);
1944                ubi->free_count++;
1945
1946                ubi->lookuptbl[e->pnum] = e;
1947
1948                found_pebs++;
1949        }
1950
1951        ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) {
1952                ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) {
1953                        cond_resched();
1954
1955                        e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
1956                        if (!e)
1957                                goto out_free;
1958
1959                        e->pnum = aeb->pnum;
1960                        e->ec = aeb->ec;
1961                        ubi->lookuptbl[e->pnum] = e;
1962
1963                        if (!aeb->scrub) {
1964                                dbg_wl("add PEB %d EC %d to the used tree",
1965                                       e->pnum, e->ec);
1966                                wl_tree_add(e, &ubi->used);
1967                        } else {
1968                                dbg_wl("add PEB %d EC %d to the scrub tree",
1969                                       e->pnum, e->ec);
1970                                wl_tree_add(e, &ubi->scrub);
1971                        }
1972
1973                        found_pebs++;
1974                }
1975        }
1976
1977        dbg_wl("found %i PEBs", found_pebs);
1978
1979        if (ubi->fm)
1980                ubi_assert(ubi->good_peb_count == \
1981                           found_pebs + ubi->fm->used_blocks);
1982        else
1983                ubi_assert(ubi->good_peb_count == found_pebs);
1984
1985        reserved_pebs = WL_RESERVED_PEBS;
1986#ifdef CONFIG_MTD_UBI_FASTMAP
1987        /* Reserve enough LEBs to store two fastmaps. */
1988        reserved_pebs += (ubi->fm_size / ubi->leb_size) * 2;
1989#endif
1990
1991        if (ubi->avail_pebs < reserved_pebs) {
1992                ubi_err("no enough physical eraseblocks (%d, need %d)",
1993                        ubi->avail_pebs, reserved_pebs);
1994                if (ubi->corr_peb_count)
1995                        ubi_err("%d PEBs are corrupted and not used",
1996                                ubi->corr_peb_count);
1997                goto out_free;
1998        }
1999        ubi->avail_pebs -= reserved_pebs;
2000        ubi->rsvd_pebs += reserved_pebs;
2001
2002        /* Schedule wear-leveling if needed */
2003        err = ensure_wear_leveling(ubi, 0);
2004        if (err)
2005                goto out_free;
2006
2007        return 0;
2008
2009out_free:
2010        cancel_pending(ubi);
2011        tree_destroy(&ubi->used);
2012        tree_destroy(&ubi->free);
2013        tree_destroy(&ubi->scrub);
2014        kfree(ubi->lookuptbl);
2015        return err;
2016}
2017
2018/**
2019 * protection_queue_destroy - destroy the protection queue.
2020 * @ubi: UBI device description object
2021 */
2022static void protection_queue_destroy(struct ubi_device *ubi)
2023{
2024        int i;
2025        struct ubi_wl_entry *e, *tmp;
2026
2027        for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) {
2028                list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) {
2029                        list_del(&e->u.list);
2030                        kmem_cache_free(ubi_wl_entry_slab, e);
2031                }
2032        }
2033}
2034
2035/**
2036 * ubi_wl_close - close the wear-leveling sub-system.
2037 * @ubi: UBI device description object
2038 */
2039void ubi_wl_close(struct ubi_device *ubi)
2040{
2041        dbg_wl("close the WL sub-system");
2042        cancel_pending(ubi);
2043        protection_queue_destroy(ubi);
2044        tree_destroy(&ubi->used);
2045        tree_destroy(&ubi->erroneous);
2046        tree_destroy(&ubi->free);
2047        tree_destroy(&ubi->scrub);
2048        kfree(ubi->lookuptbl);
2049}
2050
2051/**
2052 * self_check_ec - make sure that the erase counter of a PEB is correct.
2053 * @ubi: UBI device description object
2054 * @pnum: the physical eraseblock number to check
2055 * @ec: the erase counter to check
2056 *
2057 * This function returns zero if the erase counter of physical eraseblock @pnum
2058 * is equivalent to @ec, and a negative error code if not or if an error
2059 * occurred.
2060 */
2061static int self_check_ec(struct ubi_device *ubi, int pnum, int ec)
2062{
2063        int err;
2064        long long read_ec;
2065        struct ubi_ec_hdr *ec_hdr;
2066
2067        if (!ubi_dbg_chk_gen(ubi))
2068                return 0;
2069
2070        ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
2071        if (!ec_hdr)
2072                return -ENOMEM;
2073
2074        err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0);
2075        if (err && err != UBI_IO_BITFLIPS) {
2076                /* The header does not have to exist */
2077                err = 0;
2078                goto out_free;
2079        }
2080
2081        read_ec = be64_to_cpu(ec_hdr->ec);
2082        if (ec != read_ec && read_ec - ec > 1) {
2083                ubi_err("self-check failed for PEB %d", pnum);
2084                ubi_err("read EC is %lld, should be %d", read_ec, ec);
2085                dump_stack();
2086                err = 1;
2087        } else
2088                err = 0;
2089
2090out_free:
2091        kfree(ec_hdr);
2092        return err;
2093}
2094
2095/**
2096 * self_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree.
2097 * @ubi: UBI device description object
2098 * @e: the wear-leveling entry to check
2099 * @root: the root of the tree
2100 *
2101 * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it
2102 * is not.
2103 */
2104static int self_check_in_wl_tree(const struct ubi_device *ubi,
2105                                 struct ubi_wl_entry *e, struct rb_root *root)
2106{
2107        if (!ubi_dbg_chk_gen(ubi))
2108                return 0;
2109
2110        if (in_wl_tree(e, root))
2111                return 0;
2112
2113        ubi_err("self-check failed for PEB %d, EC %d, RB-tree %p ",
2114                e->pnum, e->ec, root);
2115        dump_stack();
2116        return -EINVAL;
2117}
2118
2119/**
2120 * self_check_in_pq - check if wear-leveling entry is in the protection
2121 *                        queue.
2122 * @ubi: UBI device description object
2123 * @e: the wear-leveling entry to check
2124 *
2125 * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not.
2126 */
2127static int self_check_in_pq(const struct ubi_device *ubi,
2128                            struct ubi_wl_entry *e)
2129{
2130        struct ubi_wl_entry *p;
2131        int i;
2132
2133        if (!ubi_dbg_chk_gen(ubi))
2134                return 0;
2135
2136        for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i)
2137                list_for_each_entry(p, &ubi->pq[i], u.list)
2138                        if (p == e)
2139                                return 0;
2140
2141        ubi_err("self-check failed for PEB %d, EC %d, Protect queue",
2142                e->pnum, e->ec);
2143        dump_stack();
2144        return -EINVAL;
2145}
2146