linux/drivers/mtd/ubi/eba.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) International Business Machines Corp., 2006
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
  12 * the GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17 *
  18 * Author: Artem Bityutskiy (Битюцкий Артём)
  19 */
  20
  21/*
  22 * The UBI Eraseblock Association (EBA) unit.
  23 *
  24 * This unit is responsible for I/O to/from logical eraseblock.
  25 *
  26 * Although in this implementation the EBA table is fully kept and managed in
  27 * RAM, which assumes poor scalability, it might be (partially) maintained on
  28 * flash in future implementations.
  29 *
  30 * The EBA unit implements per-logical eraseblock locking. Before accessing a
  31 * logical eraseblock it is locked for reading or writing. The per-logical
  32 * eraseblock locking is implemented by means of the lock tree. The lock tree
  33 * is an RB-tree which refers all the currently locked logical eraseblocks. The
  34 * lock tree elements are &struct ltree_entry objects. They are indexed by
  35 * (@vol_id, @lnum) pairs.
  36 *
  37 * EBA also maintains the global sequence counter which is incremented each
  38 * time a logical eraseblock is mapped to a physical eraseblock and it is
  39 * stored in the volume identifier header. This means that each VID header has
  40 * a unique sequence number. The sequence number is only increased an we assume
  41 * 64 bits is enough to never overflow.
  42 */
  43
  44#include <linux/slab.h>
  45#include <linux/crc32.h>
  46#include <linux/err.h>
  47#include "ubi.h"
  48
  49/* Number of physical eraseblocks reserved for atomic LEB change operation */
  50#define EBA_RESERVED_PEBS 1
  51
  52/**
  53 * struct ltree_entry - an entry in the lock tree.
  54 * @rb: links RB-tree nodes
  55 * @vol_id: volume ID of the locked logical eraseblock
  56 * @lnum: locked logical eraseblock number
  57 * @users: how many tasks are using this logical eraseblock or wait for it
  58 * @mutex: read/write mutex to implement read/write access serialization to
  59 * the (@vol_id, @lnum) logical eraseblock
  60 *
  61 * When a logical eraseblock is being locked - corresponding &struct ltree_entry
  62 * object is inserted to the lock tree (@ubi->ltree).
  63 */
  64struct ltree_entry {
  65        struct rb_node rb;
  66        int vol_id;
  67        int lnum;
  68        int users;
  69        struct rw_semaphore mutex;
  70};
  71
  72/* Slab cache for lock-tree entries */
  73static struct kmem_cache *ltree_slab;
  74
  75/**
  76 * next_sqnum - get next sequence number.
  77 * @ubi: UBI device description object
  78 *
  79 * This function returns next sequence number to use, which is just the current
  80 * global sequence counter value. It also increases the global sequence
  81 * counter.
  82 */
  83static unsigned long long next_sqnum(struct ubi_device *ubi)
  84{
  85        unsigned long long sqnum;
  86
  87        spin_lock(&ubi->ltree_lock);
  88        sqnum = ubi->global_sqnum++;
  89        spin_unlock(&ubi->ltree_lock);
  90
  91        return sqnum;
  92}
  93
  94/**
  95 * ubi_get_compat - get compatibility flags of a volume.
  96 * @ubi: UBI device description object
  97 * @vol_id: volume ID
  98 *
  99 * This function returns compatibility flags for an internal volume. User
 100 * volumes have no compatibility flags, so %0 is returned.
 101 */
 102static int ubi_get_compat(const struct ubi_device *ubi, int vol_id)
 103{
 104        if (vol_id == UBI_LAYOUT_VOL_ID)
 105                return UBI_LAYOUT_VOLUME_COMPAT;
 106        return 0;
 107}
 108
 109/**
 110 * ltree_lookup - look up the lock tree.
 111 * @ubi: UBI device description object
 112 * @vol_id: volume ID
 113 * @lnum: logical eraseblock number
 114 *
 115 * This function returns a pointer to the corresponding &struct ltree_entry
 116 * object if the logical eraseblock is locked and %NULL if it is not.
 117 * @ubi->ltree_lock has to be locked.
 118 */
 119static struct ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id,
 120                                        int lnum)
 121{
 122        struct rb_node *p;
 123
 124        p = ubi->ltree.rb_node;
 125        while (p) {
 126                struct ltree_entry *le;
 127
 128                le = rb_entry(p, struct ltree_entry, rb);
 129
 130                if (vol_id < le->vol_id)
 131                        p = p->rb_left;
 132                else if (vol_id > le->vol_id)
 133                        p = p->rb_right;
 134                else {
 135                        if (lnum < le->lnum)
 136                                p = p->rb_left;
 137                        else if (lnum > le->lnum)
 138                                p = p->rb_right;
 139                        else
 140                                return le;
 141                }
 142        }
 143
 144        return NULL;
 145}
 146
 147/**
 148 * ltree_add_entry - add new entry to the lock tree.
 149 * @ubi: UBI device description object
 150 * @vol_id: volume ID
 151 * @lnum: logical eraseblock number
 152 *
 153 * This function adds new entry for logical eraseblock (@vol_id, @lnum) to the
 154 * lock tree. If such entry is already there, its usage counter is increased.
 155 * Returns pointer to the lock tree entry or %-ENOMEM if memory allocation
 156 * failed.
 157 */
 158static struct ltree_entry *ltree_add_entry(struct ubi_device *ubi, int vol_id,
 159                                           int lnum)
 160{
 161        struct ltree_entry *le, *le1, *le_free;
 162
 163        le = kmem_cache_alloc(ltree_slab, GFP_NOFS);
 164        if (!le)
 165                return ERR_PTR(-ENOMEM);
 166
 167        le->vol_id = vol_id;
 168        le->lnum = lnum;
 169
 170        spin_lock(&ubi->ltree_lock);
 171        le1 = ltree_lookup(ubi, vol_id, lnum);
 172
 173        if (le1) {
 174                /*
 175                 * This logical eraseblock is already locked. The newly
 176                 * allocated lock entry is not needed.
 177                 */
 178                le_free = le;
 179                le = le1;
 180        } else {
 181                struct rb_node **p, *parent = NULL;
 182
 183                /*
 184                 * No lock entry, add the newly allocated one to the
 185                 * @ubi->ltree RB-tree.
 186                 */
 187                le_free = NULL;
 188
 189                p = &ubi->ltree.rb_node;
 190                while (*p) {
 191                        parent = *p;
 192                        le1 = rb_entry(parent, struct ltree_entry, rb);
 193
 194                        if (vol_id < le1->vol_id)
 195                                p = &(*p)->rb_left;
 196                        else if (vol_id > le1->vol_id)
 197                                p = &(*p)->rb_right;
 198                        else {
 199                                ubi_assert(lnum != le1->lnum);
 200                                if (lnum < le1->lnum)
 201                                        p = &(*p)->rb_left;
 202                                else
 203                                        p = &(*p)->rb_right;
 204                        }
 205                }
 206
 207                rb_link_node(&le->rb, parent, p);
 208                rb_insert_color(&le->rb, &ubi->ltree);
 209        }
 210        le->users += 1;
 211        spin_unlock(&ubi->ltree_lock);
 212
 213        if (le_free)
 214                kmem_cache_free(ltree_slab, le_free);
 215
 216        return le;
 217}
 218
 219/**
 220 * leb_read_lock - lock logical eraseblock for reading.
 221 * @ubi: UBI device description object
 222 * @vol_id: volume ID
 223 * @lnum: logical eraseblock number
 224 *
 225 * This function locks a logical eraseblock for reading. Returns zero in case
 226 * of success and a negative error code in case of failure.
 227 */
 228static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum)
 229{
 230        struct ltree_entry *le;
 231
 232        le = ltree_add_entry(ubi, vol_id, lnum);
 233        if (IS_ERR(le))
 234                return PTR_ERR(le);
 235        down_read(&le->mutex);
 236        return 0;
 237}
 238
 239/**
 240 * leb_read_unlock - unlock logical eraseblock.
 241 * @ubi: UBI device description object
 242 * @vol_id: volume ID
 243 * @lnum: logical eraseblock number
 244 */
 245static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum)
 246{
 247        int free = 0;
 248        struct ltree_entry *le;
 249
 250        spin_lock(&ubi->ltree_lock);
 251        le = ltree_lookup(ubi, vol_id, lnum);
 252        le->users -= 1;
 253        ubi_assert(le->users >= 0);
 254        if (le->users == 0) {
 255                rb_erase(&le->rb, &ubi->ltree);
 256                free = 1;
 257        }
 258        spin_unlock(&ubi->ltree_lock);
 259
 260        up_read(&le->mutex);
 261        if (free)
 262                kmem_cache_free(ltree_slab, le);
 263}
 264
 265/**
 266 * leb_write_lock - lock logical eraseblock for writing.
 267 * @ubi: UBI device description object
 268 * @vol_id: volume ID
 269 * @lnum: logical eraseblock number
 270 *
 271 * This function locks a logical eraseblock for writing. Returns zero in case
 272 * of success and a negative error code in case of failure.
 273 */
 274static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum)
 275{
 276        struct ltree_entry *le;
 277
 278        le = ltree_add_entry(ubi, vol_id, lnum);
 279        if (IS_ERR(le))
 280                return PTR_ERR(le);
 281        down_write(&le->mutex);
 282        return 0;
 283}
 284
 285/**
 286 * leb_write_unlock - unlock logical eraseblock.
 287 * @ubi: UBI device description object
 288 * @vol_id: volume ID
 289 * @lnum: logical eraseblock number
 290 */
 291static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum)
 292{
 293        int free;
 294        struct ltree_entry *le;
 295
 296        spin_lock(&ubi->ltree_lock);
 297        le = ltree_lookup(ubi, vol_id, lnum);
 298        le->users -= 1;
 299        ubi_assert(le->users >= 0);
 300        if (le->users == 0) {
 301                rb_erase(&le->rb, &ubi->ltree);
 302                free = 1;
 303        } else
 304                free = 0;
 305        spin_unlock(&ubi->ltree_lock);
 306
 307        up_write(&le->mutex);
 308        if (free)
 309                kmem_cache_free(ltree_slab, le);
 310}
 311
 312/**
 313 * ubi_eba_unmap_leb - un-map logical eraseblock.
 314 * @ubi: UBI device description object
 315 * @vol_id: volume ID
 316 * @lnum: logical eraseblock number
 317 *
 318 * This function un-maps logical eraseblock @lnum and schedules corresponding
 319 * physical eraseblock for erasure. Returns zero in case of success and a
 320 * negative error code in case of failure.
 321 */
 322int ubi_eba_unmap_leb(struct ubi_device *ubi, int vol_id, int lnum)
 323{
 324        int idx = vol_id2idx(ubi, vol_id), err, pnum;
 325        struct ubi_volume *vol = ubi->volumes[idx];
 326
 327        if (ubi->ro_mode)
 328                return -EROFS;
 329
 330        err = leb_write_lock(ubi, vol_id, lnum);
 331        if (err)
 332                return err;
 333
 334        pnum = vol->eba_tbl[lnum];
 335        if (pnum < 0)
 336                /* This logical eraseblock is already unmapped */
 337                goto out_unlock;
 338
 339        dbg_eba("erase LEB %d:%d, PEB %d", vol_id, lnum, pnum);
 340
 341        vol->eba_tbl[lnum] = UBI_LEB_UNMAPPED;
 342        err = ubi_wl_put_peb(ubi, pnum, 0);
 343
 344out_unlock:
 345        leb_write_unlock(ubi, vol_id, lnum);
 346        return err;
 347}
 348
 349/**
 350 * ubi_eba_read_leb - read data.
 351 * @ubi: UBI device description object
 352 * @vol_id: volume ID
 353 * @lnum: logical eraseblock number
 354 * @buf: buffer to store the read data
 355 * @offset: offset from where to read
 356 * @len: how many bytes to read
 357 * @check: data CRC check flag
 358 *
 359 * If the logical eraseblock @lnum is unmapped, @buf is filled with 0xFF
 360 * bytes. The @check flag only makes sense for static volumes and forces
 361 * eraseblock data CRC checking.
 362 *
 363 * In case of success this function returns zero. In case of a static volume,
 364 * if data CRC mismatches - %-EBADMSG is returned. %-EBADMSG may also be
 365 * returned for any volume type if an ECC error was detected by the MTD device
 366 * driver. Other negative error cored may be returned in case of other errors.
 367 */
 368int ubi_eba_read_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf,
 369                     int offset, int len, int check)
 370{
 371        int err, pnum, scrub = 0, idx = vol_id2idx(ubi, vol_id);
 372        struct ubi_vid_hdr *vid_hdr;
 373        struct ubi_volume *vol = ubi->volumes[idx];
 374        uint32_t uninitialized_var(crc);
 375
 376        err = leb_read_lock(ubi, vol_id, lnum);
 377        if (err)
 378                return err;
 379
 380        pnum = vol->eba_tbl[lnum];
 381        if (pnum < 0) {
 382                /*
 383                 * The logical eraseblock is not mapped, fill the whole buffer
 384                 * with 0xFF bytes. The exception is static volumes for which
 385                 * it is an error to read unmapped logical eraseblocks.
 386                 */
 387                dbg_eba("read %d bytes from offset %d of LEB %d:%d (unmapped)",
 388                        len, offset, vol_id, lnum);
 389                leb_read_unlock(ubi, vol_id, lnum);
 390                ubi_assert(vol->vol_type != UBI_STATIC_VOLUME);
 391                memset(buf, 0xFF, len);
 392                return 0;
 393        }
 394
 395        dbg_eba("read %d bytes from offset %d of LEB %d:%d, PEB %d",
 396                len, offset, vol_id, lnum, pnum);
 397
 398        if (vol->vol_type == UBI_DYNAMIC_VOLUME)
 399                check = 0;
 400
 401retry:
 402        if (check) {
 403                vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
 404                if (!vid_hdr) {
 405                        err = -ENOMEM;
 406                        goto out_unlock;
 407                }
 408
 409                err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1);
 410                if (err && err != UBI_IO_BITFLIPS) {
 411                        if (err > 0) {
 412                                /*
 413                                 * The header is either absent or corrupted.
 414                                 * The former case means there is a bug -
 415                                 * switch to read-only mode just in case.
 416                                 * The latter case means a real corruption - we
 417                                 * may try to recover data. FIXME: but this is
 418                                 * not implemented.
 419                                 */
 420                                if (err == UBI_IO_BAD_VID_HDR) {
 421                                        ubi_warn("bad VID header at PEB %d, LEB"
 422                                                 "%d:%d", pnum, vol_id, lnum);
 423                                        err = -EBADMSG;
 424                                } else
 425                                        ubi_ro_mode(ubi);
 426                        }
 427                        goto out_free;
 428                } else if (err == UBI_IO_BITFLIPS)
 429                        scrub = 1;
 430
 431                ubi_assert(lnum < be32_to_cpu(vid_hdr->used_ebs));
 432                ubi_assert(len == be32_to_cpu(vid_hdr->data_size));
 433
 434                crc = be32_to_cpu(vid_hdr->data_crc);
 435                ubi_free_vid_hdr(ubi, vid_hdr);
 436        }
 437
 438        err = ubi_io_read_data(ubi, buf, pnum, offset, len);
 439        if (err) {
 440                if (err == UBI_IO_BITFLIPS) {
 441                        scrub = 1;
 442                        err = 0;
 443                } else if (err == -EBADMSG) {
 444                        if (vol->vol_type == UBI_DYNAMIC_VOLUME)
 445                                goto out_unlock;
 446                        scrub = 1;
 447                        if (!check) {
 448                                ubi_msg("force data checking");
 449                                check = 1;
 450                                goto retry;
 451                        }
 452                } else
 453                        goto out_unlock;
 454        }
 455
 456        if (check) {
 457                uint32_t crc1 = crc32(UBI_CRC32_INIT, buf, len);
 458                if (crc1 != crc) {
 459                        ubi_warn("CRC error: calculated %#08x, must be %#08x",
 460                                 crc1, crc);
 461                        err = -EBADMSG;
 462                        goto out_unlock;
 463                }
 464        }
 465
 466        if (scrub)
 467                err = ubi_wl_scrub_peb(ubi, pnum);
 468
 469        leb_read_unlock(ubi, vol_id, lnum);
 470        return err;
 471
 472out_free:
 473        ubi_free_vid_hdr(ubi, vid_hdr);
 474out_unlock:
 475        leb_read_unlock(ubi, vol_id, lnum);
 476        return err;
 477}
 478
 479/**
 480 * recover_peb - recover from write failure.
 481 * @ubi: UBI device description object
 482 * @pnum: the physical eraseblock to recover
 483 * @vol_id: volume ID
 484 * @lnum: logical eraseblock number
 485 * @buf: data which was not written because of the write failure
 486 * @offset: offset of the failed write
 487 * @len: how many bytes should have been written
 488 *
 489 * This function is called in case of a write failure and moves all good data
 490 * from the potentially bad physical eraseblock to a good physical eraseblock.
 491 * This function also writes the data which was not written due to the failure.
 492 * Returns new physical eraseblock number in case of success, and a negative
 493 * error code in case of failure.
 494 */
 495static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
 496                       const void *buf, int offset, int len)
 497{
 498        int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0;
 499        struct ubi_volume *vol = ubi->volumes[idx];
 500        struct ubi_vid_hdr *vid_hdr;
 501
 502        vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
 503        if (!vid_hdr) {
 504                return -ENOMEM;
 505        }
 506
 507        mutex_lock(&ubi->buf_mutex);
 508
 509retry:
 510        new_pnum = ubi_wl_get_peb(ubi, UBI_UNKNOWN);
 511        if (new_pnum < 0) {
 512                mutex_unlock(&ubi->buf_mutex);
 513                ubi_free_vid_hdr(ubi, vid_hdr);
 514                return new_pnum;
 515        }
 516
 517        ubi_msg("recover PEB %d, move data to PEB %d", pnum, new_pnum);
 518
 519        err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1);
 520        if (err && err != UBI_IO_BITFLIPS) {
 521                if (err > 0)
 522                        err = -EIO;
 523                goto out_put;
 524        }
 525
 526        vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
 527        err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
 528        if (err)
 529                goto write_error;
 530
 531        data_size = offset + len;
 532        memset(ubi->peb_buf1 + offset, 0xFF, len);
 533
 534        /* Read everything before the area where the write failure happened */
 535        if (offset > 0) {
 536                err = ubi_io_read_data(ubi, ubi->peb_buf1, pnum, 0, offset);
 537                if (err && err != UBI_IO_BITFLIPS)
 538                        goto out_put;
 539        }
 540
 541        memcpy(ubi->peb_buf1 + offset, buf, len);
 542
 543        err = ubi_io_write_data(ubi, ubi->peb_buf1, new_pnum, 0, data_size);
 544        if (err)
 545                goto write_error;
 546
 547        mutex_unlock(&ubi->buf_mutex);
 548        ubi_free_vid_hdr(ubi, vid_hdr);
 549
 550        vol->eba_tbl[lnum] = new_pnum;
 551        ubi_wl_put_peb(ubi, pnum, 1);
 552
 553        ubi_msg("data was successfully recovered");
 554        return 0;
 555
 556out_put:
 557        mutex_unlock(&ubi->buf_mutex);
 558        ubi_wl_put_peb(ubi, new_pnum, 1);
 559        ubi_free_vid_hdr(ubi, vid_hdr);
 560        return err;
 561
 562write_error:
 563        /*
 564         * Bad luck? This physical eraseblock is bad too? Crud. Let's try to
 565         * get another one.
 566         */
 567        ubi_warn("failed to write to PEB %d", new_pnum);
 568        ubi_wl_put_peb(ubi, new_pnum, 1);
 569        if (++tries > UBI_IO_RETRIES) {
 570                mutex_unlock(&ubi->buf_mutex);
 571                ubi_free_vid_hdr(ubi, vid_hdr);
 572                return err;
 573        }
 574        ubi_msg("try again");
 575        goto retry;
 576}
 577
 578/**
 579 * ubi_eba_write_leb - write data to dynamic volume.
 580 * @ubi: UBI device description object
 581 * @vol_id: volume ID
 582 * @lnum: logical eraseblock number
 583 * @buf: the data to write
 584 * @offset: offset within the logical eraseblock where to write
 585 * @len: how many bytes to write
 586 * @dtype: data type
 587 *
 588 * This function writes data to logical eraseblock @lnum of a dynamic volume
 589 * @vol_id. Returns zero in case of success and a negative error code in case
 590 * of failure. In case of error, it is possible that something was still
 591 * written to the flash media, but may be some garbage.
 592 */
 593int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum,
 594                      const void *buf, int offset, int len, int dtype)
 595{
 596        int idx = vol_id2idx(ubi, vol_id), err, pnum, tries = 0;
 597        struct ubi_volume *vol = ubi->volumes[idx];
 598        struct ubi_vid_hdr *vid_hdr;
 599
 600        if (ubi->ro_mode)
 601                return -EROFS;
 602
 603        err = leb_write_lock(ubi, vol_id, lnum);
 604        if (err)
 605                return err;
 606
 607        pnum = vol->eba_tbl[lnum];
 608        if (pnum >= 0) {
 609                dbg_eba("write %d bytes at offset %d of LEB %d:%d, PEB %d",
 610                        len, offset, vol_id, lnum, pnum);
 611
 612                err = ubi_io_write_data(ubi, buf, pnum, offset, len);
 613                if (err) {
 614                        ubi_warn("failed to write data to PEB %d", pnum);
 615                        if (err == -EIO && ubi->bad_allowed)
 616                                err = recover_peb(ubi, pnum, vol_id, lnum, buf, offset, len);
 617                        if (err)
 618                                ubi_ro_mode(ubi);
 619                }
 620                leb_write_unlock(ubi, vol_id, lnum);
 621                return err;
 622        }
 623
 624        /*
 625         * The logical eraseblock is not mapped. We have to get a free physical
 626         * eraseblock and write the volume identifier header there first.
 627         */
 628        vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
 629        if (!vid_hdr) {
 630                leb_write_unlock(ubi, vol_id, lnum);
 631                return -ENOMEM;
 632        }
 633
 634        vid_hdr->vol_type = UBI_VID_DYNAMIC;
 635        vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
 636        vid_hdr->vol_id = cpu_to_be32(vol_id);
 637        vid_hdr->lnum = cpu_to_be32(lnum);
 638        vid_hdr->compat = ubi_get_compat(ubi, vol_id);
 639        vid_hdr->data_pad = cpu_to_be32(vol->data_pad);
 640
 641retry:
 642        pnum = ubi_wl_get_peb(ubi, dtype);
 643        if (pnum < 0) {
 644                ubi_free_vid_hdr(ubi, vid_hdr);
 645                leb_write_unlock(ubi, vol_id, lnum);
 646                return pnum;
 647        }
 648
 649        dbg_eba("write VID hdr and %d bytes at offset %d of LEB %d:%d, PEB %d",
 650                len, offset, vol_id, lnum, pnum);
 651
 652        err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr);
 653        if (err) {
 654                ubi_warn("failed to write VID header to LEB %d:%d, PEB %d",
 655                         vol_id, lnum, pnum);
 656                goto write_error;
 657        }
 658
 659        err = ubi_io_write_data(ubi, buf, pnum, offset, len);
 660        if (err) {
 661                ubi_warn("failed to write %d bytes at offset %d of LEB %d:%d, "
 662                         "PEB %d", len, offset, vol_id, lnum, pnum);
 663                goto write_error;
 664        }
 665
 666        vol->eba_tbl[lnum] = pnum;
 667
 668        leb_write_unlock(ubi, vol_id, lnum);
 669        ubi_free_vid_hdr(ubi, vid_hdr);
 670        return 0;
 671
 672write_error:
 673        if (err != -EIO || !ubi->bad_allowed) {
 674                ubi_ro_mode(ubi);
 675                leb_write_unlock(ubi, vol_id, lnum);
 676                ubi_free_vid_hdr(ubi, vid_hdr);
 677                return err;
 678        }
 679
 680        /*
 681         * Fortunately, this is the first write operation to this physical
 682         * eraseblock, so just put it and request a new one. We assume that if
 683         * this physical eraseblock went bad, the erase code will handle that.
 684         */
 685        err = ubi_wl_put_peb(ubi, pnum, 1);
 686        if (err || ++tries > UBI_IO_RETRIES) {
 687                ubi_ro_mode(ubi);
 688                leb_write_unlock(ubi, vol_id, lnum);
 689                ubi_free_vid_hdr(ubi, vid_hdr);
 690                return err;
 691        }
 692
 693        vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
 694        ubi_msg("try another PEB");
 695        goto retry;
 696}
 697
 698/**
 699 * ubi_eba_write_leb_st - write data to static volume.
 700 * @ubi: UBI device description object
 701 * @vol_id: volume ID
 702 * @lnum: logical eraseblock number
 703 * @buf: data to write
 704 * @len: how many bytes to write
 705 * @dtype: data type
 706 * @used_ebs: how many logical eraseblocks will this volume contain
 707 *
 708 * This function writes data to logical eraseblock @lnum of static volume
 709 * @vol_id. The @used_ebs argument should contain total number of logical
 710 * eraseblock in this static volume.
 711 *
 712 * When writing to the last logical eraseblock, the @len argument doesn't have
 713 * to be aligned to the minimal I/O unit size. Instead, it has to be equivalent
 714 * to the real data size, although the @buf buffer has to contain the
 715 * alignment. In all other cases, @len has to be aligned.
 716 *
 717 * It is prohibited to write more then once to logical eraseblocks of static
 718 * volumes. This function returns zero in case of success and a negative error
 719 * code in case of failure.
 720 */
 721int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum,
 722                         const void *buf, int len, int dtype, int used_ebs)
 723{
 724        int err, pnum, tries = 0, data_size = len;
 725        int idx = vol_id2idx(ubi, vol_id);
 726        struct ubi_volume *vol = ubi->volumes[idx];
 727        struct ubi_vid_hdr *vid_hdr;
 728        uint32_t crc;
 729
 730        if (ubi->ro_mode)
 731                return -EROFS;
 732
 733        if (lnum == used_ebs - 1)
 734                /* If this is the last LEB @len may be unaligned */
 735                len = ALIGN(data_size, ubi->min_io_size);
 736        else
 737                ubi_assert(len % ubi->min_io_size == 0);
 738
 739        vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
 740        if (!vid_hdr)
 741                return -ENOMEM;
 742
 743        err = leb_write_lock(ubi, vol_id, lnum);
 744        if (err) {
 745                ubi_free_vid_hdr(ubi, vid_hdr);
 746                return err;
 747        }
 748
 749        vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
 750        vid_hdr->vol_id = cpu_to_be32(vol_id);
 751        vid_hdr->lnum = cpu_to_be32(lnum);
 752        vid_hdr->compat = ubi_get_compat(ubi, vol_id);
 753        vid_hdr->data_pad = cpu_to_be32(vol->data_pad);
 754
 755        crc = crc32(UBI_CRC32_INIT, buf, data_size);
 756        vid_hdr->vol_type = UBI_VID_STATIC;
 757        vid_hdr->data_size = cpu_to_be32(data_size);
 758        vid_hdr->used_ebs = cpu_to_be32(used_ebs);
 759        vid_hdr->data_crc = cpu_to_be32(crc);
 760
 761retry:
 762        pnum = ubi_wl_get_peb(ubi, dtype);
 763        if (pnum < 0) {
 764                ubi_free_vid_hdr(ubi, vid_hdr);
 765                leb_write_unlock(ubi, vol_id, lnum);
 766                return pnum;
 767        }
 768
 769        dbg_eba("write VID hdr and %d bytes at LEB %d:%d, PEB %d, used_ebs %d",
 770                len, vol_id, lnum, pnum, used_ebs);
 771
 772        err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr);
 773        if (err) {
 774                ubi_warn("failed to write VID header to LEB %d:%d, PEB %d",
 775                         vol_id, lnum, pnum);
 776                goto write_error;
 777        }
 778
 779        err = ubi_io_write_data(ubi, buf, pnum, 0, len);
 780        if (err) {
 781                ubi_warn("failed to write %d bytes of data to PEB %d",
 782                         len, pnum);
 783                goto write_error;
 784        }
 785
 786        ubi_assert(vol->eba_tbl[lnum] < 0);
 787        vol->eba_tbl[lnum] = pnum;
 788
 789        leb_write_unlock(ubi, vol_id, lnum);
 790        ubi_free_vid_hdr(ubi, vid_hdr);
 791        return 0;
 792
 793write_error:
 794        if (err != -EIO || !ubi->bad_allowed) {
 795                /*
 796                 * This flash device does not admit of bad eraseblocks or
 797                 * something nasty and unexpected happened. Switch to read-only
 798                 * mode just in case.
 799                 */
 800                ubi_ro_mode(ubi);
 801                leb_write_unlock(ubi, vol_id, lnum);
 802                ubi_free_vid_hdr(ubi, vid_hdr);
 803                return err;
 804        }
 805
 806        err = ubi_wl_put_peb(ubi, pnum, 1);
 807        if (err || ++tries > UBI_IO_RETRIES) {
 808                ubi_ro_mode(ubi);
 809                leb_write_unlock(ubi, vol_id, lnum);
 810                ubi_free_vid_hdr(ubi, vid_hdr);
 811                return err;
 812        }
 813
 814        vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
 815        ubi_msg("try another PEB");
 816        goto retry;
 817}
 818
 819/*
 820 * ubi_eba_atomic_leb_change - change logical eraseblock atomically.
 821 * @ubi: UBI device description object
 822 * @vol_id: volume ID
 823 * @lnum: logical eraseblock number
 824 * @buf: data to write
 825 * @len: how many bytes to write
 826 * @dtype: data type
 827 *
 828 * This function changes the contents of a logical eraseblock atomically. @buf
 829 * has to contain new logical eraseblock data, and @len - the length of the
 830 * data, which has to be aligned. This function guarantees that in case of an
 831 * unclean reboot the old contents is preserved. Returns zero in case of
 832 * success and a negative error code in case of failure.
 833 *
 834 * UBI reserves one LEB for the "atomic LEB change" operation, so only one
 835 * LEB change may be done at a time. This is ensured by @ubi->alc_mutex.
 836 */
 837int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum,
 838                              const void *buf, int len, int dtype)
 839{
 840        int err, pnum, tries = 0, idx = vol_id2idx(ubi, vol_id);
 841        struct ubi_volume *vol = ubi->volumes[idx];
 842        struct ubi_vid_hdr *vid_hdr;
 843        uint32_t crc;
 844
 845        if (ubi->ro_mode)
 846                return -EROFS;
 847
 848        vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
 849        if (!vid_hdr)
 850                return -ENOMEM;
 851
 852        mutex_lock(&ubi->alc_mutex);
 853        err = leb_write_lock(ubi, vol_id, lnum);
 854        if (err)
 855                goto out_mutex;
 856
 857        vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
 858        vid_hdr->vol_id = cpu_to_be32(vol_id);
 859        vid_hdr->lnum = cpu_to_be32(lnum);
 860        vid_hdr->compat = ubi_get_compat(ubi, vol_id);
 861        vid_hdr->data_pad = cpu_to_be32(vol->data_pad);
 862
 863        crc = crc32(UBI_CRC32_INIT, buf, len);
 864        vid_hdr->vol_type = UBI_VID_DYNAMIC;
 865        vid_hdr->data_size = cpu_to_be32(len);
 866        vid_hdr->copy_flag = 1;
 867        vid_hdr->data_crc = cpu_to_be32(crc);
 868
 869retry:
 870        pnum = ubi_wl_get_peb(ubi, dtype);
 871        if (pnum < 0) {
 872                err = pnum;
 873                goto out_leb_unlock;
 874        }
 875
 876        dbg_eba("change LEB %d:%d, PEB %d, write VID hdr to PEB %d",
 877                vol_id, lnum, vol->eba_tbl[lnum], pnum);
 878
 879        err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr);
 880        if (err) {
 881                ubi_warn("failed to write VID header to LEB %d:%d, PEB %d",
 882                         vol_id, lnum, pnum);
 883                goto write_error;
 884        }
 885
 886        err = ubi_io_write_data(ubi, buf, pnum, 0, len);
 887        if (err) {
 888                ubi_warn("failed to write %d bytes of data to PEB %d",
 889                         len, pnum);
 890                goto write_error;
 891        }
 892
 893        if (vol->eba_tbl[lnum] >= 0) {
 894                err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1);
 895                if (err)
 896                        goto out_leb_unlock;
 897        }
 898
 899        vol->eba_tbl[lnum] = pnum;
 900
 901out_leb_unlock:
 902        leb_write_unlock(ubi, vol_id, lnum);
 903out_mutex:
 904        mutex_unlock(&ubi->alc_mutex);
 905        ubi_free_vid_hdr(ubi, vid_hdr);
 906        return err;
 907
 908write_error:
 909        if (err != -EIO || !ubi->bad_allowed) {
 910                /*
 911                 * This flash device does not admit of bad eraseblocks or
 912                 * something nasty and unexpected happened. Switch to read-only
 913                 * mode just in case.
 914                 */
 915                ubi_ro_mode(ubi);
 916                goto out_leb_unlock;
 917        }
 918
 919        err = ubi_wl_put_peb(ubi, pnum, 1);
 920        if (err || ++tries > UBI_IO_RETRIES) {
 921                ubi_ro_mode(ubi);
 922                goto out_leb_unlock;
 923        }
 924
 925        vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
 926        ubi_msg("try another PEB");
 927        goto retry;
 928}
 929
 930/**
 931 * ltree_entry_ctor - lock tree entries slab cache constructor.
 932 * @obj: the lock-tree entry to construct
 933 * @cache: the lock tree entry slab cache
 934 * @flags: constructor flags
 935 */
 936static void ltree_entry_ctor(struct kmem_cache *cache, void *obj)
 937{
 938        struct ltree_entry *le = obj;
 939
 940        le->users = 0;
 941        init_rwsem(&le->mutex);
 942}
 943
 944/**
 945 * ubi_eba_copy_leb - copy logical eraseblock.
 946 * @ubi: UBI device description object
 947 * @from: physical eraseblock number from where to copy
 948 * @to: physical eraseblock number where to copy
 949 * @vid_hdr: VID header of the @from physical eraseblock
 950 *
 951 * This function copies logical eraseblock from physical eraseblock @from to
 952 * physical eraseblock @to. The @vid_hdr buffer may be changed by this
 953 * function. Returns zero in case of success, %UBI_IO_BITFLIPS if the operation
 954 * was canceled because bit-flips were detected at the target PEB, and a
 955 * negative error code in case of failure.
 956 */
 957int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 958                     struct ubi_vid_hdr *vid_hdr)
 959{
 960        int err, vol_id, lnum, data_size, aldata_size, pnum, idx;
 961        struct ubi_volume *vol;
 962        uint32_t crc;
 963
 964        vol_id = be32_to_cpu(vid_hdr->vol_id);
 965        lnum = be32_to_cpu(vid_hdr->lnum);
 966
 967        dbg_eba("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to);
 968
 969        if (vid_hdr->vol_type == UBI_VID_STATIC) {
 970                data_size = be32_to_cpu(vid_hdr->data_size);
 971                aldata_size = ALIGN(data_size, ubi->min_io_size);
 972        } else
 973                data_size = aldata_size =
 974                            ubi->leb_size - be32_to_cpu(vid_hdr->data_pad);
 975
 976        /*
 977         * We do not want anybody to write to this logical eraseblock while we
 978         * are moving it, so we lock it.
 979         */
 980        err = leb_write_lock(ubi, vol_id, lnum);
 981        if (err)
 982                return err;
 983
 984        mutex_lock(&ubi->buf_mutex);
 985
 986        /*
 987         * But the logical eraseblock might have been put by this time.
 988         * Cancel if it is true.
 989         */
 990        idx = vol_id2idx(ubi, vol_id);
 991
 992        /*
 993         * We may race with volume deletion/re-size, so we have to hold
 994         * @ubi->volumes_lock.
 995         */
 996        spin_lock(&ubi->volumes_lock);
 997        vol = ubi->volumes[idx];
 998        if (!vol) {
 999                dbg_eba("volume %d was removed meanwhile", vol_id);
1000                spin_unlock(&ubi->volumes_lock);
1001                goto out_unlock;
1002        }
1003
1004        pnum = vol->eba_tbl[lnum];
1005        if (pnum != from) {
1006                dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to "
1007                        "PEB %d, cancel", vol_id, lnum, from, pnum);
1008                spin_unlock(&ubi->volumes_lock);
1009                goto out_unlock;
1010        }
1011        spin_unlock(&ubi->volumes_lock);
1012
1013        /* OK, now the LEB is locked and we can safely start moving it */
1014
1015        dbg_eba("read %d bytes of data", aldata_size);
1016        err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size);
1017        if (err && err != UBI_IO_BITFLIPS) {
1018                ubi_warn("error %d while reading data from PEB %d",
1019                         err, from);
1020                goto out_unlock;
1021        }
1022
1023        /*
1024         * Now we have got to calculate how much data we have to to copy. In
1025         * case of a static volume it is fairly easy - the VID header contains
1026         * the data size. In case of a dynamic volume it is more difficult - we
1027         * have to read the contents, cut 0xFF bytes from the end and copy only
1028         * the first part. We must do this to avoid writing 0xFF bytes as it
1029         * may have some side-effects. And not only this. It is important not
1030         * to include those 0xFFs to CRC because later the they may be filled
1031         * by data.
1032         */
1033        if (vid_hdr->vol_type == UBI_VID_DYNAMIC)
1034                aldata_size = data_size =
1035                        ubi_calc_data_len(ubi, ubi->peb_buf1, data_size);
1036
1037        cond_resched();
1038        crc = crc32(UBI_CRC32_INIT, ubi->peb_buf1, data_size);
1039        cond_resched();
1040
1041        /*
1042         * It may turn out to me that the whole @from physical eraseblock
1043         * contains only 0xFF bytes. Then we have to only write the VID header
1044         * and do not write any data. This also means we should not set
1045         * @vid_hdr->copy_flag, @vid_hdr->data_size, and @vid_hdr->data_crc.
1046         */
1047        if (data_size > 0) {
1048                vid_hdr->copy_flag = 1;
1049                vid_hdr->data_size = cpu_to_be32(data_size);
1050                vid_hdr->data_crc = cpu_to_be32(crc);
1051        }
1052        vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
1053
1054        err = ubi_io_write_vid_hdr(ubi, to, vid_hdr);
1055        if (err)
1056                goto out_unlock;
1057
1058        cond_resched();
1059
1060        /* Read the VID header back and check if it was written correctly */
1061        err = ubi_io_read_vid_hdr(ubi, to, vid_hdr, 1);
1062        if (err) {
1063                if (err != UBI_IO_BITFLIPS)
1064                        ubi_warn("cannot read VID header back from PEB %d", to);
1065                goto out_unlock;
1066        }
1067
1068        if (data_size > 0) {
1069                err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size);
1070                if (err)
1071                        goto out_unlock;
1072
1073                cond_resched();
1074
1075                /*
1076                 * We've written the data and are going to read it back to make
1077                 * sure it was written correctly.
1078                 */
1079
1080                err = ubi_io_read_data(ubi, ubi->peb_buf2, to, 0, aldata_size);
1081                if (err) {
1082                        if (err != UBI_IO_BITFLIPS)
1083                                ubi_warn("cannot read data back from PEB %d",
1084                                         to);
1085                        goto out_unlock;
1086                }
1087
1088                cond_resched();
1089
1090                if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) {
1091                        ubi_warn("read data back from PEB %d - it is different",
1092                                 to);
1093                        goto out_unlock;
1094                }
1095        }
1096
1097        ubi_assert(vol->eba_tbl[lnum] == from);
1098        vol->eba_tbl[lnum] = to;
1099
1100out_unlock:
1101        mutex_unlock(&ubi->buf_mutex);
1102        leb_write_unlock(ubi, vol_id, lnum);
1103        return err;
1104}
1105
1106/**
1107 * ubi_eba_init_scan - initialize the EBA unit using scanning information.
1108 * @ubi: UBI device description object
1109 * @si: scanning information
1110 *
1111 * This function returns zero in case of success and a negative error code in
1112 * case of failure.
1113 */
1114int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
1115{
1116        int i, j, err, num_volumes;
1117        struct ubi_scan_volume *sv;
1118        struct ubi_volume *vol;
1119        struct ubi_scan_leb *seb;
1120        struct rb_node *rb;
1121
1122        dbg_eba("initialize EBA unit");
1123
1124        spin_lock_init(&ubi->ltree_lock);
1125        mutex_init(&ubi->alc_mutex);
1126        ubi->ltree = RB_ROOT;
1127
1128        if (ubi_devices_cnt == 0) {
1129                ltree_slab = kmem_cache_create("ubi_ltree_slab",
1130                                               sizeof(struct ltree_entry), 0,
1131                                               0, &ltree_entry_ctor);
1132                if (!ltree_slab)
1133                        return -ENOMEM;
1134        }
1135
1136        ubi->global_sqnum = si->max_sqnum + 1;
1137        num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
1138
1139        for (i = 0; i < num_volumes; i++) {
1140                vol = ubi->volumes[i];
1141                if (!vol)
1142                        continue;
1143
1144                cond_resched();
1145
1146                vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int),
1147                                       GFP_KERNEL);
1148                if (!vol->eba_tbl) {
1149                        err = -ENOMEM;
1150                        goto out_free;
1151                }
1152
1153                for (j = 0; j < vol->reserved_pebs; j++)
1154                        vol->eba_tbl[j] = UBI_LEB_UNMAPPED;
1155
1156                sv = ubi_scan_find_sv(si, idx2vol_id(ubi, i));
1157                if (!sv)
1158                        continue;
1159
1160                ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
1161                        if (seb->lnum >= vol->reserved_pebs)
1162                                /*
1163                                 * This may happen in case of an unclean reboot
1164                                 * during re-size.
1165                                 */
1166                                ubi_scan_move_to_list(sv, seb, &si->erase);
1167                        vol->eba_tbl[seb->lnum] = seb->pnum;
1168                }
1169        }
1170
1171        if (ubi->bad_allowed) {
1172                ubi_calculate_reserved(ubi);
1173
1174                if (ubi->avail_pebs < ubi->beb_rsvd_level) {
1175                        /* No enough free physical eraseblocks */
1176                        ubi->beb_rsvd_pebs = ubi->avail_pebs;
1177                        ubi_warn("cannot reserve enough PEBs for bad PEB "
1178                                 "handling, reserved %d, need %d",
1179                                 ubi->beb_rsvd_pebs, ubi->beb_rsvd_level);
1180                } else
1181                        ubi->beb_rsvd_pebs = ubi->beb_rsvd_level;
1182
1183                ubi->avail_pebs -= ubi->beb_rsvd_pebs;
1184                ubi->rsvd_pebs  += ubi->beb_rsvd_pebs;
1185        }
1186
1187        if (ubi->avail_pebs < EBA_RESERVED_PEBS) {
1188                ubi_err("no enough physical eraseblocks (%d, need %d)",
1189                        ubi->avail_pebs, EBA_RESERVED_PEBS);
1190                err = -ENOSPC;
1191                goto out_free;
1192        }
1193        ubi->avail_pebs -= EBA_RESERVED_PEBS;
1194        ubi->rsvd_pebs += EBA_RESERVED_PEBS;
1195
1196        dbg_eba("EBA unit is initialized");
1197        return 0;
1198
1199out_free:
1200        for (i = 0; i < num_volumes; i++) {
1201                if (!ubi->volumes[i])
1202                        continue;
1203                kfree(ubi->volumes[i]->eba_tbl);
1204        }
1205        if (ubi_devices_cnt == 0)
1206                kmem_cache_destroy(ltree_slab);
1207        return err;
1208}
1209
1210/**
1211 * ubi_eba_close - close EBA unit.
1212 * @ubi: UBI device description object
1213 */
1214void ubi_eba_close(const struct ubi_device *ubi)
1215{
1216        int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
1217
1218        dbg_eba("close EBA unit");
1219
1220        for (i = 0; i < num_volumes; i++) {
1221                if (!ubi->volumes[i])
1222                        continue;
1223                kfree(ubi->volumes[i]->eba_tbl);
1224        }
1225        if (ubi_devices_cnt == 1)
1226                kmem_cache_destroy(ltree_slab);
1227}
1228