uboot/fs/ubifs/recovery.c
<<
>>
Prefs
   1/*
   2 * This file is part of UBIFS.
   3 *
   4 * Copyright (C) 2006-2008 Nokia Corporation
   5 *
   6 * SPDX-License-Identifier:     GPL-2.0+
   7 *
   8 * Authors: Adrian Hunter
   9 *          Artem Bityutskiy (Битюцкий Артём)
  10 */
  11
  12/*
  13 * This file implements functions needed to recover from unclean un-mounts.
  14 * When UBIFS is mounted, it checks a flag on the master node to determine if
  15 * an un-mount was completed successfully. If not, the process of mounting
  16 * incorporates additional checking and fixing of on-flash data structures.
  17 * UBIFS always cleans away all remnants of an unclean un-mount, so that
  18 * errors do not accumulate. However UBIFS defers recovery if it is mounted
  19 * read-only, and the flash is not modified in that case.
  20 *
  21 * The general UBIFS approach to the recovery is that it recovers from
  22 * corruptions which could be caused by power cuts, but it refuses to recover
  23 * from corruption caused by other reasons. And UBIFS tries to distinguish
  24 * between these 2 reasons of corruptions and silently recover in the former
  25 * case and loudly complain in the latter case.
  26 *
  27 * UBIFS writes only to erased LEBs, so it writes only to the flash space
  28 * containing only 0xFFs. UBIFS also always writes strictly from the beginning
  29 * of the LEB to the end. And UBIFS assumes that the underlying flash media
  30 * writes in @c->max_write_size bytes at a time.
  31 *
  32 * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min.
  33 * I/O unit corresponding to offset X to contain corrupted data, all the
  34 * following min. I/O units have to contain empty space (all 0xFFs). If this is
  35 * not true, the corruption cannot be the result of a power cut, and UBIFS
  36 * refuses to mount.
  37 */
  38
  39#ifndef __UBOOT__
  40#include <linux/crc32.h>
  41#include <linux/slab.h>
  42#else
  43#include <linux/err.h>
  44#endif
  45#include "ubifs.h"
  46
  47/**
  48 * is_empty - determine whether a buffer is empty (contains all 0xff).
  49 * @buf: buffer to clean
  50 * @len: length of buffer
  51 *
  52 * This function returns %1 if the buffer is empty (contains all 0xff) otherwise
  53 * %0 is returned.
  54 */
  55static int is_empty(void *buf, int len)
  56{
  57        uint8_t *p = buf;
  58        int i;
  59
  60        for (i = 0; i < len; i++)
  61                if (*p++ != 0xff)
  62                        return 0;
  63        return 1;
  64}
  65
  66/**
  67 * first_non_ff - find offset of the first non-0xff byte.
  68 * @buf: buffer to search in
  69 * @len: length of buffer
  70 *
  71 * This function returns offset of the first non-0xff byte in @buf or %-1 if
  72 * the buffer contains only 0xff bytes.
  73 */
  74static int first_non_ff(void *buf, int len)
  75{
  76        uint8_t *p = buf;
  77        int i;
  78
  79        for (i = 0; i < len; i++)
  80                if (*p++ != 0xff)
  81                        return i;
  82        return -1;
  83}
  84
  85/**
  86 * get_master_node - get the last valid master node allowing for corruption.
  87 * @c: UBIFS file-system description object
  88 * @lnum: LEB number
  89 * @pbuf: buffer containing the LEB read, is returned here
  90 * @mst: master node, if found, is returned here
  91 * @cor: corruption, if found, is returned here
  92 *
  93 * This function allocates a buffer, reads the LEB into it, and finds and
  94 * returns the last valid master node allowing for one area of corruption.
  95 * The corrupt area, if there is one, must be consistent with the assumption
  96 * that it is the result of an unclean unmount while the master node was being
  97 * written. Under those circumstances, it is valid to use the previously written
  98 * master node.
  99 *
 100 * This function returns %0 on success and a negative error code on failure.
 101 */
 102static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
 103                           struct ubifs_mst_node **mst, void **cor)
 104{
 105        const int sz = c->mst_node_alsz;
 106        int err, offs, len;
 107        void *sbuf, *buf;
 108
 109        sbuf = vmalloc(c->leb_size);
 110        if (!sbuf)
 111                return -ENOMEM;
 112
 113        err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0);
 114        if (err && err != -EBADMSG)
 115                goto out_free;
 116
 117        /* Find the first position that is definitely not a node */
 118        offs = 0;
 119        buf = sbuf;
 120        len = c->leb_size;
 121        while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) {
 122                struct ubifs_ch *ch = buf;
 123
 124                if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
 125                        break;
 126                offs += sz;
 127                buf  += sz;
 128                len  -= sz;
 129        }
 130        /* See if there was a valid master node before that */
 131        if (offs) {
 132                int ret;
 133
 134                offs -= sz;
 135                buf  -= sz;
 136                len  += sz;
 137                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
 138                if (ret != SCANNED_A_NODE && offs) {
 139                        /* Could have been corruption so check one place back */
 140                        offs -= sz;
 141                        buf  -= sz;
 142                        len  += sz;
 143                        ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
 144                        if (ret != SCANNED_A_NODE)
 145                                /*
 146                                 * We accept only one area of corruption because
 147                                 * we are assuming that it was caused while
 148                                 * trying to write a master node.
 149                                 */
 150                                goto out_err;
 151                }
 152                if (ret == SCANNED_A_NODE) {
 153                        struct ubifs_ch *ch = buf;
 154
 155                        if (ch->node_type != UBIFS_MST_NODE)
 156                                goto out_err;
 157                        dbg_rcvry("found a master node at %d:%d", lnum, offs);
 158                        *mst = buf;
 159                        offs += sz;
 160                        buf  += sz;
 161                        len  -= sz;
 162                }
 163        }
 164        /* Check for corruption */
 165        if (offs < c->leb_size) {
 166                if (!is_empty(buf, min_t(int, len, sz))) {
 167                        *cor = buf;
 168                        dbg_rcvry("found corruption at %d:%d", lnum, offs);
 169                }
 170                offs += sz;
 171                buf  += sz;
 172                len  -= sz;
 173        }
 174        /* Check remaining empty space */
 175        if (offs < c->leb_size)
 176                if (!is_empty(buf, len))
 177                        goto out_err;
 178        *pbuf = sbuf;
 179        return 0;
 180
 181out_err:
 182        err = -EINVAL;
 183out_free:
 184        vfree(sbuf);
 185        *mst = NULL;
 186        *cor = NULL;
 187        return err;
 188}
 189
 190/**
 191 * write_rcvrd_mst_node - write recovered master node.
 192 * @c: UBIFS file-system description object
 193 * @mst: master node
 194 *
 195 * This function returns %0 on success and a negative error code on failure.
 196 */
 197static int write_rcvrd_mst_node(struct ubifs_info *c,
 198                                struct ubifs_mst_node *mst)
 199{
 200        int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
 201        __le32 save_flags;
 202
 203        dbg_rcvry("recovery");
 204
 205        save_flags = mst->flags;
 206        mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
 207
 208        ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
 209        err = ubifs_leb_change(c, lnum, mst, sz);
 210        if (err)
 211                goto out;
 212        err = ubifs_leb_change(c, lnum + 1, mst, sz);
 213        if (err)
 214                goto out;
 215out:
 216        mst->flags = save_flags;
 217        return err;
 218}
 219
 220/**
 221 * ubifs_recover_master_node - recover the master node.
 222 * @c: UBIFS file-system description object
 223 *
 224 * This function recovers the master node from corruption that may occur due to
 225 * an unclean unmount.
 226 *
 227 * This function returns %0 on success and a negative error code on failure.
 228 */
 229int ubifs_recover_master_node(struct ubifs_info *c)
 230{
 231        void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL;
 232        struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst;
 233        const int sz = c->mst_node_alsz;
 234        int err, offs1, offs2;
 235
 236        dbg_rcvry("recovery");
 237
 238        err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1);
 239        if (err)
 240                goto out_free;
 241
 242        err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2);
 243        if (err)
 244                goto out_free;
 245
 246        if (mst1) {
 247                offs1 = (void *)mst1 - buf1;
 248                if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) &&
 249                    (offs1 == 0 && !cor1)) {
 250                        /*
 251                         * mst1 was written by recovery at offset 0 with no
 252                         * corruption.
 253                         */
 254                        dbg_rcvry("recovery recovery");
 255                        mst = mst1;
 256                } else if (mst2) {
 257                        offs2 = (void *)mst2 - buf2;
 258                        if (offs1 == offs2) {
 259                                /* Same offset, so must be the same */
 260                                if (memcmp((void *)mst1 + UBIFS_CH_SZ,
 261                                           (void *)mst2 + UBIFS_CH_SZ,
 262                                           UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
 263                                        goto out_err;
 264                                mst = mst1;
 265                        } else if (offs2 + sz == offs1) {
 266                                /* 1st LEB was written, 2nd was not */
 267                                if (cor1)
 268                                        goto out_err;
 269                                mst = mst1;
 270                        } else if (offs1 == 0 &&
 271                                   c->leb_size - offs2 - sz < sz) {
 272                                /* 1st LEB was unmapped and written, 2nd not */
 273                                if (cor1)
 274                                        goto out_err;
 275                                mst = mst1;
 276                        } else
 277                                goto out_err;
 278                } else {
 279                        /*
 280                         * 2nd LEB was unmapped and about to be written, so
 281                         * there must be only one master node in the first LEB
 282                         * and no corruption.
 283                         */
 284                        if (offs1 != 0 || cor1)
 285                                goto out_err;
 286                        mst = mst1;
 287                }
 288        } else {
 289                if (!mst2)
 290                        goto out_err;
 291                /*
 292                 * 1st LEB was unmapped and about to be written, so there must
 293                 * be no room left in 2nd LEB.
 294                 */
 295                offs2 = (void *)mst2 - buf2;
 296                if (offs2 + sz + sz <= c->leb_size)
 297                        goto out_err;
 298                mst = mst2;
 299        }
 300
 301        ubifs_msg(c, "recovered master node from LEB %d",
 302                  (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
 303
 304        memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
 305
 306        if (c->ro_mount) {
 307                /* Read-only mode. Keep a copy for switching to rw mode */
 308                c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
 309                if (!c->rcvrd_mst_node) {
 310                        err = -ENOMEM;
 311                        goto out_free;
 312                }
 313                memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
 314
 315                /*
 316                 * We had to recover the master node, which means there was an
 317                 * unclean reboot. However, it is possible that the master node
 318                 * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
 319                 * E.g., consider the following chain of events:
 320                 *
 321                 * 1. UBIFS was cleanly unmounted, so the master node is clean
 322                 * 2. UBIFS is being mounted R/W and starts changing the master
 323                 *    node in the first (%UBIFS_MST_LNUM). A power cut happens,
 324                 *    so this LEB ends up with some amount of garbage at the
 325                 *    end.
 326                 * 3. UBIFS is being mounted R/O. We reach this place and
 327                 *    recover the master node from the second LEB
 328                 *    (%UBIFS_MST_LNUM + 1). But we cannot update the media
 329                 *    because we are being mounted R/O. We have to defer the
 330                 *    operation.
 331                 * 4. However, this master node (@c->mst_node) is marked as
 332                 *    clean (since the step 1). And if we just return, the
 333                 *    mount code will be confused and won't recover the master
 334                 *    node when it is re-mounter R/W later.
 335                 *
 336                 *    Thus, to force the recovery by marking the master node as
 337                 *    dirty.
 338                 */
 339                c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 340#ifndef __UBOOT__
 341        } else {
 342                /* Write the recovered master node */
 343                c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
 344                err = write_rcvrd_mst_node(c, c->mst_node);
 345                if (err)
 346                        goto out_free;
 347#endif
 348        }
 349
 350        vfree(buf2);
 351        vfree(buf1);
 352
 353        return 0;
 354
 355out_err:
 356        err = -EINVAL;
 357out_free:
 358        ubifs_err(c, "failed to recover master node");
 359        if (mst1) {
 360                ubifs_err(c, "dumping first master node");
 361                ubifs_dump_node(c, mst1);
 362        }
 363        if (mst2) {
 364                ubifs_err(c, "dumping second master node");
 365                ubifs_dump_node(c, mst2);
 366        }
 367        vfree(buf2);
 368        vfree(buf1);
 369        return err;
 370}
 371
 372/**
 373 * ubifs_write_rcvrd_mst_node - write the recovered master node.
 374 * @c: UBIFS file-system description object
 375 *
 376 * This function writes the master node that was recovered during mounting in
 377 * read-only mode and must now be written because we are remounting rw.
 378 *
 379 * This function returns %0 on success and a negative error code on failure.
 380 */
 381int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
 382{
 383        int err;
 384
 385        if (!c->rcvrd_mst_node)
 386                return 0;
 387        c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 388        c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 389        err = write_rcvrd_mst_node(c, c->rcvrd_mst_node);
 390        if (err)
 391                return err;
 392        kfree(c->rcvrd_mst_node);
 393        c->rcvrd_mst_node = NULL;
 394        return 0;
 395}
 396
 397/**
 398 * is_last_write - determine if an offset was in the last write to a LEB.
 399 * @c: UBIFS file-system description object
 400 * @buf: buffer to check
 401 * @offs: offset to check
 402 *
 403 * This function returns %1 if @offs was in the last write to the LEB whose data
 404 * is in @buf, otherwise %0 is returned. The determination is made by checking
 405 * for subsequent empty space starting from the next @c->max_write_size
 406 * boundary.
 407 */
 408static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
 409{
 410        int empty_offs, check_len;
 411        uint8_t *p;
 412
 413        /*
 414         * Round up to the next @c->max_write_size boundary i.e. @offs is in
 415         * the last wbuf written. After that should be empty space.
 416         */
 417        empty_offs = ALIGN(offs + 1, c->max_write_size);
 418        check_len = c->leb_size - empty_offs;
 419        p = buf + empty_offs - offs;
 420        return is_empty(p, check_len);
 421}
 422
 423/**
 424 * clean_buf - clean the data from an LEB sitting in a buffer.
 425 * @c: UBIFS file-system description object
 426 * @buf: buffer to clean
 427 * @lnum: LEB number to clean
 428 * @offs: offset from which to clean
 429 * @len: length of buffer
 430 *
 431 * This function pads up to the next min_io_size boundary (if there is one) and
 432 * sets empty space to all 0xff. @buf, @offs and @len are updated to the next
 433 * @c->min_io_size boundary.
 434 */
 435static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
 436                      int *offs, int *len)
 437{
 438        int empty_offs, pad_len;
 439
 440        lnum = lnum;
 441        dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
 442
 443        ubifs_assert(!(*offs & 7));
 444        empty_offs = ALIGN(*offs, c->min_io_size);
 445        pad_len = empty_offs - *offs;
 446        ubifs_pad(c, *buf, pad_len);
 447        *offs += pad_len;
 448        *buf += pad_len;
 449        *len -= pad_len;
 450        memset(*buf, 0xff, c->leb_size - empty_offs);
 451}
 452
 453/**
 454 * no_more_nodes - determine if there are no more nodes in a buffer.
 455 * @c: UBIFS file-system description object
 456 * @buf: buffer to check
 457 * @len: length of buffer
 458 * @lnum: LEB number of the LEB from which @buf was read
 459 * @offs: offset from which @buf was read
 460 *
 461 * This function ensures that the corrupted node at @offs is the last thing
 462 * written to a LEB. This function returns %1 if more data is not found and
 463 * %0 if more data is found.
 464 */
 465static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
 466                        int lnum, int offs)
 467{
 468        struct ubifs_ch *ch = buf;
 469        int skip, dlen = le32_to_cpu(ch->len);
 470
 471        /* Check for empty space after the corrupt node's common header */
 472        skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs;
 473        if (is_empty(buf + skip, len - skip))
 474                return 1;
 475        /*
 476         * The area after the common header size is not empty, so the common
 477         * header must be intact. Check it.
 478         */
 479        if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) {
 480                dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs);
 481                return 0;
 482        }
 483        /* Now we know the corrupt node's length we can skip over it */
 484        skip = ALIGN(offs + dlen, c->max_write_size) - offs;
 485        /* After which there should be empty space */
 486        if (is_empty(buf + skip, len - skip))
 487                return 1;
 488        dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip);
 489        return 0;
 490}
 491
 492/**
 493 * fix_unclean_leb - fix an unclean LEB.
 494 * @c: UBIFS file-system description object
 495 * @sleb: scanned LEB information
 496 * @start: offset where scan started
 497 */
 498static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 499                           int start)
 500{
 501        int lnum = sleb->lnum, endpt = start;
 502
 503        /* Get the end offset of the last node we are keeping */
 504        if (!list_empty(&sleb->nodes)) {
 505                struct ubifs_scan_node *snod;
 506
 507                snod = list_entry(sleb->nodes.prev,
 508                                  struct ubifs_scan_node, list);
 509                endpt = snod->offs + snod->len;
 510        }
 511
 512        if (c->ro_mount && !c->remounting_rw) {
 513                /* Add to recovery list */
 514                struct ubifs_unclean_leb *ucleb;
 515
 516                dbg_rcvry("need to fix LEB %d start %d endpt %d",
 517                          lnum, start, sleb->endpt);
 518                ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS);
 519                if (!ucleb)
 520                        return -ENOMEM;
 521                ucleb->lnum = lnum;
 522                ucleb->endpt = endpt;
 523                list_add_tail(&ucleb->list, &c->unclean_leb_list);
 524#ifndef __UBOOT__
 525        } else {
 526                /* Write the fixed LEB back to flash */
 527                int err;
 528
 529                dbg_rcvry("fixing LEB %d start %d endpt %d",
 530                          lnum, start, sleb->endpt);
 531                if (endpt == 0) {
 532                        err = ubifs_leb_unmap(c, lnum);
 533                        if (err)
 534                                return err;
 535                } else {
 536                        int len = ALIGN(endpt, c->min_io_size);
 537
 538                        if (start) {
 539                                err = ubifs_leb_read(c, lnum, sleb->buf, 0,
 540                                                     start, 1);
 541                                if (err)
 542                                        return err;
 543                        }
 544                        /* Pad to min_io_size */
 545                        if (len > endpt) {
 546                                int pad_len = len - ALIGN(endpt, 8);
 547
 548                                if (pad_len > 0) {
 549                                        void *buf = sleb->buf + len - pad_len;
 550
 551                                        ubifs_pad(c, buf, pad_len);
 552                                }
 553                        }
 554                        err = ubifs_leb_change(c, lnum, sleb->buf, len);
 555                        if (err)
 556                                return err;
 557                }
 558#endif
 559        }
 560        return 0;
 561}
 562
 563/**
 564 * drop_last_group - drop the last group of nodes.
 565 * @sleb: scanned LEB information
 566 * @offs: offset of dropped nodes is returned here
 567 *
 568 * This is a helper function for 'ubifs_recover_leb()' which drops the last
 569 * group of nodes of the scanned LEB.
 570 */
 571static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
 572{
 573        while (!list_empty(&sleb->nodes)) {
 574                struct ubifs_scan_node *snod;
 575                struct ubifs_ch *ch;
 576
 577                snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
 578                                  list);
 579                ch = snod->node;
 580                if (ch->group_type != UBIFS_IN_NODE_GROUP)
 581                        break;
 582
 583                dbg_rcvry("dropping grouped node at %d:%d",
 584                          sleb->lnum, snod->offs);
 585                *offs = snod->offs;
 586                list_del(&snod->list);
 587                kfree(snod);
 588                sleb->nodes_cnt -= 1;
 589        }
 590}
 591
 592/**
 593 * drop_last_node - drop the last node.
 594 * @sleb: scanned LEB information
 595 * @offs: offset of dropped nodes is returned here
 596 *
 597 * This is a helper function for 'ubifs_recover_leb()' which drops the last
 598 * node of the scanned LEB.
 599 */
 600static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
 601{
 602        struct ubifs_scan_node *snod;
 603
 604        if (!list_empty(&sleb->nodes)) {
 605                snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
 606                                  list);
 607
 608                dbg_rcvry("dropping last node at %d:%d",
 609                          sleb->lnum, snod->offs);
 610                *offs = snod->offs;
 611                list_del(&snod->list);
 612                kfree(snod);
 613                sleb->nodes_cnt -= 1;
 614        }
 615}
 616
 617/**
 618 * ubifs_recover_leb - scan and recover a LEB.
 619 * @c: UBIFS file-system description object
 620 * @lnum: LEB number
 621 * @offs: offset
 622 * @sbuf: LEB-sized buffer to use
 623 * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not
 624 *         belong to any journal head)
 625 *
 626 * This function does a scan of a LEB, but caters for errors that might have
 627 * been caused by the unclean unmount from which we are attempting to recover.
 628 * Returns the scanned information on success and a negative error code on
 629 * failure.
 630 */
 631struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
 632                                         int offs, void *sbuf, int jhead)
 633{
 634        int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
 635        int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped;
 636        struct ubifs_scan_leb *sleb;
 637        void *buf = sbuf + offs;
 638
 639        dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped);
 640
 641        sleb = ubifs_start_scan(c, lnum, offs, sbuf);
 642        if (IS_ERR(sleb))
 643                return sleb;
 644
 645        ubifs_assert(len >= 8);
 646        while (len >= 8) {
 647                dbg_scan("look at LEB %d:%d (%d bytes left)",
 648                         lnum, offs, len);
 649
 650                cond_resched();
 651
 652                /*
 653                 * Scan quietly until there is an error from which we cannot
 654                 * recover
 655                 */
 656                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
 657                if (ret == SCANNED_A_NODE) {
 658                        /* A valid node, and not a padding node */
 659                        struct ubifs_ch *ch = buf;
 660                        int node_len;
 661
 662                        err = ubifs_add_snod(c, sleb, buf, offs);
 663                        if (err)
 664                                goto error;
 665                        node_len = ALIGN(le32_to_cpu(ch->len), 8);
 666                        offs += node_len;
 667                        buf += node_len;
 668                        len -= node_len;
 669                } else if (ret > 0) {
 670                        /* Padding bytes or a valid padding node */
 671                        offs += ret;
 672                        buf += ret;
 673                        len -= ret;
 674                } else if (ret == SCANNED_EMPTY_SPACE ||
 675                           ret == SCANNED_GARBAGE     ||
 676                           ret == SCANNED_A_BAD_PAD_NODE ||
 677                           ret == SCANNED_A_CORRUPT_NODE) {
 678                        dbg_rcvry("found corruption (%d) at %d:%d",
 679                                  ret, lnum, offs);
 680                        break;
 681                } else {
 682                        ubifs_err(c, "unexpected return value %d", ret);
 683                        err = -EINVAL;
 684                        goto error;
 685                }
 686        }
 687
 688        if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
 689                if (!is_last_write(c, buf, offs))
 690                        goto corrupted_rescan;
 691        } else if (ret == SCANNED_A_CORRUPT_NODE) {
 692                if (!no_more_nodes(c, buf, len, lnum, offs))
 693                        goto corrupted_rescan;
 694        } else if (!is_empty(buf, len)) {
 695                if (!is_last_write(c, buf, offs)) {
 696                        int corruption = first_non_ff(buf, len);
 697
 698                        /*
 699                         * See header comment for this file for more
 700                         * explanations about the reasons we have this check.
 701                         */
 702                        ubifs_err(c, "corrupt empty space LEB %d:%d, corruption starts at %d",
 703                                  lnum, offs, corruption);
 704                        /* Make sure we dump interesting non-0xFF data */
 705                        offs += corruption;
 706                        buf += corruption;
 707                        goto corrupted;
 708                }
 709        }
 710
 711        min_io_unit = round_down(offs, c->min_io_size);
 712        if (grouped)
 713                /*
 714                 * If nodes are grouped, always drop the incomplete group at
 715                 * the end.
 716                 */
 717                drop_last_group(sleb, &offs);
 718
 719        if (jhead == GCHD) {
 720                /*
 721                 * If this LEB belongs to the GC head then while we are in the
 722                 * middle of the same min. I/O unit keep dropping nodes. So
 723                 * basically, what we want is to make sure that the last min.
 724                 * I/O unit where we saw the corruption is dropped completely
 725                 * with all the uncorrupted nodes which may possibly sit there.
 726                 *
 727                 * In other words, let's name the min. I/O unit where the
 728                 * corruption starts B, and the previous min. I/O unit A. The
 729                 * below code tries to deal with a situation when half of B
 730                 * contains valid nodes or the end of a valid node, and the
 731                 * second half of B contains corrupted data or garbage. This
 732                 * means that UBIFS had been writing to B just before the power
 733                 * cut happened. I do not know how realistic is this scenario
 734                 * that half of the min. I/O unit had been written successfully
 735                 * and the other half not, but this is possible in our 'failure
 736                 * mode emulation' infrastructure at least.
 737                 *
 738                 * So what is the problem, why we need to drop those nodes? Why
 739                 * can't we just clean-up the second half of B by putting a
 740                 * padding node there? We can, and this works fine with one
 741                 * exception which was reproduced with power cut emulation
 742                 * testing and happens extremely rarely.
 743                 *
 744                 * Imagine the file-system is full, we run GC which starts
 745                 * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is
 746                 * the current GC head LEB). The @c->gc_lnum is -1, which means
 747                 * that GC will retain LEB X and will try to continue. Imagine
 748                 * that LEB X is currently the dirtiest LEB, and the amount of
 749                 * used space in LEB Y is exactly the same as amount of free
 750                 * space in LEB X.
 751                 *
 752                 * And a power cut happens when nodes are moved from LEB X to
 753                 * LEB Y. We are here trying to recover LEB Y which is the GC
 754                 * head LEB. We find the min. I/O unit B as described above.
 755                 * Then we clean-up LEB Y by padding min. I/O unit. And later
 756                 * 'ubifs_rcvry_gc_commit()' function fails, because it cannot
 757                 * find a dirty LEB which could be GC'd into LEB Y! Even LEB X
 758                 * does not match because the amount of valid nodes there does
 759                 * not fit the free space in LEB Y any more! And this is
 760                 * because of the padding node which we added to LEB Y. The
 761                 * user-visible effect of this which I once observed and
 762                 * analysed is that we cannot mount the file-system with
 763                 * -ENOSPC error.
 764                 *
 765                 * So obviously, to make sure that situation does not happen we
 766                 * should free min. I/O unit B in LEB Y completely and the last
 767                 * used min. I/O unit in LEB Y should be A. This is basically
 768                 * what the below code tries to do.
 769                 */
 770                while (offs > min_io_unit)
 771                        drop_last_node(sleb, &offs);
 772        }
 773
 774        buf = sbuf + offs;
 775        len = c->leb_size - offs;
 776
 777        clean_buf(c, &buf, lnum, &offs, &len);
 778        ubifs_end_scan(c, sleb, lnum, offs);
 779
 780        err = fix_unclean_leb(c, sleb, start);
 781        if (err)
 782                goto error;
 783
 784        return sleb;
 785
 786corrupted_rescan:
 787        /* Re-scan the corrupted data with verbose messages */
 788        ubifs_err(c, "corruption %d", ret);
 789        ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
 790corrupted:
 791        ubifs_scanned_corruption(c, lnum, offs, buf);
 792        err = -EUCLEAN;
 793error:
 794        ubifs_err(c, "LEB %d scanning failed", lnum);
 795        ubifs_scan_destroy(sleb);
 796        return ERR_PTR(err);
 797}
 798
 799/**
 800 * get_cs_sqnum - get commit start sequence number.
 801 * @c: UBIFS file-system description object
 802 * @lnum: LEB number of commit start node
 803 * @offs: offset of commit start node
 804 * @cs_sqnum: commit start sequence number is returned here
 805 *
 806 * This function returns %0 on success and a negative error code on failure.
 807 */
 808static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
 809                        unsigned long long *cs_sqnum)
 810{
 811        struct ubifs_cs_node *cs_node = NULL;
 812        int err, ret;
 813
 814        dbg_rcvry("at %d:%d", lnum, offs);
 815        cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL);
 816        if (!cs_node)
 817                return -ENOMEM;
 818        if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
 819                goto out_err;
 820        err = ubifs_leb_read(c, lnum, (void *)cs_node, offs,
 821                             UBIFS_CS_NODE_SZ, 0);
 822        if (err && err != -EBADMSG)
 823                goto out_free;
 824        ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
 825        if (ret != SCANNED_A_NODE) {
 826                ubifs_err(c, "Not a valid node");
 827                goto out_err;
 828        }
 829        if (cs_node->ch.node_type != UBIFS_CS_NODE) {
 830                ubifs_err(c, "Node a CS node, type is %d", cs_node->ch.node_type);
 831                goto out_err;
 832        }
 833        if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
 834                ubifs_err(c, "CS node cmt_no %llu != current cmt_no %llu",
 835                          (unsigned long long)le64_to_cpu(cs_node->cmt_no),
 836                          c->cmt_no);
 837                goto out_err;
 838        }
 839        *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum);
 840        dbg_rcvry("commit start sqnum %llu", *cs_sqnum);
 841        kfree(cs_node);
 842        return 0;
 843
 844out_err:
 845        err = -EINVAL;
 846out_free:
 847        ubifs_err(c, "failed to get CS sqnum");
 848        kfree(cs_node);
 849        return err;
 850}
 851
 852/**
 853 * ubifs_recover_log_leb - scan and recover a log LEB.
 854 * @c: UBIFS file-system description object
 855 * @lnum: LEB number
 856 * @offs: offset
 857 * @sbuf: LEB-sized buffer to use
 858 *
 859 * This function does a scan of a LEB, but caters for errors that might have
 860 * been caused by unclean reboots from which we are attempting to recover
 861 * (assume that only the last log LEB can be corrupted by an unclean reboot).
 862 *
 863 * This function returns %0 on success and a negative error code on failure.
 864 */
 865struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
 866                                             int offs, void *sbuf)
 867{
 868        struct ubifs_scan_leb *sleb;
 869        int next_lnum;
 870
 871        dbg_rcvry("LEB %d", lnum);
 872        next_lnum = lnum + 1;
 873        if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs)
 874                next_lnum = UBIFS_LOG_LNUM;
 875        if (next_lnum != c->ltail_lnum) {
 876                /*
 877                 * We can only recover at the end of the log, so check that the
 878                 * next log LEB is empty or out of date.
 879                 */
 880                sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0);
 881                if (IS_ERR(sleb))
 882                        return sleb;
 883                if (sleb->nodes_cnt) {
 884                        struct ubifs_scan_node *snod;
 885                        unsigned long long cs_sqnum = c->cs_sqnum;
 886
 887                        snod = list_entry(sleb->nodes.next,
 888                                          struct ubifs_scan_node, list);
 889                        if (cs_sqnum == 0) {
 890                                int err;
 891
 892                                err = get_cs_sqnum(c, lnum, offs, &cs_sqnum);
 893                                if (err) {
 894                                        ubifs_scan_destroy(sleb);
 895                                        return ERR_PTR(err);
 896                                }
 897                        }
 898                        if (snod->sqnum > cs_sqnum) {
 899                                ubifs_err(c, "unrecoverable log corruption in LEB %d",
 900                                          lnum);
 901                                ubifs_scan_destroy(sleb);
 902                                return ERR_PTR(-EUCLEAN);
 903                        }
 904                }
 905                ubifs_scan_destroy(sleb);
 906        }
 907        return ubifs_recover_leb(c, lnum, offs, sbuf, -1);
 908}
 909
 910/**
 911 * recover_head - recover a head.
 912 * @c: UBIFS file-system description object
 913 * @lnum: LEB number of head to recover
 914 * @offs: offset of head to recover
 915 * @sbuf: LEB-sized buffer to use
 916 *
 917 * This function ensures that there is no data on the flash at a head location.
 918 *
 919 * This function returns %0 on success and a negative error code on failure.
 920 */
 921static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf)
 922{
 923        int len = c->max_write_size, err;
 924
 925        if (offs + len > c->leb_size)
 926                len = c->leb_size - offs;
 927
 928        if (!len)
 929                return 0;
 930
 931        /* Read at the head location and check it is empty flash */
 932        err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1);
 933        if (err || !is_empty(sbuf, len)) {
 934                dbg_rcvry("cleaning head at %d:%d", lnum, offs);
 935                if (offs == 0)
 936                        return ubifs_leb_unmap(c, lnum);
 937                err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1);
 938                if (err)
 939                        return err;
 940                return ubifs_leb_change(c, lnum, sbuf, offs);
 941        }
 942
 943        return 0;
 944}
 945
 946/**
 947 * ubifs_recover_inl_heads - recover index and LPT heads.
 948 * @c: UBIFS file-system description object
 949 * @sbuf: LEB-sized buffer to use
 950 *
 951 * This function ensures that there is no data on the flash at the index and
 952 * LPT head locations.
 953 *
 954 * This deals with the recovery of a half-completed journal commit. UBIFS is
 955 * careful never to overwrite the last version of the index or the LPT. Because
 956 * the index and LPT are wandering trees, data from a half-completed commit will
 957 * not be referenced anywhere in UBIFS. The data will be either in LEBs that are
 958 * assumed to be empty and will be unmapped anyway before use, or in the index
 959 * and LPT heads.
 960 *
 961 * This function returns %0 on success and a negative error code on failure.
 962 */
 963int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
 964{
 965        int err;
 966
 967        ubifs_assert(!c->ro_mount || c->remounting_rw);
 968
 969        dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
 970        err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
 971        if (err)
 972                return err;
 973
 974        dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
 975
 976        return recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
 977}
 978
 979/**
 980 * clean_an_unclean_leb - read and write a LEB to remove corruption.
 981 * @c: UBIFS file-system description object
 982 * @ucleb: unclean LEB information
 983 * @sbuf: LEB-sized buffer to use
 984 *
 985 * This function reads a LEB up to a point pre-determined by the mount recovery,
 986 * checks the nodes, and writes the result back to the flash, thereby cleaning
 987 * off any following corruption, or non-fatal ECC errors.
 988 *
 989 * This function returns %0 on success and a negative error code on failure.
 990 */
 991static int clean_an_unclean_leb(struct ubifs_info *c,
 992                                struct ubifs_unclean_leb *ucleb, void *sbuf)
 993{
 994        int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
 995        void *buf = sbuf;
 996
 997        dbg_rcvry("LEB %d len %d", lnum, len);
 998
 999        if (len == 0) {
1000                /* Nothing to read, just unmap it */
1001                return ubifs_leb_unmap(c, lnum);
1002        }
1003
1004        err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
1005        if (err && err != -EBADMSG)
1006                return err;
1007
1008        while (len >= 8) {
1009                int ret;
1010
1011                cond_resched();
1012
1013                /* Scan quietly until there is an error */
1014                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
1015
1016                if (ret == SCANNED_A_NODE) {
1017                        /* A valid node, and not a padding node */
1018                        struct ubifs_ch *ch = buf;
1019                        int node_len;
1020
1021                        node_len = ALIGN(le32_to_cpu(ch->len), 8);
1022                        offs += node_len;
1023                        buf += node_len;
1024                        len -= node_len;
1025                        continue;
1026                }
1027
1028                if (ret > 0) {
1029                        /* Padding bytes or a valid padding node */
1030                        offs += ret;
1031                        buf += ret;
1032                        len -= ret;
1033                        continue;
1034                }
1035
1036                if (ret == SCANNED_EMPTY_SPACE) {
1037                        ubifs_err(c, "unexpected empty space at %d:%d",
1038                                  lnum, offs);
1039                        return -EUCLEAN;
1040                }
1041
1042                if (quiet) {
1043                        /* Redo the last scan but noisily */
1044                        quiet = 0;
1045                        continue;
1046                }
1047
1048                ubifs_scanned_corruption(c, lnum, offs, buf);
1049                return -EUCLEAN;
1050        }
1051
1052        /* Pad to min_io_size */
1053        len = ALIGN(ucleb->endpt, c->min_io_size);
1054        if (len > ucleb->endpt) {
1055                int pad_len = len - ALIGN(ucleb->endpt, 8);
1056
1057                if (pad_len > 0) {
1058                        buf = c->sbuf + len - pad_len;
1059                        ubifs_pad(c, buf, pad_len);
1060                }
1061        }
1062
1063        /* Write back the LEB atomically */
1064        err = ubifs_leb_change(c, lnum, sbuf, len);
1065        if (err)
1066                return err;
1067
1068        dbg_rcvry("cleaned LEB %d", lnum);
1069
1070        return 0;
1071}
1072
1073/**
1074 * ubifs_clean_lebs - clean LEBs recovered during read-only mount.
1075 * @c: UBIFS file-system description object
1076 * @sbuf: LEB-sized buffer to use
1077 *
1078 * This function cleans a LEB identified during recovery that needs to be
1079 * written but was not because UBIFS was mounted read-only. This happens when
1080 * remounting to read-write mode.
1081 *
1082 * This function returns %0 on success and a negative error code on failure.
1083 */
1084int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf)
1085{
1086        dbg_rcvry("recovery");
1087        while (!list_empty(&c->unclean_leb_list)) {
1088                struct ubifs_unclean_leb *ucleb;
1089                int err;
1090
1091                ucleb = list_entry(c->unclean_leb_list.next,
1092                                   struct ubifs_unclean_leb, list);
1093                err = clean_an_unclean_leb(c, ucleb, sbuf);
1094                if (err)
1095                        return err;
1096                list_del(&ucleb->list);
1097                kfree(ucleb);
1098        }
1099        return 0;
1100}
1101
1102#ifndef __UBOOT__
1103/**
1104 * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
1105 * @c: UBIFS file-system description object
1106 *
1107 * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
1108 * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
1109 * zero in case of success and a negative error code in case of failure.
1110 */
1111static int grab_empty_leb(struct ubifs_info *c)
1112{
1113        int lnum, err;
1114
1115        /*
1116         * Note, it is very important to first search for an empty LEB and then
1117         * run the commit, not vice-versa. The reason is that there might be
1118         * only one empty LEB at the moment, the one which has been the
1119         * @c->gc_lnum just before the power cut happened. During the regular
1120         * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
1121         * one but GC can grab it. But at this moment this single empty LEB is
1122         * not marked as taken, so if we run commit - what happens? Right, the
1123         * commit will grab it and write the index there. Remember that the
1124         * index always expands as long as there is free space, and it only
1125         * starts consolidating when we run out of space.
1126         *
1127         * IOW, if we run commit now, we might not be able to find a free LEB
1128         * after this.
1129         */
1130        lnum = ubifs_find_free_leb_for_idx(c);
1131        if (lnum < 0) {
1132                ubifs_err(c, "could not find an empty LEB");
1133                ubifs_dump_lprops(c);
1134                ubifs_dump_budg(c, &c->bi);
1135                return lnum;
1136        }
1137
1138        /* Reset the index flag */
1139        err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1140                                  LPROPS_INDEX, 0);
1141        if (err)
1142                return err;
1143
1144        c->gc_lnum = lnum;
1145        dbg_rcvry("found empty LEB %d, run commit", lnum);
1146
1147        return ubifs_run_commit(c);
1148}
1149
1150/**
1151 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
1152 * @c: UBIFS file-system description object
1153 *
1154 * Out-of-place garbage collection requires always one empty LEB with which to
1155 * start garbage collection. The LEB number is recorded in c->gc_lnum and is
1156 * written to the master node on unmounting. In the case of an unclean unmount
1157 * the value of gc_lnum recorded in the master node is out of date and cannot
1158 * be used. Instead, recovery must allocate an empty LEB for this purpose.
1159 * However, there may not be enough empty space, in which case it must be
1160 * possible to GC the dirtiest LEB into the GC head LEB.
1161 *
1162 * This function also runs the commit which causes the TNC updates from
1163 * size-recovery and orphans to be written to the flash. That is important to
1164 * ensure correct replay order for subsequent mounts.
1165 *
1166 * This function returns %0 on success and a negative error code on failure.
1167 */
1168int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1169{
1170        struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1171        struct ubifs_lprops lp;
1172        int err;
1173
1174        dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
1175
1176        c->gc_lnum = -1;
1177        if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
1178                return grab_empty_leb(c);
1179
1180        err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1181        if (err) {
1182                if (err != -ENOSPC)
1183                        return err;
1184
1185                dbg_rcvry("could not find a dirty LEB");
1186                return grab_empty_leb(c);
1187        }
1188
1189        ubifs_assert(!(lp.flags & LPROPS_INDEX));
1190        ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
1191
1192        /*
1193         * We run the commit before garbage collection otherwise subsequent
1194         * mounts will see the GC and orphan deletion in a different order.
1195         */
1196        dbg_rcvry("committing");
1197        err = ubifs_run_commit(c);
1198        if (err)
1199                return err;
1200
1201        dbg_rcvry("GC'ing LEB %d", lp.lnum);
1202        mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
1203        err = ubifs_garbage_collect_leb(c, &lp);
1204        if (err >= 0) {
1205                int err2 = ubifs_wbuf_sync_nolock(wbuf);
1206
1207                if (err2)
1208                        err = err2;
1209        }
1210        mutex_unlock(&wbuf->io_mutex);
1211        if (err < 0) {
1212                ubifs_err(c, "GC failed, error %d", err);
1213                if (err == -EAGAIN)
1214                        err = -EINVAL;
1215                return err;
1216        }
1217
1218        ubifs_assert(err == LEB_RETAINED);
1219        if (err != LEB_RETAINED)
1220                return -EINVAL;
1221
1222        err = ubifs_leb_unmap(c, c->gc_lnum);
1223        if (err)
1224                return err;
1225
1226        dbg_rcvry("allocated LEB %d for GC", lp.lnum);
1227        return 0;
1228}
1229#else
1230int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1231{
1232        return 0;
1233}
1234#endif
1235
1236/**
1237 * struct size_entry - inode size information for recovery.
1238 * @rb: link in the RB-tree of sizes
1239 * @inum: inode number
1240 * @i_size: size on inode
1241 * @d_size: maximum size based on data nodes
1242 * @exists: indicates whether the inode exists
1243 * @inode: inode if pinned in memory awaiting rw mode to fix it
1244 */
1245struct size_entry {
1246        struct rb_node rb;
1247        ino_t inum;
1248        loff_t i_size;
1249        loff_t d_size;
1250        int exists;
1251        struct inode *inode;
1252};
1253
1254/**
1255 * add_ino - add an entry to the size tree.
1256 * @c: UBIFS file-system description object
1257 * @inum: inode number
1258 * @i_size: size on inode
1259 * @d_size: maximum size based on data nodes
1260 * @exists: indicates whether the inode exists
1261 */
1262static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size,
1263                   loff_t d_size, int exists)
1264{
1265        struct rb_node **p = &c->size_tree.rb_node, *parent = NULL;
1266        struct size_entry *e;
1267
1268        while (*p) {
1269                parent = *p;
1270                e = rb_entry(parent, struct size_entry, rb);
1271                if (inum < e->inum)
1272                        p = &(*p)->rb_left;
1273                else
1274                        p = &(*p)->rb_right;
1275        }
1276
1277        e = kzalloc(sizeof(struct size_entry), GFP_KERNEL);
1278        if (!e)
1279                return -ENOMEM;
1280
1281        e->inum = inum;
1282        e->i_size = i_size;
1283        e->d_size = d_size;
1284        e->exists = exists;
1285
1286        rb_link_node(&e->rb, parent, p);
1287        rb_insert_color(&e->rb, &c->size_tree);
1288
1289        return 0;
1290}
1291
1292/**
1293 * find_ino - find an entry on the size tree.
1294 * @c: UBIFS file-system description object
1295 * @inum: inode number
1296 */
1297static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum)
1298{
1299        struct rb_node *p = c->size_tree.rb_node;
1300        struct size_entry *e;
1301
1302        while (p) {
1303                e = rb_entry(p, struct size_entry, rb);
1304                if (inum < e->inum)
1305                        p = p->rb_left;
1306                else if (inum > e->inum)
1307                        p = p->rb_right;
1308                else
1309                        return e;
1310        }
1311        return NULL;
1312}
1313
1314/**
1315 * remove_ino - remove an entry from the size tree.
1316 * @c: UBIFS file-system description object
1317 * @inum: inode number
1318 */
1319static void remove_ino(struct ubifs_info *c, ino_t inum)
1320{
1321        struct size_entry *e = find_ino(c, inum);
1322
1323        if (!e)
1324                return;
1325        rb_erase(&e->rb, &c->size_tree);
1326        kfree(e);
1327}
1328
1329/**
1330 * ubifs_destroy_size_tree - free resources related to the size tree.
1331 * @c: UBIFS file-system description object
1332 */
1333void ubifs_destroy_size_tree(struct ubifs_info *c)
1334{
1335        struct size_entry *e, *n;
1336
1337        rbtree_postorder_for_each_entry_safe(e, n, &c->size_tree, rb) {
1338                if (e->inode)
1339                        iput(e->inode);
1340                kfree(e);
1341        }
1342
1343        c->size_tree = RB_ROOT;
1344}
1345
1346/**
1347 * ubifs_recover_size_accum - accumulate inode sizes for recovery.
1348 * @c: UBIFS file-system description object
1349 * @key: node key
1350 * @deletion: node is for a deletion
1351 * @new_size: inode size
1352 *
1353 * This function has two purposes:
1354 *     1) to ensure there are no data nodes that fall outside the inode size
1355 *     2) to ensure there are no data nodes for inodes that do not exist
1356 * To accomplish those purposes, a rb-tree is constructed containing an entry
1357 * for each inode number in the journal that has not been deleted, and recording
1358 * the size from the inode node, the maximum size of any data node (also altered
1359 * by truncations) and a flag indicating a inode number for which no inode node
1360 * was present in the journal.
1361 *
1362 * Note that there is still the possibility that there are data nodes that have
1363 * been committed that are beyond the inode size, however the only way to find
1364 * them would be to scan the entire index. Alternatively, some provision could
1365 * be made to record the size of inodes at the start of commit, which would seem
1366 * very cumbersome for a scenario that is quite unlikely and the only negative
1367 * consequence of which is wasted space.
1368 *
1369 * This functions returns %0 on success and a negative error code on failure.
1370 */
1371int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
1372                             int deletion, loff_t new_size)
1373{
1374        ino_t inum = key_inum(c, key);
1375        struct size_entry *e;
1376        int err;
1377
1378        switch (key_type(c, key)) {
1379        case UBIFS_INO_KEY:
1380                if (deletion)
1381                        remove_ino(c, inum);
1382                else {
1383                        e = find_ino(c, inum);
1384                        if (e) {
1385                                e->i_size = new_size;
1386                                e->exists = 1;
1387                        } else {
1388                                err = add_ino(c, inum, new_size, 0, 1);
1389                                if (err)
1390                                        return err;
1391                        }
1392                }
1393                break;
1394        case UBIFS_DATA_KEY:
1395                e = find_ino(c, inum);
1396                if (e) {
1397                        if (new_size > e->d_size)
1398                                e->d_size = new_size;
1399                } else {
1400                        err = add_ino(c, inum, 0, new_size, 0);
1401                        if (err)
1402                                return err;
1403                }
1404                break;
1405        case UBIFS_TRUN_KEY:
1406                e = find_ino(c, inum);
1407                if (e)
1408                        e->d_size = new_size;
1409                break;
1410        }
1411        return 0;
1412}
1413
1414#ifndef __UBOOT__
1415/**
1416 * fix_size_in_place - fix inode size in place on flash.
1417 * @c: UBIFS file-system description object
1418 * @e: inode size information for recovery
1419 */
1420static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1421{
1422        struct ubifs_ino_node *ino = c->sbuf;
1423        unsigned char *p;
1424        union ubifs_key key;
1425        int err, lnum, offs, len;
1426        loff_t i_size;
1427        uint32_t crc;
1428
1429        /* Locate the inode node LEB number and offset */
1430        ino_key_init(c, &key, e->inum);
1431        err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs);
1432        if (err)
1433                goto out;
1434        /*
1435         * If the size recorded on the inode node is greater than the size that
1436         * was calculated from nodes in the journal then don't change the inode.
1437         */
1438        i_size = le64_to_cpu(ino->size);
1439        if (i_size >= e->d_size)
1440                return 0;
1441        /* Read the LEB */
1442        err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1);
1443        if (err)
1444                goto out;
1445        /* Change the size field and recalculate the CRC */
1446        ino = c->sbuf + offs;
1447        ino->size = cpu_to_le64(e->d_size);
1448        len = le32_to_cpu(ino->ch.len);
1449        crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8);
1450        ino->ch.crc = cpu_to_le32(crc);
1451        /* Work out where data in the LEB ends and free space begins */
1452        p = c->sbuf;
1453        len = c->leb_size - 1;
1454        while (p[len] == 0xff)
1455                len -= 1;
1456        len = ALIGN(len + 1, c->min_io_size);
1457        /* Atomically write the fixed LEB back again */
1458        err = ubifs_leb_change(c, lnum, c->sbuf, len);
1459        if (err)
1460                goto out;
1461        dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
1462                  (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
1463        return 0;
1464
1465out:
1466        ubifs_warn(c, "inode %lu failed to fix size %lld -> %lld error %d",
1467                   (unsigned long)e->inum, e->i_size, e->d_size, err);
1468        return err;
1469}
1470#endif
1471
1472/**
1473 * ubifs_recover_size - recover inode size.
1474 * @c: UBIFS file-system description object
1475 *
1476 * This function attempts to fix inode size discrepancies identified by the
1477 * 'ubifs_recover_size_accum()' function.
1478 *
1479 * This functions returns %0 on success and a negative error code on failure.
1480 */
1481int ubifs_recover_size(struct ubifs_info *c)
1482{
1483        struct rb_node *this = rb_first(&c->size_tree);
1484
1485        while (this) {
1486                struct size_entry *e;
1487                int err;
1488
1489                e = rb_entry(this, struct size_entry, rb);
1490                if (!e->exists) {
1491                        union ubifs_key key;
1492
1493                        ino_key_init(c, &key, e->inum);
1494                        err = ubifs_tnc_lookup(c, &key, c->sbuf);
1495                        if (err && err != -ENOENT)
1496                                return err;
1497                        if (err == -ENOENT) {
1498                                /* Remove data nodes that have no inode */
1499                                dbg_rcvry("removing ino %lu",
1500                                          (unsigned long)e->inum);
1501                                err = ubifs_tnc_remove_ino(c, e->inum);
1502                                if (err)
1503                                        return err;
1504                        } else {
1505                                struct ubifs_ino_node *ino = c->sbuf;
1506
1507                                e->exists = 1;
1508                                e->i_size = le64_to_cpu(ino->size);
1509                        }
1510                }
1511
1512                if (e->exists && e->i_size < e->d_size) {
1513                        if (c->ro_mount) {
1514                                /* Fix the inode size and pin it in memory */
1515                                struct inode *inode;
1516                                struct ubifs_inode *ui;
1517
1518                                ubifs_assert(!e->inode);
1519
1520                                inode = ubifs_iget(c->vfs_sb, e->inum);
1521                                if (IS_ERR(inode))
1522                                        return PTR_ERR(inode);
1523
1524                                ui = ubifs_inode(inode);
1525                                if (inode->i_size < e->d_size) {
1526                                        dbg_rcvry("ino %lu size %lld -> %lld",
1527                                                  (unsigned long)e->inum,
1528                                                  inode->i_size, e->d_size);
1529                                        inode->i_size = e->d_size;
1530                                        ui->ui_size = e->d_size;
1531                                        ui->synced_i_size = e->d_size;
1532                                        e->inode = inode;
1533                                        this = rb_next(this);
1534                                        continue;
1535                                }
1536                                iput(inode);
1537#ifndef __UBOOT__
1538                        } else {
1539                                /* Fix the size in place */
1540                                err = fix_size_in_place(c, e);
1541                                if (err)
1542                                        return err;
1543                                if (e->inode)
1544                                        iput(e->inode);
1545#endif
1546                        }
1547                }
1548
1549                this = rb_next(this);
1550                rb_erase(&e->rb, &c->size_tree);
1551                kfree(e);
1552        }
1553
1554        return 0;
1555}
1556