uboot/fs/ubifs/recovery.c
<<
>>
Prefs
   1/*
   2 * This file is part of UBIFS.
   3 *
   4 * Copyright (C) 2006-2008 Nokia Corporation
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published by
   8 * the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 *
  15 * You should have received a copy of the GNU General Public License along with
  16 * this program; if not, write to the Free Software Foundation, Inc., 51
  17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  18 *
  19 * Authors: Adrian Hunter
  20 *          Artem Bityutskiy (Битюцкий Артём)
  21 */
  22
  23/*
  24 * This file implements functions needed to recover from unclean un-mounts.
  25 * When UBIFS is mounted, it checks a flag on the master node to determine if
  26 * an un-mount was completed sucessfully. If not, the process of mounting
  27 * incorparates additional checking and fixing of on-flash data structures.
  28 * UBIFS always cleans away all remnants of an unclean un-mount, so that
  29 * errors do not accumulate. However UBIFS defers recovery if it is mounted
  30 * read-only, and the flash is not modified in that case.
  31 */
  32
  33#include "ubifs.h"
  34
  35/**
  36 * is_empty - determine whether a buffer is empty (contains all 0xff).
  37 * @buf: buffer to clean
  38 * @len: length of buffer
  39 *
  40 * This function returns %1 if the buffer is empty (contains all 0xff) otherwise
  41 * %0 is returned.
  42 */
  43static int is_empty(void *buf, int len)
  44{
  45        uint8_t *p = buf;
  46        int i;
  47
  48        for (i = 0; i < len; i++)
  49                if (*p++ != 0xff)
  50                        return 0;
  51        return 1;
  52}
  53
  54/**
  55 * get_master_node - get the last valid master node allowing for corruption.
  56 * @c: UBIFS file-system description object
  57 * @lnum: LEB number
  58 * @pbuf: buffer containing the LEB read, is returned here
  59 * @mst: master node, if found, is returned here
  60 * @cor: corruption, if found, is returned here
  61 *
  62 * This function allocates a buffer, reads the LEB into it, and finds and
  63 * returns the last valid master node allowing for one area of corruption.
  64 * The corrupt area, if there is one, must be consistent with the assumption
  65 * that it is the result of an unclean unmount while the master node was being
  66 * written. Under those circumstances, it is valid to use the previously written
  67 * master node.
  68 *
  69 * This function returns %0 on success and a negative error code on failure.
  70 */
  71static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
  72                           struct ubifs_mst_node **mst, void **cor)
  73{
  74        const int sz = c->mst_node_alsz;
  75        int err, offs, len;
  76        void *sbuf, *buf;
  77
  78        sbuf = vmalloc(c->leb_size);
  79        if (!sbuf)
  80                return -ENOMEM;
  81
  82        err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size);
  83        if (err && err != -EBADMSG)
  84                goto out_free;
  85
  86        /* Find the first position that is definitely not a node */
  87        offs = 0;
  88        buf = sbuf;
  89        len = c->leb_size;
  90        while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) {
  91                struct ubifs_ch *ch = buf;
  92
  93                if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
  94                        break;
  95                offs += sz;
  96                buf  += sz;
  97                len  -= sz;
  98        }
  99        /* See if there was a valid master node before that */
 100        if (offs) {
 101                int ret;
 102
 103                offs -= sz;
 104                buf  -= sz;
 105                len  += sz;
 106                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
 107                if (ret != SCANNED_A_NODE && offs) {
 108                        /* Could have been corruption so check one place back */
 109                        offs -= sz;
 110                        buf  -= sz;
 111                        len  += sz;
 112                        ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
 113                        if (ret != SCANNED_A_NODE)
 114                                /*
 115                                 * We accept only one area of corruption because
 116                                 * we are assuming that it was caused while
 117                                 * trying to write a master node.
 118                                 */
 119                                goto out_err;
 120                }
 121                if (ret == SCANNED_A_NODE) {
 122                        struct ubifs_ch *ch = buf;
 123
 124                        if (ch->node_type != UBIFS_MST_NODE)
 125                                goto out_err;
 126                        dbg_rcvry("found a master node at %d:%d", lnum, offs);
 127                        *mst = buf;
 128                        offs += sz;
 129                        buf  += sz;
 130                        len  -= sz;
 131                }
 132        }
 133        /* Check for corruption */
 134        if (offs < c->leb_size) {
 135                if (!is_empty(buf, min_t(int, len, sz))) {
 136                        *cor = buf;
 137                        dbg_rcvry("found corruption at %d:%d", lnum, offs);
 138                }
 139                offs += sz;
 140                buf  += sz;
 141                len  -= sz;
 142        }
 143        /* Check remaining empty space */
 144        if (offs < c->leb_size)
 145                if (!is_empty(buf, len))
 146                        goto out_err;
 147        *pbuf = sbuf;
 148        return 0;
 149
 150out_err:
 151        err = -EINVAL;
 152out_free:
 153        vfree(sbuf);
 154        *mst = NULL;
 155        *cor = NULL;
 156        return err;
 157}
 158
 159/**
 160 * write_rcvrd_mst_node - write recovered master node.
 161 * @c: UBIFS file-system description object
 162 * @mst: master node
 163 *
 164 * This function returns %0 on success and a negative error code on failure.
 165 */
 166static int write_rcvrd_mst_node(struct ubifs_info *c,
 167                                struct ubifs_mst_node *mst)
 168{
 169        int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
 170        __le32 save_flags;
 171
 172        dbg_rcvry("recovery");
 173
 174        save_flags = mst->flags;
 175        mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
 176
 177        ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
 178        err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM);
 179        if (err)
 180                goto out;
 181        err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM);
 182        if (err)
 183                goto out;
 184out:
 185        mst->flags = save_flags;
 186        return err;
 187}
 188
 189/**
 190 * ubifs_recover_master_node - recover the master node.
 191 * @c: UBIFS file-system description object
 192 *
 193 * This function recovers the master node from corruption that may occur due to
 194 * an unclean unmount.
 195 *
 196 * This function returns %0 on success and a negative error code on failure.
 197 */
 198int ubifs_recover_master_node(struct ubifs_info *c)
 199{
 200        void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL;
 201        struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst;
 202        const int sz = c->mst_node_alsz;
 203        int err, offs1, offs2;
 204
 205        dbg_rcvry("recovery");
 206
 207        err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1);
 208        if (err)
 209                goto out_free;
 210
 211        err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2);
 212        if (err)
 213                goto out_free;
 214
 215        if (mst1) {
 216                offs1 = (void *)mst1 - buf1;
 217                if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) &&
 218                    (offs1 == 0 && !cor1)) {
 219                        /*
 220                         * mst1 was written by recovery at offset 0 with no
 221                         * corruption.
 222                         */
 223                        dbg_rcvry("recovery recovery");
 224                        mst = mst1;
 225                } else if (mst2) {
 226                        offs2 = (void *)mst2 - buf2;
 227                        if (offs1 == offs2) {
 228                                /* Same offset, so must be the same */
 229                                if (memcmp((void *)mst1 + UBIFS_CH_SZ,
 230                                           (void *)mst2 + UBIFS_CH_SZ,
 231                                           UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
 232                                        goto out_err;
 233                                mst = mst1;
 234                        } else if (offs2 + sz == offs1) {
 235                                /* 1st LEB was written, 2nd was not */
 236                                if (cor1)
 237                                        goto out_err;
 238                                mst = mst1;
 239                        } else if (offs1 == 0 && offs2 + sz >= c->leb_size) {
 240                                /* 1st LEB was unmapped and written, 2nd not */
 241                                if (cor1)
 242                                        goto out_err;
 243                                mst = mst1;
 244                        } else
 245                                goto out_err;
 246                } else {
 247                        /*
 248                         * 2nd LEB was unmapped and about to be written, so
 249                         * there must be only one master node in the first LEB
 250                         * and no corruption.
 251                         */
 252                        if (offs1 != 0 || cor1)
 253                                goto out_err;
 254                        mst = mst1;
 255                }
 256        } else {
 257                if (!mst2)
 258                        goto out_err;
 259                /*
 260                 * 1st LEB was unmapped and about to be written, so there must
 261                 * be no room left in 2nd LEB.
 262                 */
 263                offs2 = (void *)mst2 - buf2;
 264                if (offs2 + sz + sz <= c->leb_size)
 265                        goto out_err;
 266                mst = mst2;
 267        }
 268
 269        dbg_rcvry("recovered master node from LEB %d",
 270                  (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
 271
 272        memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
 273
 274        if ((c->vfs_sb->s_flags & MS_RDONLY)) {
 275                /* Read-only mode. Keep a copy for switching to rw mode */
 276                c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
 277                if (!c->rcvrd_mst_node) {
 278                        err = -ENOMEM;
 279                        goto out_free;
 280                }
 281                memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
 282        }
 283
 284        vfree(buf2);
 285        vfree(buf1);
 286
 287        return 0;
 288
 289out_err:
 290        err = -EINVAL;
 291out_free:
 292        ubifs_err("failed to recover master node");
 293        if (mst1) {
 294                dbg_err("dumping first master node");
 295                dbg_dump_node(c, mst1);
 296        }
 297        if (mst2) {
 298                dbg_err("dumping second master node");
 299                dbg_dump_node(c, mst2);
 300        }
 301        vfree(buf2);
 302        vfree(buf1);
 303        return err;
 304}
 305
 306/**
 307 * ubifs_write_rcvrd_mst_node - write the recovered master node.
 308 * @c: UBIFS file-system description object
 309 *
 310 * This function writes the master node that was recovered during mounting in
 311 * read-only mode and must now be written because we are remounting rw.
 312 *
 313 * This function returns %0 on success and a negative error code on failure.
 314 */
 315int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
 316{
 317        int err;
 318
 319        if (!c->rcvrd_mst_node)
 320                return 0;
 321        c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 322        c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
 323        err = write_rcvrd_mst_node(c, c->rcvrd_mst_node);
 324        if (err)
 325                return err;
 326        kfree(c->rcvrd_mst_node);
 327        c->rcvrd_mst_node = NULL;
 328        return 0;
 329}
 330
 331/**
 332 * is_last_write - determine if an offset was in the last write to a LEB.
 333 * @c: UBIFS file-system description object
 334 * @buf: buffer to check
 335 * @offs: offset to check
 336 *
 337 * This function returns %1 if @offs was in the last write to the LEB whose data
 338 * is in @buf, otherwise %0 is returned.  The determination is made by checking
 339 * for subsequent empty space starting from the next min_io_size boundary (or a
 340 * bit less than the common header size if min_io_size is one).
 341 */
 342static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
 343{
 344        int empty_offs;
 345        int check_len;
 346        uint8_t *p;
 347
 348        if (c->min_io_size == 1) {
 349                check_len = c->leb_size - offs;
 350                p = buf + check_len;
 351                for (; check_len > 0; check_len--)
 352                        if (*--p != 0xff)
 353                                break;
 354                /*
 355                 * 'check_len' is the size of the corruption which cannot be
 356                 * more than the size of 1 node if it was caused by an unclean
 357                 * unmount.
 358                 */
 359                if (check_len > UBIFS_MAX_NODE_SZ)
 360                        return 0;
 361                return 1;
 362        }
 363
 364        /*
 365         * Round up to the next c->min_io_size boundary i.e. 'offs' is in the
 366         * last wbuf written. After that should be empty space.
 367         */
 368        empty_offs = ALIGN(offs + 1, c->min_io_size);
 369        check_len = c->leb_size - empty_offs;
 370        p = buf + empty_offs - offs;
 371
 372        for (; check_len > 0; check_len--)
 373                if (*p++ != 0xff)
 374                        return 0;
 375        return 1;
 376}
 377
 378/**
 379 * clean_buf - clean the data from an LEB sitting in a buffer.
 380 * @c: UBIFS file-system description object
 381 * @buf: buffer to clean
 382 * @lnum: LEB number to clean
 383 * @offs: offset from which to clean
 384 * @len: length of buffer
 385 *
 386 * This function pads up to the next min_io_size boundary (if there is one) and
 387 * sets empty space to all 0xff. @buf, @offs and @len are updated to the next
 388 * min_io_size boundary (if there is one).
 389 */
 390static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
 391                      int *offs, int *len)
 392{
 393        int empty_offs, pad_len;
 394
 395        lnum = lnum;
 396        dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
 397
 398        if (c->min_io_size == 1) {
 399                memset(*buf, 0xff, c->leb_size - *offs);
 400                return;
 401        }
 402
 403        ubifs_assert(!(*offs & 7));
 404        empty_offs = ALIGN(*offs, c->min_io_size);
 405        pad_len = empty_offs - *offs;
 406        ubifs_pad(c, *buf, pad_len);
 407        *offs += pad_len;
 408        *buf += pad_len;
 409        *len -= pad_len;
 410        memset(*buf, 0xff, c->leb_size - empty_offs);
 411}
 412
 413/**
 414 * no_more_nodes - determine if there are no more nodes in a buffer.
 415 * @c: UBIFS file-system description object
 416 * @buf: buffer to check
 417 * @len: length of buffer
 418 * @lnum: LEB number of the LEB from which @buf was read
 419 * @offs: offset from which @buf was read
 420 *
 421 * This function ensures that the corrupted node at @offs is the last thing
 422 * written to a LEB. This function returns %1 if more data is not found and
 423 * %0 if more data is found.
 424 */
 425static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
 426                        int lnum, int offs)
 427{
 428        struct ubifs_ch *ch = buf;
 429        int skip, dlen = le32_to_cpu(ch->len);
 430
 431        /* Check for empty space after the corrupt node's common header */
 432        skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs;
 433        if (is_empty(buf + skip, len - skip))
 434                return 1;
 435        /*
 436         * The area after the common header size is not empty, so the common
 437         * header must be intact. Check it.
 438         */
 439        if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) {
 440                dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs);
 441                return 0;
 442        }
 443        /* Now we know the corrupt node's length we can skip over it */
 444        skip = ALIGN(offs + dlen, c->min_io_size) - offs;
 445        /* After which there should be empty space */
 446        if (is_empty(buf + skip, len - skip))
 447                return 1;
 448        dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip);
 449        return 0;
 450}
 451
 452/**
 453 * fix_unclean_leb - fix an unclean LEB.
 454 * @c: UBIFS file-system description object
 455 * @sleb: scanned LEB information
 456 * @start: offset where scan started
 457 */
 458static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 459                           int start)
 460{
 461        int lnum = sleb->lnum, endpt = start;
 462
 463        /* Get the end offset of the last node we are keeping */
 464        if (!list_empty(&sleb->nodes)) {
 465                struct ubifs_scan_node *snod;
 466
 467                snod = list_entry(sleb->nodes.prev,
 468                                  struct ubifs_scan_node, list);
 469                endpt = snod->offs + snod->len;
 470        }
 471
 472        if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) {
 473                /* Add to recovery list */
 474                struct ubifs_unclean_leb *ucleb;
 475
 476                dbg_rcvry("need to fix LEB %d start %d endpt %d",
 477                          lnum, start, sleb->endpt);
 478                ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS);
 479                if (!ucleb)
 480                        return -ENOMEM;
 481                ucleb->lnum = lnum;
 482                ucleb->endpt = endpt;
 483                list_add_tail(&ucleb->list, &c->unclean_leb_list);
 484        }
 485        return 0;
 486}
 487
 488/**
 489 * drop_incomplete_group - drop nodes from an incomplete group.
 490 * @sleb: scanned LEB information
 491 * @offs: offset of dropped nodes is returned here
 492 *
 493 * This function returns %1 if nodes are dropped and %0 otherwise.
 494 */
 495static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
 496{
 497        int dropped = 0;
 498
 499        while (!list_empty(&sleb->nodes)) {
 500                struct ubifs_scan_node *snod;
 501                struct ubifs_ch *ch;
 502
 503                snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
 504                                  list);
 505                ch = snod->node;
 506                if (ch->group_type != UBIFS_IN_NODE_GROUP)
 507                        return dropped;
 508                dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs);
 509                *offs = snod->offs;
 510                list_del(&snod->list);
 511                kfree(snod);
 512                sleb->nodes_cnt -= 1;
 513                dropped = 1;
 514        }
 515        return dropped;
 516}
 517
 518/**
 519 * ubifs_recover_leb - scan and recover a LEB.
 520 * @c: UBIFS file-system description object
 521 * @lnum: LEB number
 522 * @offs: offset
 523 * @sbuf: LEB-sized buffer to use
 524 * @grouped: nodes may be grouped for recovery
 525 *
 526 * This function does a scan of a LEB, but caters for errors that might have
 527 * been caused by the unclean unmount from which we are attempting to recover.
 528 *
 529 * This function returns %0 on success and a negative error code on failure.
 530 */
 531struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
 532                                         int offs, void *sbuf, int grouped)
 533{
 534        int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
 535        int empty_chkd = 0, start = offs;
 536        struct ubifs_scan_leb *sleb;
 537        void *buf = sbuf + offs;
 538
 539        dbg_rcvry("%d:%d", lnum, offs);
 540
 541        sleb = ubifs_start_scan(c, lnum, offs, sbuf);
 542        if (IS_ERR(sleb))
 543                return sleb;
 544
 545        if (sleb->ecc)
 546                need_clean = 1;
 547
 548        while (len >= 8) {
 549                int ret;
 550
 551                dbg_scan("look at LEB %d:%d (%d bytes left)",
 552                         lnum, offs, len);
 553
 554                cond_resched();
 555
 556                /*
 557                 * Scan quietly until there is an error from which we cannot
 558                 * recover
 559                 */
 560                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
 561
 562                if (ret == SCANNED_A_NODE) {
 563                        /* A valid node, and not a padding node */
 564                        struct ubifs_ch *ch = buf;
 565                        int node_len;
 566
 567                        err = ubifs_add_snod(c, sleb, buf, offs);
 568                        if (err)
 569                                goto error;
 570                        node_len = ALIGN(le32_to_cpu(ch->len), 8);
 571                        offs += node_len;
 572                        buf += node_len;
 573                        len -= node_len;
 574                        continue;
 575                }
 576
 577                if (ret > 0) {
 578                        /* Padding bytes or a valid padding node */
 579                        offs += ret;
 580                        buf += ret;
 581                        len -= ret;
 582                        continue;
 583                }
 584
 585                if (ret == SCANNED_EMPTY_SPACE) {
 586                        if (!is_empty(buf, len)) {
 587                                if (!is_last_write(c, buf, offs))
 588                                        break;
 589                                clean_buf(c, &buf, lnum, &offs, &len);
 590                                need_clean = 1;
 591                        }
 592                        empty_chkd = 1;
 593                        break;
 594                }
 595
 596                if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
 597                        if (is_last_write(c, buf, offs)) {
 598                                clean_buf(c, &buf, lnum, &offs, &len);
 599                                need_clean = 1;
 600                                empty_chkd = 1;
 601                                break;
 602                        }
 603
 604                if (ret == SCANNED_A_CORRUPT_NODE)
 605                        if (no_more_nodes(c, buf, len, lnum, offs)) {
 606                                clean_buf(c, &buf, lnum, &offs, &len);
 607                                need_clean = 1;
 608                                empty_chkd = 1;
 609                                break;
 610                        }
 611
 612                if (quiet) {
 613                        /* Redo the last scan but noisily */
 614                        quiet = 0;
 615                        continue;
 616                }
 617
 618                switch (ret) {
 619                case SCANNED_GARBAGE:
 620                        dbg_err("garbage");
 621                        goto corrupted;
 622                case SCANNED_A_CORRUPT_NODE:
 623                case SCANNED_A_BAD_PAD_NODE:
 624                        dbg_err("bad node");
 625                        goto corrupted;
 626                default:
 627                        dbg_err("unknown");
 628                        goto corrupted;
 629                }
 630        }
 631
 632        if (!empty_chkd && !is_empty(buf, len)) {
 633                if (is_last_write(c, buf, offs)) {
 634                        clean_buf(c, &buf, lnum, &offs, &len);
 635                        need_clean = 1;
 636                } else {
 637                        ubifs_err("corrupt empty space at LEB %d:%d",
 638                                  lnum, offs);
 639                        goto corrupted;
 640                }
 641        }
 642
 643        /* Drop nodes from incomplete group */
 644        if (grouped && drop_incomplete_group(sleb, &offs)) {
 645                buf = sbuf + offs;
 646                len = c->leb_size - offs;
 647                clean_buf(c, &buf, lnum, &offs, &len);
 648                need_clean = 1;
 649        }
 650
 651        if (offs % c->min_io_size) {
 652                clean_buf(c, &buf, lnum, &offs, &len);
 653                need_clean = 1;
 654        }
 655
 656        ubifs_end_scan(c, sleb, lnum, offs);
 657
 658        if (need_clean) {
 659                err = fix_unclean_leb(c, sleb, start);
 660                if (err)
 661                        goto error;
 662        }
 663
 664        return sleb;
 665
 666corrupted:
 667        ubifs_scanned_corruption(c, lnum, offs, buf);
 668        err = -EUCLEAN;
 669error:
 670        ubifs_err("LEB %d scanning failed", lnum);
 671        ubifs_scan_destroy(sleb);
 672        return ERR_PTR(err);
 673}
 674
 675/**
 676 * get_cs_sqnum - get commit start sequence number.
 677 * @c: UBIFS file-system description object
 678 * @lnum: LEB number of commit start node
 679 * @offs: offset of commit start node
 680 * @cs_sqnum: commit start sequence number is returned here
 681 *
 682 * This function returns %0 on success and a negative error code on failure.
 683 */
 684static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
 685                        unsigned long long *cs_sqnum)
 686{
 687        struct ubifs_cs_node *cs_node = NULL;
 688        int err, ret;
 689
 690        dbg_rcvry("at %d:%d", lnum, offs);
 691        cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL);
 692        if (!cs_node)
 693                return -ENOMEM;
 694        if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
 695                goto out_err;
 696        err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ);
 697        if (err && err != -EBADMSG)
 698                goto out_free;
 699        ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
 700        if (ret != SCANNED_A_NODE) {
 701                dbg_err("Not a valid node");
 702                goto out_err;
 703        }
 704        if (cs_node->ch.node_type != UBIFS_CS_NODE) {
 705                dbg_err("Node a CS node, type is %d", cs_node->ch.node_type);
 706                goto out_err;
 707        }
 708        if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
 709                dbg_err("CS node cmt_no %llu != current cmt_no %llu",
 710                        (unsigned long long)le64_to_cpu(cs_node->cmt_no),
 711                        c->cmt_no);
 712                goto out_err;
 713        }
 714        *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum);
 715        dbg_rcvry("commit start sqnum %llu", *cs_sqnum);
 716        kfree(cs_node);
 717        return 0;
 718
 719out_err:
 720        err = -EINVAL;
 721out_free:
 722        ubifs_err("failed to get CS sqnum");
 723        kfree(cs_node);
 724        return err;
 725}
 726
 727/**
 728 * ubifs_recover_log_leb - scan and recover a log LEB.
 729 * @c: UBIFS file-system description object
 730 * @lnum: LEB number
 731 * @offs: offset
 732 * @sbuf: LEB-sized buffer to use
 733 *
 734 * This function does a scan of a LEB, but caters for errors that might have
 735 * been caused by the unclean unmount from which we are attempting to recover.
 736 *
 737 * This function returns %0 on success and a negative error code on failure.
 738 */
 739struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
 740                                             int offs, void *sbuf)
 741{
 742        struct ubifs_scan_leb *sleb;
 743        int next_lnum;
 744
 745        dbg_rcvry("LEB %d", lnum);
 746        next_lnum = lnum + 1;
 747        if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs)
 748                next_lnum = UBIFS_LOG_LNUM;
 749        if (next_lnum != c->ltail_lnum) {
 750                /*
 751                 * We can only recover at the end of the log, so check that the
 752                 * next log LEB is empty or out of date.
 753                 */
 754                sleb = ubifs_scan(c, next_lnum, 0, sbuf);
 755                if (IS_ERR(sleb))
 756                        return sleb;
 757                if (sleb->nodes_cnt) {
 758                        struct ubifs_scan_node *snod;
 759                        unsigned long long cs_sqnum = c->cs_sqnum;
 760
 761                        snod = list_entry(sleb->nodes.next,
 762                                          struct ubifs_scan_node, list);
 763                        if (cs_sqnum == 0) {
 764                                int err;
 765
 766                                err = get_cs_sqnum(c, lnum, offs, &cs_sqnum);
 767                                if (err) {
 768                                        ubifs_scan_destroy(sleb);
 769                                        return ERR_PTR(err);
 770                                }
 771                        }
 772                        if (snod->sqnum > cs_sqnum) {
 773                                ubifs_err("unrecoverable log corruption "
 774                                          "in LEB %d", lnum);
 775                                ubifs_scan_destroy(sleb);
 776                                return ERR_PTR(-EUCLEAN);
 777                        }
 778                }
 779                ubifs_scan_destroy(sleb);
 780        }
 781        return ubifs_recover_leb(c, lnum, offs, sbuf, 0);
 782}
 783
 784/**
 785 * recover_head - recover a head.
 786 * @c: UBIFS file-system description object
 787 * @lnum: LEB number of head to recover
 788 * @offs: offset of head to recover
 789 * @sbuf: LEB-sized buffer to use
 790 *
 791 * This function ensures that there is no data on the flash at a head location.
 792 *
 793 * This function returns %0 on success and a negative error code on failure.
 794 */
 795static int recover_head(const struct ubifs_info *c, int lnum, int offs,
 796                        void *sbuf)
 797{
 798        int len, err, need_clean = 0;
 799
 800        if (c->min_io_size > 1)
 801                len = c->min_io_size;
 802        else
 803                len = 512;
 804        if (offs + len > c->leb_size)
 805                len = c->leb_size - offs;
 806
 807        if (!len)
 808                return 0;
 809
 810        /* Read at the head location and check it is empty flash */
 811        err = ubi_read(c->ubi, lnum, sbuf, offs, len);
 812        if (err)
 813                need_clean = 1;
 814        else {
 815                uint8_t *p = sbuf;
 816
 817                while (len--)
 818                        if (*p++ != 0xff) {
 819                                need_clean = 1;
 820                                break;
 821                        }
 822        }
 823
 824        if (need_clean) {
 825                dbg_rcvry("cleaning head at %d:%d", lnum, offs);
 826                if (offs == 0)
 827                        return ubifs_leb_unmap(c, lnum);
 828                err = ubi_read(c->ubi, lnum, sbuf, 0, offs);
 829                if (err)
 830                        return err;
 831                return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN);
 832        }
 833
 834        return 0;
 835}
 836
 837/**
 838 * ubifs_recover_inl_heads - recover index and LPT heads.
 839 * @c: UBIFS file-system description object
 840 * @sbuf: LEB-sized buffer to use
 841 *
 842 * This function ensures that there is no data on the flash at the index and
 843 * LPT head locations.
 844 *
 845 * This deals with the recovery of a half-completed journal commit. UBIFS is
 846 * careful never to overwrite the last version of the index or the LPT. Because
 847 * the index and LPT are wandering trees, data from a half-completed commit will
 848 * not be referenced anywhere in UBIFS. The data will be either in LEBs that are
 849 * assumed to be empty and will be unmapped anyway before use, or in the index
 850 * and LPT heads.
 851 *
 852 * This function returns %0 on success and a negative error code on failure.
 853 */
 854int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
 855{
 856        int err;
 857
 858        ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw);
 859
 860        dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
 861        err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
 862        if (err)
 863                return err;
 864
 865        dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
 866        err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
 867        if (err)
 868                return err;
 869
 870        return 0;
 871}
 872
 873/**
 874 *  clean_an_unclean_leb - read and write a LEB to remove corruption.
 875 * @c: UBIFS file-system description object
 876 * @ucleb: unclean LEB information
 877 * @sbuf: LEB-sized buffer to use
 878 *
 879 * This function reads a LEB up to a point pre-determined by the mount recovery,
 880 * checks the nodes, and writes the result back to the flash, thereby cleaning
 881 * off any following corruption, or non-fatal ECC errors.
 882 *
 883 * This function returns %0 on success and a negative error code on failure.
 884 */
 885static int clean_an_unclean_leb(const struct ubifs_info *c,
 886                                struct ubifs_unclean_leb *ucleb, void *sbuf)
 887{
 888        int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
 889        void *buf = sbuf;
 890
 891        dbg_rcvry("LEB %d len %d", lnum, len);
 892
 893        if (len == 0) {
 894                /* Nothing to read, just unmap it */
 895                err = ubifs_leb_unmap(c, lnum);
 896                if (err)
 897                        return err;
 898                return 0;
 899        }
 900
 901        err = ubi_read(c->ubi, lnum, buf, offs, len);
 902        if (err && err != -EBADMSG)
 903                return err;
 904
 905        while (len >= 8) {
 906                int ret;
 907
 908                cond_resched();
 909
 910                /* Scan quietly until there is an error */
 911                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
 912
 913                if (ret == SCANNED_A_NODE) {
 914                        /* A valid node, and not a padding node */
 915                        struct ubifs_ch *ch = buf;
 916                        int node_len;
 917
 918                        node_len = ALIGN(le32_to_cpu(ch->len), 8);
 919                        offs += node_len;
 920                        buf += node_len;
 921                        len -= node_len;
 922                        continue;
 923                }
 924
 925                if (ret > 0) {
 926                        /* Padding bytes or a valid padding node */
 927                        offs += ret;
 928                        buf += ret;
 929                        len -= ret;
 930                        continue;
 931                }
 932
 933                if (ret == SCANNED_EMPTY_SPACE) {
 934                        ubifs_err("unexpected empty space at %d:%d",
 935                                  lnum, offs);
 936                        return -EUCLEAN;
 937                }
 938
 939                if (quiet) {
 940                        /* Redo the last scan but noisily */
 941                        quiet = 0;
 942                        continue;
 943                }
 944
 945                ubifs_scanned_corruption(c, lnum, offs, buf);
 946                return -EUCLEAN;
 947        }
 948
 949        /* Pad to min_io_size */
 950        len = ALIGN(ucleb->endpt, c->min_io_size);
 951        if (len > ucleb->endpt) {
 952                int pad_len = len - ALIGN(ucleb->endpt, 8);
 953
 954                if (pad_len > 0) {
 955                        buf = c->sbuf + len - pad_len;
 956                        ubifs_pad(c, buf, pad_len);
 957                }
 958        }
 959
 960        /* Write back the LEB atomically */
 961        err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN);
 962        if (err)
 963                return err;
 964
 965        dbg_rcvry("cleaned LEB %d", lnum);
 966
 967        return 0;
 968}
 969
 970/**
 971 * ubifs_clean_lebs - clean LEBs recovered during read-only mount.
 972 * @c: UBIFS file-system description object
 973 * @sbuf: LEB-sized buffer to use
 974 *
 975 * This function cleans a LEB identified during recovery that needs to be
 976 * written but was not because UBIFS was mounted read-only. This happens when
 977 * remounting to read-write mode.
 978 *
 979 * This function returns %0 on success and a negative error code on failure.
 980 */
 981int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
 982{
 983        dbg_rcvry("recovery");
 984        while (!list_empty(&c->unclean_leb_list)) {
 985                struct ubifs_unclean_leb *ucleb;
 986                int err;
 987
 988                ucleb = list_entry(c->unclean_leb_list.next,
 989                                   struct ubifs_unclean_leb, list);
 990                err = clean_an_unclean_leb(c, ucleb, sbuf);
 991                if (err)
 992                        return err;
 993                list_del(&ucleb->list);
 994                kfree(ucleb);
 995        }
 996        return 0;
 997}
 998
 999/**
1000 * struct size_entry - inode size information for recovery.
1001 * @rb: link in the RB-tree of sizes
1002 * @inum: inode number
1003 * @i_size: size on inode
1004 * @d_size: maximum size based on data nodes
1005 * @exists: indicates whether the inode exists
1006 * @inode: inode if pinned in memory awaiting rw mode to fix it
1007 */
1008struct size_entry {
1009        struct rb_node rb;
1010        ino_t inum;
1011        loff_t i_size;
1012        loff_t d_size;
1013        int exists;
1014        struct inode *inode;
1015};
1016
1017/**
1018 * add_ino - add an entry to the size tree.
1019 * @c: UBIFS file-system description object
1020 * @inum: inode number
1021 * @i_size: size on inode
1022 * @d_size: maximum size based on data nodes
1023 * @exists: indicates whether the inode exists
1024 */
1025static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size,
1026                   loff_t d_size, int exists)
1027{
1028        struct rb_node **p = &c->size_tree.rb_node, *parent = NULL;
1029        struct size_entry *e;
1030
1031        while (*p) {
1032                parent = *p;
1033                e = rb_entry(parent, struct size_entry, rb);
1034                if (inum < e->inum)
1035                        p = &(*p)->rb_left;
1036                else
1037                        p = &(*p)->rb_right;
1038        }
1039
1040        e = kzalloc(sizeof(struct size_entry), GFP_KERNEL);
1041        if (!e)
1042                return -ENOMEM;
1043
1044        e->inum = inum;
1045        e->i_size = i_size;
1046        e->d_size = d_size;
1047        e->exists = exists;
1048
1049        rb_link_node(&e->rb, parent, p);
1050        rb_insert_color(&e->rb, &c->size_tree);
1051
1052        return 0;
1053}
1054
1055/**
1056 * find_ino - find an entry on the size tree.
1057 * @c: UBIFS file-system description object
1058 * @inum: inode number
1059 */
1060static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum)
1061{
1062        struct rb_node *p = c->size_tree.rb_node;
1063        struct size_entry *e;
1064
1065        while (p) {
1066                e = rb_entry(p, struct size_entry, rb);
1067                if (inum < e->inum)
1068                        p = p->rb_left;
1069                else if (inum > e->inum)
1070                        p = p->rb_right;
1071                else
1072                        return e;
1073        }
1074        return NULL;
1075}
1076
1077/**
1078 * remove_ino - remove an entry from the size tree.
1079 * @c: UBIFS file-system description object
1080 * @inum: inode number
1081 */
1082static void remove_ino(struct ubifs_info *c, ino_t inum)
1083{
1084        struct size_entry *e = find_ino(c, inum);
1085
1086        if (!e)
1087                return;
1088        rb_erase(&e->rb, &c->size_tree);
1089        kfree(e);
1090}
1091
1092/**
1093 * ubifs_recover_size_accum - accumulate inode sizes for recovery.
1094 * @c: UBIFS file-system description object
1095 * @key: node key
1096 * @deletion: node is for a deletion
1097 * @new_size: inode size
1098 *
1099 * This function has two purposes:
1100 *     1) to ensure there are no data nodes that fall outside the inode size
1101 *     2) to ensure there are no data nodes for inodes that do not exist
1102 * To accomplish those purposes, a rb-tree is constructed containing an entry
1103 * for each inode number in the journal that has not been deleted, and recording
1104 * the size from the inode node, the maximum size of any data node (also altered
1105 * by truncations) and a flag indicating a inode number for which no inode node
1106 * was present in the journal.
1107 *
1108 * Note that there is still the possibility that there are data nodes that have
1109 * been committed that are beyond the inode size, however the only way to find
1110 * them would be to scan the entire index. Alternatively, some provision could
1111 * be made to record the size of inodes at the start of commit, which would seem
1112 * very cumbersome for a scenario that is quite unlikely and the only negative
1113 * consequence of which is wasted space.
1114 *
1115 * This functions returns %0 on success and a negative error code on failure.
1116 */
1117int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
1118                             int deletion, loff_t new_size)
1119{
1120        ino_t inum = key_inum(c, key);
1121        struct size_entry *e;
1122        int err;
1123
1124        switch (key_type(c, key)) {
1125        case UBIFS_INO_KEY:
1126                if (deletion)
1127                        remove_ino(c, inum);
1128                else {
1129                        e = find_ino(c, inum);
1130                        if (e) {
1131                                e->i_size = new_size;
1132                                e->exists = 1;
1133                        } else {
1134                                err = add_ino(c, inum, new_size, 0, 1);
1135                                if (err)
1136                                        return err;
1137                        }
1138                }
1139                break;
1140        case UBIFS_DATA_KEY:
1141                e = find_ino(c, inum);
1142                if (e) {
1143                        if (new_size > e->d_size)
1144                                e->d_size = new_size;
1145                } else {
1146                        err = add_ino(c, inum, 0, new_size, 0);
1147                        if (err)
1148                                return err;
1149                }
1150                break;
1151        case UBIFS_TRUN_KEY:
1152                e = find_ino(c, inum);
1153                if (e)
1154                        e->d_size = new_size;
1155                break;
1156        }
1157        return 0;
1158}
1159
1160/**
1161 * ubifs_recover_size - recover inode size.
1162 * @c: UBIFS file-system description object
1163 *
1164 * This function attempts to fix inode size discrepancies identified by the
1165 * 'ubifs_recover_size_accum()' function.
1166 *
1167 * This functions returns %0 on success and a negative error code on failure.
1168 */
1169int ubifs_recover_size(struct ubifs_info *c)
1170{
1171        struct rb_node *this = rb_first(&c->size_tree);
1172
1173        while (this) {
1174                struct size_entry *e;
1175                int err;
1176
1177                e = rb_entry(this, struct size_entry, rb);
1178                if (!e->exists) {
1179                        union ubifs_key key;
1180
1181                        ino_key_init(c, &key, e->inum);
1182                        err = ubifs_tnc_lookup(c, &key, c->sbuf);
1183                        if (err && err != -ENOENT)
1184                                return err;
1185                        if (err == -ENOENT) {
1186                                /* Remove data nodes that have no inode */
1187                                dbg_rcvry("removing ino %lu",
1188                                          (unsigned long)e->inum);
1189                                err = ubifs_tnc_remove_ino(c, e->inum);
1190                                if (err)
1191                                        return err;
1192                        } else {
1193                                struct ubifs_ino_node *ino = c->sbuf;
1194
1195                                e->exists = 1;
1196                                e->i_size = le64_to_cpu(ino->size);
1197                        }
1198                }
1199                if (e->exists && e->i_size < e->d_size) {
1200                        if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) {
1201                                /* Fix the inode size and pin it in memory */
1202                                struct inode *inode;
1203
1204                                inode = ubifs_iget(c->vfs_sb, e->inum);
1205                                if (IS_ERR(inode))
1206                                        return PTR_ERR(inode);
1207                                if (inode->i_size < e->d_size) {
1208                                        dbg_rcvry("ino %lu size %lld -> %lld",
1209                                                  (unsigned long)e->inum,
1210                                                  e->d_size, inode->i_size);
1211                                        inode->i_size = e->d_size;
1212                                        ubifs_inode(inode)->ui_size = e->d_size;
1213                                        e->inode = inode;
1214                                        this = rb_next(this);
1215                                        continue;
1216                                }
1217                                iput(inode);
1218                        }
1219                }
1220                this = rb_next(this);
1221                rb_erase(&e->rb, &c->size_tree);
1222                kfree(e);
1223        }
1224        return 0;
1225}
1226