uboot/fs/ubifs/replay.c
<<
>>
Prefs
   1/*
   2 * This file is part of UBIFS.
   3 *
   4 * Copyright (C) 2006-2008 Nokia Corporation.
   5 *
   6 * SPDX-License-Identifier:     GPL-2.0+
   7 *
   8 * Authors: Adrian Hunter
   9 *          Artem Bityutskiy (Битюцкий Артём)
  10 */
  11
  12/*
  13 * This file contains journal replay code. It runs when the file-system is being
  14 * mounted and requires no locking.
  15 *
  16 * The larger is the journal, the longer it takes to scan it, so the longer it
  17 * takes to mount UBIFS. This is why the journal has limited size which may be
  18 * changed depending on the system requirements. But a larger journal gives
  19 * faster I/O speed because it writes the index less frequently. So this is a
  20 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the
  21 * larger is the journal, the more memory its index may consume.
  22 */
  23
  24#ifdef __UBOOT__
  25#include <linux/compat.h>
  26#include <linux/err.h>
  27#endif
  28#include "ubifs.h"
  29#include <linux/list_sort.h>
  30
  31/**
  32 * struct replay_entry - replay list entry.
  33 * @lnum: logical eraseblock number of the node
  34 * @offs: node offset
  35 * @len: node length
  36 * @deletion: non-zero if this entry corresponds to a node deletion
  37 * @sqnum: node sequence number
  38 * @list: links the replay list
  39 * @key: node key
  40 * @nm: directory entry name
  41 * @old_size: truncation old size
  42 * @new_size: truncation new size
  43 *
  44 * The replay process first scans all buds and builds the replay list, then
  45 * sorts the replay list in nodes sequence number order, and then inserts all
  46 * the replay entries to the TNC.
  47 */
  48struct replay_entry {
  49        int lnum;
  50        int offs;
  51        int len;
  52        unsigned int deletion:1;
  53        unsigned long long sqnum;
  54        struct list_head list;
  55        union ubifs_key key;
  56        union {
  57                struct qstr nm;
  58                struct {
  59                        loff_t old_size;
  60                        loff_t new_size;
  61                };
  62        };
  63};
  64
  65/**
  66 * struct bud_entry - entry in the list of buds to replay.
  67 * @list: next bud in the list
  68 * @bud: bud description object
  69 * @sqnum: reference node sequence number
  70 * @free: free bytes in the bud
  71 * @dirty: dirty bytes in the bud
  72 */
  73struct bud_entry {
  74        struct list_head list;
  75        struct ubifs_bud *bud;
  76        unsigned long long sqnum;
  77        int free;
  78        int dirty;
  79};
  80
  81#ifndef __UBOOT__
  82/**
  83 * set_bud_lprops - set free and dirty space used by a bud.
  84 * @c: UBIFS file-system description object
  85 * @b: bud entry which describes the bud
  86 *
  87 * This function makes sure the LEB properties of bud @b are set correctly
  88 * after the replay. Returns zero in case of success and a negative error code
  89 * in case of failure.
  90 */
  91static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
  92{
  93        const struct ubifs_lprops *lp;
  94        int err = 0, dirty;
  95
  96        ubifs_get_lprops(c);
  97
  98        lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
  99        if (IS_ERR(lp)) {
 100                err = PTR_ERR(lp);
 101                goto out;
 102        }
 103
 104        dirty = lp->dirty;
 105        if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
 106                /*
 107                 * The LEB was added to the journal with a starting offset of
 108                 * zero which means the LEB must have been empty. The LEB
 109                 * property values should be @lp->free == @c->leb_size and
 110                 * @lp->dirty == 0, but that is not the case. The reason is that
 111                 * the LEB had been garbage collected before it became the bud,
 112                 * and there was not commit inbetween. The garbage collector
 113                 * resets the free and dirty space without recording it
 114                 * anywhere except lprops, so if there was no commit then
 115                 * lprops does not have that information.
 116                 *
 117                 * We do not need to adjust free space because the scan has told
 118                 * us the exact value which is recorded in the replay entry as
 119                 * @b->free.
 120                 *
 121                 * However we do need to subtract from the dirty space the
 122                 * amount of space that the garbage collector reclaimed, which
 123                 * is the whole LEB minus the amount of space that was free.
 124                 */
 125                dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
 126                        lp->free, lp->dirty);
 127                dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
 128                        lp->free, lp->dirty);
 129                dirty -= c->leb_size - lp->free;
 130                /*
 131                 * If the replay order was perfect the dirty space would now be
 132                 * zero. The order is not perfect because the journal heads
 133                 * race with each other. This is not a problem but is does mean
 134                 * that the dirty space may temporarily exceed c->leb_size
 135                 * during the replay.
 136                 */
 137                if (dirty != 0)
 138                        dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty",
 139                                b->bud->lnum, lp->free, lp->dirty, b->free,
 140                                b->dirty);
 141        }
 142        lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
 143                             lp->flags | LPROPS_TAKEN, 0);
 144        if (IS_ERR(lp)) {
 145                err = PTR_ERR(lp);
 146                goto out;
 147        }
 148
 149        /* Make sure the journal head points to the latest bud */
 150        err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
 151                                     b->bud->lnum, c->leb_size - b->free);
 152
 153out:
 154        ubifs_release_lprops(c);
 155        return err;
 156}
 157
 158/**
 159 * set_buds_lprops - set free and dirty space for all replayed buds.
 160 * @c: UBIFS file-system description object
 161 *
 162 * This function sets LEB properties for all replayed buds. Returns zero in
 163 * case of success and a negative error code in case of failure.
 164 */
 165static int set_buds_lprops(struct ubifs_info *c)
 166{
 167        struct bud_entry *b;
 168        int err;
 169
 170        list_for_each_entry(b, &c->replay_buds, list) {
 171                err = set_bud_lprops(c, b);
 172                if (err)
 173                        return err;
 174        }
 175
 176        return 0;
 177}
 178
 179/**
 180 * trun_remove_range - apply a replay entry for a truncation to the TNC.
 181 * @c: UBIFS file-system description object
 182 * @r: replay entry of truncation
 183 */
 184static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
 185{
 186        unsigned min_blk, max_blk;
 187        union ubifs_key min_key, max_key;
 188        ino_t ino;
 189
 190        min_blk = r->new_size / UBIFS_BLOCK_SIZE;
 191        if (r->new_size & (UBIFS_BLOCK_SIZE - 1))
 192                min_blk += 1;
 193
 194        max_blk = r->old_size / UBIFS_BLOCK_SIZE;
 195        if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0)
 196                max_blk -= 1;
 197
 198        ino = key_inum(c, &r->key);
 199
 200        data_key_init(c, &min_key, ino, min_blk);
 201        data_key_init(c, &max_key, ino, max_blk);
 202
 203        return ubifs_tnc_remove_range(c, &min_key, &max_key);
 204}
 205
 206/**
 207 * apply_replay_entry - apply a replay entry to the TNC.
 208 * @c: UBIFS file-system description object
 209 * @r: replay entry to apply
 210 *
 211 * Apply a replay entry to the TNC.
 212 */
 213static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
 214{
 215        int err;
 216
 217        dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ",
 218                 r->lnum, r->offs, r->len, r->deletion, r->sqnum);
 219
 220        /* Set c->replay_sqnum to help deal with dangling branches. */
 221        c->replay_sqnum = r->sqnum;
 222
 223        if (is_hash_key(c, &r->key)) {
 224                if (r->deletion)
 225                        err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
 226                else
 227                        err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
 228                                               r->len, &r->nm);
 229        } else {
 230                if (r->deletion)
 231                        switch (key_type(c, &r->key)) {
 232                        case UBIFS_INO_KEY:
 233                        {
 234                                ino_t inum = key_inum(c, &r->key);
 235
 236                                err = ubifs_tnc_remove_ino(c, inum);
 237                                break;
 238                        }
 239                        case UBIFS_TRUN_KEY:
 240                                err = trun_remove_range(c, r);
 241                                break;
 242                        default:
 243                                err = ubifs_tnc_remove(c, &r->key);
 244                                break;
 245                        }
 246                else
 247                        err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
 248                                            r->len);
 249                if (err)
 250                        return err;
 251
 252                if (c->need_recovery)
 253                        err = ubifs_recover_size_accum(c, &r->key, r->deletion,
 254                                                       r->new_size);
 255        }
 256
 257        return err;
 258}
 259
 260/**
 261 * replay_entries_cmp - compare 2 replay entries.
 262 * @priv: UBIFS file-system description object
 263 * @a: first replay entry
 264 * @a: second replay entry
 265 *
 266 * This is a comparios function for 'list_sort()' which compares 2 replay
 267 * entries @a and @b by comparing their sequence numer.  Returns %1 if @a has
 268 * greater sequence number and %-1 otherwise.
 269 */
 270static int replay_entries_cmp(void *priv, struct list_head *a,
 271                              struct list_head *b)
 272{
 273        struct replay_entry *ra, *rb;
 274
 275        cond_resched();
 276        if (a == b)
 277                return 0;
 278
 279        ra = list_entry(a, struct replay_entry, list);
 280        rb = list_entry(b, struct replay_entry, list);
 281        ubifs_assert(ra->sqnum != rb->sqnum);
 282        if (ra->sqnum > rb->sqnum)
 283                return 1;
 284        return -1;
 285}
 286
 287/**
 288 * apply_replay_list - apply the replay list to the TNC.
 289 * @c: UBIFS file-system description object
 290 *
 291 * Apply all entries in the replay list to the TNC. Returns zero in case of
 292 * success and a negative error code in case of failure.
 293 */
 294static int apply_replay_list(struct ubifs_info *c)
 295{
 296        struct replay_entry *r;
 297        int err;
 298
 299        list_sort(c, &c->replay_list, &replay_entries_cmp);
 300
 301        list_for_each_entry(r, &c->replay_list, list) {
 302                cond_resched();
 303
 304                err = apply_replay_entry(c, r);
 305                if (err)
 306                        return err;
 307        }
 308
 309        return 0;
 310}
 311
 312/**
 313 * destroy_replay_list - destroy the replay.
 314 * @c: UBIFS file-system description object
 315 *
 316 * Destroy the replay list.
 317 */
 318static void destroy_replay_list(struct ubifs_info *c)
 319{
 320        struct replay_entry *r, *tmp;
 321
 322        list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
 323                if (is_hash_key(c, &r->key))
 324                        kfree(r->nm.name);
 325                list_del(&r->list);
 326                kfree(r);
 327        }
 328}
 329
 330/**
 331 * insert_node - insert a node to the replay list
 332 * @c: UBIFS file-system description object
 333 * @lnum: node logical eraseblock number
 334 * @offs: node offset
 335 * @len: node length
 336 * @key: node key
 337 * @sqnum: sequence number
 338 * @deletion: non-zero if this is a deletion
 339 * @used: number of bytes in use in a LEB
 340 * @old_size: truncation old size
 341 * @new_size: truncation new size
 342 *
 343 * This function inserts a scanned non-direntry node to the replay list. The
 344 * replay list contains @struct replay_entry elements, and we sort this list in
 345 * sequence number order before applying it. The replay list is applied at the
 346 * very end of the replay process. Since the list is sorted in sequence number
 347 * order, the older modifications are applied first. This function returns zero
 348 * in case of success and a negative error code in case of failure.
 349 */
 350static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
 351                       union ubifs_key *key, unsigned long long sqnum,
 352                       int deletion, int *used, loff_t old_size,
 353                       loff_t new_size)
 354{
 355        struct replay_entry *r;
 356
 357        dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
 358
 359        if (key_inum(c, key) >= c->highest_inum)
 360                c->highest_inum = key_inum(c, key);
 361
 362        r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
 363        if (!r)
 364                return -ENOMEM;
 365
 366        if (!deletion)
 367                *used += ALIGN(len, 8);
 368        r->lnum = lnum;
 369        r->offs = offs;
 370        r->len = len;
 371        r->deletion = !!deletion;
 372        r->sqnum = sqnum;
 373        key_copy(c, key, &r->key);
 374        r->old_size = old_size;
 375        r->new_size = new_size;
 376
 377        list_add_tail(&r->list, &c->replay_list);
 378        return 0;
 379}
 380
 381/**
 382 * insert_dent - insert a directory entry node into the replay list.
 383 * @c: UBIFS file-system description object
 384 * @lnum: node logical eraseblock number
 385 * @offs: node offset
 386 * @len: node length
 387 * @key: node key
 388 * @name: directory entry name
 389 * @nlen: directory entry name length
 390 * @sqnum: sequence number
 391 * @deletion: non-zero if this is a deletion
 392 * @used: number of bytes in use in a LEB
 393 *
 394 * This function inserts a scanned directory entry node or an extended
 395 * attribute entry to the replay list. Returns zero in case of success and a
 396 * negative error code in case of failure.
 397 */
 398static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
 399                       union ubifs_key *key, const char *name, int nlen,
 400                       unsigned long long sqnum, int deletion, int *used)
 401{
 402        struct replay_entry *r;
 403        char *nbuf;
 404
 405        dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
 406        if (key_inum(c, key) >= c->highest_inum)
 407                c->highest_inum = key_inum(c, key);
 408
 409        r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
 410        if (!r)
 411                return -ENOMEM;
 412
 413        nbuf = kmalloc(nlen + 1, GFP_KERNEL);
 414        if (!nbuf) {
 415                kfree(r);
 416                return -ENOMEM;
 417        }
 418
 419        if (!deletion)
 420                *used += ALIGN(len, 8);
 421        r->lnum = lnum;
 422        r->offs = offs;
 423        r->len = len;
 424        r->deletion = !!deletion;
 425        r->sqnum = sqnum;
 426        key_copy(c, key, &r->key);
 427        r->nm.len = nlen;
 428        memcpy(nbuf, name, nlen);
 429        nbuf[nlen] = '\0';
 430        r->nm.name = nbuf;
 431
 432        list_add_tail(&r->list, &c->replay_list);
 433        return 0;
 434}
 435#endif
 436
 437/**
 438 * ubifs_validate_entry - validate directory or extended attribute entry node.
 439 * @c: UBIFS file-system description object
 440 * @dent: the node to validate
 441 *
 442 * This function validates directory or extended attribute entry node @dent.
 443 * Returns zero if the node is all right and a %-EINVAL if not.
 444 */
 445int ubifs_validate_entry(struct ubifs_info *c,
 446                         const struct ubifs_dent_node *dent)
 447{
 448        int key_type = key_type_flash(c, dent->key);
 449        int nlen = le16_to_cpu(dent->nlen);
 450
 451        if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 ||
 452            dent->type >= UBIFS_ITYPES_CNT ||
 453            nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
 454            strnlen(dent->name, nlen) != nlen ||
 455            le64_to_cpu(dent->inum) > MAX_INUM) {
 456                ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ?
 457                          "directory entry" : "extended attribute entry");
 458                return -EINVAL;
 459        }
 460
 461        if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
 462                ubifs_err("bad key type %d", key_type);
 463                return -EINVAL;
 464        }
 465
 466        return 0;
 467}
 468
 469#ifndef __UBOOT__
 470/**
 471 * is_last_bud - check if the bud is the last in the journal head.
 472 * @c: UBIFS file-system description object
 473 * @bud: bud description object
 474 *
 475 * This function checks if bud @bud is the last bud in its journal head. This
 476 * information is then used by 'replay_bud()' to decide whether the bud can
 477 * have corruptions or not. Indeed, only last buds can be corrupted by power
 478 * cuts. Returns %1 if this is the last bud, and %0 if not.
 479 */
 480static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
 481{
 482        struct ubifs_jhead *jh = &c->jheads[bud->jhead];
 483        struct ubifs_bud *next;
 484        uint32_t data;
 485        int err;
 486
 487        if (list_is_last(&bud->list, &jh->buds_list))
 488                return 1;
 489
 490        /*
 491         * The following is a quirk to make sure we work correctly with UBIFS
 492         * images used with older UBIFS.
 493         *
 494         * Normally, the last bud will be the last in the journal head's list
 495         * of bud. However, there is one exception if the UBIFS image belongs
 496         * to older UBIFS. This is fairly unlikely: one would need to use old
 497         * UBIFS, then have a power cut exactly at the right point, and then
 498         * try to mount this image with new UBIFS.
 499         *
 500         * The exception is: it is possible to have 2 buds A and B, A goes
 501         * before B, and B is the last, bud B is contains no data, and bud A is
 502         * corrupted at the end. The reason is that in older versions when the
 503         * journal code switched the next bud (from A to B), it first added a
 504         * log reference node for the new bud (B), and only after this it
 505         * synchronized the write-buffer of current bud (A). But later this was
 506         * changed and UBIFS started to always synchronize the write-buffer of
 507         * the bud (A) before writing the log reference for the new bud (B).
 508         *
 509         * But because older UBIFS always synchronized A's write-buffer before
 510         * writing to B, we can recognize this exceptional situation but
 511         * checking the contents of bud B - if it is empty, then A can be
 512         * treated as the last and we can recover it.
 513         *
 514         * TODO: remove this piece of code in a couple of years (today it is
 515         * 16.05.2011).
 516         */
 517        next = list_entry(bud->list.next, struct ubifs_bud, list);
 518        if (!list_is_last(&next->list, &jh->buds_list))
 519                return 0;
 520
 521        err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1);
 522        if (err)
 523                return 0;
 524
 525        return data == 0xFFFFFFFF;
 526}
 527
 528/**
 529 * replay_bud - replay a bud logical eraseblock.
 530 * @c: UBIFS file-system description object
 531 * @b: bud entry which describes the bud
 532 *
 533 * This function replays bud @bud, recovers it if needed, and adds all nodes
 534 * from this bud to the replay list. Returns zero in case of success and a
 535 * negative error code in case of failure.
 536 */
 537static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 538{
 539        int is_last = is_last_bud(c, b->bud);
 540        int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
 541        struct ubifs_scan_leb *sleb;
 542        struct ubifs_scan_node *snod;
 543
 544        dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
 545                lnum, b->bud->jhead, offs, is_last);
 546
 547        if (c->need_recovery && is_last)
 548                /*
 549                 * Recover only last LEBs in the journal heads, because power
 550                 * cuts may cause corruptions only in these LEBs, because only
 551                 * these LEBs could possibly be written to at the power cut
 552                 * time.
 553                 */
 554                sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead);
 555        else
 556                sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
 557        if (IS_ERR(sleb))
 558                return PTR_ERR(sleb);
 559
 560        /*
 561         * The bud does not have to start from offset zero - the beginning of
 562         * the 'lnum' LEB may contain previously committed data. One of the
 563         * things we have to do in replay is to correctly update lprops with
 564         * newer information about this LEB.
 565         *
 566         * At this point lprops thinks that this LEB has 'c->leb_size - offs'
 567         * bytes of free space because it only contain information about
 568         * committed data.
 569         *
 570         * But we know that real amount of free space is 'c->leb_size -
 571         * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and
 572         * 'sleb->endpt' is used by bud data. We have to correctly calculate
 573         * how much of these data are dirty and update lprops with this
 574         * information.
 575         *
 576         * The dirt in that LEB region is comprised of padding nodes, deletion
 577         * nodes, truncation nodes and nodes which are obsoleted by subsequent
 578         * nodes in this LEB. So instead of calculating clean space, we
 579         * calculate used space ('used' variable).
 580         */
 581
 582        list_for_each_entry(snod, &sleb->nodes, list) {
 583                int deletion = 0;
 584
 585                cond_resched();
 586
 587                if (snod->sqnum >= SQNUM_WATERMARK) {
 588                        ubifs_err("file system's life ended");
 589                        goto out_dump;
 590                }
 591
 592                if (snod->sqnum > c->max_sqnum)
 593                        c->max_sqnum = snod->sqnum;
 594
 595                switch (snod->type) {
 596                case UBIFS_INO_NODE:
 597                {
 598                        struct ubifs_ino_node *ino = snod->node;
 599                        loff_t new_size = le64_to_cpu(ino->size);
 600
 601                        if (le32_to_cpu(ino->nlink) == 0)
 602                                deletion = 1;
 603                        err = insert_node(c, lnum, snod->offs, snod->len,
 604                                          &snod->key, snod->sqnum, deletion,
 605                                          &used, 0, new_size);
 606                        break;
 607                }
 608                case UBIFS_DATA_NODE:
 609                {
 610                        struct ubifs_data_node *dn = snod->node;
 611                        loff_t new_size = le32_to_cpu(dn->size) +
 612                                          key_block(c, &snod->key) *
 613                                          UBIFS_BLOCK_SIZE;
 614
 615                        err = insert_node(c, lnum, snod->offs, snod->len,
 616                                          &snod->key, snod->sqnum, deletion,
 617                                          &used, 0, new_size);
 618                        break;
 619                }
 620                case UBIFS_DENT_NODE:
 621                case UBIFS_XENT_NODE:
 622                {
 623                        struct ubifs_dent_node *dent = snod->node;
 624
 625                        err = ubifs_validate_entry(c, dent);
 626                        if (err)
 627                                goto out_dump;
 628
 629                        err = insert_dent(c, lnum, snod->offs, snod->len,
 630                                          &snod->key, dent->name,
 631                                          le16_to_cpu(dent->nlen), snod->sqnum,
 632                                          !le64_to_cpu(dent->inum), &used);
 633                        break;
 634                }
 635                case UBIFS_TRUN_NODE:
 636                {
 637                        struct ubifs_trun_node *trun = snod->node;
 638                        loff_t old_size = le64_to_cpu(trun->old_size);
 639                        loff_t new_size = le64_to_cpu(trun->new_size);
 640                        union ubifs_key key;
 641
 642                        /* Validate truncation node */
 643                        if (old_size < 0 || old_size > c->max_inode_sz ||
 644                            new_size < 0 || new_size > c->max_inode_sz ||
 645                            old_size <= new_size) {
 646                                ubifs_err("bad truncation node");
 647                                goto out_dump;
 648                        }
 649
 650                        /*
 651                         * Create a fake truncation key just to use the same
 652                         * functions which expect nodes to have keys.
 653                         */
 654                        trun_key_init(c, &key, le32_to_cpu(trun->inum));
 655                        err = insert_node(c, lnum, snod->offs, snod->len,
 656                                          &key, snod->sqnum, 1, &used,
 657                                          old_size, new_size);
 658                        break;
 659                }
 660                default:
 661                        ubifs_err("unexpected node type %d in bud LEB %d:%d",
 662                                  snod->type, lnum, snod->offs);
 663                        err = -EINVAL;
 664                        goto out_dump;
 665                }
 666                if (err)
 667                        goto out;
 668        }
 669
 670        ubifs_assert(ubifs_search_bud(c, lnum));
 671        ubifs_assert(sleb->endpt - offs >= used);
 672        ubifs_assert(sleb->endpt % c->min_io_size == 0);
 673
 674        b->dirty = sleb->endpt - offs - used;
 675        b->free = c->leb_size - sleb->endpt;
 676        dbg_mnt("bud LEB %d replied: dirty %d, free %d",
 677                lnum, b->dirty, b->free);
 678
 679out:
 680        ubifs_scan_destroy(sleb);
 681        return err;
 682
 683out_dump:
 684        ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs);
 685        ubifs_dump_node(c, snod->node);
 686        ubifs_scan_destroy(sleb);
 687        return -EINVAL;
 688}
 689
 690/**
 691 * replay_buds - replay all buds.
 692 * @c: UBIFS file-system description object
 693 *
 694 * This function returns zero in case of success and a negative error code in
 695 * case of failure.
 696 */
 697static int replay_buds(struct ubifs_info *c)
 698{
 699        struct bud_entry *b;
 700        int err;
 701        unsigned long long prev_sqnum = 0;
 702
 703        list_for_each_entry(b, &c->replay_buds, list) {
 704                err = replay_bud(c, b);
 705                if (err)
 706                        return err;
 707
 708                ubifs_assert(b->sqnum > prev_sqnum);
 709                prev_sqnum = b->sqnum;
 710        }
 711
 712        return 0;
 713}
 714
 715/**
 716 * destroy_bud_list - destroy the list of buds to replay.
 717 * @c: UBIFS file-system description object
 718 */
 719static void destroy_bud_list(struct ubifs_info *c)
 720{
 721        struct bud_entry *b;
 722
 723        while (!list_empty(&c->replay_buds)) {
 724                b = list_entry(c->replay_buds.next, struct bud_entry, list);
 725                list_del(&b->list);
 726                kfree(b);
 727        }
 728}
 729
 730/**
 731 * add_replay_bud - add a bud to the list of buds to replay.
 732 * @c: UBIFS file-system description object
 733 * @lnum: bud logical eraseblock number to replay
 734 * @offs: bud start offset
 735 * @jhead: journal head to which this bud belongs
 736 * @sqnum: reference node sequence number
 737 *
 738 * This function returns zero in case of success and a negative error code in
 739 * case of failure.
 740 */
 741static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
 742                          unsigned long long sqnum)
 743{
 744        struct ubifs_bud *bud;
 745        struct bud_entry *b;
 746
 747        dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
 748
 749        bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL);
 750        if (!bud)
 751                return -ENOMEM;
 752
 753        b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
 754        if (!b) {
 755                kfree(bud);
 756                return -ENOMEM;
 757        }
 758
 759        bud->lnum = lnum;
 760        bud->start = offs;
 761        bud->jhead = jhead;
 762        ubifs_add_bud(c, bud);
 763
 764        b->bud = bud;
 765        b->sqnum = sqnum;
 766        list_add_tail(&b->list, &c->replay_buds);
 767
 768        return 0;
 769}
 770
 771/**
 772 * validate_ref - validate a reference node.
 773 * @c: UBIFS file-system description object
 774 * @ref: the reference node to validate
 775 * @ref_lnum: LEB number of the reference node
 776 * @ref_offs: reference node offset
 777 *
 778 * This function returns %1 if a bud reference already exists for the LEB. %0 is
 779 * returned if the reference node is new, otherwise %-EINVAL is returned if
 780 * validation failed.
 781 */
 782static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
 783{
 784        struct ubifs_bud *bud;
 785        int lnum = le32_to_cpu(ref->lnum);
 786        unsigned int offs = le32_to_cpu(ref->offs);
 787        unsigned int jhead = le32_to_cpu(ref->jhead);
 788
 789        /*
 790         * ref->offs may point to the end of LEB when the journal head points
 791         * to the end of LEB and we write reference node for it during commit.
 792         * So this is why we require 'offs > c->leb_size'.
 793         */
 794        if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt ||
 795            lnum < c->main_first || offs > c->leb_size ||
 796            offs & (c->min_io_size - 1))
 797                return -EINVAL;
 798
 799        /* Make sure we have not already looked at this bud */
 800        bud = ubifs_search_bud(c, lnum);
 801        if (bud) {
 802                if (bud->jhead == jhead && bud->start <= offs)
 803                        return 1;
 804                ubifs_err("bud at LEB %d:%d was already referred", lnum, offs);
 805                return -EINVAL;
 806        }
 807
 808        return 0;
 809}
 810
 811/**
 812 * replay_log_leb - replay a log logical eraseblock.
 813 * @c: UBIFS file-system description object
 814 * @lnum: log logical eraseblock to replay
 815 * @offs: offset to start replaying from
 816 * @sbuf: scan buffer
 817 *
 818 * This function replays a log LEB and returns zero in case of success, %1 if
 819 * this is the last LEB in the log, and a negative error code in case of
 820 * failure.
 821 */
 822static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
 823{
 824        int err;
 825        struct ubifs_scan_leb *sleb;
 826        struct ubifs_scan_node *snod;
 827        const struct ubifs_cs_node *node;
 828
 829        dbg_mnt("replay log LEB %d:%d", lnum, offs);
 830        sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
 831        if (IS_ERR(sleb)) {
 832                if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
 833                        return PTR_ERR(sleb);
 834                /*
 835                 * Note, the below function will recover this log LEB only if
 836                 * it is the last, because unclean reboots can possibly corrupt
 837                 * only the tail of the log.
 838                 */
 839                sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
 840                if (IS_ERR(sleb))
 841                        return PTR_ERR(sleb);
 842        }
 843
 844        if (sleb->nodes_cnt == 0) {
 845                err = 1;
 846                goto out;
 847        }
 848
 849        node = sleb->buf;
 850        snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
 851        if (c->cs_sqnum == 0) {
 852                /*
 853                 * This is the first log LEB we are looking at, make sure that
 854                 * the first node is a commit start node. Also record its
 855                 * sequence number so that UBIFS can determine where the log
 856                 * ends, because all nodes which were have higher sequence
 857                 * numbers.
 858                 */
 859                if (snod->type != UBIFS_CS_NODE) {
 860                        ubifs_err("first log node at LEB %d:%d is not CS node",
 861                                  lnum, offs);
 862                        goto out_dump;
 863                }
 864                if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
 865                        ubifs_err("first CS node at LEB %d:%d has wrong commit number %llu expected %llu",
 866                                  lnum, offs,
 867                                  (unsigned long long)le64_to_cpu(node->cmt_no),
 868                                  c->cmt_no);
 869                        goto out_dump;
 870                }
 871
 872                c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
 873                dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
 874        }
 875
 876        if (snod->sqnum < c->cs_sqnum) {
 877                /*
 878                 * This means that we reached end of log and now
 879                 * look to the older log data, which was already
 880                 * committed but the eraseblock was not erased (UBIFS
 881                 * only un-maps it). So this basically means we have to
 882                 * exit with "end of log" code.
 883                 */
 884                err = 1;
 885                goto out;
 886        }
 887
 888        /* Make sure the first node sits at offset zero of the LEB */
 889        if (snod->offs != 0) {
 890                ubifs_err("first node is not at zero offset");
 891                goto out_dump;
 892        }
 893
 894        list_for_each_entry(snod, &sleb->nodes, list) {
 895                cond_resched();
 896
 897                if (snod->sqnum >= SQNUM_WATERMARK) {
 898                        ubifs_err("file system's life ended");
 899                        goto out_dump;
 900                }
 901
 902                if (snod->sqnum < c->cs_sqnum) {
 903                        ubifs_err("bad sqnum %llu, commit sqnum %llu",
 904                                  snod->sqnum, c->cs_sqnum);
 905                        goto out_dump;
 906                }
 907
 908                if (snod->sqnum > c->max_sqnum)
 909                        c->max_sqnum = snod->sqnum;
 910
 911                switch (snod->type) {
 912                case UBIFS_REF_NODE: {
 913                        const struct ubifs_ref_node *ref = snod->node;
 914
 915                        err = validate_ref(c, ref);
 916                        if (err == 1)
 917                                break; /* Already have this bud */
 918                        if (err)
 919                                goto out_dump;
 920
 921                        err = add_replay_bud(c, le32_to_cpu(ref->lnum),
 922                                             le32_to_cpu(ref->offs),
 923                                             le32_to_cpu(ref->jhead),
 924                                             snod->sqnum);
 925                        if (err)
 926                                goto out;
 927
 928                        break;
 929                }
 930                case UBIFS_CS_NODE:
 931                        /* Make sure it sits at the beginning of LEB */
 932                        if (snod->offs != 0) {
 933                                ubifs_err("unexpected node in log");
 934                                goto out_dump;
 935                        }
 936                        break;
 937                default:
 938                        ubifs_err("unexpected node in log");
 939                        goto out_dump;
 940                }
 941        }
 942
 943        if (sleb->endpt || c->lhead_offs >= c->leb_size) {
 944                c->lhead_lnum = lnum;
 945                c->lhead_offs = sleb->endpt;
 946        }
 947
 948        err = !sleb->endpt;
 949out:
 950        ubifs_scan_destroy(sleb);
 951        return err;
 952
 953out_dump:
 954        ubifs_err("log error detected while replaying the log at LEB %d:%d",
 955                  lnum, offs + snod->offs);
 956        ubifs_dump_node(c, snod->node);
 957        ubifs_scan_destroy(sleb);
 958        return -EINVAL;
 959}
 960
 961/**
 962 * take_ihead - update the status of the index head in lprops to 'taken'.
 963 * @c: UBIFS file-system description object
 964 *
 965 * This function returns the amount of free space in the index head LEB or a
 966 * negative error code.
 967 */
 968static int take_ihead(struct ubifs_info *c)
 969{
 970        const struct ubifs_lprops *lp;
 971        int err, free;
 972
 973        ubifs_get_lprops(c);
 974
 975        lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum);
 976        if (IS_ERR(lp)) {
 977                err = PTR_ERR(lp);
 978                goto out;
 979        }
 980
 981        free = lp->free;
 982
 983        lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
 984                             lp->flags | LPROPS_TAKEN, 0);
 985        if (IS_ERR(lp)) {
 986                err = PTR_ERR(lp);
 987                goto out;
 988        }
 989
 990        err = free;
 991out:
 992        ubifs_release_lprops(c);
 993        return err;
 994}
 995
 996/**
 997 * ubifs_replay_journal - replay journal.
 998 * @c: UBIFS file-system description object
 999 *
1000 * This function scans the journal, replays and cleans it up. It makes sure all
1001 * memory data structures related to uncommitted journal are built (dirty TNC
1002 * tree, tree of buds, modified lprops, etc).
1003 */
1004int ubifs_replay_journal(struct ubifs_info *c)
1005{
1006        int err, lnum, free;
1007
1008        BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
1009
1010        /* Update the status of the index head in lprops to 'taken' */
1011        free = take_ihead(c);
1012        if (free < 0)
1013                return free; /* Error code */
1014
1015        if (c->ihead_offs != c->leb_size - free) {
1016                ubifs_err("bad index head LEB %d:%d", c->ihead_lnum,
1017                          c->ihead_offs);
1018                return -EINVAL;
1019        }
1020
1021        dbg_mnt("start replaying the journal");
1022        c->replaying = 1;
1023        lnum = c->ltail_lnum = c->lhead_lnum;
1024
1025        do {
1026                err = replay_log_leb(c, lnum, 0, c->sbuf);
1027                if (err == 1)
1028                        /* We hit the end of the log */
1029                        break;
1030                if (err)
1031                        goto out;
1032                lnum = ubifs_next_log_lnum(c, lnum);
1033        } while (lnum != c->ltail_lnum);
1034
1035        err = replay_buds(c);
1036        if (err)
1037                goto out;
1038
1039        err = apply_replay_list(c);
1040        if (err)
1041                goto out;
1042
1043        err = set_buds_lprops(c);
1044        if (err)
1045                goto out;
1046
1047        /*
1048         * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
1049         * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
1050         * depend on it. This means we have to initialize it to make sure
1051         * budgeting works properly.
1052         */
1053        c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
1054        c->bi.uncommitted_idx *= c->max_idx_node_sz;
1055
1056        ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1057        dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu",
1058                c->lhead_lnum, c->lhead_offs, c->max_sqnum,
1059                (unsigned long)c->highest_inum);
1060out:
1061        destroy_replay_list(c);
1062        destroy_bud_list(c);
1063        c->replaying = 0;
1064        return err;
1065}
1066#endif
1067