linux/fs/nilfs2/recovery.c
<<
>>
Prefs
   1/*
   2 * recovery.c - NILFS recovery logic
   3 *
   4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * Written by Ryusuke Konishi.
  17 */
  18
  19#include <linux/buffer_head.h>
  20#include <linux/blkdev.h>
  21#include <linux/swap.h>
  22#include <linux/slab.h>
  23#include <linux/crc32.h>
  24#include "nilfs.h"
  25#include "segment.h"
  26#include "sufile.h"
  27#include "page.h"
  28#include "segbuf.h"
  29
  30/*
  31 * Segment check result
  32 */
  33enum {
  34        NILFS_SEG_VALID,
  35        NILFS_SEG_NO_SUPER_ROOT,
  36        NILFS_SEG_FAIL_IO,
  37        NILFS_SEG_FAIL_MAGIC,
  38        NILFS_SEG_FAIL_SEQ,
  39        NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT,
  40        NILFS_SEG_FAIL_CHECKSUM_FULL,
  41        NILFS_SEG_FAIL_CONSISTENCY,
  42};
  43
  44/* work structure for recovery */
  45struct nilfs_recovery_block {
  46        ino_t ino;              /*
  47                                 * Inode number of the file that this block
  48                                 * belongs to
  49                                 */
  50        sector_t blocknr;       /* block number */
  51        __u64 vblocknr;         /* virtual block number */
  52        unsigned long blkoff;   /* File offset of the data block (per block) */
  53        struct list_head list;
  54};
  55
  56
  57static int nilfs_warn_segment_error(struct super_block *sb, int err)
  58{
  59        const char *msg = NULL;
  60
  61        switch (err) {
  62        case NILFS_SEG_FAIL_IO:
  63                nilfs_msg(sb, KERN_ERR, "I/O error reading segment");
  64                return -EIO;
  65        case NILFS_SEG_FAIL_MAGIC:
  66                msg = "Magic number mismatch";
  67                break;
  68        case NILFS_SEG_FAIL_SEQ:
  69                msg = "Sequence number mismatch";
  70                break;
  71        case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT:
  72                msg = "Checksum error in super root";
  73                break;
  74        case NILFS_SEG_FAIL_CHECKSUM_FULL:
  75                msg = "Checksum error in segment payload";
  76                break;
  77        case NILFS_SEG_FAIL_CONSISTENCY:
  78                msg = "Inconsistency found";
  79                break;
  80        case NILFS_SEG_NO_SUPER_ROOT:
  81                msg = "No super root in the last segment";
  82                break;
  83        default:
  84                nilfs_msg(sb, KERN_ERR, "unrecognized segment error %d", err);
  85                return -EINVAL;
  86        }
  87        nilfs_msg(sb, KERN_WARNING, "invalid segment: %s", msg);
  88        return -EINVAL;
  89}
  90
  91/**
  92 * nilfs_compute_checksum - compute checksum of blocks continuously
  93 * @nilfs: nilfs object
  94 * @bhs: buffer head of start block
  95 * @sum: place to store result
  96 * @offset: offset bytes in the first block
  97 * @check_bytes: number of bytes to be checked
  98 * @start: DBN of start block
  99 * @nblock: number of blocks to be checked
 100 */
 101static int nilfs_compute_checksum(struct the_nilfs *nilfs,
 102                                  struct buffer_head *bhs, u32 *sum,
 103                                  unsigned long offset, u64 check_bytes,
 104                                  sector_t start, unsigned long nblock)
 105{
 106        unsigned int blocksize = nilfs->ns_blocksize;
 107        unsigned long size;
 108        u32 crc;
 109
 110        BUG_ON(offset >= blocksize);
 111        check_bytes -= offset;
 112        size = min_t(u64, check_bytes, blocksize - offset);
 113        crc = crc32_le(nilfs->ns_crc_seed,
 114                       (unsigned char *)bhs->b_data + offset, size);
 115        if (--nblock > 0) {
 116                do {
 117                        struct buffer_head *bh;
 118
 119                        bh = __bread(nilfs->ns_bdev, ++start, blocksize);
 120                        if (!bh)
 121                                return -EIO;
 122                        check_bytes -= size;
 123                        size = min_t(u64, check_bytes, blocksize);
 124                        crc = crc32_le(crc, bh->b_data, size);
 125                        brelse(bh);
 126                } while (--nblock > 0);
 127        }
 128        *sum = crc;
 129        return 0;
 130}
 131
 132/**
 133 * nilfs_read_super_root_block - read super root block
 134 * @nilfs: nilfs object
 135 * @sr_block: disk block number of the super root block
 136 * @pbh: address of a buffer_head pointer to return super root buffer
 137 * @check: CRC check flag
 138 */
 139int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block,
 140                                struct buffer_head **pbh, int check)
 141{
 142        struct buffer_head *bh_sr;
 143        struct nilfs_super_root *sr;
 144        u32 crc;
 145        int ret;
 146
 147        *pbh = NULL;
 148        bh_sr = __bread(nilfs->ns_bdev, sr_block, nilfs->ns_blocksize);
 149        if (unlikely(!bh_sr)) {
 150                ret = NILFS_SEG_FAIL_IO;
 151                goto failed;
 152        }
 153
 154        sr = (struct nilfs_super_root *)bh_sr->b_data;
 155        if (check) {
 156                unsigned int bytes = le16_to_cpu(sr->sr_bytes);
 157
 158                if (bytes == 0 || bytes > nilfs->ns_blocksize) {
 159                        ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
 160                        goto failed_bh;
 161                }
 162                if (nilfs_compute_checksum(
 163                            nilfs, bh_sr, &crc, sizeof(sr->sr_sum), bytes,
 164                            sr_block, 1)) {
 165                        ret = NILFS_SEG_FAIL_IO;
 166                        goto failed_bh;
 167                }
 168                if (crc != le32_to_cpu(sr->sr_sum)) {
 169                        ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
 170                        goto failed_bh;
 171                }
 172        }
 173        *pbh = bh_sr;
 174        return 0;
 175
 176 failed_bh:
 177        brelse(bh_sr);
 178
 179 failed:
 180        return nilfs_warn_segment_error(nilfs->ns_sb, ret);
 181}
 182
 183/**
 184 * nilfs_read_log_header - read summary header of the specified log
 185 * @nilfs: nilfs object
 186 * @start_blocknr: start block number of the log
 187 * @sum: pointer to return segment summary structure
 188 */
 189static struct buffer_head *
 190nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr,
 191                      struct nilfs_segment_summary **sum)
 192{
 193        struct buffer_head *bh_sum;
 194
 195        bh_sum = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize);
 196        if (bh_sum)
 197                *sum = (struct nilfs_segment_summary *)bh_sum->b_data;
 198        return bh_sum;
 199}
 200
 201/**
 202 * nilfs_validate_log - verify consistency of log
 203 * @nilfs: nilfs object
 204 * @seg_seq: sequence number of segment
 205 * @bh_sum: buffer head of summary block
 206 * @sum: segment summary struct
 207 */
 208static int nilfs_validate_log(struct the_nilfs *nilfs, u64 seg_seq,
 209                              struct buffer_head *bh_sum,
 210                              struct nilfs_segment_summary *sum)
 211{
 212        unsigned long nblock;
 213        u32 crc;
 214        int ret;
 215
 216        ret = NILFS_SEG_FAIL_MAGIC;
 217        if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC)
 218                goto out;
 219
 220        ret = NILFS_SEG_FAIL_SEQ;
 221        if (le64_to_cpu(sum->ss_seq) != seg_seq)
 222                goto out;
 223
 224        nblock = le32_to_cpu(sum->ss_nblocks);
 225        ret = NILFS_SEG_FAIL_CONSISTENCY;
 226        if (unlikely(nblock == 0 || nblock > nilfs->ns_blocks_per_segment))
 227                /* This limits the number of blocks read in the CRC check */
 228                goto out;
 229
 230        ret = NILFS_SEG_FAIL_IO;
 231        if (nilfs_compute_checksum(nilfs, bh_sum, &crc, sizeof(sum->ss_datasum),
 232                                   ((u64)nblock << nilfs->ns_blocksize_bits),
 233                                   bh_sum->b_blocknr, nblock))
 234                goto out;
 235
 236        ret = NILFS_SEG_FAIL_CHECKSUM_FULL;
 237        if (crc != le32_to_cpu(sum->ss_datasum))
 238                goto out;
 239        ret = 0;
 240out:
 241        return ret;
 242}
 243
 244/**
 245 * nilfs_read_summary_info - read an item on summary blocks of a log
 246 * @nilfs: nilfs object
 247 * @pbh: the current buffer head on summary blocks [in, out]
 248 * @offset: the current byte offset on summary blocks [in, out]
 249 * @bytes: byte size of the item to be read
 250 */
 251static void *nilfs_read_summary_info(struct the_nilfs *nilfs,
 252                                     struct buffer_head **pbh,
 253                                     unsigned int *offset, unsigned int bytes)
 254{
 255        void *ptr;
 256        sector_t blocknr;
 257
 258        BUG_ON((*pbh)->b_size < *offset);
 259        if (bytes > (*pbh)->b_size - *offset) {
 260                blocknr = (*pbh)->b_blocknr;
 261                brelse(*pbh);
 262                *pbh = __bread(nilfs->ns_bdev, blocknr + 1,
 263                               nilfs->ns_blocksize);
 264                if (unlikely(!*pbh))
 265                        return NULL;
 266                *offset = 0;
 267        }
 268        ptr = (*pbh)->b_data + *offset;
 269        *offset += bytes;
 270        return ptr;
 271}
 272
 273/**
 274 * nilfs_skip_summary_info - skip items on summary blocks of a log
 275 * @nilfs: nilfs object
 276 * @pbh: the current buffer head on summary blocks [in, out]
 277 * @offset: the current byte offset on summary blocks [in, out]
 278 * @bytes: byte size of the item to be skipped
 279 * @count: number of items to be skipped
 280 */
 281static void nilfs_skip_summary_info(struct the_nilfs *nilfs,
 282                                    struct buffer_head **pbh,
 283                                    unsigned int *offset, unsigned int bytes,
 284                                    unsigned long count)
 285{
 286        unsigned int rest_item_in_current_block
 287                = ((*pbh)->b_size - *offset) / bytes;
 288
 289        if (count <= rest_item_in_current_block) {
 290                *offset += bytes * count;
 291        } else {
 292                sector_t blocknr = (*pbh)->b_blocknr;
 293                unsigned int nitem_per_block = (*pbh)->b_size / bytes;
 294                unsigned int bcnt;
 295
 296                count -= rest_item_in_current_block;
 297                bcnt = DIV_ROUND_UP(count, nitem_per_block);
 298                *offset = bytes * (count - (bcnt - 1) * nitem_per_block);
 299
 300                brelse(*pbh);
 301                *pbh = __bread(nilfs->ns_bdev, blocknr + bcnt,
 302                               nilfs->ns_blocksize);
 303        }
 304}
 305
 306/**
 307 * nilfs_scan_dsync_log - get block information of a log written for data sync
 308 * @nilfs: nilfs object
 309 * @start_blocknr: start block number of the log
 310 * @sum: log summary information
 311 * @head: list head to add nilfs_recovery_block struct
 312 */
 313static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
 314                                struct nilfs_segment_summary *sum,
 315                                struct list_head *head)
 316{
 317        struct buffer_head *bh;
 318        unsigned int offset;
 319        u32 nfinfo, sumbytes;
 320        sector_t blocknr;
 321        ino_t ino;
 322        int err = -EIO;
 323
 324        nfinfo = le32_to_cpu(sum->ss_nfinfo);
 325        if (!nfinfo)
 326                return 0;
 327
 328        sumbytes = le32_to_cpu(sum->ss_sumbytes);
 329        blocknr = start_blocknr + DIV_ROUND_UP(sumbytes, nilfs->ns_blocksize);
 330        bh = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize);
 331        if (unlikely(!bh))
 332                goto out;
 333
 334        offset = le16_to_cpu(sum->ss_bytes);
 335        for (;;) {
 336                unsigned long nblocks, ndatablk, nnodeblk;
 337                struct nilfs_finfo *finfo;
 338
 339                finfo = nilfs_read_summary_info(nilfs, &bh, &offset,
 340                                                sizeof(*finfo));
 341                if (unlikely(!finfo))
 342                        goto out;
 343
 344                ino = le64_to_cpu(finfo->fi_ino);
 345                nblocks = le32_to_cpu(finfo->fi_nblocks);
 346                ndatablk = le32_to_cpu(finfo->fi_ndatablk);
 347                nnodeblk = nblocks - ndatablk;
 348
 349                while (ndatablk-- > 0) {
 350                        struct nilfs_recovery_block *rb;
 351                        struct nilfs_binfo_v *binfo;
 352
 353                        binfo = nilfs_read_summary_info(nilfs, &bh, &offset,
 354                                                        sizeof(*binfo));
 355                        if (unlikely(!binfo))
 356                                goto out;
 357
 358                        rb = kmalloc(sizeof(*rb), GFP_NOFS);
 359                        if (unlikely(!rb)) {
 360                                err = -ENOMEM;
 361                                goto out;
 362                        }
 363                        rb->ino = ino;
 364                        rb->blocknr = blocknr++;
 365                        rb->vblocknr = le64_to_cpu(binfo->bi_vblocknr);
 366                        rb->blkoff = le64_to_cpu(binfo->bi_blkoff);
 367                        /* INIT_LIST_HEAD(&rb->list); */
 368                        list_add_tail(&rb->list, head);
 369                }
 370                if (--nfinfo == 0)
 371                        break;
 372                blocknr += nnodeblk; /* always 0 for data sync logs */
 373                nilfs_skip_summary_info(nilfs, &bh, &offset, sizeof(__le64),
 374                                        nnodeblk);
 375                if (unlikely(!bh))
 376                        goto out;
 377        }
 378        err = 0;
 379 out:
 380        brelse(bh);   /* brelse(NULL) is just ignored */
 381        return err;
 382}
 383
 384static void dispose_recovery_list(struct list_head *head)
 385{
 386        while (!list_empty(head)) {
 387                struct nilfs_recovery_block *rb;
 388
 389                rb = list_first_entry(head, struct nilfs_recovery_block, list);
 390                list_del(&rb->list);
 391                kfree(rb);
 392        }
 393}
 394
 395struct nilfs_segment_entry {
 396        struct list_head        list;
 397        __u64                   segnum;
 398};
 399
 400static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
 401{
 402        struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS);
 403
 404        if (unlikely(!ent))
 405                return -ENOMEM;
 406
 407        ent->segnum = segnum;
 408        INIT_LIST_HEAD(&ent->list);
 409        list_add_tail(&ent->list, head);
 410        return 0;
 411}
 412
 413void nilfs_dispose_segment_list(struct list_head *head)
 414{
 415        while (!list_empty(head)) {
 416                struct nilfs_segment_entry *ent;
 417
 418                ent = list_first_entry(head, struct nilfs_segment_entry, list);
 419                list_del(&ent->list);
 420                kfree(ent);
 421        }
 422}
 423
 424static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
 425                                              struct super_block *sb,
 426                                              struct nilfs_recovery_info *ri)
 427{
 428        struct list_head *head = &ri->ri_used_segments;
 429        struct nilfs_segment_entry *ent, *n;
 430        struct inode *sufile = nilfs->ns_sufile;
 431        __u64 segnum[4];
 432        int err;
 433        int i;
 434
 435        segnum[0] = nilfs->ns_segnum;
 436        segnum[1] = nilfs->ns_nextnum;
 437        segnum[2] = ri->ri_segnum;
 438        segnum[3] = ri->ri_nextnum;
 439
 440        /*
 441         * Releasing the next segment of the latest super root.
 442         * The next segment is invalidated by this recovery.
 443         */
 444        err = nilfs_sufile_free(sufile, segnum[1]);
 445        if (unlikely(err))
 446                goto failed;
 447
 448        for (i = 1; i < 4; i++) {
 449                err = nilfs_segment_list_add(head, segnum[i]);
 450                if (unlikely(err))
 451                        goto failed;
 452        }
 453
 454        /*
 455         * Collecting segments written after the latest super root.
 456         * These are marked dirty to avoid being reallocated in the next write.
 457         */
 458        list_for_each_entry_safe(ent, n, head, list) {
 459                if (ent->segnum != segnum[0]) {
 460                        err = nilfs_sufile_scrap(sufile, ent->segnum);
 461                        if (unlikely(err))
 462                                goto failed;
 463                }
 464                list_del(&ent->list);
 465                kfree(ent);
 466        }
 467
 468        /* Allocate new segments for recovery */
 469        err = nilfs_sufile_alloc(sufile, &segnum[0]);
 470        if (unlikely(err))
 471                goto failed;
 472
 473        nilfs->ns_pseg_offset = 0;
 474        nilfs->ns_seg_seq = ri->ri_seq + 2;
 475        nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0];
 476
 477 failed:
 478        /* No need to recover sufile because it will be destroyed on error */
 479        return err;
 480}
 481
 482static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
 483                                     struct nilfs_recovery_block *rb,
 484                                     struct page *page)
 485{
 486        struct buffer_head *bh_org;
 487        void *kaddr;
 488
 489        bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
 490        if (unlikely(!bh_org))
 491                return -EIO;
 492
 493        kaddr = kmap_atomic(page);
 494        memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
 495        kunmap_atomic(kaddr);
 496        brelse(bh_org);
 497        return 0;
 498}
 499
 500static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
 501                                      struct super_block *sb,
 502                                      struct nilfs_root *root,
 503                                      struct list_head *head,
 504                                      unsigned long *nr_salvaged_blocks)
 505{
 506        struct inode *inode;
 507        struct nilfs_recovery_block *rb, *n;
 508        unsigned int blocksize = nilfs->ns_blocksize;
 509        struct page *page;
 510        loff_t pos;
 511        int err = 0, err2 = 0;
 512
 513        list_for_each_entry_safe(rb, n, head, list) {
 514                inode = nilfs_iget(sb, root, rb->ino);
 515                if (IS_ERR(inode)) {
 516                        err = PTR_ERR(inode);
 517                        inode = NULL;
 518                        goto failed_inode;
 519                }
 520
 521                pos = rb->blkoff << inode->i_blkbits;
 522                err = block_write_begin(inode->i_mapping, pos, blocksize,
 523                                        0, &page, nilfs_get_block);
 524                if (unlikely(err)) {
 525                        loff_t isize = inode->i_size;
 526
 527                        if (pos + blocksize > isize)
 528                                nilfs_write_failed(inode->i_mapping,
 529                                                        pos + blocksize);
 530                        goto failed_inode;
 531                }
 532
 533                err = nilfs_recovery_copy_block(nilfs, rb, page);
 534                if (unlikely(err))
 535                        goto failed_page;
 536
 537                err = nilfs_set_file_dirty(inode, 1);
 538                if (unlikely(err))
 539                        goto failed_page;
 540
 541                block_write_end(NULL, inode->i_mapping, pos, blocksize,
 542                                blocksize, page, NULL);
 543
 544                unlock_page(page);
 545                put_page(page);
 546
 547                (*nr_salvaged_blocks)++;
 548                goto next;
 549
 550 failed_page:
 551                unlock_page(page);
 552                put_page(page);
 553
 554 failed_inode:
 555                nilfs_msg(sb, KERN_WARNING,
 556                          "error %d recovering data block (ino=%lu, block-offset=%llu)",
 557                          err, (unsigned long)rb->ino,
 558                          (unsigned long long)rb->blkoff);
 559                if (!err2)
 560                        err2 = err;
 561 next:
 562                iput(inode); /* iput(NULL) is just ignored */
 563                list_del_init(&rb->list);
 564                kfree(rb);
 565        }
 566        return err2;
 567}
 568
 569/**
 570 * nilfs_do_roll_forward - salvage logical segments newer than the latest
 571 * checkpoint
 572 * @nilfs: nilfs object
 573 * @sb: super block instance
 574 * @ri: pointer to a nilfs_recovery_info
 575 */
 576static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
 577                                 struct super_block *sb,
 578                                 struct nilfs_root *root,
 579                                 struct nilfs_recovery_info *ri)
 580{
 581        struct buffer_head *bh_sum = NULL;
 582        struct nilfs_segment_summary *sum = NULL;
 583        sector_t pseg_start;
 584        sector_t seg_start, seg_end;  /* Starting/ending DBN of full segment */
 585        unsigned long nsalvaged_blocks = 0;
 586        unsigned int flags;
 587        u64 seg_seq;
 588        __u64 segnum, nextnum = 0;
 589        int empty_seg = 0;
 590        int err = 0, ret;
 591        LIST_HEAD(dsync_blocks);  /* list of data blocks to be recovered */
 592        enum {
 593                RF_INIT_ST,
 594                RF_DSYNC_ST,   /* scanning data-sync segments */
 595        };
 596        int state = RF_INIT_ST;
 597
 598        pseg_start = ri->ri_lsegs_start;
 599        seg_seq = ri->ri_lsegs_start_seq;
 600        segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
 601        nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
 602
 603        while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) {
 604                brelse(bh_sum);
 605                bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum);
 606                if (!bh_sum) {
 607                        err = -EIO;
 608                        goto failed;
 609                }
 610
 611                ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum);
 612                if (ret) {
 613                        if (ret == NILFS_SEG_FAIL_IO) {
 614                                err = -EIO;
 615                                goto failed;
 616                        }
 617                        goto strayed;
 618                }
 619
 620                flags = le16_to_cpu(sum->ss_flags);
 621                if (flags & NILFS_SS_SR)
 622                        goto confused;
 623
 624                /* Found a valid partial segment; do recovery actions */
 625                nextnum = nilfs_get_segnum_of_block(nilfs,
 626                                                    le64_to_cpu(sum->ss_next));
 627                empty_seg = 0;
 628                nilfs->ns_ctime = le64_to_cpu(sum->ss_create);
 629                if (!(flags & NILFS_SS_GC))
 630                        nilfs->ns_nongc_ctime = nilfs->ns_ctime;
 631
 632                switch (state) {
 633                case RF_INIT_ST:
 634                        if (!(flags & NILFS_SS_LOGBGN) ||
 635                            !(flags & NILFS_SS_SYNDT))
 636                                goto try_next_pseg;
 637                        state = RF_DSYNC_ST;
 638                        /* Fall through */
 639                case RF_DSYNC_ST:
 640                        if (!(flags & NILFS_SS_SYNDT))
 641                                goto confused;
 642
 643                        err = nilfs_scan_dsync_log(nilfs, pseg_start, sum,
 644                                                   &dsync_blocks);
 645                        if (unlikely(err))
 646                                goto failed;
 647                        if (flags & NILFS_SS_LOGEND) {
 648                                err = nilfs_recover_dsync_blocks(
 649                                        nilfs, sb, root, &dsync_blocks,
 650                                        &nsalvaged_blocks);
 651                                if (unlikely(err))
 652                                        goto failed;
 653                                state = RF_INIT_ST;
 654                        }
 655                        break; /* Fall through to try_next_pseg */
 656                }
 657
 658 try_next_pseg:
 659                if (pseg_start == ri->ri_lsegs_end)
 660                        break;
 661                pseg_start += le32_to_cpu(sum->ss_nblocks);
 662                if (pseg_start < seg_end)
 663                        continue;
 664                goto feed_segment;
 665
 666 strayed:
 667                if (pseg_start == ri->ri_lsegs_end)
 668                        break;
 669
 670 feed_segment:
 671                /* Looking to the next full segment */
 672                if (empty_seg++)
 673                        break;
 674                seg_seq++;
 675                segnum = nextnum;
 676                nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
 677                pseg_start = seg_start;
 678        }
 679
 680        if (nsalvaged_blocks) {
 681                nilfs_msg(sb, KERN_INFO, "salvaged %lu blocks",
 682                          nsalvaged_blocks);
 683                ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
 684        }
 685 out:
 686        brelse(bh_sum);
 687        dispose_recovery_list(&dsync_blocks);
 688        return err;
 689
 690 confused:
 691        err = -EINVAL;
 692 failed:
 693        nilfs_msg(sb, KERN_ERR,
 694                  "error %d roll-forwarding partial segment at blocknr = %llu",
 695                  err, (unsigned long long)pseg_start);
 696        goto out;
 697}
 698
 699static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
 700                                      struct nilfs_recovery_info *ri)
 701{
 702        struct buffer_head *bh;
 703        int err;
 704
 705        if (nilfs_get_segnum_of_block(nilfs, ri->ri_lsegs_start) !=
 706            nilfs_get_segnum_of_block(nilfs, ri->ri_super_root))
 707                return;
 708
 709        bh = __getblk(nilfs->ns_bdev, ri->ri_lsegs_start, nilfs->ns_blocksize);
 710        BUG_ON(!bh);
 711        memset(bh->b_data, 0, bh->b_size);
 712        set_buffer_dirty(bh);
 713        err = sync_dirty_buffer(bh);
 714        if (unlikely(err))
 715                nilfs_msg(nilfs->ns_sb, KERN_WARNING,
 716                          "buffer sync write failed during post-cleaning of recovery.");
 717        brelse(bh);
 718}
 719
 720/**
 721 * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
 722 * @nilfs: nilfs object
 723 * @sb: super block instance
 724 * @ri: pointer to a nilfs_recovery_info struct to store search results.
 725 *
 726 * Return Value: On success, 0 is returned.  On error, one of the following
 727 * negative error code is returned.
 728 *
 729 * %-EINVAL - Inconsistent filesystem state.
 730 *
 731 * %-EIO - I/O error
 732 *
 733 * %-ENOSPC - No space left on device (only in a panic state).
 734 *
 735 * %-ERESTARTSYS - Interrupted.
 736 *
 737 * %-ENOMEM - Insufficient memory available.
 738 */
 739int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
 740                              struct super_block *sb,
 741                              struct nilfs_recovery_info *ri)
 742{
 743        struct nilfs_root *root;
 744        int err;
 745
 746        if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0)
 747                return 0;
 748
 749        err = nilfs_attach_checkpoint(sb, ri->ri_cno, true, &root);
 750        if (unlikely(err)) {
 751                nilfs_msg(sb, KERN_ERR,
 752                          "error %d loading the latest checkpoint", err);
 753                return err;
 754        }
 755
 756        err = nilfs_do_roll_forward(nilfs, sb, root, ri);
 757        if (unlikely(err))
 758                goto failed;
 759
 760        if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) {
 761                err = nilfs_prepare_segment_for_recovery(nilfs, sb, ri);
 762                if (unlikely(err)) {
 763                        nilfs_msg(sb, KERN_ERR,
 764                                  "error %d preparing segment for recovery",
 765                                  err);
 766                        goto failed;
 767                }
 768
 769                err = nilfs_attach_log_writer(sb, root);
 770                if (unlikely(err))
 771                        goto failed;
 772
 773                set_nilfs_discontinued(nilfs);
 774                err = nilfs_construct_segment(sb);
 775                nilfs_detach_log_writer(sb);
 776
 777                if (unlikely(err)) {
 778                        nilfs_msg(sb, KERN_ERR,
 779                                  "error %d writing segment for recovery",
 780                                  err);
 781                        goto failed;
 782                }
 783
 784                nilfs_finish_roll_forward(nilfs, ri);
 785        }
 786
 787 failed:
 788        nilfs_put_root(root);
 789        return err;
 790}
 791
 792/**
 793 * nilfs_search_super_root - search the latest valid super root
 794 * @nilfs: the_nilfs
 795 * @ri: pointer to a nilfs_recovery_info struct to store search results.
 796 *
 797 * nilfs_search_super_root() looks for the latest super-root from a partial
 798 * segment pointed by the superblock.  It sets up struct the_nilfs through
 799 * this search. It fills nilfs_recovery_info (ri) required for recovery.
 800 *
 801 * Return Value: On success, 0 is returned.  On error, one of the following
 802 * negative error code is returned.
 803 *
 804 * %-EINVAL - No valid segment found
 805 *
 806 * %-EIO - I/O error
 807 *
 808 * %-ENOMEM - Insufficient memory available.
 809 */
 810int nilfs_search_super_root(struct the_nilfs *nilfs,
 811                            struct nilfs_recovery_info *ri)
 812{
 813        struct buffer_head *bh_sum = NULL;
 814        struct nilfs_segment_summary *sum = NULL;
 815        sector_t pseg_start, pseg_end, sr_pseg_start = 0;
 816        sector_t seg_start, seg_end; /* range of full segment (block number) */
 817        sector_t b, end;
 818        unsigned long nblocks;
 819        unsigned int flags;
 820        u64 seg_seq;
 821        __u64 segnum, nextnum = 0;
 822        __u64 cno;
 823        LIST_HEAD(segments);
 824        int empty_seg = 0, scan_newer = 0;
 825        int ret;
 826
 827        pseg_start = nilfs->ns_last_pseg;
 828        seg_seq = nilfs->ns_last_seq;
 829        cno = nilfs->ns_last_cno;
 830        segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
 831
 832        /* Calculate range of segment */
 833        nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
 834
 835        /* Read ahead segment */
 836        b = seg_start;
 837        while (b <= seg_end)
 838                __breadahead(nilfs->ns_bdev, b++, nilfs->ns_blocksize);
 839
 840        for (;;) {
 841                brelse(bh_sum);
 842                ret = NILFS_SEG_FAIL_IO;
 843                bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum);
 844                if (!bh_sum)
 845                        goto failed;
 846
 847                ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum);
 848                if (ret) {
 849                        if (ret == NILFS_SEG_FAIL_IO)
 850                                goto failed;
 851                        goto strayed;
 852                }
 853
 854                nblocks = le32_to_cpu(sum->ss_nblocks);
 855                pseg_end = pseg_start + nblocks - 1;
 856                if (unlikely(pseg_end > seg_end)) {
 857                        ret = NILFS_SEG_FAIL_CONSISTENCY;
 858                        goto strayed;
 859                }
 860
 861                /* A valid partial segment */
 862                ri->ri_pseg_start = pseg_start;
 863                ri->ri_seq = seg_seq;
 864                ri->ri_segnum = segnum;
 865                nextnum = nilfs_get_segnum_of_block(nilfs,
 866                                                    le64_to_cpu(sum->ss_next));
 867                ri->ri_nextnum = nextnum;
 868                empty_seg = 0;
 869
 870                flags = le16_to_cpu(sum->ss_flags);
 871                if (!(flags & NILFS_SS_SR) && !scan_newer) {
 872                        /*
 873                         * This will never happen because a superblock
 874                         * (last_segment) always points to a pseg with
 875                         * a super root.
 876                         */
 877                        ret = NILFS_SEG_FAIL_CONSISTENCY;
 878                        goto failed;
 879                }
 880
 881                if (pseg_start == seg_start) {
 882                        nilfs_get_segment_range(nilfs, nextnum, &b, &end);
 883                        while (b <= end)
 884                                __breadahead(nilfs->ns_bdev, b++,
 885                                             nilfs->ns_blocksize);
 886                }
 887                if (!(flags & NILFS_SS_SR)) {
 888                        if (!ri->ri_lsegs_start && (flags & NILFS_SS_LOGBGN)) {
 889                                ri->ri_lsegs_start = pseg_start;
 890                                ri->ri_lsegs_start_seq = seg_seq;
 891                        }
 892                        if (flags & NILFS_SS_LOGEND)
 893                                ri->ri_lsegs_end = pseg_start;
 894                        goto try_next_pseg;
 895                }
 896
 897                /* A valid super root was found. */
 898                ri->ri_cno = cno++;
 899                ri->ri_super_root = pseg_end;
 900                ri->ri_lsegs_start = ri->ri_lsegs_end = 0;
 901
 902                nilfs_dispose_segment_list(&segments);
 903                sr_pseg_start = pseg_start;
 904                nilfs->ns_pseg_offset = pseg_start + nblocks - seg_start;
 905                nilfs->ns_seg_seq = seg_seq;
 906                nilfs->ns_segnum = segnum;
 907                nilfs->ns_cno = cno;  /* nilfs->ns_cno = ri->ri_cno + 1 */
 908                nilfs->ns_ctime = le64_to_cpu(sum->ss_create);
 909                nilfs->ns_nextnum = nextnum;
 910
 911                if (scan_newer)
 912                        ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED;
 913                else {
 914                        if (nilfs->ns_mount_state & NILFS_VALID_FS)
 915                                goto super_root_found;
 916                        scan_newer = 1;
 917                }
 918
 919 try_next_pseg:
 920                /* Standing on a course, or met an inconsistent state */
 921                pseg_start += nblocks;
 922                if (pseg_start < seg_end)
 923                        continue;
 924                goto feed_segment;
 925
 926 strayed:
 927                /* Off the trail */
 928                if (!scan_newer)
 929                        /*
 930                         * This can happen if a checkpoint was written without
 931                         * barriers, or as a result of an I/O failure.
 932                         */
 933                        goto failed;
 934
 935 feed_segment:
 936                /* Looking to the next full segment */
 937                if (empty_seg++)
 938                        goto super_root_found; /* found a valid super root */
 939
 940                ret = nilfs_segment_list_add(&segments, segnum);
 941                if (unlikely(ret))
 942                        goto failed;
 943
 944                seg_seq++;
 945                segnum = nextnum;
 946                nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
 947                pseg_start = seg_start;
 948        }
 949
 950 super_root_found:
 951        /* Updating pointers relating to the latest checkpoint */
 952        brelse(bh_sum);
 953        list_splice_tail(&segments, &ri->ri_used_segments);
 954        nilfs->ns_last_pseg = sr_pseg_start;
 955        nilfs->ns_last_seq = nilfs->ns_seg_seq;
 956        nilfs->ns_last_cno = ri->ri_cno;
 957        return 0;
 958
 959 failed:
 960        brelse(bh_sum);
 961        nilfs_dispose_segment_list(&segments);
 962        return ret < 0 ? ret : nilfs_warn_segment_error(nilfs->ns_sb, ret);
 963}
 964