linux/fs/f2fs/recovery.c
<<
>>
Prefs
   1/*
   2 * fs/f2fs/recovery.c
   3 *
   4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   5 *             http://www.samsung.com/
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11#include <linux/fs.h>
  12#include <linux/f2fs_fs.h>
  13#include "f2fs.h"
  14#include "node.h"
  15#include "segment.h"
  16
  17/*
  18 * Roll forward recovery scenarios.
  19 *
  20 * [Term] F: fsync_mark, D: dentry_mark
  21 *
  22 * 1. inode(x) | CP | inode(x) | dnode(F)
  23 * -> Update the latest inode(x).
  24 *
  25 * 2. inode(x) | CP | inode(F) | dnode(F)
  26 * -> No problem.
  27 *
  28 * 3. inode(x) | CP | dnode(F) | inode(x)
  29 * -> Recover to the latest dnode(F), and drop the last inode(x)
  30 *
  31 * 4. inode(x) | CP | dnode(F) | inode(F)
  32 * -> No problem.
  33 *
  34 * 5. CP | inode(x) | dnode(F)
  35 * -> The inode(DF) was missing. Should drop this dnode(F).
  36 *
  37 * 6. CP | inode(DF) | dnode(F)
  38 * -> No problem.
  39 *
  40 * 7. CP | dnode(F) | inode(DF)
  41 * -> If f2fs_iget fails, then goto next to find inode(DF).
  42 *
  43 * 8. CP | dnode(F) | inode(x)
  44 * -> If f2fs_iget fails, then goto next to find inode(DF).
  45 *    But it will fail due to no inode(DF).
  46 */
  47
  48static struct kmem_cache *fsync_entry_slab;
  49
  50bool space_for_roll_forward(struct f2fs_sb_info *sbi)
  51{
  52        if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
  53                        > sbi->user_block_count)
  54                return false;
  55        return true;
  56}
  57
  58static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
  59                                                                nid_t ino)
  60{
  61        struct fsync_inode_entry *entry;
  62
  63        list_for_each_entry(entry, head, list)
  64                if (entry->inode->i_ino == ino)
  65                        return entry;
  66
  67        return NULL;
  68}
  69
  70static int recover_dentry(struct inode *inode, struct page *ipage)
  71{
  72        struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
  73        nid_t pino = le32_to_cpu(raw_inode->i_pino);
  74        struct f2fs_dir_entry *de;
  75        struct qstr name;
  76        struct page *page;
  77        struct inode *dir, *einode;
  78        int err = 0;
  79
  80        dir = f2fs_iget(inode->i_sb, pino);
  81        if (IS_ERR(dir)) {
  82                err = PTR_ERR(dir);
  83                goto out;
  84        }
  85
  86        if (file_enc_name(inode)) {
  87                iput(dir);
  88                return 0;
  89        }
  90
  91        name.len = le32_to_cpu(raw_inode->i_namelen);
  92        name.name = raw_inode->i_name;
  93
  94        if (unlikely(name.len > F2FS_NAME_LEN)) {
  95                WARN_ON(1);
  96                err = -ENAMETOOLONG;
  97                goto out_err;
  98        }
  99retry:
 100        de = f2fs_find_entry(dir, &name, &page);
 101        if (de && inode->i_ino == le32_to_cpu(de->ino))
 102                goto out_unmap_put;
 103
 104        if (de) {
 105                einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
 106                if (IS_ERR(einode)) {
 107                        WARN_ON(1);
 108                        err = PTR_ERR(einode);
 109                        if (err == -ENOENT)
 110                                err = -EEXIST;
 111                        goto out_unmap_put;
 112                }
 113                err = acquire_orphan_inode(F2FS_I_SB(inode));
 114                if (err) {
 115                        iput(einode);
 116                        goto out_unmap_put;
 117                }
 118                f2fs_delete_entry(de, page, dir, einode);
 119                iput(einode);
 120                goto retry;
 121        }
 122        err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
 123        if (err)
 124                goto out_err;
 125
 126        if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
 127                iput(dir);
 128        } else {
 129                add_dirty_dir_inode(dir);
 130                set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
 131        }
 132
 133        goto out;
 134
 135out_unmap_put:
 136        f2fs_dentry_kunmap(dir, page);
 137        f2fs_put_page(page, 0);
 138out_err:
 139        iput(dir);
 140out:
 141        f2fs_msg(inode->i_sb, KERN_NOTICE,
 142                        "%s: ino = %x, name = %s, dir = %lx, err = %d",
 143                        __func__, ino_of_node(ipage), raw_inode->i_name,
 144                        IS_ERR(dir) ? 0 : dir->i_ino, err);
 145        return err;
 146}
 147
 148static void recover_inode(struct inode *inode, struct page *page)
 149{
 150        struct f2fs_inode *raw = F2FS_INODE(page);
 151        char *name;
 152
 153        inode->i_mode = le16_to_cpu(raw->i_mode);
 154        i_size_write(inode, le64_to_cpu(raw->i_size));
 155        inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
 156        inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
 157        inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
 158        inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
 159        inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
 160        inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
 161
 162        if (file_enc_name(inode))
 163                name = "<encrypted>";
 164        else
 165                name = F2FS_INODE(page)->i_name;
 166
 167        f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
 168                        ino_of_node(page), name);
 169}
 170
 171static bool is_same_inode(struct inode *inode, struct page *ipage)
 172{
 173        struct f2fs_inode *ri = F2FS_INODE(ipage);
 174        struct timespec disk;
 175
 176        if (!IS_INODE(ipage))
 177                return true;
 178
 179        disk.tv_sec = le64_to_cpu(ri->i_ctime);
 180        disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
 181        if (timespec_compare(&inode->i_ctime, &disk) > 0)
 182                return false;
 183
 184        disk.tv_sec = le64_to_cpu(ri->i_atime);
 185        disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
 186        if (timespec_compare(&inode->i_atime, &disk) > 0)
 187                return false;
 188
 189        disk.tv_sec = le64_to_cpu(ri->i_mtime);
 190        disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
 191        if (timespec_compare(&inode->i_mtime, &disk) > 0)
 192                return false;
 193
 194        return true;
 195}
 196
 197static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
 198{
 199        unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
 200        struct curseg_info *curseg;
 201        struct page *page = NULL;
 202        block_t blkaddr;
 203        int err = 0;
 204
 205        /* get node pages in the current segment */
 206        curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
 207        blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 208
 209        ra_meta_pages(sbi, blkaddr, 1, META_POR, true);
 210
 211        while (1) {
 212                struct fsync_inode_entry *entry;
 213
 214                if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
 215                        return 0;
 216
 217                page = get_tmp_page(sbi, blkaddr);
 218
 219                if (cp_ver != cpver_of_node(page))
 220                        break;
 221
 222                if (!is_fsync_dnode(page))
 223                        goto next;
 224
 225                entry = get_fsync_inode(head, ino_of_node(page));
 226                if (entry) {
 227                        if (!is_same_inode(entry->inode, page))
 228                                goto next;
 229                } else {
 230                        if (IS_INODE(page) && is_dent_dnode(page)) {
 231                                err = recover_inode_page(sbi, page);
 232                                if (err)
 233                                        break;
 234                        }
 235
 236                        /* add this fsync inode to the list */
 237                        entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
 238                        if (!entry) {
 239                                err = -ENOMEM;
 240                                break;
 241                        }
 242                        /*
 243                         * CP | dnode(F) | inode(DF)
 244                         * For this case, we should not give up now.
 245                         */
 246                        entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
 247                        if (IS_ERR(entry->inode)) {
 248                                err = PTR_ERR(entry->inode);
 249                                kmem_cache_free(fsync_entry_slab, entry);
 250                                if (err == -ENOENT) {
 251                                        err = 0;
 252                                        goto next;
 253                                }
 254                                break;
 255                        }
 256                        list_add_tail(&entry->list, head);
 257                }
 258                entry->blkaddr = blkaddr;
 259
 260                if (IS_INODE(page)) {
 261                        entry->last_inode = blkaddr;
 262                        if (is_dent_dnode(page))
 263                                entry->last_dentry = blkaddr;
 264                }
 265next:
 266                /* check next segment */
 267                blkaddr = next_blkaddr_of_node(page);
 268                f2fs_put_page(page, 1);
 269
 270                ra_meta_pages_cond(sbi, blkaddr);
 271        }
 272        f2fs_put_page(page, 1);
 273        return err;
 274}
 275
 276static void destroy_fsync_dnodes(struct list_head *head)
 277{
 278        struct fsync_inode_entry *entry, *tmp;
 279
 280        list_for_each_entry_safe(entry, tmp, head, list) {
 281                iput(entry->inode);
 282                list_del(&entry->list);
 283                kmem_cache_free(fsync_entry_slab, entry);
 284        }
 285}
 286
 287static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
 288                        block_t blkaddr, struct dnode_of_data *dn)
 289{
 290        struct seg_entry *sentry;
 291        unsigned int segno = GET_SEGNO(sbi, blkaddr);
 292        unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
 293        struct f2fs_summary_block *sum_node;
 294        struct f2fs_summary sum;
 295        struct page *sum_page, *node_page;
 296        struct dnode_of_data tdn = *dn;
 297        nid_t ino, nid;
 298        struct inode *inode;
 299        unsigned int offset;
 300        block_t bidx;
 301        int i;
 302
 303        sentry = get_seg_entry(sbi, segno);
 304        if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
 305                return 0;
 306
 307        /* Get the previous summary */
 308        for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
 309                struct curseg_info *curseg = CURSEG_I(sbi, i);
 310                if (curseg->segno == segno) {
 311                        sum = curseg->sum_blk->entries[blkoff];
 312                        goto got_it;
 313                }
 314        }
 315
 316        sum_page = get_sum_page(sbi, segno);
 317        sum_node = (struct f2fs_summary_block *)page_address(sum_page);
 318        sum = sum_node->entries[blkoff];
 319        f2fs_put_page(sum_page, 1);
 320got_it:
 321        /* Use the locked dnode page and inode */
 322        nid = le32_to_cpu(sum.nid);
 323        if (dn->inode->i_ino == nid) {
 324                tdn.nid = nid;
 325                if (!dn->inode_page_locked)
 326                        lock_page(dn->inode_page);
 327                tdn.node_page = dn->inode_page;
 328                tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
 329                goto truncate_out;
 330        } else if (dn->nid == nid) {
 331                tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
 332                goto truncate_out;
 333        }
 334
 335        /* Get the node page */
 336        node_page = get_node_page(sbi, nid);
 337        if (IS_ERR(node_page))
 338                return PTR_ERR(node_page);
 339
 340        offset = ofs_of_node(node_page);
 341        ino = ino_of_node(node_page);
 342        f2fs_put_page(node_page, 1);
 343
 344        if (ino != dn->inode->i_ino) {
 345                /* Deallocate previous index in the node page */
 346                inode = f2fs_iget(sbi->sb, ino);
 347                if (IS_ERR(inode))
 348                        return PTR_ERR(inode);
 349        } else {
 350                inode = dn->inode;
 351        }
 352
 353        bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
 354
 355        /*
 356         * if inode page is locked, unlock temporarily, but its reference
 357         * count keeps alive.
 358         */
 359        if (ino == dn->inode->i_ino && dn->inode_page_locked)
 360                unlock_page(dn->inode_page);
 361
 362        set_new_dnode(&tdn, inode, NULL, NULL, 0);
 363        if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
 364                goto out;
 365
 366        if (tdn.data_blkaddr == blkaddr)
 367                truncate_data_blocks_range(&tdn, 1);
 368
 369        f2fs_put_dnode(&tdn);
 370out:
 371        if (ino != dn->inode->i_ino)
 372                iput(inode);
 373        else if (dn->inode_page_locked)
 374                lock_page(dn->inode_page);
 375        return 0;
 376
 377truncate_out:
 378        if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
 379                truncate_data_blocks_range(&tdn, 1);
 380        if (dn->inode->i_ino == nid && !dn->inode_page_locked)
 381                unlock_page(dn->inode_page);
 382        return 0;
 383}
 384
 385static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 386                                        struct page *page, block_t blkaddr)
 387{
 388        struct dnode_of_data dn;
 389        struct node_info ni;
 390        unsigned int start, end;
 391        int err = 0, recovered = 0;
 392
 393        /* step 1: recover xattr */
 394        if (IS_INODE(page)) {
 395                recover_inline_xattr(inode, page);
 396        } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
 397                /*
 398                 * Deprecated; xattr blocks should be found from cold log.
 399                 * But, we should remain this for backward compatibility.
 400                 */
 401                recover_xattr_data(inode, page, blkaddr);
 402                goto out;
 403        }
 404
 405        /* step 2: recover inline data */
 406        if (recover_inline_data(inode, page))
 407                goto out;
 408
 409        /* step 3: recover data indices */
 410        start = start_bidx_of_node(ofs_of_node(page), inode);
 411        end = start + ADDRS_PER_PAGE(page, inode);
 412
 413        set_new_dnode(&dn, inode, NULL, NULL, 0);
 414
 415        err = get_dnode_of_data(&dn, start, ALLOC_NODE);
 416        if (err)
 417                goto out;
 418
 419        f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
 420
 421        get_node_info(sbi, dn.nid, &ni);
 422        f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
 423        f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
 424
 425        for (; start < end; start++, dn.ofs_in_node++) {
 426                block_t src, dest;
 427
 428                src = datablock_addr(dn.node_page, dn.ofs_in_node);
 429                dest = datablock_addr(page, dn.ofs_in_node);
 430
 431                /* skip recovering if dest is the same as src */
 432                if (src == dest)
 433                        continue;
 434
 435                /* dest is invalid, just invalidate src block */
 436                if (dest == NULL_ADDR) {
 437                        truncate_data_blocks_range(&dn, 1);
 438                        continue;
 439                }
 440
 441                /*
 442                 * dest is reserved block, invalidate src block
 443                 * and then reserve one new block in dnode page.
 444                 */
 445                if (dest == NEW_ADDR) {
 446                        truncate_data_blocks_range(&dn, 1);
 447                        err = reserve_new_block(&dn);
 448                        f2fs_bug_on(sbi, err);
 449                        continue;
 450                }
 451
 452                /* dest is valid block, try to recover from src to dest */
 453                if (is_valid_blkaddr(sbi, dest, META_POR)) {
 454
 455                        if (src == NULL_ADDR) {
 456                                err = reserve_new_block(&dn);
 457                                /* We should not get -ENOSPC */
 458                                f2fs_bug_on(sbi, err);
 459                        }
 460
 461                        /* Check the previous node page having this index */
 462                        err = check_index_in_prev_nodes(sbi, dest, &dn);
 463                        if (err)
 464                                goto err;
 465
 466                        /* write dummy data page */
 467                        f2fs_replace_block(sbi, &dn, src, dest,
 468                                                ni.version, false, false);
 469                        recovered++;
 470                }
 471        }
 472
 473        if (IS_INODE(dn.node_page))
 474                sync_inode_page(&dn);
 475
 476        copy_node_footer(dn.node_page, page);
 477        fill_node_footer(dn.node_page, dn.nid, ni.ino,
 478                                        ofs_of_node(page), false);
 479        set_page_dirty(dn.node_page);
 480err:
 481        f2fs_put_dnode(&dn);
 482out:
 483        f2fs_msg(sbi->sb, KERN_NOTICE,
 484                "recover_data: ino = %lx, recovered = %d blocks, err = %d",
 485                inode->i_ino, recovered, err);
 486        return err;
 487}
 488
 489static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
 490{
 491        unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
 492        struct curseg_info *curseg;
 493        struct page *page = NULL;
 494        int err = 0;
 495        block_t blkaddr;
 496
 497        /* get node pages in the current segment */
 498        curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
 499        blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 500
 501        while (1) {
 502                struct fsync_inode_entry *entry;
 503
 504                if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
 505                        break;
 506
 507                ra_meta_pages_cond(sbi, blkaddr);
 508
 509                page = get_tmp_page(sbi, blkaddr);
 510
 511                if (cp_ver != cpver_of_node(page)) {
 512                        f2fs_put_page(page, 1);
 513                        break;
 514                }
 515
 516                entry = get_fsync_inode(head, ino_of_node(page));
 517                if (!entry)
 518                        goto next;
 519                /*
 520                 * inode(x) | CP | inode(x) | dnode(F)
 521                 * In this case, we can lose the latest inode(x).
 522                 * So, call recover_inode for the inode update.
 523                 */
 524                if (entry->last_inode == blkaddr)
 525                        recover_inode(entry->inode, page);
 526                if (entry->last_dentry == blkaddr) {
 527                        err = recover_dentry(entry->inode, page);
 528                        if (err) {
 529                                f2fs_put_page(page, 1);
 530                                break;
 531                        }
 532                }
 533                err = do_recover_data(sbi, entry->inode, page, blkaddr);
 534                if (err) {
 535                        f2fs_put_page(page, 1);
 536                        break;
 537                }
 538
 539                if (entry->blkaddr == blkaddr) {
 540                        iput(entry->inode);
 541                        list_del(&entry->list);
 542                        kmem_cache_free(fsync_entry_slab, entry);
 543                }
 544next:
 545                /* check next segment */
 546                blkaddr = next_blkaddr_of_node(page);
 547                f2fs_put_page(page, 1);
 548        }
 549        if (!err)
 550                allocate_new_segments(sbi);
 551        return err;
 552}
 553
 554int recover_fsync_data(struct f2fs_sb_info *sbi)
 555{
 556        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
 557        struct list_head inode_list;
 558        block_t blkaddr;
 559        int err;
 560        bool need_writecp = false;
 561
 562        fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
 563                        sizeof(struct fsync_inode_entry));
 564        if (!fsync_entry_slab)
 565                return -ENOMEM;
 566
 567        INIT_LIST_HEAD(&inode_list);
 568
 569        /* prevent checkpoint */
 570        mutex_lock(&sbi->cp_mutex);
 571
 572        blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 573
 574        /* step #1: find fsynced inode numbers */
 575        err = find_fsync_dnodes(sbi, &inode_list);
 576        if (err)
 577                goto out;
 578
 579        if (list_empty(&inode_list))
 580                goto out;
 581
 582        need_writecp = true;
 583
 584        /* step #2: recover data */
 585        err = recover_data(sbi, &inode_list);
 586        if (!err)
 587                f2fs_bug_on(sbi, !list_empty(&inode_list));
 588out:
 589        destroy_fsync_dnodes(&inode_list);
 590        kmem_cache_destroy(fsync_entry_slab);
 591
 592        /* truncate meta pages to be used by the recovery */
 593        truncate_inode_pages_range(META_MAPPING(sbi),
 594                        (loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
 595
 596        if (err) {
 597                truncate_inode_pages_final(NODE_MAPPING(sbi));
 598                truncate_inode_pages_final(META_MAPPING(sbi));
 599        }
 600
 601        clear_sbi_flag(sbi, SBI_POR_DOING);
 602        if (err) {
 603                bool invalidate = false;
 604
 605                if (discard_next_dnode(sbi, blkaddr))
 606                        invalidate = true;
 607
 608                /* Flush all the NAT/SIT pages */
 609                while (get_pages(sbi, F2FS_DIRTY_META))
 610                        sync_meta_pages(sbi, META, LONG_MAX);
 611
 612                /* invalidate temporary meta page */
 613                if (invalidate)
 614                        invalidate_mapping_pages(META_MAPPING(sbi),
 615                                                        blkaddr, blkaddr);
 616
 617                set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
 618                mutex_unlock(&sbi->cp_mutex);
 619        } else if (need_writecp) {
 620                struct cp_control cpc = {
 621                        .reason = CP_RECOVERY,
 622                };
 623                mutex_unlock(&sbi->cp_mutex);
 624                err = write_checkpoint(sbi, &cpc);
 625        } else {
 626                mutex_unlock(&sbi->cp_mutex);
 627        }
 628        return err;
 629}
 630