linux/fs/f2fs/recovery.c
<<
>>
Prefs
   1/*
   2 * fs/f2fs/recovery.c
   3 *
   4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   5 *             http://www.samsung.com/
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11#include <linux/fs.h>
  12#include <linux/f2fs_fs.h>
  13#include "f2fs.h"
  14#include "node.h"
  15#include "segment.h"
  16
  17/*
  18 * Roll forward recovery scenarios.
  19 *
  20 * [Term] F: fsync_mark, D: dentry_mark
  21 *
  22 * 1. inode(x) | CP | inode(x) | dnode(F)
  23 * -> Update the latest inode(x).
  24 *
  25 * 2. inode(x) | CP | inode(F) | dnode(F)
  26 * -> No problem.
  27 *
  28 * 3. inode(x) | CP | dnode(F) | inode(x)
  29 * -> Recover to the latest dnode(F), and drop the last inode(x)
  30 *
  31 * 4. inode(x) | CP | dnode(F) | inode(F)
  32 * -> No problem.
  33 *
  34 * 5. CP | inode(x) | dnode(F)
  35 * -> The inode(DF) was missing. Should drop this dnode(F).
  36 *
  37 * 6. CP | inode(DF) | dnode(F)
  38 * -> No problem.
  39 *
  40 * 7. CP | dnode(F) | inode(DF)
  41 * -> If f2fs_iget fails, then goto next to find inode(DF).
  42 *
  43 * 8. CP | dnode(F) | inode(x)
  44 * -> If f2fs_iget fails, then goto next to find inode(DF).
  45 *    But it will fail due to no inode(DF).
  46 */
  47
  48static struct kmem_cache *fsync_entry_slab;
  49
  50bool space_for_roll_forward(struct f2fs_sb_info *sbi)
  51{
  52        s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);
  53
  54        if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
  55                return false;
  56        return true;
  57}
  58
  59static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
  60                                                                nid_t ino)
  61{
  62        struct fsync_inode_entry *entry;
  63
  64        list_for_each_entry(entry, head, list)
  65                if (entry->inode->i_ino == ino)
  66                        return entry;
  67
  68        return NULL;
  69}
  70
  71static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
  72                        struct list_head *head, nid_t ino, bool quota_inode)
  73{
  74        struct inode *inode;
  75        struct fsync_inode_entry *entry;
  76        int err;
  77
  78        inode = f2fs_iget_retry(sbi->sb, ino);
  79        if (IS_ERR(inode))
  80                return ERR_CAST(inode);
  81
  82        err = dquot_initialize(inode);
  83        if (err)
  84                goto err_out;
  85
  86        if (quota_inode) {
  87                err = dquot_alloc_inode(inode);
  88                if (err)
  89                        goto err_out;
  90        }
  91
  92        entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
  93        entry->inode = inode;
  94        list_add_tail(&entry->list, head);
  95
  96        return entry;
  97err_out:
  98        iput(inode);
  99        return ERR_PTR(err);
 100}
 101
 102static void del_fsync_inode(struct fsync_inode_entry *entry)
 103{
 104        iput(entry->inode);
 105        list_del(&entry->list);
 106        kmem_cache_free(fsync_entry_slab, entry);
 107}
 108
 109static int recover_dentry(struct inode *inode, struct page *ipage,
 110                                                struct list_head *dir_list)
 111{
 112        struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
 113        nid_t pino = le32_to_cpu(raw_inode->i_pino);
 114        struct f2fs_dir_entry *de;
 115        struct fscrypt_name fname;
 116        struct page *page;
 117        struct inode *dir, *einode;
 118        struct fsync_inode_entry *entry;
 119        int err = 0;
 120        char *name;
 121
 122        entry = get_fsync_inode(dir_list, pino);
 123        if (!entry) {
 124                entry = add_fsync_inode(F2FS_I_SB(inode), dir_list,
 125                                                        pino, false);
 126                if (IS_ERR(entry)) {
 127                        dir = ERR_CAST(entry);
 128                        err = PTR_ERR(entry);
 129                        goto out;
 130                }
 131        }
 132
 133        dir = entry->inode;
 134
 135        memset(&fname, 0, sizeof(struct fscrypt_name));
 136        fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
 137        fname.disk_name.name = raw_inode->i_name;
 138
 139        if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
 140                WARN_ON(1);
 141                err = -ENAMETOOLONG;
 142                goto out;
 143        }
 144retry:
 145        de = __f2fs_find_entry(dir, &fname, &page);
 146        if (de && inode->i_ino == le32_to_cpu(de->ino))
 147                goto out_unmap_put;
 148
 149        if (de) {
 150                einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino));
 151                if (IS_ERR(einode)) {
 152                        WARN_ON(1);
 153                        err = PTR_ERR(einode);
 154                        if (err == -ENOENT)
 155                                err = -EEXIST;
 156                        goto out_unmap_put;
 157                }
 158
 159                err = dquot_initialize(einode);
 160                if (err) {
 161                        iput(einode);
 162                        goto out_unmap_put;
 163                }
 164
 165                err = acquire_orphan_inode(F2FS_I_SB(inode));
 166                if (err) {
 167                        iput(einode);
 168                        goto out_unmap_put;
 169                }
 170                f2fs_delete_entry(de, page, dir, einode);
 171                iput(einode);
 172                goto retry;
 173        } else if (IS_ERR(page)) {
 174                err = PTR_ERR(page);
 175        } else {
 176                err = __f2fs_do_add_link(dir, &fname, inode,
 177                                        inode->i_ino, inode->i_mode);
 178        }
 179        if (err == -ENOMEM)
 180                goto retry;
 181        goto out;
 182
 183out_unmap_put:
 184        f2fs_dentry_kunmap(dir, page);
 185        f2fs_put_page(page, 0);
 186out:
 187        if (file_enc_name(inode))
 188                name = "<encrypted>";
 189        else
 190                name = raw_inode->i_name;
 191        f2fs_msg(inode->i_sb, KERN_NOTICE,
 192                        "%s: ino = %x, name = %s, dir = %lx, err = %d",
 193                        __func__, ino_of_node(ipage), name,
 194                        IS_ERR(dir) ? 0 : dir->i_ino, err);
 195        return err;
 196}
 197
 198static void recover_inode(struct inode *inode, struct page *page)
 199{
 200        struct f2fs_inode *raw = F2FS_INODE(page);
 201        char *name;
 202
 203        inode->i_mode = le16_to_cpu(raw->i_mode);
 204        f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
 205        inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
 206        inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
 207        inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
 208        inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
 209        inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
 210        inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
 211
 212        F2FS_I(inode)->i_advise = raw->i_advise;
 213
 214        if (file_enc_name(inode))
 215                name = "<encrypted>";
 216        else
 217                name = F2FS_INODE(page)->i_name;
 218
 219        f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
 220                        ino_of_node(page), name);
 221}
 222
 223static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
 224                                bool check_only)
 225{
 226        struct curseg_info *curseg;
 227        struct page *page = NULL;
 228        block_t blkaddr;
 229        int err = 0;
 230
 231        /* get node pages in the current segment */
 232        curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
 233        blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 234
 235        while (1) {
 236                struct fsync_inode_entry *entry;
 237
 238                if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
 239                        return 0;
 240
 241                page = get_tmp_page(sbi, blkaddr);
 242
 243                if (!is_recoverable_dnode(page))
 244                        break;
 245
 246                if (!is_fsync_dnode(page))
 247                        goto next;
 248
 249                entry = get_fsync_inode(head, ino_of_node(page));
 250                if (!entry) {
 251                        bool quota_inode = false;
 252
 253                        if (!check_only &&
 254                                        IS_INODE(page) && is_dent_dnode(page)) {
 255                                err = recover_inode_page(sbi, page);
 256                                if (err)
 257                                        break;
 258                                quota_inode = true;
 259                        }
 260
 261                        /*
 262                         * CP | dnode(F) | inode(DF)
 263                         * For this case, we should not give up now.
 264                         */
 265                        entry = add_fsync_inode(sbi, head, ino_of_node(page),
 266                                                                quota_inode);
 267                        if (IS_ERR(entry)) {
 268                                err = PTR_ERR(entry);
 269                                if (err == -ENOENT) {
 270                                        err = 0;
 271                                        goto next;
 272                                }
 273                                break;
 274                        }
 275                }
 276                entry->blkaddr = blkaddr;
 277
 278                if (IS_INODE(page) && is_dent_dnode(page))
 279                        entry->last_dentry = blkaddr;
 280next:
 281                /* check next segment */
 282                blkaddr = next_blkaddr_of_node(page);
 283                f2fs_put_page(page, 1);
 284
 285                ra_meta_pages_cond(sbi, blkaddr);
 286        }
 287        f2fs_put_page(page, 1);
 288        return err;
 289}
 290
 291static void destroy_fsync_dnodes(struct list_head *head)
 292{
 293        struct fsync_inode_entry *entry, *tmp;
 294
 295        list_for_each_entry_safe(entry, tmp, head, list)
 296                del_fsync_inode(entry);
 297}
 298
 299static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
 300                        block_t blkaddr, struct dnode_of_data *dn)
 301{
 302        struct seg_entry *sentry;
 303        unsigned int segno = GET_SEGNO(sbi, blkaddr);
 304        unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
 305        struct f2fs_summary_block *sum_node;
 306        struct f2fs_summary sum;
 307        struct page *sum_page, *node_page;
 308        struct dnode_of_data tdn = *dn;
 309        nid_t ino, nid;
 310        struct inode *inode;
 311        unsigned int offset;
 312        block_t bidx;
 313        int i;
 314
 315        sentry = get_seg_entry(sbi, segno);
 316        if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
 317                return 0;
 318
 319        /* Get the previous summary */
 320        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
 321                struct curseg_info *curseg = CURSEG_I(sbi, i);
 322                if (curseg->segno == segno) {
 323                        sum = curseg->sum_blk->entries[blkoff];
 324                        goto got_it;
 325                }
 326        }
 327
 328        sum_page = get_sum_page(sbi, segno);
 329        sum_node = (struct f2fs_summary_block *)page_address(sum_page);
 330        sum = sum_node->entries[blkoff];
 331        f2fs_put_page(sum_page, 1);
 332got_it:
 333        /* Use the locked dnode page and inode */
 334        nid = le32_to_cpu(sum.nid);
 335        if (dn->inode->i_ino == nid) {
 336                tdn.nid = nid;
 337                if (!dn->inode_page_locked)
 338                        lock_page(dn->inode_page);
 339                tdn.node_page = dn->inode_page;
 340                tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
 341                goto truncate_out;
 342        } else if (dn->nid == nid) {
 343                tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
 344                goto truncate_out;
 345        }
 346
 347        /* Get the node page */
 348        node_page = get_node_page(sbi, nid);
 349        if (IS_ERR(node_page))
 350                return PTR_ERR(node_page);
 351
 352        offset = ofs_of_node(node_page);
 353        ino = ino_of_node(node_page);
 354        f2fs_put_page(node_page, 1);
 355
 356        if (ino != dn->inode->i_ino) {
 357                int ret;
 358
 359                /* Deallocate previous index in the node page */
 360                inode = f2fs_iget_retry(sbi->sb, ino);
 361                if (IS_ERR(inode))
 362                        return PTR_ERR(inode);
 363
 364                ret = dquot_initialize(inode);
 365                if (ret) {
 366                        iput(inode);
 367                        return ret;
 368                }
 369        } else {
 370                inode = dn->inode;
 371        }
 372
 373        bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
 374
 375        /*
 376         * if inode page is locked, unlock temporarily, but its reference
 377         * count keeps alive.
 378         */
 379        if (ino == dn->inode->i_ino && dn->inode_page_locked)
 380                unlock_page(dn->inode_page);
 381
 382        set_new_dnode(&tdn, inode, NULL, NULL, 0);
 383        if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
 384                goto out;
 385
 386        if (tdn.data_blkaddr == blkaddr)
 387                truncate_data_blocks_range(&tdn, 1);
 388
 389        f2fs_put_dnode(&tdn);
 390out:
 391        if (ino != dn->inode->i_ino)
 392                iput(inode);
 393        else if (dn->inode_page_locked)
 394                lock_page(dn->inode_page);
 395        return 0;
 396
 397truncate_out:
 398        if (datablock_addr(tdn.inode, tdn.node_page,
 399                                        tdn.ofs_in_node) == blkaddr)
 400                truncate_data_blocks_range(&tdn, 1);
 401        if (dn->inode->i_ino == nid && !dn->inode_page_locked)
 402                unlock_page(dn->inode_page);
 403        return 0;
 404}
 405
 406static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 407                                        struct page *page, block_t blkaddr)
 408{
 409        struct dnode_of_data dn;
 410        struct node_info ni;
 411        unsigned int start, end;
 412        int err = 0, recovered = 0;
 413
 414        /* step 1: recover xattr */
 415        if (IS_INODE(page)) {
 416                recover_inline_xattr(inode, page);
 417        } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
 418                err = recover_xattr_data(inode, page, blkaddr);
 419                if (!err)
 420                        recovered++;
 421                goto out;
 422        }
 423
 424        /* step 2: recover inline data */
 425        if (recover_inline_data(inode, page))
 426                goto out;
 427
 428        /* step 3: recover data indices */
 429        start = start_bidx_of_node(ofs_of_node(page), inode);
 430        end = start + ADDRS_PER_PAGE(page, inode);
 431
 432        set_new_dnode(&dn, inode, NULL, NULL, 0);
 433retry_dn:
 434        err = get_dnode_of_data(&dn, start, ALLOC_NODE);
 435        if (err) {
 436                if (err == -ENOMEM) {
 437                        congestion_wait(BLK_RW_ASYNC, HZ/50);
 438                        goto retry_dn;
 439                }
 440                goto out;
 441        }
 442
 443        f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
 444
 445        get_node_info(sbi, dn.nid, &ni);
 446        f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
 447        f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
 448
 449        for (; start < end; start++, dn.ofs_in_node++) {
 450                block_t src, dest;
 451
 452                src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
 453                dest = datablock_addr(dn.inode, page, dn.ofs_in_node);
 454
 455                /* skip recovering if dest is the same as src */
 456                if (src == dest)
 457                        continue;
 458
 459                /* dest is invalid, just invalidate src block */
 460                if (dest == NULL_ADDR) {
 461                        truncate_data_blocks_range(&dn, 1);
 462                        continue;
 463                }
 464
 465                if (!file_keep_isize(inode) &&
 466                        (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
 467                        f2fs_i_size_write(inode,
 468                                (loff_t)(start + 1) << PAGE_SHIFT);
 469
 470                /*
 471                 * dest is reserved block, invalidate src block
 472                 * and then reserve one new block in dnode page.
 473                 */
 474                if (dest == NEW_ADDR) {
 475                        truncate_data_blocks_range(&dn, 1);
 476                        reserve_new_block(&dn);
 477                        continue;
 478                }
 479
 480                /* dest is valid block, try to recover from src to dest */
 481                if (is_valid_blkaddr(sbi, dest, META_POR)) {
 482
 483                        if (src == NULL_ADDR) {
 484                                err = reserve_new_block(&dn);
 485#ifdef CONFIG_F2FS_FAULT_INJECTION
 486                                while (err)
 487                                        err = reserve_new_block(&dn);
 488#endif
 489                                /* We should not get -ENOSPC */
 490                                f2fs_bug_on(sbi, err);
 491                                if (err)
 492                                        goto err;
 493                        }
 494retry_prev:
 495                        /* Check the previous node page having this index */
 496                        err = check_index_in_prev_nodes(sbi, dest, &dn);
 497                        if (err) {
 498                                if (err == -ENOMEM) {
 499                                        congestion_wait(BLK_RW_ASYNC, HZ/50);
 500                                        goto retry_prev;
 501                                }
 502                                goto err;
 503                        }
 504
 505                        /* write dummy data page */
 506                        f2fs_replace_block(sbi, &dn, src, dest,
 507                                                ni.version, false, false);
 508                        recovered++;
 509                }
 510        }
 511
 512        copy_node_footer(dn.node_page, page);
 513        fill_node_footer(dn.node_page, dn.nid, ni.ino,
 514                                        ofs_of_node(page), false);
 515        set_page_dirty(dn.node_page);
 516err:
 517        f2fs_put_dnode(&dn);
 518out:
 519        f2fs_msg(sbi->sb, KERN_NOTICE,
 520                "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
 521                inode->i_ino,
 522                file_keep_isize(inode) ? "keep" : "recover",
 523                recovered, err);
 524        return err;
 525}
 526
 527static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
 528                                                struct list_head *dir_list)
 529{
 530        struct curseg_info *curseg;
 531        struct page *page = NULL;
 532        int err = 0;
 533        block_t blkaddr;
 534
 535        /* get node pages in the current segment */
 536        curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
 537        blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 538
 539        while (1) {
 540                struct fsync_inode_entry *entry;
 541
 542                if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
 543                        break;
 544
 545                ra_meta_pages_cond(sbi, blkaddr);
 546
 547                page = get_tmp_page(sbi, blkaddr);
 548
 549                if (!is_recoverable_dnode(page)) {
 550                        f2fs_put_page(page, 1);
 551                        break;
 552                }
 553
 554                entry = get_fsync_inode(inode_list, ino_of_node(page));
 555                if (!entry)
 556                        goto next;
 557                /*
 558                 * inode(x) | CP | inode(x) | dnode(F)
 559                 * In this case, we can lose the latest inode(x).
 560                 * So, call recover_inode for the inode update.
 561                 */
 562                if (IS_INODE(page))
 563                        recover_inode(entry->inode, page);
 564                if (entry->last_dentry == blkaddr) {
 565                        err = recover_dentry(entry->inode, page, dir_list);
 566                        if (err) {
 567                                f2fs_put_page(page, 1);
 568                                break;
 569                        }
 570                }
 571                err = do_recover_data(sbi, entry->inode, page, blkaddr);
 572                if (err) {
 573                        f2fs_put_page(page, 1);
 574                        break;
 575                }
 576
 577                if (entry->blkaddr == blkaddr)
 578                        del_fsync_inode(entry);
 579next:
 580                /* check next segment */
 581                blkaddr = next_blkaddr_of_node(page);
 582                f2fs_put_page(page, 1);
 583        }
 584        if (!err)
 585                allocate_new_segments(sbi);
 586        return err;
 587}
 588
 589int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
 590{
 591        struct list_head inode_list;
 592        struct list_head dir_list;
 593        int err;
 594        int ret = 0;
 595        unsigned long s_flags = sbi->sb->s_flags;
 596        bool need_writecp = false;
 597
 598        if (s_flags & MS_RDONLY) {
 599                f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
 600                sbi->sb->s_flags &= ~MS_RDONLY;
 601        }
 602
 603#ifdef CONFIG_QUOTA
 604        /* Needed for iput() to work correctly and not trash data */
 605        sbi->sb->s_flags |= MS_ACTIVE;
 606        /* Turn on quotas so that they are updated correctly */
 607        f2fs_enable_quota_files(sbi);
 608#endif
 609
 610        fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
 611                        sizeof(struct fsync_inode_entry));
 612        if (!fsync_entry_slab) {
 613                err = -ENOMEM;
 614                goto out;
 615        }
 616
 617        INIT_LIST_HEAD(&inode_list);
 618        INIT_LIST_HEAD(&dir_list);
 619
 620        /* prevent checkpoint */
 621        mutex_lock(&sbi->cp_mutex);
 622
 623        /* step #1: find fsynced inode numbers */
 624        err = find_fsync_dnodes(sbi, &inode_list, check_only);
 625        if (err || list_empty(&inode_list))
 626                goto skip;
 627
 628        if (check_only) {
 629                ret = 1;
 630                goto skip;
 631        }
 632
 633        need_writecp = true;
 634
 635        /* step #2: recover data */
 636        err = recover_data(sbi, &inode_list, &dir_list);
 637        if (!err)
 638                f2fs_bug_on(sbi, !list_empty(&inode_list));
 639skip:
 640        destroy_fsync_dnodes(&inode_list);
 641
 642        /* truncate meta pages to be used by the recovery */
 643        truncate_inode_pages_range(META_MAPPING(sbi),
 644                        (loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
 645
 646        if (err) {
 647                truncate_inode_pages_final(NODE_MAPPING(sbi));
 648                truncate_inode_pages_final(META_MAPPING(sbi));
 649        }
 650
 651        clear_sbi_flag(sbi, SBI_POR_DOING);
 652        mutex_unlock(&sbi->cp_mutex);
 653
 654        /* let's drop all the directory inodes for clean checkpoint */
 655        destroy_fsync_dnodes(&dir_list);
 656
 657        if (!err && need_writecp) {
 658                struct cp_control cpc = {
 659                        .reason = CP_RECOVERY,
 660                };
 661                err = write_checkpoint(sbi, &cpc);
 662        }
 663
 664        kmem_cache_destroy(fsync_entry_slab);
 665out:
 666#ifdef CONFIG_QUOTA
 667        /* Turn quotas off */
 668        f2fs_quota_off_umount(sbi->sb);
 669#endif
 670        sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
 671
 672        return ret ? ret: err;
 673}
 674