linux/fs/f2fs/checkpoint.c
<<
>>
Prefs
   1/*
   2 * fs/f2fs/checkpoint.c
   3 *
   4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   5 *             http://www.samsung.com/
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11#include <linux/fs.h>
  12#include <linux/bio.h>
  13#include <linux/mpage.h>
  14#include <linux/writeback.h>
  15#include <linux/blkdev.h>
  16#include <linux/f2fs_fs.h>
  17#include <linux/pagevec.h>
  18#include <linux/swap.h>
  19
  20#include "f2fs.h"
  21#include "node.h"
  22#include "segment.h"
  23#include "trace.h"
  24#include <trace/events/f2fs.h>
  25
  26static struct kmem_cache *ino_entry_slab;
  27struct kmem_cache *inode_entry_slab;
  28
  29/*
  30 * We guarantee no failure on the returned page.
  31 */
  32struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
  33{
  34        struct address_space *mapping = META_MAPPING(sbi);
  35        struct page *page = NULL;
  36repeat:
  37        page = grab_cache_page(mapping, index);
  38        if (!page) {
  39                cond_resched();
  40                goto repeat;
  41        }
  42        f2fs_wait_on_page_writeback(page, META);
  43        SetPageUptodate(page);
  44        return page;
  45}
  46
  47/*
  48 * We guarantee no failure on the returned page.
  49 */
  50static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
  51                                                        bool is_meta)
  52{
  53        struct address_space *mapping = META_MAPPING(sbi);
  54        struct page *page;
  55        struct f2fs_io_info fio = {
  56                .sbi = sbi,
  57                .type = META,
  58                .rw = READ_SYNC | REQ_META | REQ_PRIO,
  59                .blk_addr = index,
  60                .encrypted_page = NULL,
  61        };
  62
  63        if (unlikely(!is_meta))
  64                fio.rw &= ~REQ_META;
  65repeat:
  66        page = grab_cache_page(mapping, index);
  67        if (!page) {
  68                cond_resched();
  69                goto repeat;
  70        }
  71        if (PageUptodate(page))
  72                goto out;
  73
  74        fio.page = page;
  75
  76        if (f2fs_submit_page_bio(&fio)) {
  77                f2fs_put_page(page, 1);
  78                goto repeat;
  79        }
  80
  81        lock_page(page);
  82        if (unlikely(page->mapping != mapping)) {
  83                f2fs_put_page(page, 1);
  84                goto repeat;
  85        }
  86
  87        /*
  88         * if there is any IO error when accessing device, make our filesystem
  89         * readonly and make sure do not write checkpoint with non-uptodate
  90         * meta page.
  91         */
  92        if (unlikely(!PageUptodate(page)))
  93                f2fs_stop_checkpoint(sbi);
  94out:
  95        return page;
  96}
  97
  98struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
  99{
 100        return __get_meta_page(sbi, index, true);
 101}
 102
 103/* for POR only */
 104struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
 105{
 106        return __get_meta_page(sbi, index, false);
 107}
 108
 109bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
 110{
 111        switch (type) {
 112        case META_NAT:
 113                break;
 114        case META_SIT:
 115                if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
 116                        return false;
 117                break;
 118        case META_SSA:
 119                if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
 120                        blkaddr < SM_I(sbi)->ssa_blkaddr))
 121                        return false;
 122                break;
 123        case META_CP:
 124                if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
 125                        blkaddr < __start_cp_addr(sbi)))
 126                        return false;
 127                break;
 128        case META_POR:
 129                if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
 130                        blkaddr < MAIN_BLKADDR(sbi)))
 131                        return false;
 132                break;
 133        default:
 134                BUG();
 135        }
 136
 137        return true;
 138}
 139
 140/*
 141 * Readahead CP/NAT/SIT/SSA pages
 142 */
 143int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
 144                                                        int type, bool sync)
 145{
 146        block_t prev_blk_addr = 0;
 147        struct page *page;
 148        block_t blkno = start;
 149        struct f2fs_io_info fio = {
 150                .sbi = sbi,
 151                .type = META,
 152                .rw = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA,
 153                .encrypted_page = NULL,
 154        };
 155
 156        if (unlikely(type == META_POR))
 157                fio.rw &= ~REQ_META;
 158
 159        for (; nrpages-- > 0; blkno++) {
 160
 161                if (!is_valid_blkaddr(sbi, blkno, type))
 162                        goto out;
 163
 164                switch (type) {
 165                case META_NAT:
 166                        if (unlikely(blkno >=
 167                                        NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
 168                                blkno = 0;
 169                        /* get nat block addr */
 170                        fio.blk_addr = current_nat_addr(sbi,
 171                                        blkno * NAT_ENTRY_PER_BLOCK);
 172                        break;
 173                case META_SIT:
 174                        /* get sit block addr */
 175                        fio.blk_addr = current_sit_addr(sbi,
 176                                        blkno * SIT_ENTRY_PER_BLOCK);
 177                        if (blkno != start && prev_blk_addr + 1 != fio.blk_addr)
 178                                goto out;
 179                        prev_blk_addr = fio.blk_addr;
 180                        break;
 181                case META_SSA:
 182                case META_CP:
 183                case META_POR:
 184                        fio.blk_addr = blkno;
 185                        break;
 186                default:
 187                        BUG();
 188                }
 189
 190                page = grab_cache_page(META_MAPPING(sbi), fio.blk_addr);
 191                if (!page)
 192                        continue;
 193                if (PageUptodate(page)) {
 194                        f2fs_put_page(page, 1);
 195                        continue;
 196                }
 197
 198                fio.page = page;
 199                f2fs_submit_page_mbio(&fio);
 200                f2fs_put_page(page, 0);
 201        }
 202out:
 203        f2fs_submit_merged_bio(sbi, META, READ);
 204        return blkno - start;
 205}
 206
 207void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
 208{
 209        struct page *page;
 210        bool readahead = false;
 211
 212        page = find_get_page(META_MAPPING(sbi), index);
 213        if (!page || (page && !PageUptodate(page)))
 214                readahead = true;
 215        f2fs_put_page(page, 0);
 216
 217        if (readahead)
 218                ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true);
 219}
 220
 221static int f2fs_write_meta_page(struct page *page,
 222                                struct writeback_control *wbc)
 223{
 224        struct f2fs_sb_info *sbi = F2FS_P_SB(page);
 225
 226        trace_f2fs_writepage(page, META);
 227
 228        if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
 229                goto redirty_out;
 230        if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
 231                goto redirty_out;
 232        if (unlikely(f2fs_cp_error(sbi)))
 233                goto redirty_out;
 234
 235        f2fs_wait_on_page_writeback(page, META);
 236        write_meta_page(sbi, page);
 237        dec_page_count(sbi, F2FS_DIRTY_META);
 238        unlock_page(page);
 239
 240        if (wbc->for_reclaim)
 241                f2fs_submit_merged_bio(sbi, META, WRITE);
 242        return 0;
 243
 244redirty_out:
 245        redirty_page_for_writepage(wbc, page);
 246        return AOP_WRITEPAGE_ACTIVATE;
 247}
 248
 249static int f2fs_write_meta_pages(struct address_space *mapping,
 250                                struct writeback_control *wbc)
 251{
 252        struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
 253        long diff, written;
 254
 255        trace_f2fs_writepages(mapping->host, wbc, META);
 256
 257        /* collect a number of dirty meta pages and write together */
 258        if (wbc->for_kupdate ||
 259                get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
 260                goto skip_write;
 261
 262        /* if mounting is failed, skip writing node pages */
 263        mutex_lock(&sbi->cp_mutex);
 264        diff = nr_pages_to_write(sbi, META, wbc);
 265        written = sync_meta_pages(sbi, META, wbc->nr_to_write);
 266        mutex_unlock(&sbi->cp_mutex);
 267        wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
 268        return 0;
 269
 270skip_write:
 271        wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
 272        return 0;
 273}
 274
 275long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
 276                                                long nr_to_write)
 277{
 278        struct address_space *mapping = META_MAPPING(sbi);
 279        pgoff_t index = 0, end = LONG_MAX, prev = LONG_MAX;
 280        struct pagevec pvec;
 281        long nwritten = 0;
 282        struct writeback_control wbc = {
 283                .for_reclaim = 0,
 284        };
 285
 286        pagevec_init(&pvec, 0);
 287
 288        while (index <= end) {
 289                int i, nr_pages;
 290                nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
 291                                PAGECACHE_TAG_DIRTY,
 292                                min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
 293                if (unlikely(nr_pages == 0))
 294                        break;
 295
 296                for (i = 0; i < nr_pages; i++) {
 297                        struct page *page = pvec.pages[i];
 298
 299                        if (prev == LONG_MAX)
 300                                prev = page->index - 1;
 301                        if (nr_to_write != LONG_MAX && page->index != prev + 1) {
 302                                pagevec_release(&pvec);
 303                                goto stop;
 304                        }
 305
 306                        lock_page(page);
 307
 308                        if (unlikely(page->mapping != mapping)) {
 309continue_unlock:
 310                                unlock_page(page);
 311                                continue;
 312                        }
 313                        if (!PageDirty(page)) {
 314                                /* someone wrote it for us */
 315                                goto continue_unlock;
 316                        }
 317
 318                        if (!clear_page_dirty_for_io(page))
 319                                goto continue_unlock;
 320
 321                        if (mapping->a_ops->writepage(page, &wbc)) {
 322                                unlock_page(page);
 323                                break;
 324                        }
 325                        nwritten++;
 326                        prev = page->index;
 327                        if (unlikely(nwritten >= nr_to_write))
 328                                break;
 329                }
 330                pagevec_release(&pvec);
 331                cond_resched();
 332        }
 333stop:
 334        if (nwritten)
 335                f2fs_submit_merged_bio(sbi, type, WRITE);
 336
 337        return nwritten;
 338}
 339
 340static int f2fs_set_meta_page_dirty(struct page *page)
 341{
 342        trace_f2fs_set_page_dirty(page, META);
 343
 344        SetPageUptodate(page);
 345        if (!PageDirty(page)) {
 346                __set_page_dirty_nobuffers(page);
 347                inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
 348                SetPagePrivate(page);
 349                f2fs_trace_pid(page);
 350                return 1;
 351        }
 352        return 0;
 353}
 354
 355const struct address_space_operations f2fs_meta_aops = {
 356        .writepage      = f2fs_write_meta_page,
 357        .writepages     = f2fs_write_meta_pages,
 358        .set_page_dirty = f2fs_set_meta_page_dirty,
 359        .invalidatepage = f2fs_invalidate_page,
 360        .releasepage    = f2fs_release_page,
 361};
 362
 363static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
 364{
 365        struct inode_management *im = &sbi->im[type];
 366        struct ino_entry *e, *tmp;
 367
 368        tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
 369retry:
 370        radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
 371
 372        spin_lock(&im->ino_lock);
 373        e = radix_tree_lookup(&im->ino_root, ino);
 374        if (!e) {
 375                e = tmp;
 376                if (radix_tree_insert(&im->ino_root, ino, e)) {
 377                        spin_unlock(&im->ino_lock);
 378                        radix_tree_preload_end();
 379                        goto retry;
 380                }
 381                memset(e, 0, sizeof(struct ino_entry));
 382                e->ino = ino;
 383
 384                list_add_tail(&e->list, &im->ino_list);
 385                if (type != ORPHAN_INO)
 386                        im->ino_num++;
 387        }
 388        spin_unlock(&im->ino_lock);
 389        radix_tree_preload_end();
 390
 391        if (e != tmp)
 392                kmem_cache_free(ino_entry_slab, tmp);
 393}
 394
 395static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
 396{
 397        struct inode_management *im = &sbi->im[type];
 398        struct ino_entry *e;
 399
 400        spin_lock(&im->ino_lock);
 401        e = radix_tree_lookup(&im->ino_root, ino);
 402        if (e) {
 403                list_del(&e->list);
 404                radix_tree_delete(&im->ino_root, ino);
 405                im->ino_num--;
 406                spin_unlock(&im->ino_lock);
 407                kmem_cache_free(ino_entry_slab, e);
 408                return;
 409        }
 410        spin_unlock(&im->ino_lock);
 411}
 412
 413void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
 414{
 415        /* add new dirty ino entry into list */
 416        __add_ino_entry(sbi, ino, type);
 417}
 418
 419void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
 420{
 421        /* remove dirty ino entry from list */
 422        __remove_ino_entry(sbi, ino, type);
 423}
 424
 425/* mode should be APPEND_INO or UPDATE_INO */
 426bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
 427{
 428        struct inode_management *im = &sbi->im[mode];
 429        struct ino_entry *e;
 430
 431        spin_lock(&im->ino_lock);
 432        e = radix_tree_lookup(&im->ino_root, ino);
 433        spin_unlock(&im->ino_lock);
 434        return e ? true : false;
 435}
 436
 437void release_dirty_inode(struct f2fs_sb_info *sbi)
 438{
 439        struct ino_entry *e, *tmp;
 440        int i;
 441
 442        for (i = APPEND_INO; i <= UPDATE_INO; i++) {
 443                struct inode_management *im = &sbi->im[i];
 444
 445                spin_lock(&im->ino_lock);
 446                list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
 447                        list_del(&e->list);
 448                        radix_tree_delete(&im->ino_root, e->ino);
 449                        kmem_cache_free(ino_entry_slab, e);
 450                        im->ino_num--;
 451                }
 452                spin_unlock(&im->ino_lock);
 453        }
 454}
 455
 456int acquire_orphan_inode(struct f2fs_sb_info *sbi)
 457{
 458        struct inode_management *im = &sbi->im[ORPHAN_INO];
 459        int err = 0;
 460
 461        spin_lock(&im->ino_lock);
 462        if (unlikely(im->ino_num >= sbi->max_orphans))
 463                err = -ENOSPC;
 464        else
 465                im->ino_num++;
 466        spin_unlock(&im->ino_lock);
 467
 468        return err;
 469}
 470
 471void release_orphan_inode(struct f2fs_sb_info *sbi)
 472{
 473        struct inode_management *im = &sbi->im[ORPHAN_INO];
 474
 475        spin_lock(&im->ino_lock);
 476        f2fs_bug_on(sbi, im->ino_num == 0);
 477        im->ino_num--;
 478        spin_unlock(&im->ino_lock);
 479}
 480
 481void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 482{
 483        /* add new orphan ino entry into list */
 484        __add_ino_entry(sbi, ino, ORPHAN_INO);
 485}
 486
 487void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 488{
 489        /* remove orphan entry from orphan list */
 490        __remove_ino_entry(sbi, ino, ORPHAN_INO);
 491}
 492
 493static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 494{
 495        struct inode *inode;
 496
 497        inode = f2fs_iget(sbi->sb, ino);
 498        if (IS_ERR(inode)) {
 499                /*
 500                 * there should be a bug that we can't find the entry
 501                 * to orphan inode.
 502                 */
 503                f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT);
 504                return PTR_ERR(inode);
 505        }
 506
 507        clear_nlink(inode);
 508
 509        /* truncate all the data during iput */
 510        iput(inode);
 511        return 0;
 512}
 513
 514int recover_orphan_inodes(struct f2fs_sb_info *sbi)
 515{
 516        block_t start_blk, orphan_blocks, i, j;
 517        int err;
 518
 519        if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
 520                return 0;
 521
 522        start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
 523        orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
 524
 525        ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
 526
 527        for (i = 0; i < orphan_blocks; i++) {
 528                struct page *page = get_meta_page(sbi, start_blk + i);
 529                struct f2fs_orphan_block *orphan_blk;
 530
 531                orphan_blk = (struct f2fs_orphan_block *)page_address(page);
 532                for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
 533                        nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
 534                        err = recover_orphan_inode(sbi, ino);
 535                        if (err) {
 536                                f2fs_put_page(page, 1);
 537                                return err;
 538                        }
 539                }
 540                f2fs_put_page(page, 1);
 541        }
 542        /* clear Orphan Flag */
 543        clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
 544        return 0;
 545}
 546
 547static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
 548{
 549        struct list_head *head;
 550        struct f2fs_orphan_block *orphan_blk = NULL;
 551        unsigned int nentries = 0;
 552        unsigned short index = 1;
 553        unsigned short orphan_blocks;
 554        struct page *page = NULL;
 555        struct ino_entry *orphan = NULL;
 556        struct inode_management *im = &sbi->im[ORPHAN_INO];
 557
 558        orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
 559
 560        /*
 561         * we don't need to do spin_lock(&im->ino_lock) here, since all the
 562         * orphan inode operations are covered under f2fs_lock_op().
 563         * And, spin_lock should be avoided due to page operations below.
 564         */
 565        head = &im->ino_list;
 566
 567        /* loop for each orphan inode entry and write them in Jornal block */
 568        list_for_each_entry(orphan, head, list) {
 569                if (!page) {
 570                        page = grab_meta_page(sbi, start_blk++);
 571                        orphan_blk =
 572                                (struct f2fs_orphan_block *)page_address(page);
 573                        memset(orphan_blk, 0, sizeof(*orphan_blk));
 574                }
 575
 576                orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
 577
 578                if (nentries == F2FS_ORPHANS_PER_BLOCK) {
 579                        /*
 580                         * an orphan block is full of 1020 entries,
 581                         * then we need to flush current orphan blocks
 582                         * and bring another one in memory
 583                         */
 584                        orphan_blk->blk_addr = cpu_to_le16(index);
 585                        orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
 586                        orphan_blk->entry_count = cpu_to_le32(nentries);
 587                        set_page_dirty(page);
 588                        f2fs_put_page(page, 1);
 589                        index++;
 590                        nentries = 0;
 591                        page = NULL;
 592                }
 593        }
 594
 595        if (page) {
 596                orphan_blk->blk_addr = cpu_to_le16(index);
 597                orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
 598                orphan_blk->entry_count = cpu_to_le32(nentries);
 599                set_page_dirty(page);
 600                f2fs_put_page(page, 1);
 601        }
 602}
 603
 604static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
 605                                block_t cp_addr, unsigned long long *version)
 606{
 607        struct page *cp_page_1, *cp_page_2 = NULL;
 608        unsigned long blk_size = sbi->blocksize;
 609        struct f2fs_checkpoint *cp_block;
 610        unsigned long long cur_version = 0, pre_version = 0;
 611        size_t crc_offset;
 612        __u32 crc = 0;
 613
 614        /* Read the 1st cp block in this CP pack */
 615        cp_page_1 = get_meta_page(sbi, cp_addr);
 616
 617        /* get the version number */
 618        cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
 619        crc_offset = le32_to_cpu(cp_block->checksum_offset);
 620        if (crc_offset >= blk_size)
 621                goto invalid_cp1;
 622
 623        crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
 624        if (!f2fs_crc_valid(crc, cp_block, crc_offset))
 625                goto invalid_cp1;
 626
 627        pre_version = cur_cp_version(cp_block);
 628
 629        /* Read the 2nd cp block in this CP pack */
 630        cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
 631        cp_page_2 = get_meta_page(sbi, cp_addr);
 632
 633        cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
 634        crc_offset = le32_to_cpu(cp_block->checksum_offset);
 635        if (crc_offset >= blk_size)
 636                goto invalid_cp2;
 637
 638        crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
 639        if (!f2fs_crc_valid(crc, cp_block, crc_offset))
 640                goto invalid_cp2;
 641
 642        cur_version = cur_cp_version(cp_block);
 643
 644        if (cur_version == pre_version) {
 645                *version = cur_version;
 646                f2fs_put_page(cp_page_2, 1);
 647                return cp_page_1;
 648        }
 649invalid_cp2:
 650        f2fs_put_page(cp_page_2, 1);
 651invalid_cp1:
 652        f2fs_put_page(cp_page_1, 1);
 653        return NULL;
 654}
 655
 656int get_valid_checkpoint(struct f2fs_sb_info *sbi)
 657{
 658        struct f2fs_checkpoint *cp_block;
 659        struct f2fs_super_block *fsb = sbi->raw_super;
 660        struct page *cp1, *cp2, *cur_page;
 661        unsigned long blk_size = sbi->blocksize;
 662        unsigned long long cp1_version = 0, cp2_version = 0;
 663        unsigned long long cp_start_blk_no;
 664        unsigned int cp_blks = 1 + __cp_payload(sbi);
 665        block_t cp_blk_no;
 666        int i;
 667
 668        sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
 669        if (!sbi->ckpt)
 670                return -ENOMEM;
 671        /*
 672         * Finding out valid cp block involves read both
 673         * sets( cp pack1 and cp pack 2)
 674         */
 675        cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
 676        cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
 677
 678        /* The second checkpoint pack should start at the next segment */
 679        cp_start_blk_no += ((unsigned long long)1) <<
 680                                le32_to_cpu(fsb->log_blocks_per_seg);
 681        cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
 682
 683        if (cp1 && cp2) {
 684                if (ver_after(cp2_version, cp1_version))
 685                        cur_page = cp2;
 686                else
 687                        cur_page = cp1;
 688        } else if (cp1) {
 689                cur_page = cp1;
 690        } else if (cp2) {
 691                cur_page = cp2;
 692        } else {
 693                goto fail_no_cp;
 694        }
 695
 696        cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
 697        memcpy(sbi->ckpt, cp_block, blk_size);
 698
 699        if (cp_blks <= 1)
 700                goto done;
 701
 702        cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
 703        if (cur_page == cp2)
 704                cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
 705
 706        for (i = 1; i < cp_blks; i++) {
 707                void *sit_bitmap_ptr;
 708                unsigned char *ckpt = (unsigned char *)sbi->ckpt;
 709
 710                cur_page = get_meta_page(sbi, cp_blk_no + i);
 711                sit_bitmap_ptr = page_address(cur_page);
 712                memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
 713                f2fs_put_page(cur_page, 1);
 714        }
 715done:
 716        f2fs_put_page(cp1, 1);
 717        f2fs_put_page(cp2, 1);
 718        return 0;
 719
 720fail_no_cp:
 721        kfree(sbi->ckpt);
 722        return -EINVAL;
 723}
 724
 725static int __add_dirty_inode(struct inode *inode, struct inode_entry *new)
 726{
 727        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 728
 729        if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
 730                return -EEXIST;
 731
 732        set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
 733        F2FS_I(inode)->dirty_dir = new;
 734        list_add_tail(&new->list, &sbi->dir_inode_list);
 735        stat_inc_dirty_dir(sbi);
 736        return 0;
 737}
 738
 739void update_dirty_page(struct inode *inode, struct page *page)
 740{
 741        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 742        struct inode_entry *new;
 743        int ret = 0;
 744
 745        if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
 746                        !S_ISLNK(inode->i_mode))
 747                return;
 748
 749        if (!S_ISDIR(inode->i_mode)) {
 750                inode_inc_dirty_pages(inode);
 751                goto out;
 752        }
 753
 754        new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
 755        new->inode = inode;
 756        INIT_LIST_HEAD(&new->list);
 757
 758        spin_lock(&sbi->dir_inode_lock);
 759        ret = __add_dirty_inode(inode, new);
 760        inode_inc_dirty_pages(inode);
 761        spin_unlock(&sbi->dir_inode_lock);
 762
 763        if (ret)
 764                kmem_cache_free(inode_entry_slab, new);
 765out:
 766        SetPagePrivate(page);
 767        f2fs_trace_pid(page);
 768}
 769
 770void add_dirty_dir_inode(struct inode *inode)
 771{
 772        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 773        struct inode_entry *new =
 774                        f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
 775        int ret = 0;
 776
 777        new->inode = inode;
 778        INIT_LIST_HEAD(&new->list);
 779
 780        spin_lock(&sbi->dir_inode_lock);
 781        ret = __add_dirty_inode(inode, new);
 782        spin_unlock(&sbi->dir_inode_lock);
 783
 784        if (ret)
 785                kmem_cache_free(inode_entry_slab, new);
 786}
 787
 788void remove_dirty_dir_inode(struct inode *inode)
 789{
 790        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 791        struct inode_entry *entry;
 792
 793        if (!S_ISDIR(inode->i_mode))
 794                return;
 795
 796        spin_lock(&sbi->dir_inode_lock);
 797        if (get_dirty_pages(inode) ||
 798                        !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
 799                spin_unlock(&sbi->dir_inode_lock);
 800                return;
 801        }
 802
 803        entry = F2FS_I(inode)->dirty_dir;
 804        list_del(&entry->list);
 805        F2FS_I(inode)->dirty_dir = NULL;
 806        clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
 807        stat_dec_dirty_dir(sbi);
 808        spin_unlock(&sbi->dir_inode_lock);
 809        kmem_cache_free(inode_entry_slab, entry);
 810
 811        /* Only from the recovery routine */
 812        if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
 813                clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
 814                iput(inode);
 815        }
 816}
 817
 818void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
 819{
 820        struct list_head *head;
 821        struct inode_entry *entry;
 822        struct inode *inode;
 823retry:
 824        if (unlikely(f2fs_cp_error(sbi)))
 825                return;
 826
 827        spin_lock(&sbi->dir_inode_lock);
 828
 829        head = &sbi->dir_inode_list;
 830        if (list_empty(head)) {
 831                spin_unlock(&sbi->dir_inode_lock);
 832                return;
 833        }
 834        entry = list_entry(head->next, struct inode_entry, list);
 835        inode = igrab(entry->inode);
 836        spin_unlock(&sbi->dir_inode_lock);
 837        if (inode) {
 838                filemap_fdatawrite(inode->i_mapping);
 839                iput(inode);
 840        } else {
 841                /*
 842                 * We should submit bio, since it exists several
 843                 * wribacking dentry pages in the freeing inode.
 844                 */
 845                f2fs_submit_merged_bio(sbi, DATA, WRITE);
 846                cond_resched();
 847        }
 848        goto retry;
 849}
 850
 851/*
 852 * Freeze all the FS-operations for checkpoint.
 853 */
 854static int block_operations(struct f2fs_sb_info *sbi)
 855{
 856        struct writeback_control wbc = {
 857                .sync_mode = WB_SYNC_ALL,
 858                .nr_to_write = LONG_MAX,
 859                .for_reclaim = 0,
 860        };
 861        struct blk_plug plug;
 862        int err = 0;
 863
 864        blk_start_plug(&plug);
 865
 866retry_flush_dents:
 867        f2fs_lock_all(sbi);
 868        /* write all the dirty dentry pages */
 869        if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
 870                f2fs_unlock_all(sbi);
 871                sync_dirty_dir_inodes(sbi);
 872                if (unlikely(f2fs_cp_error(sbi))) {
 873                        err = -EIO;
 874                        goto out;
 875                }
 876                goto retry_flush_dents;
 877        }
 878
 879        /*
 880         * POR: we should ensure that there are no dirty node pages
 881         * until finishing nat/sit flush.
 882         */
 883retry_flush_nodes:
 884        down_write(&sbi->node_write);
 885
 886        if (get_pages(sbi, F2FS_DIRTY_NODES)) {
 887                up_write(&sbi->node_write);
 888                sync_node_pages(sbi, 0, &wbc);
 889                if (unlikely(f2fs_cp_error(sbi))) {
 890                        f2fs_unlock_all(sbi);
 891                        err = -EIO;
 892                        goto out;
 893                }
 894                goto retry_flush_nodes;
 895        }
 896out:
 897        blk_finish_plug(&plug);
 898        return err;
 899}
 900
 901static void unblock_operations(struct f2fs_sb_info *sbi)
 902{
 903        up_write(&sbi->node_write);
 904        f2fs_unlock_all(sbi);
 905}
 906
 907static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
 908{
 909        DEFINE_WAIT(wait);
 910
 911        for (;;) {
 912                prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
 913
 914                if (!get_pages(sbi, F2FS_WRITEBACK))
 915                        break;
 916
 917                io_schedule();
 918        }
 919        finish_wait(&sbi->cp_wait, &wait);
 920}
 921
 922static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 923{
 924        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 925        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
 926        struct f2fs_nm_info *nm_i = NM_I(sbi);
 927        unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
 928        nid_t last_nid = nm_i->next_scan_nid;
 929        block_t start_blk;
 930        unsigned int data_sum_blocks, orphan_blocks;
 931        __u32 crc32 = 0;
 932        int i;
 933        int cp_payload_blks = __cp_payload(sbi);
 934        block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg);
 935        bool invalidate = false;
 936
 937        /*
 938         * This avoids to conduct wrong roll-forward operations and uses
 939         * metapages, so should be called prior to sync_meta_pages below.
 940         */
 941        if (discard_next_dnode(sbi, discard_blk))
 942                invalidate = true;
 943
 944        /* Flush all the NAT/SIT pages */
 945        while (get_pages(sbi, F2FS_DIRTY_META)) {
 946                sync_meta_pages(sbi, META, LONG_MAX);
 947                if (unlikely(f2fs_cp_error(sbi)))
 948                        return;
 949        }
 950
 951        next_free_nid(sbi, &last_nid);
 952
 953        /*
 954         * modify checkpoint
 955         * version number is already updated
 956         */
 957        ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
 958        ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
 959        ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
 960        for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
 961                ckpt->cur_node_segno[i] =
 962                        cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
 963                ckpt->cur_node_blkoff[i] =
 964                        cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
 965                ckpt->alloc_type[i + CURSEG_HOT_NODE] =
 966                                curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
 967        }
 968        for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
 969                ckpt->cur_data_segno[i] =
 970                        cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
 971                ckpt->cur_data_blkoff[i] =
 972                        cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
 973                ckpt->alloc_type[i + CURSEG_HOT_DATA] =
 974                                curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
 975        }
 976
 977        ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
 978        ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
 979        ckpt->next_free_nid = cpu_to_le32(last_nid);
 980
 981        /* 2 cp  + n data seg summary + orphan inode blocks */
 982        data_sum_blocks = npages_for_summary_flush(sbi, false);
 983        if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
 984                set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 985        else
 986                clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 987
 988        orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
 989        ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
 990                        orphan_blocks);
 991
 992        if (__remain_node_summaries(cpc->reason))
 993                ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
 994                                cp_payload_blks + data_sum_blocks +
 995                                orphan_blocks + NR_CURSEG_NODE_TYPE);
 996        else
 997                ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
 998                                cp_payload_blks + data_sum_blocks +
 999                                orphan_blocks);
1000
1001        if (cpc->reason == CP_UMOUNT)
1002                set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
1003        else
1004                clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
1005
1006        if (cpc->reason == CP_FASTBOOT)
1007                set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
1008        else
1009                clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
1010
1011        if (orphan_num)
1012                set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
1013        else
1014                clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
1015
1016        if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1017                set_ckpt_flags(ckpt, CP_FSCK_FLAG);
1018
1019        /* update SIT/NAT bitmap */
1020        get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
1021        get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
1022
1023        crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
1024        *((__le32 *)((unsigned char *)ckpt +
1025                                le32_to_cpu(ckpt->checksum_offset)))
1026                                = cpu_to_le32(crc32);
1027
1028        start_blk = __start_cp_addr(sbi);
1029
1030        /* need to wait for end_io results */
1031        wait_on_all_pages_writeback(sbi);
1032        if (unlikely(f2fs_cp_error(sbi)))
1033                return;
1034
1035        /* write out checkpoint buffer at block 0 */
1036        update_meta_page(sbi, ckpt, start_blk++);
1037
1038        for (i = 1; i < 1 + cp_payload_blks; i++)
1039                update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
1040                                                        start_blk++);
1041
1042        if (orphan_num) {
1043                write_orphan_inodes(sbi, start_blk);
1044                start_blk += orphan_blocks;
1045        }
1046
1047        write_data_summaries(sbi, start_blk);
1048        start_blk += data_sum_blocks;
1049        if (__remain_node_summaries(cpc->reason)) {
1050                write_node_summaries(sbi, start_blk);
1051                start_blk += NR_CURSEG_NODE_TYPE;
1052        }
1053
1054        /* writeout checkpoint block */
1055        update_meta_page(sbi, ckpt, start_blk);
1056
1057        /* wait for previous submitted node/meta pages writeback */
1058        wait_on_all_pages_writeback(sbi);
1059
1060        if (unlikely(f2fs_cp_error(sbi)))
1061                return;
1062
1063        filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
1064        filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
1065
1066        /* update user_block_counts */
1067        sbi->last_valid_block_count = sbi->total_valid_block_count;
1068        sbi->alloc_valid_block_count = 0;
1069
1070        /* Here, we only have one bio having CP pack */
1071        sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
1072
1073        /* wait for previous submitted meta pages writeback */
1074        wait_on_all_pages_writeback(sbi);
1075
1076        /*
1077         * invalidate meta page which is used temporarily for zeroing out
1078         * block at the end of warm node chain.
1079         */
1080        if (invalidate)
1081                invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
1082                                                                discard_blk);
1083
1084        release_dirty_inode(sbi);
1085
1086        if (unlikely(f2fs_cp_error(sbi)))
1087                return;
1088
1089        clear_prefree_segments(sbi, cpc);
1090        clear_sbi_flag(sbi, SBI_IS_DIRTY);
1091}
1092
1093/*
1094 * We guarantee that this checkpoint procedure will not fail.
1095 */
1096void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1097{
1098        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1099        unsigned long long ckpt_ver;
1100
1101        mutex_lock(&sbi->cp_mutex);
1102
1103        if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
1104                (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
1105                (cpc->reason == CP_DISCARD && !sbi->discard_blks)))
1106                goto out;
1107        if (unlikely(f2fs_cp_error(sbi)))
1108                goto out;
1109        if (f2fs_readonly(sbi->sb))
1110                goto out;
1111
1112        trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
1113
1114        if (block_operations(sbi))
1115                goto out;
1116
1117        trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
1118
1119        f2fs_submit_merged_bio(sbi, DATA, WRITE);
1120        f2fs_submit_merged_bio(sbi, NODE, WRITE);
1121        f2fs_submit_merged_bio(sbi, META, WRITE);
1122
1123        /*
1124         * update checkpoint pack index
1125         * Increase the version number so that
1126         * SIT entries and seg summaries are written at correct place
1127         */
1128        ckpt_ver = cur_cp_version(ckpt);
1129        ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
1130
1131        /* write cached NAT/SIT entries to NAT/SIT area */
1132        flush_nat_entries(sbi);
1133        flush_sit_entries(sbi, cpc);
1134
1135        /* unlock all the fs_lock[] in do_checkpoint() */
1136        do_checkpoint(sbi, cpc);
1137
1138        unblock_operations(sbi);
1139        stat_inc_cp_count(sbi->stat_info);
1140
1141        if (cpc->reason == CP_RECOVERY)
1142                f2fs_msg(sbi->sb, KERN_NOTICE,
1143                        "checkpoint: version = %llx", ckpt_ver);
1144
1145        /* do checkpoint periodically */
1146        sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval);
1147out:
1148        mutex_unlock(&sbi->cp_mutex);
1149        trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
1150}
1151
1152void init_ino_entry_info(struct f2fs_sb_info *sbi)
1153{
1154        int i;
1155
1156        for (i = 0; i < MAX_INO_ENTRY; i++) {
1157                struct inode_management *im = &sbi->im[i];
1158
1159                INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
1160                spin_lock_init(&im->ino_lock);
1161                INIT_LIST_HEAD(&im->ino_list);
1162                im->ino_num = 0;
1163        }
1164
1165        sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1166                        NR_CURSEG_TYPE - __cp_payload(sbi)) *
1167                                F2FS_ORPHANS_PER_BLOCK;
1168}
1169
1170int __init create_checkpoint_caches(void)
1171{
1172        ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
1173                        sizeof(struct ino_entry));
1174        if (!ino_entry_slab)
1175                return -ENOMEM;
1176        inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
1177                        sizeof(struct inode_entry));
1178        if (!inode_entry_slab) {
1179                kmem_cache_destroy(ino_entry_slab);
1180                return -ENOMEM;
1181        }
1182        return 0;
1183}
1184
1185void destroy_checkpoint_caches(void)
1186{
1187        kmem_cache_destroy(ino_entry_slab);
1188        kmem_cache_destroy(inode_entry_slab);
1189}
1190