LXR linux/fs/f2fs/checkpoint.c

   1/*
   2 * fs/f2fs/checkpoint.c
   3 *
   4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   5 *             http://www.samsung.com/
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11#include <linux/fs.h>
  12#include <linux/bio.h>
  13#include <linux/mpage.h>
  14#include <linux/writeback.h>
  15#include <linux/blkdev.h>
  16#include <linux/f2fs_fs.h>
  17#include <linux/pagevec.h>
  18#include <linux/swap.h>
  19
  20#include "f2fs.h"
  21#include "node.h"
  22#include "segment.h"
  23#include <trace/events/f2fs.h>
  24
  25static struct kmem_cache *orphan_entry_slab;
  26static struct kmem_cache *inode_entry_slab;
  27
  28/*
  29 * We guarantee no failure on the returned page.
  30 */
  31struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
  32{
  33        struct address_space *mapping = sbi->meta_inode->i_mapping;
  34        struct page *page = NULL;
  35repeat:
  36        page = grab_cache_page(mapping, index);
  37        if (!page) {
  38                cond_resched();
  39                goto repeat;
  40        }
  41
  42        /* We wait writeback only inside grab_meta_page() */
  43        wait_on_page_writeback(page);
  44        SetPageUptodate(page);
  45        return page;
  46}
  47
  48/*
  49 * We guarantee no failure on the returned page.
  50 */
  51struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
  52{
  53        struct address_space *mapping = sbi->meta_inode->i_mapping;
  54        struct page *page;
  55repeat:
  56        page = grab_cache_page(mapping, index);
  57        if (!page) {
  58                cond_resched();
  59                goto repeat;
  60        }
  61        if (PageUptodate(page))
  62                goto out;
  63
  64        if (f2fs_readpage(sbi, page, index, READ_SYNC))
  65                goto repeat;
  66
  67        lock_page(page);
  68        if (page->mapping != mapping) {
  69                f2fs_put_page(page, 1);
  70                goto repeat;
  71        }
  72out:
  73        mark_page_accessed(page);
  74        return page;
  75}
  76
  77static int f2fs_write_meta_page(struct page *page,
  78                                struct writeback_control *wbc)
  79{
  80        struct inode *inode = page->mapping->host;
  81        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
  82
  83        /* Should not write any meta pages, if any IO error was occurred */
  84        if (wbc->for_reclaim ||
  85                        is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) {
  86                dec_page_count(sbi, F2FS_DIRTY_META);
  87                wbc->pages_skipped++;
  88                set_page_dirty(page);
  89                return AOP_WRITEPAGE_ACTIVATE;
  90        }
  91
  92        wait_on_page_writeback(page);
  93
  94        write_meta_page(sbi, page);
  95        dec_page_count(sbi, F2FS_DIRTY_META);
  96        unlock_page(page);
  97        return 0;
  98}
  99
 100static int f2fs_write_meta_pages(struct address_space *mapping,
 101                                struct writeback_control *wbc)
 102{
 103        struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
 104        struct block_device *bdev = sbi->sb->s_bdev;
 105        long written;
 106
 107        if (wbc->for_kupdate)
 108                return 0;
 109
 110        if (get_pages(sbi, F2FS_DIRTY_META) == 0)
 111                return 0;
 112
 113        /* if mounting is failed, skip writing node pages */
 114        mutex_lock(&sbi->cp_mutex);
 115        written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev));
 116        mutex_unlock(&sbi->cp_mutex);
 117        wbc->nr_to_write -= written;
 118        return 0;
 119}
 120
 121long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
 122                                                long nr_to_write)
 123{
 124        struct address_space *mapping = sbi->meta_inode->i_mapping;
 125        pgoff_t index = 0, end = LONG_MAX;
 126        struct pagevec pvec;
 127        long nwritten = 0;
 128        struct writeback_control wbc = {
 129                .for_reclaim = 0,
 130        };
 131
 132        pagevec_init(&pvec, 0);
 133
 134        while (index <= end) {
 135                int i, nr_pages;
 136                nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
 137                                PAGECACHE_TAG_DIRTY,
 138                                min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
 139                if (nr_pages == 0)
 140                        break;
 141
 142                for (i = 0; i < nr_pages; i++) {
 143                        struct page *page = pvec.pages[i];
 144                        lock_page(page);
 145                        BUG_ON(page->mapping != mapping);
 146                        BUG_ON(!PageDirty(page));
 147                        clear_page_dirty_for_io(page);
 148                        if (f2fs_write_meta_page(page, &wbc)) {
 149                                unlock_page(page);
 150                                break;
 151                        }
 152                        if (nwritten++ >= nr_to_write)
 153                                break;
 154                }
 155                pagevec_release(&pvec);
 156                cond_resched();
 157        }
 158
 159        if (nwritten)
 160                f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX);
 161
 162        return nwritten;
 163}
 164
 165static int f2fs_set_meta_page_dirty(struct page *page)
 166{
 167        struct address_space *mapping = page->mapping;
 168        struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
 169
 170        SetPageUptodate(page);
 171        if (!PageDirty(page)) {
 172                __set_page_dirty_nobuffers(page);
 173                inc_page_count(sbi, F2FS_DIRTY_META);
 174                return 1;
 175        }
 176        return 0;
 177}
 178
 179const struct address_space_operations f2fs_meta_aops = {
 180        .writepage      = f2fs_write_meta_page,
 181        .writepages     = f2fs_write_meta_pages,
 182        .set_page_dirty = f2fs_set_meta_page_dirty,
 183};
 184
 185int acquire_orphan_inode(struct f2fs_sb_info *sbi)
 186{
 187        unsigned int max_orphans;
 188        int err = 0;
 189
 190        /*
 191         * considering 512 blocks in a segment 5 blocks are needed for cp
 192         * and log segment summaries. Remaining blocks are used to keep
 193         * orphan entries with the limitation one reserved segment
 194         * for cp pack we can have max 1020*507 orphan entries
 195         */
 196        max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK;
 197        mutex_lock(&sbi->orphan_inode_mutex);
 198        if (sbi->n_orphans >= max_orphans)
 199                err = -ENOSPC;
 200        else
 201                sbi->n_orphans++;
 202        mutex_unlock(&sbi->orphan_inode_mutex);
 203        return err;
 204}
 205
 206void release_orphan_inode(struct f2fs_sb_info *sbi)
 207{
 208        mutex_lock(&sbi->orphan_inode_mutex);
 209        sbi->n_orphans--;
 210        mutex_unlock(&sbi->orphan_inode_mutex);
 211}
 212
 213void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 214{
 215        struct list_head *head, *this;
 216        struct orphan_inode_entry *new = NULL, *orphan = NULL;
 217
 218        mutex_lock(&sbi->orphan_inode_mutex);
 219        head = &sbi->orphan_inode_list;
 220        list_for_each(this, head) {
 221                orphan = list_entry(this, struct orphan_inode_entry, list);
 222                if (orphan->ino == ino)
 223                        goto out;
 224                if (orphan->ino > ino)
 225                        break;
 226                orphan = NULL;
 227        }
 228retry:
 229        new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
 230        if (!new) {
 231                cond_resched();
 232                goto retry;
 233        }
 234        new->ino = ino;
 235
 236        /* add new_oentry into list which is sorted by inode number */
 237        if (orphan)
 238                list_add(&new->list, this->prev);
 239        else
 240                list_add_tail(&new->list, head);
 241out:
 242        mutex_unlock(&sbi->orphan_inode_mutex);
 243}
 244
 245void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 246{
 247        struct list_head *head;
 248        struct orphan_inode_entry *orphan;
 249
 250        mutex_lock(&sbi->orphan_inode_mutex);
 251        head = &sbi->orphan_inode_list;
 252        list_for_each_entry(orphan, head, list) {
 253                if (orphan->ino == ino) {
 254                        list_del(&orphan->list);
 255                        kmem_cache_free(orphan_entry_slab, orphan);
 256                        sbi->n_orphans--;
 257                        break;
 258                }
 259        }
 260        mutex_unlock(&sbi->orphan_inode_mutex);
 261}
 262
 263static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 264{
 265        struct inode *inode = f2fs_iget(sbi->sb, ino);
 266        BUG_ON(IS_ERR(inode));
 267        clear_nlink(inode);
 268
 269        /* truncate all the data during iput */
 270        iput(inode);
 271}
 272
 273int recover_orphan_inodes(struct f2fs_sb_info *sbi)
 274{
 275        block_t start_blk, orphan_blkaddr, i, j;
 276
 277        if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
 278                return 0;
 279
 280        sbi->por_doing = 1;
 281        start_blk = __start_cp_addr(sbi) + 1;
 282        orphan_blkaddr = __start_sum_addr(sbi) - 1;
 283
 284        for (i = 0; i < orphan_blkaddr; i++) {
 285                struct page *page = get_meta_page(sbi, start_blk + i);
 286                struct f2fs_orphan_block *orphan_blk;
 287
 288                orphan_blk = (struct f2fs_orphan_block *)page_address(page);
 289                for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
 290                        nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
 291                        recover_orphan_inode(sbi, ino);
 292                }
 293                f2fs_put_page(page, 1);
 294        }
 295        /* clear Orphan Flag */
 296        clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
 297        sbi->por_doing = 0;
 298        return 0;
 299}
 300
 301static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
 302{
 303        struct list_head *head, *this, *next;
 304        struct f2fs_orphan_block *orphan_blk = NULL;
 305        struct page *page = NULL;
 306        unsigned int nentries = 0;
 307        unsigned short index = 1;
 308        unsigned short orphan_blocks;
 309
 310        orphan_blocks = (unsigned short)((sbi->n_orphans +
 311                (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
 312
 313        mutex_lock(&sbi->orphan_inode_mutex);
 314        head = &sbi->orphan_inode_list;
 315
 316        /* loop for each orphan inode entry and write them in Jornal block */
 317        list_for_each_safe(this, next, head) {
 318                struct orphan_inode_entry *orphan;
 319
 320                orphan = list_entry(this, struct orphan_inode_entry, list);
 321
 322                if (nentries == F2FS_ORPHANS_PER_BLOCK) {
 323                        /*
 324                         * an orphan block is full of 1020 entries,
 325                         * then we need to flush current orphan blocks
 326                         * and bring another one in memory
 327                         */
 328                        orphan_blk->blk_addr = cpu_to_le16(index);
 329                        orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
 330                        orphan_blk->entry_count = cpu_to_le32(nentries);
 331                        set_page_dirty(page);
 332                        f2fs_put_page(page, 1);
 333                        index++;
 334                        start_blk++;
 335                        nentries = 0;
 336                        page = NULL;
 337                }
 338                if (page)
 339                        goto page_exist;
 340
 341                page = grab_meta_page(sbi, start_blk);
 342                orphan_blk = (struct f2fs_orphan_block *)page_address(page);
 343                memset(orphan_blk, 0, sizeof(*orphan_blk));
 344page_exist:
 345                orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
 346        }
 347        if (!page)
 348                goto end;
 349
 350        orphan_blk->blk_addr = cpu_to_le16(index);
 351        orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
 352        orphan_blk->entry_count = cpu_to_le32(nentries);
 353        set_page_dirty(page);
 354        f2fs_put_page(page, 1);
 355end:
 356        mutex_unlock(&sbi->orphan_inode_mutex);
 357}
 358
 359static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
 360                                block_t cp_addr, unsigned long long *version)
 361{
 362        struct page *cp_page_1, *cp_page_2 = NULL;
 363        unsigned long blk_size = sbi->blocksize;
 364        struct f2fs_checkpoint *cp_block;
 365        unsigned long long cur_version = 0, pre_version = 0;
 366        size_t crc_offset;
 367        __u32 crc = 0;
 368
 369        /* Read the 1st cp block in this CP pack */
 370        cp_page_1 = get_meta_page(sbi, cp_addr);
 371
 372        /* get the version number */
 373        cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
 374        crc_offset = le32_to_cpu(cp_block->checksum_offset);
 375        if (crc_offset >= blk_size)
 376                goto invalid_cp1;
 377
 378        crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
 379        if (!f2fs_crc_valid(crc, cp_block, crc_offset))
 380                goto invalid_cp1;
 381
 382        pre_version = cur_cp_version(cp_block);
 383
 384        /* Read the 2nd cp block in this CP pack */
 385        cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
 386        cp_page_2 = get_meta_page(sbi, cp_addr);
 387
 388        cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
 389        crc_offset = le32_to_cpu(cp_block->checksum_offset);
 390        if (crc_offset >= blk_size)
 391                goto invalid_cp2;
 392
 393        crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
 394        if (!f2fs_crc_valid(crc, cp_block, crc_offset))
 395                goto invalid_cp2;
 396
 397        cur_version = cur_cp_version(cp_block);
 398
 399        if (cur_version == pre_version) {
 400                *version = cur_version;
 401                f2fs_put_page(cp_page_2, 1);
 402                return cp_page_1;
 403        }
 404invalid_cp2:
 405        f2fs_put_page(cp_page_2, 1);
 406invalid_cp1:
 407        f2fs_put_page(cp_page_1, 1);
 408        return NULL;
 409}
 410
 411int get_valid_checkpoint(struct f2fs_sb_info *sbi)
 412{
 413        struct f2fs_checkpoint *cp_block;
 414        struct f2fs_super_block *fsb = sbi->raw_super;
 415        struct page *cp1, *cp2, *cur_page;
 416        unsigned long blk_size = sbi->blocksize;
 417        unsigned long long cp1_version = 0, cp2_version = 0;
 418        unsigned long long cp_start_blk_no;
 419
 420        sbi->ckpt = kzalloc(blk_size, GFP_KERNEL);
 421        if (!sbi->ckpt)
 422                return -ENOMEM;
 423        /*
 424         * Finding out valid cp block involves read both
 425         * sets( cp pack1 and cp pack 2)
 426         */
 427        cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
 428        cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
 429
 430        /* The second checkpoint pack should start at the next segment */
 431        cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
 432        cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
 433
 434        if (cp1 && cp2) {
 435                if (ver_after(cp2_version, cp1_version))
 436                        cur_page = cp2;
 437                else
 438                        cur_page = cp1;
 439        } else if (cp1) {
 440                cur_page = cp1;
 441        } else if (cp2) {
 442                cur_page = cp2;
 443        } else {
 444                goto fail_no_cp;
 445        }
 446
 447        cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
 448        memcpy(sbi->ckpt, cp_block, blk_size);
 449
 450        f2fs_put_page(cp1, 1);
 451        f2fs_put_page(cp2, 1);
 452        return 0;
 453
 454fail_no_cp:
 455        kfree(sbi->ckpt);
 456        return -EINVAL;
 457}
 458
 459static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
 460{
 461        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 462        struct list_head *head = &sbi->dir_inode_list;
 463        struct list_head *this;
 464
 465        list_for_each(this, head) {
 466                struct dir_inode_entry *entry;
 467                entry = list_entry(this, struct dir_inode_entry, list);
 468                if (entry->inode == inode)
 469                        return -EEXIST;
 470        }
 471        list_add_tail(&new->list, head);
 472#ifdef CONFIG_F2FS_STAT_FS
 473        sbi->n_dirty_dirs++;
 474#endif
 475        return 0;
 476}
 477
 478void set_dirty_dir_page(struct inode *inode, struct page *page)
 479{
 480        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 481        struct dir_inode_entry *new;
 482
 483        if (!S_ISDIR(inode->i_mode))
 484                return;
 485retry:
 486        new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
 487        if (!new) {
 488                cond_resched();
 489                goto retry;
 490        }
 491        new->inode = inode;
 492        INIT_LIST_HEAD(&new->list);
 493
 494        spin_lock(&sbi->dir_inode_lock);
 495        if (__add_dirty_inode(inode, new))
 496                kmem_cache_free(inode_entry_slab, new);
 497
 498        inc_page_count(sbi, F2FS_DIRTY_DENTS);
 499        inode_inc_dirty_dents(inode);
 500        SetPagePrivate(page);
 501        spin_unlock(&sbi->dir_inode_lock);
 502}
 503
 504void add_dirty_dir_inode(struct inode *inode)
 505{
 506        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 507        struct dir_inode_entry *new;
 508retry:
 509        new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
 510        if (!new) {
 511                cond_resched();
 512                goto retry;
 513        }
 514        new->inode = inode;
 515        INIT_LIST_HEAD(&new->list);
 516
 517        spin_lock(&sbi->dir_inode_lock);
 518        if (__add_dirty_inode(inode, new))
 519                kmem_cache_free(inode_entry_slab, new);
 520        spin_unlock(&sbi->dir_inode_lock);
 521}
 522
 523void remove_dirty_dir_inode(struct inode *inode)
 524{
 525        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 526        struct list_head *head = &sbi->dir_inode_list;
 527        struct list_head *this;
 528
 529        if (!S_ISDIR(inode->i_mode))
 530                return;
 531
 532        spin_lock(&sbi->dir_inode_lock);
 533        if (atomic_read(&F2FS_I(inode)->dirty_dents)) {
 534                spin_unlock(&sbi->dir_inode_lock);
 535                return;
 536        }
 537
 538        list_for_each(this, head) {
 539                struct dir_inode_entry *entry;
 540                entry = list_entry(this, struct dir_inode_entry, list);
 541                if (entry->inode == inode) {
 542                        list_del(&entry->list);
 543                        kmem_cache_free(inode_entry_slab, entry);
 544#ifdef CONFIG_F2FS_STAT_FS
 545                        sbi->n_dirty_dirs--;
 546#endif
 547                        break;
 548                }
 549        }
 550        spin_unlock(&sbi->dir_inode_lock);
 551
 552        /* Only from the recovery routine */
 553        if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
 554                clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
 555                iput(inode);
 556        }
 557}
 558
 559struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
 560{
 561        struct list_head *head = &sbi->dir_inode_list;
 562        struct list_head *this;
 563        struct inode *inode = NULL;
 564
 565        spin_lock(&sbi->dir_inode_lock);
 566        list_for_each(this, head) {
 567                struct dir_inode_entry *entry;
 568                entry = list_entry(this, struct dir_inode_entry, list);
 569                if (entry->inode->i_ino == ino) {
 570                        inode = entry->inode;
 571                        break;
 572                }
 573        }
 574        spin_unlock(&sbi->dir_inode_lock);
 575        return inode;
 576}
 577
 578void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
 579{
 580        struct list_head *head = &sbi->dir_inode_list;
 581        struct dir_inode_entry *entry;
 582        struct inode *inode;
 583retry:
 584        spin_lock(&sbi->dir_inode_lock);
 585        if (list_empty(head)) {
 586                spin_unlock(&sbi->dir_inode_lock);
 587                return;
 588        }
 589        entry = list_entry(head->next, struct dir_inode_entry, list);
 590        inode = igrab(entry->inode);
 591        spin_unlock(&sbi->dir_inode_lock);
 592        if (inode) {
 593                filemap_flush(inode->i_mapping);
 594                iput(inode);
 595        } else {
 596                /*
 597                 * We should submit bio, since it exists several
 598                 * wribacking dentry pages in the freeing inode.
 599                 */
 600                f2fs_submit_bio(sbi, DATA, true);
 601        }
 602        goto retry;
 603}
 604
 605/*
 606 * Freeze all the FS-operations for checkpoint.
 607 */
 608static void block_operations(struct f2fs_sb_info *sbi)
 609{
 610        struct writeback_control wbc = {
 611                .sync_mode = WB_SYNC_ALL,
 612                .nr_to_write = LONG_MAX,
 613                .for_reclaim = 0,
 614        };
 615        struct blk_plug plug;
 616
 617        blk_start_plug(&plug);
 618
 619retry_flush_dents:
 620        mutex_lock_all(sbi);
 621
 622        /* write all the dirty dentry pages */
 623        if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
 624                mutex_unlock_all(sbi);
 625                sync_dirty_dir_inodes(sbi);
 626                goto retry_flush_dents;
 627        }
 628
 629        /*
 630         * POR: we should ensure that there is no dirty node pages
 631         * until finishing nat/sit flush.
 632         */
 633retry_flush_nodes:
 634        mutex_lock(&sbi->node_write);
 635
 636        if (get_pages(sbi, F2FS_DIRTY_NODES)) {
 637                mutex_unlock(&sbi->node_write);
 638                sync_node_pages(sbi, 0, &wbc);
 639                goto retry_flush_nodes;
 640        }
 641        blk_finish_plug(&plug);
 642}
 643
 644static void unblock_operations(struct f2fs_sb_info *sbi)
 645{
 646        mutex_unlock(&sbi->node_write);
 647        mutex_unlock_all(sbi);
 648}
 649
 650static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 651{
 652        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 653        nid_t last_nid = 0;
 654        block_t start_blk;
 655        struct page *cp_page;
 656        unsigned int data_sum_blocks, orphan_blocks;
 657        __u32 crc32 = 0;
 658        void *kaddr;
 659        int i;
 660
 661        /* Flush all the NAT/SIT pages */
 662        while (get_pages(sbi, F2FS_DIRTY_META))
 663                sync_meta_pages(sbi, META, LONG_MAX);
 664
 665        next_free_nid(sbi, &last_nid);
 666
 667        /*
 668         * modify checkpoint
 669         * version number is already updated
 670         */
 671        ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
 672        ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
 673        ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
 674        for (i = 0; i < 3; i++) {
 675                ckpt->cur_node_segno[i] =
 676                        cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
 677                ckpt->cur_node_blkoff[i] =
 678                        cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
 679                ckpt->alloc_type[i + CURSEG_HOT_NODE] =
 680                                curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
 681        }
 682        for (i = 0; i < 3; i++) {
 683                ckpt->cur_data_segno[i] =
 684                        cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
 685                ckpt->cur_data_blkoff[i] =
 686                        cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
 687                ckpt->alloc_type[i + CURSEG_HOT_DATA] =
 688                                curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
 689        }
 690
 691        ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
 692        ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
 693        ckpt->next_free_nid = cpu_to_le32(last_nid);
 694
 695        /* 2 cp  + n data seg summary + orphan inode blocks */
 696        data_sum_blocks = npages_for_summary_flush(sbi);
 697        if (data_sum_blocks < 3)
 698                set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 699        else
 700                clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 701
 702        orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
 703                                        / F2FS_ORPHANS_PER_BLOCK;
 704        ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks);
 705
 706        if (is_umount) {
 707                set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 708                ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
 709                        data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE);
 710        } else {
 711                clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 712                ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
 713                        data_sum_blocks + orphan_blocks);
 714        }
 715
 716        if (sbi->n_orphans)
 717                set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 718        else
 719                clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 720
 721        /* update SIT/NAT bitmap */
 722        get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
 723        get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
 724
 725        crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
 726        *((__le32 *)((unsigned char *)ckpt +
 727                                le32_to_cpu(ckpt->checksum_offset)))
 728                                = cpu_to_le32(crc32);
 729
 730        start_blk = __start_cp_addr(sbi);
 731
 732        /* write out checkpoint buffer at block 0 */
 733        cp_page = grab_meta_page(sbi, start_blk++);
 734        kaddr = page_address(cp_page);
 735        memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
 736        set_page_dirty(cp_page);
 737        f2fs_put_page(cp_page, 1);
 738
 739        if (sbi->n_orphans) {
 740                write_orphan_inodes(sbi, start_blk);
 741                start_blk += orphan_blocks;
 742        }
 743
 744        write_data_summaries(sbi, start_blk);
 745        start_blk += data_sum_blocks;
 746        if (is_umount) {
 747                write_node_summaries(sbi, start_blk);
 748                start_blk += NR_CURSEG_NODE_TYPE;
 749        }
 750
 751        /* writeout checkpoint block */
 752        cp_page = grab_meta_page(sbi, start_blk);
 753        kaddr = page_address(cp_page);
 754        memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
 755        set_page_dirty(cp_page);
 756        f2fs_put_page(cp_page, 1);
 757
 758        /* wait for previous submitted node/meta pages writeback */
 759        while (get_pages(sbi, F2FS_WRITEBACK))
 760                congestion_wait(BLK_RW_ASYNC, HZ / 50);
 761
 762        filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
 763        filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX);
 764
 765        /* update user_block_counts */
 766        sbi->last_valid_block_count = sbi->total_valid_block_count;
 767        sbi->alloc_valid_block_count = 0;
 768
 769        /* Here, we only have one bio having CP pack */
 770        sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
 771
 772        if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
 773                clear_prefree_segments(sbi);
 774                F2FS_RESET_SB_DIRT(sbi);
 775        }
 776}
 777
 778/*
 779 * We guarantee that this checkpoint procedure should not fail.
 780 */
 781void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 782{
 783        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 784        unsigned long long ckpt_ver;
 785
 786        trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops");
 787
 788        mutex_lock(&sbi->cp_mutex);
 789        block_operations(sbi);
 790
 791        trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
 792
 793        f2fs_submit_bio(sbi, DATA, true);
 794        f2fs_submit_bio(sbi, NODE, true);
 795        f2fs_submit_bio(sbi, META, true);
 796
 797        /*
 798         * update checkpoint pack index
 799         * Increase the version number so that
 800         * SIT entries and seg summaries are written at correct place
 801         */
 802        ckpt_ver = cur_cp_version(ckpt);
 803        ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
 804
 805        /* write cached NAT/SIT entries to NAT/SIT area */
 806        flush_nat_entries(sbi);
 807        flush_sit_entries(sbi);
 808
 809        /* unlock all the fs_lock[] in do_checkpoint() */
 810        do_checkpoint(sbi, is_umount);
 811
 812        unblock_operations(sbi);
 813        mutex_unlock(&sbi->cp_mutex);
 814
 815        trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
 816}
 817
 818void init_orphan_info(struct f2fs_sb_info *sbi)
 819{
 820        mutex_init(&sbi->orphan_inode_mutex);
 821        INIT_LIST_HEAD(&sbi->orphan_inode_list);
 822        sbi->n_orphans = 0;
 823}
 824
 825int __init create_checkpoint_caches(void)
 826{
 827        orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
 828                        sizeof(struct orphan_inode_entry), NULL);
 829        if (unlikely(!orphan_entry_slab))
 830                return -ENOMEM;
 831        inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
 832                        sizeof(struct dir_inode_entry), NULL);
 833        if (unlikely(!inode_entry_slab)) {
 834                kmem_cache_destroy(orphan_entry_slab);
 835                return -ENOMEM;
 836        }
 837        return 0;
 838}
 839
 840void destroy_checkpoint_caches(void)
 841{
 842        kmem_cache_destroy(orphan_entry_slab);
 843        kmem_cache_destroy(inode_entry_slab);
 844}
 845