linux/fs/f2fs/checkpoint.c
<<
>>
Prefs
   1/*
   2 * fs/f2fs/checkpoint.c
   3 *
   4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   5 *             http://www.samsung.com/
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11#include <linux/fs.h>
  12#include <linux/bio.h>
  13#include <linux/mpage.h>
  14#include <linux/writeback.h>
  15#include <linux/blkdev.h>
  16#include <linux/f2fs_fs.h>
  17#include <linux/pagevec.h>
  18#include <linux/swap.h>
  19
  20#include "f2fs.h"
  21#include "node.h"
  22#include "segment.h"
  23#include <trace/events/f2fs.h>
  24
  25static struct kmem_cache *orphan_entry_slab;
  26static struct kmem_cache *inode_entry_slab;
  27
  28/*
  29 * We guarantee no failure on the returned page.
  30 */
  31struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
  32{
  33        struct address_space *mapping = sbi->meta_inode->i_mapping;
  34        struct page *page = NULL;
  35repeat:
  36        page = grab_cache_page(mapping, index);
  37        if (!page) {
  38                cond_resched();
  39                goto repeat;
  40        }
  41
  42        /* We wait writeback only inside grab_meta_page() */
  43        wait_on_page_writeback(page);
  44        SetPageUptodate(page);
  45        return page;
  46}
  47
  48/*
  49 * We guarantee no failure on the returned page.
  50 */
  51struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
  52{
  53        struct address_space *mapping = sbi->meta_inode->i_mapping;
  54        struct page *page;
  55repeat:
  56        page = grab_cache_page(mapping, index);
  57        if (!page) {
  58                cond_resched();
  59                goto repeat;
  60        }
  61        if (PageUptodate(page))
  62                goto out;
  63
  64        if (f2fs_readpage(sbi, page, index, READ_SYNC))
  65                goto repeat;
  66
  67        lock_page(page);
  68        if (page->mapping != mapping) {
  69                f2fs_put_page(page, 1);
  70                goto repeat;
  71        }
  72out:
  73        mark_page_accessed(page);
  74        return page;
  75}
  76
  77static int f2fs_write_meta_page(struct page *page,
  78                                struct writeback_control *wbc)
  79{
  80        struct inode *inode = page->mapping->host;
  81        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
  82
  83        /* Should not write any meta pages, if any IO error was occurred */
  84        if (wbc->for_reclaim ||
  85                        is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) {
  86                dec_page_count(sbi, F2FS_DIRTY_META);
  87                wbc->pages_skipped++;
  88                set_page_dirty(page);
  89                return AOP_WRITEPAGE_ACTIVATE;
  90        }
  91
  92        wait_on_page_writeback(page);
  93
  94        write_meta_page(sbi, page);
  95        dec_page_count(sbi, F2FS_DIRTY_META);
  96        unlock_page(page);
  97        return 0;
  98}
  99
 100static int f2fs_write_meta_pages(struct address_space *mapping,
 101                                struct writeback_control *wbc)
 102{
 103        struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
 104        struct block_device *bdev = sbi->sb->s_bdev;
 105        long written;
 106
 107        if (wbc->for_kupdate)
 108                return 0;
 109
 110        if (get_pages(sbi, F2FS_DIRTY_META) == 0)
 111                return 0;
 112
 113        /* if mounting is failed, skip writing node pages */
 114        mutex_lock(&sbi->cp_mutex);
 115        written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev));
 116        mutex_unlock(&sbi->cp_mutex);
 117        wbc->nr_to_write -= written;
 118        return 0;
 119}
 120
 121long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
 122                                                long nr_to_write)
 123{
 124        struct address_space *mapping = sbi->meta_inode->i_mapping;
 125        pgoff_t index = 0, end = LONG_MAX;
 126        struct pagevec pvec;
 127        long nwritten = 0;
 128        struct writeback_control wbc = {
 129                .for_reclaim = 0,
 130        };
 131
 132        pagevec_init(&pvec, 0);
 133
 134        while (index <= end) {
 135                int i, nr_pages;
 136                nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
 137                                PAGECACHE_TAG_DIRTY,
 138                                min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
 139                if (nr_pages == 0)
 140                        break;
 141
 142                for (i = 0; i < nr_pages; i++) {
 143                        struct page *page = pvec.pages[i];
 144                        lock_page(page);
 145                        BUG_ON(page->mapping != mapping);
 146                        BUG_ON(!PageDirty(page));
 147                        clear_page_dirty_for_io(page);
 148                        if (f2fs_write_meta_page(page, &wbc)) {
 149                                unlock_page(page);
 150                                break;
 151                        }
 152                        if (nwritten++ >= nr_to_write)
 153                                break;
 154                }
 155                pagevec_release(&pvec);
 156                cond_resched();
 157        }
 158
 159        if (nwritten)
 160                f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX);
 161
 162        return nwritten;
 163}
 164
 165static int f2fs_set_meta_page_dirty(struct page *page)
 166{
 167        struct address_space *mapping = page->mapping;
 168        struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
 169
 170        SetPageUptodate(page);
 171        if (!PageDirty(page)) {
 172                __set_page_dirty_nobuffers(page);
 173                inc_page_count(sbi, F2FS_DIRTY_META);
 174                return 1;
 175        }
 176        return 0;
 177}
 178
 179const struct address_space_operations f2fs_meta_aops = {
 180        .writepage      = f2fs_write_meta_page,
 181        .writepages     = f2fs_write_meta_pages,
 182        .set_page_dirty = f2fs_set_meta_page_dirty,
 183};
 184
 185int check_orphan_space(struct f2fs_sb_info *sbi)
 186{
 187        unsigned int max_orphans;
 188        int err = 0;
 189
 190        /*
 191         * considering 512 blocks in a segment 5 blocks are needed for cp
 192         * and log segment summaries. Remaining blocks are used to keep
 193         * orphan entries with the limitation one reserved segment
 194         * for cp pack we can have max 1020*507 orphan entries
 195         */
 196        max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK;
 197        mutex_lock(&sbi->orphan_inode_mutex);
 198        if (sbi->n_orphans >= max_orphans)
 199                err = -ENOSPC;
 200        mutex_unlock(&sbi->orphan_inode_mutex);
 201        return err;
 202}
 203
 204void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 205{
 206        struct list_head *head, *this;
 207        struct orphan_inode_entry *new = NULL, *orphan = NULL;
 208
 209        mutex_lock(&sbi->orphan_inode_mutex);
 210        head = &sbi->orphan_inode_list;
 211        list_for_each(this, head) {
 212                orphan = list_entry(this, struct orphan_inode_entry, list);
 213                if (orphan->ino == ino)
 214                        goto out;
 215                if (orphan->ino > ino)
 216                        break;
 217                orphan = NULL;
 218        }
 219retry:
 220        new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
 221        if (!new) {
 222                cond_resched();
 223                goto retry;
 224        }
 225        new->ino = ino;
 226
 227        /* add new_oentry into list which is sorted by inode number */
 228        if (orphan)
 229                list_add(&new->list, this->prev);
 230        else
 231                list_add_tail(&new->list, head);
 232
 233        sbi->n_orphans++;
 234out:
 235        mutex_unlock(&sbi->orphan_inode_mutex);
 236}
 237
 238void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 239{
 240        struct list_head *this, *next, *head;
 241        struct orphan_inode_entry *orphan;
 242
 243        mutex_lock(&sbi->orphan_inode_mutex);
 244        head = &sbi->orphan_inode_list;
 245        list_for_each_safe(this, next, head) {
 246                orphan = list_entry(this, struct orphan_inode_entry, list);
 247                if (orphan->ino == ino) {
 248                        list_del(&orphan->list);
 249                        kmem_cache_free(orphan_entry_slab, orphan);
 250                        sbi->n_orphans--;
 251                        break;
 252                }
 253        }
 254        mutex_unlock(&sbi->orphan_inode_mutex);
 255}
 256
 257static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 258{
 259        struct inode *inode = f2fs_iget(sbi->sb, ino);
 260        BUG_ON(IS_ERR(inode));
 261        clear_nlink(inode);
 262
 263        /* truncate all the data during iput */
 264        iput(inode);
 265}
 266
 267int recover_orphan_inodes(struct f2fs_sb_info *sbi)
 268{
 269        block_t start_blk, orphan_blkaddr, i, j;
 270
 271        if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
 272                return 0;
 273
 274        sbi->por_doing = 1;
 275        start_blk = __start_cp_addr(sbi) + 1;
 276        orphan_blkaddr = __start_sum_addr(sbi) - 1;
 277
 278        for (i = 0; i < orphan_blkaddr; i++) {
 279                struct page *page = get_meta_page(sbi, start_blk + i);
 280                struct f2fs_orphan_block *orphan_blk;
 281
 282                orphan_blk = (struct f2fs_orphan_block *)page_address(page);
 283                for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
 284                        nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
 285                        recover_orphan_inode(sbi, ino);
 286                }
 287                f2fs_put_page(page, 1);
 288        }
 289        /* clear Orphan Flag */
 290        clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
 291        sbi->por_doing = 0;
 292        return 0;
 293}
 294
 295static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
 296{
 297        struct list_head *head, *this, *next;
 298        struct f2fs_orphan_block *orphan_blk = NULL;
 299        struct page *page = NULL;
 300        unsigned int nentries = 0;
 301        unsigned short index = 1;
 302        unsigned short orphan_blocks;
 303
 304        orphan_blocks = (unsigned short)((sbi->n_orphans +
 305                (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
 306
 307        mutex_lock(&sbi->orphan_inode_mutex);
 308        head = &sbi->orphan_inode_list;
 309
 310        /* loop for each orphan inode entry and write them in Jornal block */
 311        list_for_each_safe(this, next, head) {
 312                struct orphan_inode_entry *orphan;
 313
 314                orphan = list_entry(this, struct orphan_inode_entry, list);
 315
 316                if (nentries == F2FS_ORPHANS_PER_BLOCK) {
 317                        /*
 318                         * an orphan block is full of 1020 entries,
 319                         * then we need to flush current orphan blocks
 320                         * and bring another one in memory
 321                         */
 322                        orphan_blk->blk_addr = cpu_to_le16(index);
 323                        orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
 324                        orphan_blk->entry_count = cpu_to_le32(nentries);
 325                        set_page_dirty(page);
 326                        f2fs_put_page(page, 1);
 327                        index++;
 328                        start_blk++;
 329                        nentries = 0;
 330                        page = NULL;
 331                }
 332                if (page)
 333                        goto page_exist;
 334
 335                page = grab_meta_page(sbi, start_blk);
 336                orphan_blk = (struct f2fs_orphan_block *)page_address(page);
 337                memset(orphan_blk, 0, sizeof(*orphan_blk));
 338page_exist:
 339                orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
 340        }
 341        if (!page)
 342                goto end;
 343
 344        orphan_blk->blk_addr = cpu_to_le16(index);
 345        orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
 346        orphan_blk->entry_count = cpu_to_le32(nentries);
 347        set_page_dirty(page);
 348        f2fs_put_page(page, 1);
 349end:
 350        mutex_unlock(&sbi->orphan_inode_mutex);
 351}
 352
 353static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
 354                                block_t cp_addr, unsigned long long *version)
 355{
 356        struct page *cp_page_1, *cp_page_2 = NULL;
 357        unsigned long blk_size = sbi->blocksize;
 358        struct f2fs_checkpoint *cp_block;
 359        unsigned long long cur_version = 0, pre_version = 0;
 360        size_t crc_offset;
 361        __u32 crc = 0;
 362
 363        /* Read the 1st cp block in this CP pack */
 364        cp_page_1 = get_meta_page(sbi, cp_addr);
 365
 366        /* get the version number */
 367        cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
 368        crc_offset = le32_to_cpu(cp_block->checksum_offset);
 369        if (crc_offset >= blk_size)
 370                goto invalid_cp1;
 371
 372        crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
 373        if (!f2fs_crc_valid(crc, cp_block, crc_offset))
 374                goto invalid_cp1;
 375
 376        pre_version = le64_to_cpu(cp_block->checkpoint_ver);
 377
 378        /* Read the 2nd cp block in this CP pack */
 379        cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
 380        cp_page_2 = get_meta_page(sbi, cp_addr);
 381
 382        cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
 383        crc_offset = le32_to_cpu(cp_block->checksum_offset);
 384        if (crc_offset >= blk_size)
 385                goto invalid_cp2;
 386
 387        crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
 388        if (!f2fs_crc_valid(crc, cp_block, crc_offset))
 389                goto invalid_cp2;
 390
 391        cur_version = le64_to_cpu(cp_block->checkpoint_ver);
 392
 393        if (cur_version == pre_version) {
 394                *version = cur_version;
 395                f2fs_put_page(cp_page_2, 1);
 396                return cp_page_1;
 397        }
 398invalid_cp2:
 399        f2fs_put_page(cp_page_2, 1);
 400invalid_cp1:
 401        f2fs_put_page(cp_page_1, 1);
 402        return NULL;
 403}
 404
 405int get_valid_checkpoint(struct f2fs_sb_info *sbi)
 406{
 407        struct f2fs_checkpoint *cp_block;
 408        struct f2fs_super_block *fsb = sbi->raw_super;
 409        struct page *cp1, *cp2, *cur_page;
 410        unsigned long blk_size = sbi->blocksize;
 411        unsigned long long cp1_version = 0, cp2_version = 0;
 412        unsigned long long cp_start_blk_no;
 413
 414        sbi->ckpt = kzalloc(blk_size, GFP_KERNEL);
 415        if (!sbi->ckpt)
 416                return -ENOMEM;
 417        /*
 418         * Finding out valid cp block involves read both
 419         * sets( cp pack1 and cp pack 2)
 420         */
 421        cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
 422        cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
 423
 424        /* The second checkpoint pack should start at the next segment */
 425        cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
 426        cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
 427
 428        if (cp1 && cp2) {
 429                if (ver_after(cp2_version, cp1_version))
 430                        cur_page = cp2;
 431                else
 432                        cur_page = cp1;
 433        } else if (cp1) {
 434                cur_page = cp1;
 435        } else if (cp2) {
 436                cur_page = cp2;
 437        } else {
 438                goto fail_no_cp;
 439        }
 440
 441        cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
 442        memcpy(sbi->ckpt, cp_block, blk_size);
 443
 444        f2fs_put_page(cp1, 1);
 445        f2fs_put_page(cp2, 1);
 446        return 0;
 447
 448fail_no_cp:
 449        kfree(sbi->ckpt);
 450        return -EINVAL;
 451}
 452
 453static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
 454{
 455        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 456        struct list_head *head = &sbi->dir_inode_list;
 457        struct list_head *this;
 458
 459        list_for_each(this, head) {
 460                struct dir_inode_entry *entry;
 461                entry = list_entry(this, struct dir_inode_entry, list);
 462                if (entry->inode == inode)
 463                        return -EEXIST;
 464        }
 465        list_add_tail(&new->list, head);
 466#ifdef CONFIG_F2FS_STAT_FS
 467        sbi->n_dirty_dirs++;
 468#endif
 469        return 0;
 470}
 471
 472void set_dirty_dir_page(struct inode *inode, struct page *page)
 473{
 474        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 475        struct dir_inode_entry *new;
 476
 477        if (!S_ISDIR(inode->i_mode))
 478                return;
 479retry:
 480        new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
 481        if (!new) {
 482                cond_resched();
 483                goto retry;
 484        }
 485        new->inode = inode;
 486        INIT_LIST_HEAD(&new->list);
 487
 488        spin_lock(&sbi->dir_inode_lock);
 489        if (__add_dirty_inode(inode, new))
 490                kmem_cache_free(inode_entry_slab, new);
 491
 492        inc_page_count(sbi, F2FS_DIRTY_DENTS);
 493        inode_inc_dirty_dents(inode);
 494        SetPagePrivate(page);
 495        spin_unlock(&sbi->dir_inode_lock);
 496}
 497
 498void add_dirty_dir_inode(struct inode *inode)
 499{
 500        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 501        struct dir_inode_entry *new;
 502retry:
 503        new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
 504        if (!new) {
 505                cond_resched();
 506                goto retry;
 507        }
 508        new->inode = inode;
 509        INIT_LIST_HEAD(&new->list);
 510
 511        spin_lock(&sbi->dir_inode_lock);
 512        if (__add_dirty_inode(inode, new))
 513                kmem_cache_free(inode_entry_slab, new);
 514        spin_unlock(&sbi->dir_inode_lock);
 515}
 516
 517void remove_dirty_dir_inode(struct inode *inode)
 518{
 519        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 520        struct list_head *head = &sbi->dir_inode_list;
 521        struct list_head *this;
 522
 523        if (!S_ISDIR(inode->i_mode))
 524                return;
 525
 526        spin_lock(&sbi->dir_inode_lock);
 527        if (atomic_read(&F2FS_I(inode)->dirty_dents)) {
 528                spin_unlock(&sbi->dir_inode_lock);
 529                return;
 530        }
 531
 532        list_for_each(this, head) {
 533                struct dir_inode_entry *entry;
 534                entry = list_entry(this, struct dir_inode_entry, list);
 535                if (entry->inode == inode) {
 536                        list_del(&entry->list);
 537                        kmem_cache_free(inode_entry_slab, entry);
 538#ifdef CONFIG_F2FS_STAT_FS
 539                        sbi->n_dirty_dirs--;
 540#endif
 541                        break;
 542                }
 543        }
 544        spin_unlock(&sbi->dir_inode_lock);
 545
 546        /* Only from the recovery routine */
 547        if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
 548                clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
 549                iput(inode);
 550        }
 551}
 552
 553struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
 554{
 555        struct list_head *head = &sbi->dir_inode_list;
 556        struct list_head *this;
 557        struct inode *inode = NULL;
 558
 559        spin_lock(&sbi->dir_inode_lock);
 560        list_for_each(this, head) {
 561                struct dir_inode_entry *entry;
 562                entry = list_entry(this, struct dir_inode_entry, list);
 563                if (entry->inode->i_ino == ino) {
 564                        inode = entry->inode;
 565                        break;
 566                }
 567        }
 568        spin_unlock(&sbi->dir_inode_lock);
 569        return inode;
 570}
 571
 572void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
 573{
 574        struct list_head *head = &sbi->dir_inode_list;
 575        struct dir_inode_entry *entry;
 576        struct inode *inode;
 577retry:
 578        spin_lock(&sbi->dir_inode_lock);
 579        if (list_empty(head)) {
 580                spin_unlock(&sbi->dir_inode_lock);
 581                return;
 582        }
 583        entry = list_entry(head->next, struct dir_inode_entry, list);
 584        inode = igrab(entry->inode);
 585        spin_unlock(&sbi->dir_inode_lock);
 586        if (inode) {
 587                filemap_flush(inode->i_mapping);
 588                iput(inode);
 589        } else {
 590                /*
 591                 * We should submit bio, since it exists several
 592                 * wribacking dentry pages in the freeing inode.
 593                 */
 594                f2fs_submit_bio(sbi, DATA, true);
 595        }
 596        goto retry;
 597}
 598
 599/*
 600 * Freeze all the FS-operations for checkpoint.
 601 */
 602static void block_operations(struct f2fs_sb_info *sbi)
 603{
 604        struct writeback_control wbc = {
 605                .sync_mode = WB_SYNC_ALL,
 606                .nr_to_write = LONG_MAX,
 607                .for_reclaim = 0,
 608        };
 609        struct blk_plug plug;
 610
 611        blk_start_plug(&plug);
 612
 613retry_flush_dents:
 614        mutex_lock_all(sbi);
 615
 616        /* write all the dirty dentry pages */
 617        if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
 618                mutex_unlock_all(sbi);
 619                sync_dirty_dir_inodes(sbi);
 620                goto retry_flush_dents;
 621        }
 622
 623        /*
 624         * POR: we should ensure that there is no dirty node pages
 625         * until finishing nat/sit flush.
 626         */
 627retry_flush_nodes:
 628        mutex_lock(&sbi->node_write);
 629
 630        if (get_pages(sbi, F2FS_DIRTY_NODES)) {
 631                mutex_unlock(&sbi->node_write);
 632                sync_node_pages(sbi, 0, &wbc);
 633                goto retry_flush_nodes;
 634        }
 635        blk_finish_plug(&plug);
 636}
 637
 638static void unblock_operations(struct f2fs_sb_info *sbi)
 639{
 640        mutex_unlock(&sbi->node_write);
 641        mutex_unlock_all(sbi);
 642}
 643
 644static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 645{
 646        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 647        nid_t last_nid = 0;
 648        block_t start_blk;
 649        struct page *cp_page;
 650        unsigned int data_sum_blocks, orphan_blocks;
 651        __u32 crc32 = 0;
 652        void *kaddr;
 653        int i;
 654
 655        /* Flush all the NAT/SIT pages */
 656        while (get_pages(sbi, F2FS_DIRTY_META))
 657                sync_meta_pages(sbi, META, LONG_MAX);
 658
 659        next_free_nid(sbi, &last_nid);
 660
 661        /*
 662         * modify checkpoint
 663         * version number is already updated
 664         */
 665        ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
 666        ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
 667        ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
 668        for (i = 0; i < 3; i++) {
 669                ckpt->cur_node_segno[i] =
 670                        cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
 671                ckpt->cur_node_blkoff[i] =
 672                        cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
 673                ckpt->alloc_type[i + CURSEG_HOT_NODE] =
 674                                curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
 675        }
 676        for (i = 0; i < 3; i++) {
 677                ckpt->cur_data_segno[i] =
 678                        cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
 679                ckpt->cur_data_blkoff[i] =
 680                        cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
 681                ckpt->alloc_type[i + CURSEG_HOT_DATA] =
 682                                curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
 683        }
 684
 685        ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
 686        ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
 687        ckpt->next_free_nid = cpu_to_le32(last_nid);
 688
 689        /* 2 cp  + n data seg summary + orphan inode blocks */
 690        data_sum_blocks = npages_for_summary_flush(sbi);
 691        if (data_sum_blocks < 3)
 692                set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 693        else
 694                clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 695
 696        orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
 697                                        / F2FS_ORPHANS_PER_BLOCK;
 698        ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks);
 699
 700        if (is_umount) {
 701                set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 702                ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
 703                        data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE);
 704        } else {
 705                clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 706                ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
 707                        data_sum_blocks + orphan_blocks);
 708        }
 709
 710        if (sbi->n_orphans)
 711                set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 712        else
 713                clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 714
 715        /* update SIT/NAT bitmap */
 716        get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
 717        get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
 718
 719        crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
 720        *((__le32 *)((unsigned char *)ckpt +
 721                                le32_to_cpu(ckpt->checksum_offset)))
 722                                = cpu_to_le32(crc32);
 723
 724        start_blk = __start_cp_addr(sbi);
 725
 726        /* write out checkpoint buffer at block 0 */
 727        cp_page = grab_meta_page(sbi, start_blk++);
 728        kaddr = page_address(cp_page);
 729        memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
 730        set_page_dirty(cp_page);
 731        f2fs_put_page(cp_page, 1);
 732
 733        if (sbi->n_orphans) {
 734                write_orphan_inodes(sbi, start_blk);
 735                start_blk += orphan_blocks;
 736        }
 737
 738        write_data_summaries(sbi, start_blk);
 739        start_blk += data_sum_blocks;
 740        if (is_umount) {
 741                write_node_summaries(sbi, start_blk);
 742                start_blk += NR_CURSEG_NODE_TYPE;
 743        }
 744
 745        /* writeout checkpoint block */
 746        cp_page = grab_meta_page(sbi, start_blk);
 747        kaddr = page_address(cp_page);
 748        memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
 749        set_page_dirty(cp_page);
 750        f2fs_put_page(cp_page, 1);
 751
 752        /* wait for previous submitted node/meta pages writeback */
 753        while (get_pages(sbi, F2FS_WRITEBACK))
 754                congestion_wait(BLK_RW_ASYNC, HZ / 50);
 755
 756        filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
 757        filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX);
 758
 759        /* update user_block_counts */
 760        sbi->last_valid_block_count = sbi->total_valid_block_count;
 761        sbi->alloc_valid_block_count = 0;
 762
 763        /* Here, we only have one bio having CP pack */
 764        sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
 765
 766        if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
 767                clear_prefree_segments(sbi);
 768                F2FS_RESET_SB_DIRT(sbi);
 769        }
 770}
 771
 772/*
 773 * We guarantee that this checkpoint procedure should not fail.
 774 */
 775void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 776{
 777        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 778        unsigned long long ckpt_ver;
 779
 780        trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops");
 781
 782        mutex_lock(&sbi->cp_mutex);
 783        block_operations(sbi);
 784
 785        trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
 786
 787        f2fs_submit_bio(sbi, DATA, true);
 788        f2fs_submit_bio(sbi, NODE, true);
 789        f2fs_submit_bio(sbi, META, true);
 790
 791        /*
 792         * update checkpoint pack index
 793         * Increase the version number so that
 794         * SIT entries and seg summaries are written at correct place
 795         */
 796        ckpt_ver = le64_to_cpu(ckpt->checkpoint_ver);
 797        ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
 798
 799        /* write cached NAT/SIT entries to NAT/SIT area */
 800        flush_nat_entries(sbi);
 801        flush_sit_entries(sbi);
 802
 803        /* unlock all the fs_lock[] in do_checkpoint() */
 804        do_checkpoint(sbi, is_umount);
 805
 806        unblock_operations(sbi);
 807        mutex_unlock(&sbi->cp_mutex);
 808
 809        trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
 810}
 811
 812void init_orphan_info(struct f2fs_sb_info *sbi)
 813{
 814        mutex_init(&sbi->orphan_inode_mutex);
 815        INIT_LIST_HEAD(&sbi->orphan_inode_list);
 816        sbi->n_orphans = 0;
 817}
 818
 819int __init create_checkpoint_caches(void)
 820{
 821        orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
 822                        sizeof(struct orphan_inode_entry), NULL);
 823        if (unlikely(!orphan_entry_slab))
 824                return -ENOMEM;
 825        inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
 826                        sizeof(struct dir_inode_entry), NULL);
 827        if (unlikely(!inode_entry_slab)) {
 828                kmem_cache_destroy(orphan_entry_slab);
 829                return -ENOMEM;
 830        }
 831        return 0;
 832}
 833
 834void destroy_checkpoint_caches(void)
 835{
 836        kmem_cache_destroy(orphan_entry_slab);
 837        kmem_cache_destroy(inode_entry_slab);
 838}
 839