linux/fs/f2fs/gc.c
<<
>>
Prefs
   1/*
   2 * fs/f2fs/gc.c
   3 *
   4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   5 *             http://www.samsung.com/
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11#include <linux/fs.h>
  12#include <linux/module.h>
  13#include <linux/backing-dev.h>
  14#include <linux/init.h>
  15#include <linux/f2fs_fs.h>
  16#include <linux/kthread.h>
  17#include <linux/delay.h>
  18#include <linux/freezer.h>
  19
  20#include "f2fs.h"
  21#include "node.h"
  22#include "segment.h"
  23#include "gc.h"
  24#include <trace/events/f2fs.h>
  25
  26static int gc_thread_func(void *data)
  27{
  28        struct f2fs_sb_info *sbi = data;
  29        struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
  30        wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
  31        long wait_ms;
  32
  33        wait_ms = gc_th->min_sleep_time;
  34
  35        do {
  36                if (try_to_freeze())
  37                        continue;
  38                else
  39                        wait_event_interruptible_timeout(*wq,
  40                                                kthread_should_stop(),
  41                                                msecs_to_jiffies(wait_ms));
  42                if (kthread_should_stop())
  43                        break;
  44
  45                if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
  46                        increase_sleep_time(gc_th, &wait_ms);
  47                        continue;
  48                }
  49
  50                /*
  51                 * [GC triggering condition]
  52                 * 0. GC is not conducted currently.
  53                 * 1. There are enough dirty segments.
  54                 * 2. IO subsystem is idle by checking the # of writeback pages.
  55                 * 3. IO subsystem is idle by checking the # of requests in
  56                 *    bdev's request list.
  57                 *
  58                 * Note) We have to avoid triggering GCs frequently.
  59                 * Because it is possible that some segments can be
  60                 * invalidated soon after by user update or deletion.
  61                 * So, I'd like to wait some time to collect dirty segments.
  62                 */
  63                if (!mutex_trylock(&sbi->gc_mutex))
  64                        continue;
  65
  66                if (!is_idle(sbi)) {
  67                        increase_sleep_time(gc_th, &wait_ms);
  68                        mutex_unlock(&sbi->gc_mutex);
  69                        continue;
  70                }
  71
  72                if (has_enough_invalid_blocks(sbi))
  73                        decrease_sleep_time(gc_th, &wait_ms);
  74                else
  75                        increase_sleep_time(gc_th, &wait_ms);
  76
  77                stat_inc_bggc_count(sbi);
  78
  79                /* if return value is not zero, no victim was selected */
  80                if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC)))
  81                        wait_ms = gc_th->no_gc_sleep_time;
  82
  83                trace_f2fs_background_gc(sbi->sb, wait_ms,
  84                                prefree_segments(sbi), free_segments(sbi));
  85
  86                /* balancing f2fs's metadata periodically */
  87                f2fs_balance_fs_bg(sbi);
  88
  89        } while (!kthread_should_stop());
  90        return 0;
  91}
  92
  93int start_gc_thread(struct f2fs_sb_info *sbi)
  94{
  95        struct f2fs_gc_kthread *gc_th;
  96        dev_t dev = sbi->sb->s_bdev->bd_dev;
  97        int err = 0;
  98
  99        gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
 100        if (!gc_th) {
 101                err = -ENOMEM;
 102                goto out;
 103        }
 104
 105        gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
 106        gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
 107        gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
 108
 109        gc_th->gc_idle = 0;
 110
 111        sbi->gc_thread = gc_th;
 112        init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
 113        sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
 114                        "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
 115        if (IS_ERR(gc_th->f2fs_gc_task)) {
 116                err = PTR_ERR(gc_th->f2fs_gc_task);
 117                kfree(gc_th);
 118                sbi->gc_thread = NULL;
 119        }
 120out:
 121        return err;
 122}
 123
 124void stop_gc_thread(struct f2fs_sb_info *sbi)
 125{
 126        struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
 127        if (!gc_th)
 128                return;
 129        kthread_stop(gc_th->f2fs_gc_task);
 130        kfree(gc_th);
 131        sbi->gc_thread = NULL;
 132}
 133
 134static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
 135{
 136        int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
 137
 138        if (gc_th && gc_th->gc_idle) {
 139                if (gc_th->gc_idle == 1)
 140                        gc_mode = GC_CB;
 141                else if (gc_th->gc_idle == 2)
 142                        gc_mode = GC_GREEDY;
 143        }
 144        return gc_mode;
 145}
 146
 147static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
 148                        int type, struct victim_sel_policy *p)
 149{
 150        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 151
 152        if (p->alloc_mode == SSR) {
 153                p->gc_mode = GC_GREEDY;
 154                p->dirty_segmap = dirty_i->dirty_segmap[type];
 155                p->max_search = dirty_i->nr_dirty[type];
 156                p->ofs_unit = 1;
 157        } else {
 158                p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
 159                p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
 160                p->max_search = dirty_i->nr_dirty[DIRTY];
 161                p->ofs_unit = sbi->segs_per_sec;
 162        }
 163
 164        if (p->max_search > sbi->max_victim_search)
 165                p->max_search = sbi->max_victim_search;
 166
 167        p->offset = sbi->last_victim[p->gc_mode];
 168}
 169
 170static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
 171                                struct victim_sel_policy *p)
 172{
 173        /* SSR allocates in a segment unit */
 174        if (p->alloc_mode == SSR)
 175                return sbi->blocks_per_seg;
 176        if (p->gc_mode == GC_GREEDY)
 177                return sbi->blocks_per_seg * p->ofs_unit;
 178        else if (p->gc_mode == GC_CB)
 179                return UINT_MAX;
 180        else /* No other gc_mode */
 181                return 0;
 182}
 183
 184static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
 185{
 186        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 187        unsigned int secno;
 188
 189        /*
 190         * If the gc_type is FG_GC, we can select victim segments
 191         * selected by background GC before.
 192         * Those segments guarantee they have small valid blocks.
 193         */
 194        for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
 195                if (sec_usage_check(sbi, secno))
 196                        continue;
 197                clear_bit(secno, dirty_i->victim_secmap);
 198                return secno * sbi->segs_per_sec;
 199        }
 200        return NULL_SEGNO;
 201}
 202
 203static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
 204{
 205        struct sit_info *sit_i = SIT_I(sbi);
 206        unsigned int secno = GET_SECNO(sbi, segno);
 207        unsigned int start = secno * sbi->segs_per_sec;
 208        unsigned long long mtime = 0;
 209        unsigned int vblocks;
 210        unsigned char age = 0;
 211        unsigned char u;
 212        unsigned int i;
 213
 214        for (i = 0; i < sbi->segs_per_sec; i++)
 215                mtime += get_seg_entry(sbi, start + i)->mtime;
 216        vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
 217
 218        mtime = div_u64(mtime, sbi->segs_per_sec);
 219        vblocks = div_u64(vblocks, sbi->segs_per_sec);
 220
 221        u = (vblocks * 100) >> sbi->log_blocks_per_seg;
 222
 223        /* Handle if the system time has changed by the user */
 224        if (mtime < sit_i->min_mtime)
 225                sit_i->min_mtime = mtime;
 226        if (mtime > sit_i->max_mtime)
 227                sit_i->max_mtime = mtime;
 228        if (sit_i->max_mtime != sit_i->min_mtime)
 229                age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime),
 230                                sit_i->max_mtime - sit_i->min_mtime);
 231
 232        return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
 233}
 234
 235static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
 236                        unsigned int segno, struct victim_sel_policy *p)
 237{
 238        if (p->alloc_mode == SSR)
 239                return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
 240
 241        /* alloc_mode == LFS */
 242        if (p->gc_mode == GC_GREEDY)
 243                return get_valid_blocks(sbi, segno, sbi->segs_per_sec);
 244        else
 245                return get_cb_cost(sbi, segno);
 246}
 247
 248static unsigned int count_bits(const unsigned long *addr,
 249                                unsigned int offset, unsigned int len)
 250{
 251        unsigned int end = offset + len, sum = 0;
 252
 253        while (offset < end) {
 254                if (test_bit(offset++, addr))
 255                        ++sum;
 256        }
 257        return sum;
 258}
 259
 260/*
 261 * This function is called from two paths.
 262 * One is garbage collection and the other is SSR segment selection.
 263 * When it is called during GC, it just gets a victim segment
 264 * and it does not remove it from dirty seglist.
 265 * When it is called from SSR segment selection, it finds a segment
 266 * which has minimum valid blocks and removes it from dirty seglist.
 267 */
 268static int get_victim_by_default(struct f2fs_sb_info *sbi,
 269                unsigned int *result, int gc_type, int type, char alloc_mode)
 270{
 271        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 272        struct victim_sel_policy p;
 273        unsigned int secno, max_cost, last_victim;
 274        unsigned int last_segment = MAIN_SEGS(sbi);
 275        unsigned int nsearched = 0;
 276
 277        mutex_lock(&dirty_i->seglist_lock);
 278
 279        p.alloc_mode = alloc_mode;
 280        select_policy(sbi, gc_type, type, &p);
 281
 282        p.min_segno = NULL_SEGNO;
 283        p.min_cost = max_cost = get_max_cost(sbi, &p);
 284
 285        if (p.max_search == 0)
 286                goto out;
 287
 288        last_victim = sbi->last_victim[p.gc_mode];
 289        if (p.alloc_mode == LFS && gc_type == FG_GC) {
 290                p.min_segno = check_bg_victims(sbi);
 291                if (p.min_segno != NULL_SEGNO)
 292                        goto got_it;
 293        }
 294
 295        while (1) {
 296                unsigned long cost;
 297                unsigned int segno;
 298
 299                segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);
 300                if (segno >= last_segment) {
 301                        if (sbi->last_victim[p.gc_mode]) {
 302                                last_segment = sbi->last_victim[p.gc_mode];
 303                                sbi->last_victim[p.gc_mode] = 0;
 304                                p.offset = 0;
 305                                continue;
 306                        }
 307                        break;
 308                }
 309
 310                p.offset = segno + p.ofs_unit;
 311                if (p.ofs_unit > 1) {
 312                        p.offset -= segno % p.ofs_unit;
 313                        nsearched += count_bits(p.dirty_segmap,
 314                                                p.offset - p.ofs_unit,
 315                                                p.ofs_unit);
 316                } else {
 317                        nsearched++;
 318                }
 319
 320
 321                secno = GET_SECNO(sbi, segno);
 322
 323                if (sec_usage_check(sbi, secno))
 324                        goto next;
 325                if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
 326                        goto next;
 327
 328                cost = get_gc_cost(sbi, segno, &p);
 329
 330                if (p.min_cost > cost) {
 331                        p.min_segno = segno;
 332                        p.min_cost = cost;
 333                }
 334next:
 335                if (nsearched >= p.max_search) {
 336                        if (!sbi->last_victim[p.gc_mode] && segno <= last_victim)
 337                                sbi->last_victim[p.gc_mode] = last_victim + 1;
 338                        else
 339                                sbi->last_victim[p.gc_mode] = segno + 1;
 340                        break;
 341                }
 342        }
 343        if (p.min_segno != NULL_SEGNO) {
 344got_it:
 345                if (p.alloc_mode == LFS) {
 346                        secno = GET_SECNO(sbi, p.min_segno);
 347                        if (gc_type == FG_GC)
 348                                sbi->cur_victim_sec = secno;
 349                        else
 350                                set_bit(secno, dirty_i->victim_secmap);
 351                }
 352                *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
 353
 354                trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
 355                                sbi->cur_victim_sec,
 356                                prefree_segments(sbi), free_segments(sbi));
 357        }
 358out:
 359        mutex_unlock(&dirty_i->seglist_lock);
 360
 361        return (p.min_segno == NULL_SEGNO) ? 0 : 1;
 362}
 363
 364static const struct victim_selection default_v_ops = {
 365        .get_victim = get_victim_by_default,
 366};
 367
 368static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino)
 369{
 370        struct inode_entry *ie;
 371
 372        ie = radix_tree_lookup(&gc_list->iroot, ino);
 373        if (ie)
 374                return ie->inode;
 375        return NULL;
 376}
 377
 378static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
 379{
 380        struct inode_entry *new_ie;
 381
 382        if (inode == find_gc_inode(gc_list, inode->i_ino)) {
 383                iput(inode);
 384                return;
 385        }
 386        new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
 387        new_ie->inode = inode;
 388
 389        f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie);
 390        list_add_tail(&new_ie->list, &gc_list->ilist);
 391}
 392
 393static void put_gc_inode(struct gc_inode_list *gc_list)
 394{
 395        struct inode_entry *ie, *next_ie;
 396        list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) {
 397                radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
 398                iput(ie->inode);
 399                list_del(&ie->list);
 400                kmem_cache_free(inode_entry_slab, ie);
 401        }
 402}
 403
 404static int check_valid_map(struct f2fs_sb_info *sbi,
 405                                unsigned int segno, int offset)
 406{
 407        struct sit_info *sit_i = SIT_I(sbi);
 408        struct seg_entry *sentry;
 409        int ret;
 410
 411        mutex_lock(&sit_i->sentry_lock);
 412        sentry = get_seg_entry(sbi, segno);
 413        ret = f2fs_test_bit(offset, sentry->cur_valid_map);
 414        mutex_unlock(&sit_i->sentry_lock);
 415        return ret;
 416}
 417
 418/*
 419 * This function compares node address got in summary with that in NAT.
 420 * On validity, copy that node with cold status, otherwise (invalid node)
 421 * ignore that.
 422 */
 423static void gc_node_segment(struct f2fs_sb_info *sbi,
 424                struct f2fs_summary *sum, unsigned int segno, int gc_type)
 425{
 426        bool initial = true;
 427        struct f2fs_summary *entry;
 428        block_t start_addr;
 429        int off;
 430
 431        start_addr = START_BLOCK(sbi, segno);
 432
 433next_step:
 434        entry = sum;
 435
 436        for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
 437                nid_t nid = le32_to_cpu(entry->nid);
 438                struct page *node_page;
 439                struct node_info ni;
 440
 441                /* stop BG_GC if there is not enough free sections. */
 442                if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
 443                        return;
 444
 445                if (check_valid_map(sbi, segno, off) == 0)
 446                        continue;
 447
 448                if (initial) {
 449                        ra_node_page(sbi, nid);
 450                        continue;
 451                }
 452                node_page = get_node_page(sbi, nid);
 453                if (IS_ERR(node_page))
 454                        continue;
 455
 456                /* block may become invalid during get_node_page */
 457                if (check_valid_map(sbi, segno, off) == 0) {
 458                        f2fs_put_page(node_page, 1);
 459                        continue;
 460                }
 461
 462                get_node_info(sbi, nid, &ni);
 463                if (ni.blk_addr != start_addr + off) {
 464                        f2fs_put_page(node_page, 1);
 465                        continue;
 466                }
 467
 468                /* set page dirty and write it */
 469                if (gc_type == FG_GC) {
 470                        f2fs_wait_on_page_writeback(node_page, NODE, true);
 471                        set_page_dirty(node_page);
 472                } else {
 473                        if (!PageWriteback(node_page))
 474                                set_page_dirty(node_page);
 475                }
 476                f2fs_put_page(node_page, 1);
 477                stat_inc_node_blk_count(sbi, 1, gc_type);
 478        }
 479
 480        if (initial) {
 481                initial = false;
 482                goto next_step;
 483        }
 484}
 485
 486/*
 487 * Calculate start block index indicating the given node offset.
 488 * Be careful, caller should give this node offset only indicating direct node
 489 * blocks. If any node offsets, which point the other types of node blocks such
 490 * as indirect or double indirect node blocks, are given, it must be a caller's
 491 * bug.
 492 */
 493block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
 494{
 495        unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
 496        unsigned int bidx;
 497
 498        if (node_ofs == 0)
 499                return 0;
 500
 501        if (node_ofs <= 2) {
 502                bidx = node_ofs - 1;
 503        } else if (node_ofs <= indirect_blks) {
 504                int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
 505                bidx = node_ofs - 2 - dec;
 506        } else {
 507                int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
 508                bidx = node_ofs - 5 - dec;
 509        }
 510        return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode);
 511}
 512
 513static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 514                struct node_info *dni, block_t blkaddr, unsigned int *nofs)
 515{
 516        struct page *node_page;
 517        nid_t nid;
 518        unsigned int ofs_in_node;
 519        block_t source_blkaddr;
 520
 521        nid = le32_to_cpu(sum->nid);
 522        ofs_in_node = le16_to_cpu(sum->ofs_in_node);
 523
 524        node_page = get_node_page(sbi, nid);
 525        if (IS_ERR(node_page))
 526                return false;
 527
 528        get_node_info(sbi, nid, dni);
 529
 530        if (sum->version != dni->version) {
 531                f2fs_put_page(node_page, 1);
 532                return false;
 533        }
 534
 535        *nofs = ofs_of_node(node_page);
 536        source_blkaddr = datablock_addr(node_page, ofs_in_node);
 537        f2fs_put_page(node_page, 1);
 538
 539        if (source_blkaddr != blkaddr)
 540                return false;
 541        return true;
 542}
 543
 544static void move_encrypted_block(struct inode *inode, block_t bidx)
 545{
 546        struct f2fs_io_info fio = {
 547                .sbi = F2FS_I_SB(inode),
 548                .type = DATA,
 549                .rw = READ_SYNC,
 550                .encrypted_page = NULL,
 551        };
 552        struct dnode_of_data dn;
 553        struct f2fs_summary sum;
 554        struct node_info ni;
 555        struct page *page;
 556        block_t newaddr;
 557        int err;
 558
 559        /* do not read out */
 560        page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
 561        if (!page)
 562                return;
 563
 564        set_new_dnode(&dn, inode, NULL, NULL, 0);
 565        err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
 566        if (err)
 567                goto out;
 568
 569        if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
 570                ClearPageUptodate(page);
 571                goto put_out;
 572        }
 573
 574        /*
 575         * don't cache encrypted data into meta inode until previous dirty
 576         * data were writebacked to avoid racing between GC and flush.
 577         */
 578        f2fs_wait_on_page_writeback(page, DATA, true);
 579
 580        get_node_info(fio.sbi, dn.nid, &ni);
 581        set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
 582
 583        /* read page */
 584        fio.page = page;
 585        fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
 586
 587        allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
 588                                                        &sum, CURSEG_COLD_DATA);
 589
 590        fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr,
 591                                        FGP_LOCK | FGP_CREAT, GFP_NOFS);
 592        if (!fio.encrypted_page) {
 593                err = -ENOMEM;
 594                goto recover_block;
 595        }
 596
 597        err = f2fs_submit_page_bio(&fio);
 598        if (err)
 599                goto put_page_out;
 600
 601        /* write page */
 602        lock_page(fio.encrypted_page);
 603
 604        if (unlikely(!PageUptodate(fio.encrypted_page))) {
 605                err = -EIO;
 606                goto put_page_out;
 607        }
 608        if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) {
 609                err = -EIO;
 610                goto put_page_out;
 611        }
 612
 613        set_page_dirty(fio.encrypted_page);
 614        f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
 615        if (clear_page_dirty_for_io(fio.encrypted_page))
 616                dec_page_count(fio.sbi, F2FS_DIRTY_META);
 617
 618        set_page_writeback(fio.encrypted_page);
 619
 620        /* allocate block address */
 621        f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
 622
 623        fio.rw = WRITE_SYNC;
 624        fio.new_blkaddr = newaddr;
 625        f2fs_submit_page_mbio(&fio);
 626
 627        f2fs_update_data_blkaddr(&dn, newaddr);
 628        set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
 629        if (page->index == 0)
 630                set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
 631put_page_out:
 632        f2fs_put_page(fio.encrypted_page, 1);
 633recover_block:
 634        if (err)
 635                __f2fs_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
 636                                                                true, true);
 637put_out:
 638        f2fs_put_dnode(&dn);
 639out:
 640        f2fs_put_page(page, 1);
 641}
 642
 643static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
 644{
 645        struct page *page;
 646
 647        page = get_lock_data_page(inode, bidx, true);
 648        if (IS_ERR(page))
 649                return;
 650
 651        if (gc_type == BG_GC) {
 652                if (PageWriteback(page))
 653                        goto out;
 654                set_page_dirty(page);
 655                set_cold_data(page);
 656        } else {
 657                struct f2fs_io_info fio = {
 658                        .sbi = F2FS_I_SB(inode),
 659                        .type = DATA,
 660                        .rw = WRITE_SYNC,
 661                        .page = page,
 662                        .encrypted_page = NULL,
 663                };
 664                set_page_dirty(page);
 665                f2fs_wait_on_page_writeback(page, DATA, true);
 666                if (clear_page_dirty_for_io(page))
 667                        inode_dec_dirty_pages(inode);
 668                set_cold_data(page);
 669                do_write_data_page(&fio);
 670                clear_cold_data(page);
 671        }
 672out:
 673        f2fs_put_page(page, 1);
 674}
 675
 676/*
 677 * This function tries to get parent node of victim data block, and identifies
 678 * data block validity. If the block is valid, copy that with cold status and
 679 * modify parent node.
 680 * If the parent node is not valid or the data block address is different,
 681 * the victim data block is ignored.
 682 */
 683static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 684                struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
 685{
 686        struct super_block *sb = sbi->sb;
 687        struct f2fs_summary *entry;
 688        block_t start_addr;
 689        int off;
 690        int phase = 0;
 691
 692        start_addr = START_BLOCK(sbi, segno);
 693
 694next_step:
 695        entry = sum;
 696
 697        for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
 698                struct page *data_page;
 699                struct inode *inode;
 700                struct node_info dni; /* dnode info for the data */
 701                unsigned int ofs_in_node, nofs;
 702                block_t start_bidx;
 703
 704                /* stop BG_GC if there is not enough free sections. */
 705                if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
 706                        return;
 707
 708                if (check_valid_map(sbi, segno, off) == 0)
 709                        continue;
 710
 711                if (phase == 0) {
 712                        ra_node_page(sbi, le32_to_cpu(entry->nid));
 713                        continue;
 714                }
 715
 716                /* Get an inode by ino with checking validity */
 717                if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs))
 718                        continue;
 719
 720                if (phase == 1) {
 721                        ra_node_page(sbi, dni.ino);
 722                        continue;
 723                }
 724
 725                ofs_in_node = le16_to_cpu(entry->ofs_in_node);
 726
 727                if (phase == 2) {
 728                        inode = f2fs_iget(sb, dni.ino);
 729                        if (IS_ERR(inode) || is_bad_inode(inode))
 730                                continue;
 731
 732                        /* if encrypted inode, let's go phase 3 */
 733                        if (f2fs_encrypted_inode(inode) &&
 734                                                S_ISREG(inode->i_mode)) {
 735                                add_gc_inode(gc_list, inode);
 736                                continue;
 737                        }
 738
 739                        start_bidx = start_bidx_of_node(nofs, inode);
 740                        data_page = get_read_data_page(inode,
 741                                        start_bidx + ofs_in_node, READA, true);
 742                        if (IS_ERR(data_page)) {
 743                                iput(inode);
 744                                continue;
 745                        }
 746
 747                        f2fs_put_page(data_page, 0);
 748                        add_gc_inode(gc_list, inode);
 749                        continue;
 750                }
 751
 752                /* phase 3 */
 753                inode = find_gc_inode(gc_list, dni.ino);
 754                if (inode) {
 755                        start_bidx = start_bidx_of_node(nofs, inode)
 756                                                                + ofs_in_node;
 757                        if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
 758                                move_encrypted_block(inode, start_bidx);
 759                        else
 760                                move_data_page(inode, start_bidx, gc_type);
 761                        stat_inc_data_blk_count(sbi, 1, gc_type);
 762                }
 763        }
 764
 765        if (++phase < 4)
 766                goto next_step;
 767}
 768
 769static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
 770                        int gc_type)
 771{
 772        struct sit_info *sit_i = SIT_I(sbi);
 773        int ret;
 774
 775        mutex_lock(&sit_i->sentry_lock);
 776        ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
 777                                              NO_CHECK_TYPE, LFS);
 778        mutex_unlock(&sit_i->sentry_lock);
 779        return ret;
 780}
 781
 782static int do_garbage_collect(struct f2fs_sb_info *sbi,
 783                                unsigned int start_segno,
 784                                struct gc_inode_list *gc_list, int gc_type)
 785{
 786        struct page *sum_page;
 787        struct f2fs_summary_block *sum;
 788        struct blk_plug plug;
 789        unsigned int segno = start_segno;
 790        unsigned int end_segno = start_segno + sbi->segs_per_sec;
 791        int seg_freed = 0;
 792        unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
 793                                                SUM_TYPE_DATA : SUM_TYPE_NODE;
 794
 795        /* readahead multi ssa blocks those have contiguous address */
 796        if (sbi->segs_per_sec > 1)
 797                ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
 798                                        sbi->segs_per_sec, META_SSA, true);
 799
 800        /* reference all summary page */
 801        while (segno < end_segno) {
 802                sum_page = get_sum_page(sbi, segno++);
 803                unlock_page(sum_page);
 804        }
 805
 806        blk_start_plug(&plug);
 807
 808        for (segno = start_segno; segno < end_segno; segno++) {
 809                /* find segment summary of victim */
 810                sum_page = find_get_page(META_MAPPING(sbi),
 811                                        GET_SUM_BLOCK(sbi, segno));
 812                f2fs_bug_on(sbi, !PageUptodate(sum_page));
 813                f2fs_put_page(sum_page, 0);
 814
 815                sum = page_address(sum_page);
 816                f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
 817
 818                /*
 819                 * this is to avoid deadlock:
 820                 * - lock_page(sum_page)         - f2fs_replace_block
 821                 *  - check_valid_map()            - mutex_lock(sentry_lock)
 822                 *   - mutex_lock(sentry_lock)     - change_curseg()
 823                 *                                  - lock_page(sum_page)
 824                 */
 825
 826                if (type == SUM_TYPE_NODE)
 827                        gc_node_segment(sbi, sum->entries, segno, gc_type);
 828                else
 829                        gc_data_segment(sbi, sum->entries, gc_list, segno,
 830                                                                gc_type);
 831
 832                stat_inc_seg_count(sbi, type, gc_type);
 833
 834                f2fs_put_page(sum_page, 0);
 835        }
 836
 837        if (gc_type == FG_GC) {
 838                if (type == SUM_TYPE_NODE) {
 839                        struct writeback_control wbc = {
 840                                .sync_mode = WB_SYNC_ALL,
 841                                .nr_to_write = LONG_MAX,
 842                                .for_reclaim = 0,
 843                        };
 844                        sync_node_pages(sbi, 0, &wbc);
 845                } else {
 846                        f2fs_submit_merged_bio(sbi, DATA, WRITE);
 847                }
 848        }
 849
 850        blk_finish_plug(&plug);
 851
 852        if (gc_type == FG_GC) {
 853                while (start_segno < end_segno)
 854                        if (get_valid_blocks(sbi, start_segno++, 1) == 0)
 855                                seg_freed++;
 856        }
 857
 858        stat_inc_call_count(sbi->stat_info);
 859
 860        return seg_freed;
 861}
 862
 863int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
 864{
 865        unsigned int segno;
 866        int gc_type = sync ? FG_GC : BG_GC;
 867        int sec_freed = 0, seg_freed;
 868        int ret = -EINVAL;
 869        struct cp_control cpc;
 870        struct gc_inode_list gc_list = {
 871                .ilist = LIST_HEAD_INIT(gc_list.ilist),
 872                .iroot = RADIX_TREE_INIT(GFP_NOFS),
 873        };
 874
 875        cpc.reason = __get_cp_reason(sbi);
 876gc_more:
 877        segno = NULL_SEGNO;
 878
 879        if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
 880                goto stop;
 881        if (unlikely(f2fs_cp_error(sbi))) {
 882                ret = -EIO;
 883                goto stop;
 884        }
 885
 886        if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
 887                gc_type = FG_GC;
 888                /*
 889                 * If there is no victim and no prefree segment but still not
 890                 * enough free sections, we should flush dent/node blocks and do
 891                 * garbage collections.
 892                 */
 893                if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi))
 894                        write_checkpoint(sbi, &cpc);
 895                else if (has_not_enough_free_secs(sbi, 0))
 896                        write_checkpoint(sbi, &cpc);
 897        }
 898
 899        if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
 900                goto stop;
 901        ret = 0;
 902
 903        seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
 904
 905        if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
 906                sec_freed++;
 907
 908        if (gc_type == FG_GC)
 909                sbi->cur_victim_sec = NULL_SEGNO;
 910
 911        if (!sync) {
 912                if (has_not_enough_free_secs(sbi, sec_freed))
 913                        goto gc_more;
 914
 915                if (gc_type == FG_GC)
 916                        write_checkpoint(sbi, &cpc);
 917        }
 918stop:
 919        mutex_unlock(&sbi->gc_mutex);
 920
 921        put_gc_inode(&gc_list);
 922
 923        if (sync)
 924                ret = sec_freed ? 0 : -EAGAIN;
 925        return ret;
 926}
 927
 928void build_gc_manager(struct f2fs_sb_info *sbi)
 929{
 930        DIRTY_I(sbi)->v_ops = &default_v_ops;
 931}
 932