linux/fs/f2fs/segment.c
<<
>>
Prefs
   1/*
   2 * fs/f2fs/segment.c
   3 *
   4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   5 *             http://www.samsung.com/
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11#include <linux/fs.h>
  12#include <linux/f2fs_fs.h>
  13#include <linux/bio.h>
  14#include <linux/blkdev.h>
  15#include <linux/prefetch.h>
  16#include <linux/kthread.h>
  17#include <linux/swap.h>
  18#include <linux/timer.h>
  19#include <linux/freezer.h>
  20#include <linux/sched/signal.h>
  21
  22#include "f2fs.h"
  23#include "segment.h"
  24#include "node.h"
  25#include "gc.h"
  26#include "trace.h"
  27#include <trace/events/f2fs.h>
  28
  29#define __reverse_ffz(x) __reverse_ffs(~(x))
  30
  31static struct kmem_cache *discard_entry_slab;
  32static struct kmem_cache *discard_cmd_slab;
  33static struct kmem_cache *sit_entry_set_slab;
  34static struct kmem_cache *inmem_entry_slab;
  35
  36static unsigned long __reverse_ulong(unsigned char *str)
  37{
  38        unsigned long tmp = 0;
  39        int shift = 24, idx = 0;
  40
  41#if BITS_PER_LONG == 64
  42        shift = 56;
  43#endif
  44        while (shift >= 0) {
  45                tmp |= (unsigned long)str[idx++] << shift;
  46                shift -= BITS_PER_BYTE;
  47        }
  48        return tmp;
  49}
  50
  51/*
  52 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
  53 * MSB and LSB are reversed in a byte by f2fs_set_bit.
  54 */
  55static inline unsigned long __reverse_ffs(unsigned long word)
  56{
  57        int num = 0;
  58
  59#if BITS_PER_LONG == 64
  60        if ((word & 0xffffffff00000000UL) == 0)
  61                num += 32;
  62        else
  63                word >>= 32;
  64#endif
  65        if ((word & 0xffff0000) == 0)
  66                num += 16;
  67        else
  68                word >>= 16;
  69
  70        if ((word & 0xff00) == 0)
  71                num += 8;
  72        else
  73                word >>= 8;
  74
  75        if ((word & 0xf0) == 0)
  76                num += 4;
  77        else
  78                word >>= 4;
  79
  80        if ((word & 0xc) == 0)
  81                num += 2;
  82        else
  83                word >>= 2;
  84
  85        if ((word & 0x2) == 0)
  86                num += 1;
  87        return num;
  88}
  89
  90/*
  91 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
  92 * f2fs_set_bit makes MSB and LSB reversed in a byte.
  93 * @size must be integral times of unsigned long.
  94 * Example:
  95 *                             MSB <--> LSB
  96 *   f2fs_set_bit(0, bitmap) => 1000 0000
  97 *   f2fs_set_bit(7, bitmap) => 0000 0001
  98 */
  99static unsigned long __find_rev_next_bit(const unsigned long *addr,
 100                        unsigned long size, unsigned long offset)
 101{
 102        const unsigned long *p = addr + BIT_WORD(offset);
 103        unsigned long result = size;
 104        unsigned long tmp;
 105
 106        if (offset >= size)
 107                return size;
 108
 109        size -= (offset & ~(BITS_PER_LONG - 1));
 110        offset %= BITS_PER_LONG;
 111
 112        while (1) {
 113                if (*p == 0)
 114                        goto pass;
 115
 116                tmp = __reverse_ulong((unsigned char *)p);
 117
 118                tmp &= ~0UL >> offset;
 119                if (size < BITS_PER_LONG)
 120                        tmp &= (~0UL << (BITS_PER_LONG - size));
 121                if (tmp)
 122                        goto found;
 123pass:
 124                if (size <= BITS_PER_LONG)
 125                        break;
 126                size -= BITS_PER_LONG;
 127                offset = 0;
 128                p++;
 129        }
 130        return result;
 131found:
 132        return result - size + __reverse_ffs(tmp);
 133}
 134
 135static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
 136                        unsigned long size, unsigned long offset)
 137{
 138        const unsigned long *p = addr + BIT_WORD(offset);
 139        unsigned long result = size;
 140        unsigned long tmp;
 141
 142        if (offset >= size)
 143                return size;
 144
 145        size -= (offset & ~(BITS_PER_LONG - 1));
 146        offset %= BITS_PER_LONG;
 147
 148        while (1) {
 149                if (*p == ~0UL)
 150                        goto pass;
 151
 152                tmp = __reverse_ulong((unsigned char *)p);
 153
 154                if (offset)
 155                        tmp |= ~0UL << (BITS_PER_LONG - offset);
 156                if (size < BITS_PER_LONG)
 157                        tmp |= ~0UL >> size;
 158                if (tmp != ~0UL)
 159                        goto found;
 160pass:
 161                if (size <= BITS_PER_LONG)
 162                        break;
 163                size -= BITS_PER_LONG;
 164                offset = 0;
 165                p++;
 166        }
 167        return result;
 168found:
 169        return result - size + __reverse_ffz(tmp);
 170}
 171
 172bool need_SSR(struct f2fs_sb_info *sbi)
 173{
 174        int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
 175        int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
 176        int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
 177
 178        if (test_opt(sbi, LFS))
 179                return false;
 180        if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
 181                return true;
 182
 183        return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
 184                        SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
 185}
 186
 187void register_inmem_page(struct inode *inode, struct page *page)
 188{
 189        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 190        struct f2fs_inode_info *fi = F2FS_I(inode);
 191        struct inmem_pages *new;
 192
 193        f2fs_trace_pid(page);
 194
 195        set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
 196        SetPagePrivate(page);
 197
 198        new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
 199
 200        /* add atomic page indices to the list */
 201        new->page = page;
 202        INIT_LIST_HEAD(&new->list);
 203
 204        /* increase reference count with clean state */
 205        mutex_lock(&fi->inmem_lock);
 206        get_page(page);
 207        list_add_tail(&new->list, &fi->inmem_pages);
 208        spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
 209        if (list_empty(&fi->inmem_ilist))
 210                list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
 211        spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
 212        inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
 213        mutex_unlock(&fi->inmem_lock);
 214
 215        trace_f2fs_register_inmem_page(page, INMEM);
 216}
 217
 218static int __revoke_inmem_pages(struct inode *inode,
 219                                struct list_head *head, bool drop, bool recover)
 220{
 221        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 222        struct inmem_pages *cur, *tmp;
 223        int err = 0;
 224
 225        list_for_each_entry_safe(cur, tmp, head, list) {
 226                struct page *page = cur->page;
 227
 228                if (drop)
 229                        trace_f2fs_commit_inmem_page(page, INMEM_DROP);
 230
 231                lock_page(page);
 232
 233                if (recover) {
 234                        struct dnode_of_data dn;
 235                        struct node_info ni;
 236
 237                        trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
 238retry:
 239                        set_new_dnode(&dn, inode, NULL, NULL, 0);
 240                        err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
 241                        if (err) {
 242                                if (err == -ENOMEM) {
 243                                        congestion_wait(BLK_RW_ASYNC, HZ/50);
 244                                        cond_resched();
 245                                        goto retry;
 246                                }
 247                                err = -EAGAIN;
 248                                goto next;
 249                        }
 250                        get_node_info(sbi, dn.nid, &ni);
 251                        if (cur->old_addr == NEW_ADDR) {
 252                                invalidate_blocks(sbi, dn.data_blkaddr);
 253                                f2fs_update_data_blkaddr(&dn, NEW_ADDR);
 254                        } else
 255                                f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
 256                                        cur->old_addr, ni.version, true, true);
 257                        f2fs_put_dnode(&dn);
 258                }
 259next:
 260                /* we don't need to invalidate this in the sccessful status */
 261                if (drop || recover)
 262                        ClearPageUptodate(page);
 263                set_page_private(page, 0);
 264                ClearPagePrivate(page);
 265                f2fs_put_page(page, 1);
 266
 267                list_del(&cur->list);
 268                kmem_cache_free(inmem_entry_slab, cur);
 269                dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
 270        }
 271        return err;
 272}
 273
 274void drop_inmem_pages_all(struct f2fs_sb_info *sbi)
 275{
 276        struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
 277        struct inode *inode;
 278        struct f2fs_inode_info *fi;
 279next:
 280        spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
 281        if (list_empty(head)) {
 282                spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
 283                return;
 284        }
 285        fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
 286        inode = igrab(&fi->vfs_inode);
 287        spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
 288
 289        if (inode) {
 290                drop_inmem_pages(inode);
 291                iput(inode);
 292        }
 293        congestion_wait(BLK_RW_ASYNC, HZ/50);
 294        cond_resched();
 295        goto next;
 296}
 297
 298void drop_inmem_pages(struct inode *inode)
 299{
 300        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 301        struct f2fs_inode_info *fi = F2FS_I(inode);
 302
 303        mutex_lock(&fi->inmem_lock);
 304        __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
 305        spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
 306        if (!list_empty(&fi->inmem_ilist))
 307                list_del_init(&fi->inmem_ilist);
 308        spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
 309        mutex_unlock(&fi->inmem_lock);
 310
 311        clear_inode_flag(inode, FI_ATOMIC_FILE);
 312        clear_inode_flag(inode, FI_HOT_DATA);
 313        stat_dec_atomic_write(inode);
 314}
 315
 316void drop_inmem_page(struct inode *inode, struct page *page)
 317{
 318        struct f2fs_inode_info *fi = F2FS_I(inode);
 319        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 320        struct list_head *head = &fi->inmem_pages;
 321        struct inmem_pages *cur = NULL;
 322
 323        f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));
 324
 325        mutex_lock(&fi->inmem_lock);
 326        list_for_each_entry(cur, head, list) {
 327                if (cur->page == page)
 328                        break;
 329        }
 330
 331        f2fs_bug_on(sbi, !cur || cur->page != page);
 332        list_del(&cur->list);
 333        mutex_unlock(&fi->inmem_lock);
 334
 335        dec_page_count(sbi, F2FS_INMEM_PAGES);
 336        kmem_cache_free(inmem_entry_slab, cur);
 337
 338        ClearPageUptodate(page);
 339        set_page_private(page, 0);
 340        ClearPagePrivate(page);
 341        f2fs_put_page(page, 0);
 342
 343        trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
 344}
 345
 346static int __commit_inmem_pages(struct inode *inode,
 347                                        struct list_head *revoke_list)
 348{
 349        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 350        struct f2fs_inode_info *fi = F2FS_I(inode);
 351        struct inmem_pages *cur, *tmp;
 352        struct f2fs_io_info fio = {
 353                .sbi = sbi,
 354                .ino = inode->i_ino,
 355                .type = DATA,
 356                .op = REQ_OP_WRITE,
 357                .op_flags = REQ_SYNC | REQ_PRIO,
 358                .io_type = FS_DATA_IO,
 359        };
 360        pgoff_t last_idx = ULONG_MAX;
 361        int err = 0;
 362
 363        list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
 364                struct page *page = cur->page;
 365
 366                lock_page(page);
 367                if (page->mapping == inode->i_mapping) {
 368                        trace_f2fs_commit_inmem_page(page, INMEM);
 369
 370                        set_page_dirty(page);
 371                        f2fs_wait_on_page_writeback(page, DATA, true);
 372                        if (clear_page_dirty_for_io(page)) {
 373                                inode_dec_dirty_pages(inode);
 374                                remove_dirty_inode(inode);
 375                        }
 376retry:
 377                        fio.page = page;
 378                        fio.old_blkaddr = NULL_ADDR;
 379                        fio.encrypted_page = NULL;
 380                        fio.need_lock = LOCK_DONE;
 381                        err = do_write_data_page(&fio);
 382                        if (err) {
 383                                if (err == -ENOMEM) {
 384                                        congestion_wait(BLK_RW_ASYNC, HZ/50);
 385                                        cond_resched();
 386                                        goto retry;
 387                                }
 388                                unlock_page(page);
 389                                break;
 390                        }
 391                        /* record old blkaddr for revoking */
 392                        cur->old_addr = fio.old_blkaddr;
 393                        last_idx = page->index;
 394                }
 395                unlock_page(page);
 396                list_move_tail(&cur->list, revoke_list);
 397        }
 398
 399        if (last_idx != ULONG_MAX)
 400                f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA);
 401
 402        if (!err)
 403                __revoke_inmem_pages(inode, revoke_list, false, false);
 404
 405        return err;
 406}
 407
 408int commit_inmem_pages(struct inode *inode)
 409{
 410        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 411        struct f2fs_inode_info *fi = F2FS_I(inode);
 412        struct list_head revoke_list;
 413        int err;
 414
 415        INIT_LIST_HEAD(&revoke_list);
 416        f2fs_balance_fs(sbi, true);
 417        f2fs_lock_op(sbi);
 418
 419        set_inode_flag(inode, FI_ATOMIC_COMMIT);
 420
 421        mutex_lock(&fi->inmem_lock);
 422        err = __commit_inmem_pages(inode, &revoke_list);
 423        if (err) {
 424                int ret;
 425                /*
 426                 * try to revoke all committed pages, but still we could fail
 427                 * due to no memory or other reason, if that happened, EAGAIN
 428                 * will be returned, which means in such case, transaction is
 429                 * already not integrity, caller should use journal to do the
 430                 * recovery or rewrite & commit last transaction. For other
 431                 * error number, revoking was done by filesystem itself.
 432                 */
 433                ret = __revoke_inmem_pages(inode, &revoke_list, false, true);
 434                if (ret)
 435                        err = ret;
 436
 437                /* drop all uncommitted pages */
 438                __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
 439        }
 440        spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
 441        if (!list_empty(&fi->inmem_ilist))
 442                list_del_init(&fi->inmem_ilist);
 443        spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
 444        mutex_unlock(&fi->inmem_lock);
 445
 446        clear_inode_flag(inode, FI_ATOMIC_COMMIT);
 447
 448        f2fs_unlock_op(sbi);
 449        return err;
 450}
 451
 452/*
 453 * This function balances dirty node and dentry pages.
 454 * In addition, it controls garbage collection.
 455 */
 456void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 457{
 458#ifdef CONFIG_F2FS_FAULT_INJECTION
 459        if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
 460                f2fs_show_injection_info(FAULT_CHECKPOINT);
 461                f2fs_stop_checkpoint(sbi, false);
 462        }
 463#endif
 464
 465        /* balance_fs_bg is able to be pending */
 466        if (need && excess_cached_nats(sbi))
 467                f2fs_balance_fs_bg(sbi);
 468
 469        /*
 470         * We should do GC or end up with checkpoint, if there are so many dirty
 471         * dir/node pages without enough free segments.
 472         */
 473        if (has_not_enough_free_secs(sbi, 0, 0)) {
 474                mutex_lock(&sbi->gc_mutex);
 475                f2fs_gc(sbi, false, false, NULL_SEGNO);
 476        }
 477}
 478
 479void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 480{
 481        /* try to shrink extent cache when there is no enough memory */
 482        if (!available_free_memory(sbi, EXTENT_CACHE))
 483                f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
 484
 485        /* check the # of cached NAT entries */
 486        if (!available_free_memory(sbi, NAT_ENTRIES))
 487                try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
 488
 489        if (!available_free_memory(sbi, FREE_NIDS))
 490                try_to_free_nids(sbi, MAX_FREE_NIDS);
 491        else
 492                build_free_nids(sbi, false, false);
 493
 494        if (!is_idle(sbi) && !excess_dirty_nats(sbi))
 495                return;
 496
 497        /* checkpoint is the only way to shrink partial cached entries */
 498        if (!available_free_memory(sbi, NAT_ENTRIES) ||
 499                        !available_free_memory(sbi, INO_ENTRIES) ||
 500                        excess_prefree_segs(sbi) ||
 501                        excess_dirty_nats(sbi) ||
 502                        f2fs_time_over(sbi, CP_TIME)) {
 503                if (test_opt(sbi, DATA_FLUSH)) {
 504                        struct blk_plug plug;
 505
 506                        blk_start_plug(&plug);
 507                        sync_dirty_inodes(sbi, FILE_INODE);
 508                        blk_finish_plug(&plug);
 509                }
 510                f2fs_sync_fs(sbi->sb, true);
 511                stat_inc_bg_cp_count(sbi->stat_info);
 512        }
 513}
 514
 515static int __submit_flush_wait(struct f2fs_sb_info *sbi,
 516                                struct block_device *bdev)
 517{
 518        struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
 519        int ret;
 520
 521        bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
 522        bio_set_dev(bio, bdev);
 523        ret = submit_bio_wait(bio);
 524        bio_put(bio);
 525
 526        trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
 527                                test_opt(sbi, FLUSH_MERGE), ret);
 528        return ret;
 529}
 530
 531static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
 532{
 533        int ret = 0;
 534        int i;
 535
 536        if (!sbi->s_ndevs)
 537                return __submit_flush_wait(sbi, sbi->sb->s_bdev);
 538
 539        for (i = 0; i < sbi->s_ndevs; i++) {
 540                if (!is_dirty_device(sbi, ino, i, FLUSH_INO))
 541                        continue;
 542                ret = __submit_flush_wait(sbi, FDEV(i).bdev);
 543                if (ret)
 544                        break;
 545        }
 546        return ret;
 547}
 548
 549static int issue_flush_thread(void *data)
 550{
 551        struct f2fs_sb_info *sbi = data;
 552        struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
 553        wait_queue_head_t *q = &fcc->flush_wait_queue;
 554repeat:
 555        if (kthread_should_stop())
 556                return 0;
 557
 558        sb_start_intwrite(sbi->sb);
 559
 560        if (!llist_empty(&fcc->issue_list)) {
 561                struct flush_cmd *cmd, *next;
 562                int ret;
 563
 564                fcc->dispatch_list = llist_del_all(&fcc->issue_list);
 565                fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
 566
 567                cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
 568
 569                ret = submit_flush_wait(sbi, cmd->ino);
 570                atomic_inc(&fcc->issued_flush);
 571
 572                llist_for_each_entry_safe(cmd, next,
 573                                          fcc->dispatch_list, llnode) {
 574                        cmd->ret = ret;
 575                        complete(&cmd->wait);
 576                }
 577                fcc->dispatch_list = NULL;
 578        }
 579
 580        sb_end_intwrite(sbi->sb);
 581
 582        wait_event_interruptible(*q,
 583                kthread_should_stop() || !llist_empty(&fcc->issue_list));
 584        goto repeat;
 585}
 586
 587int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
 588{
 589        struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
 590        struct flush_cmd cmd;
 591        int ret;
 592
 593        if (test_opt(sbi, NOBARRIER))
 594                return 0;
 595
 596        if (!test_opt(sbi, FLUSH_MERGE)) {
 597                ret = submit_flush_wait(sbi, ino);
 598                atomic_inc(&fcc->issued_flush);
 599                return ret;
 600        }
 601
 602        if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) {
 603                ret = submit_flush_wait(sbi, ino);
 604                atomic_dec(&fcc->issing_flush);
 605
 606                atomic_inc(&fcc->issued_flush);
 607                return ret;
 608        }
 609
 610        cmd.ino = ino;
 611        init_completion(&cmd.wait);
 612
 613        llist_add(&cmd.llnode, &fcc->issue_list);
 614
 615        /* update issue_list before we wake up issue_flush thread */
 616        smp_mb();
 617
 618        if (waitqueue_active(&fcc->flush_wait_queue))
 619                wake_up(&fcc->flush_wait_queue);
 620
 621        if (fcc->f2fs_issue_flush) {
 622                wait_for_completion(&cmd.wait);
 623                atomic_dec(&fcc->issing_flush);
 624        } else {
 625                struct llist_node *list;
 626
 627                list = llist_del_all(&fcc->issue_list);
 628                if (!list) {
 629                        wait_for_completion(&cmd.wait);
 630                        atomic_dec(&fcc->issing_flush);
 631                } else {
 632                        struct flush_cmd *tmp, *next;
 633
 634                        ret = submit_flush_wait(sbi, ino);
 635
 636                        llist_for_each_entry_safe(tmp, next, list, llnode) {
 637                                if (tmp == &cmd) {
 638                                        cmd.ret = ret;
 639                                        atomic_dec(&fcc->issing_flush);
 640                                        continue;
 641                                }
 642                                tmp->ret = ret;
 643                                complete(&tmp->wait);
 644                        }
 645                }
 646        }
 647
 648        return cmd.ret;
 649}
 650
 651int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 652{
 653        dev_t dev = sbi->sb->s_bdev->bd_dev;
 654        struct flush_cmd_control *fcc;
 655        int err = 0;
 656
 657        if (SM_I(sbi)->fcc_info) {
 658                fcc = SM_I(sbi)->fcc_info;
 659                if (fcc->f2fs_issue_flush)
 660                        return err;
 661                goto init_thread;
 662        }
 663
 664        fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
 665        if (!fcc)
 666                return -ENOMEM;
 667        atomic_set(&fcc->issued_flush, 0);
 668        atomic_set(&fcc->issing_flush, 0);
 669        init_waitqueue_head(&fcc->flush_wait_queue);
 670        init_llist_head(&fcc->issue_list);
 671        SM_I(sbi)->fcc_info = fcc;
 672        if (!test_opt(sbi, FLUSH_MERGE))
 673                return err;
 674
 675init_thread:
 676        fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
 677                                "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
 678        if (IS_ERR(fcc->f2fs_issue_flush)) {
 679                err = PTR_ERR(fcc->f2fs_issue_flush);
 680                kfree(fcc);
 681                SM_I(sbi)->fcc_info = NULL;
 682                return err;
 683        }
 684
 685        return err;
 686}
 687
 688void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
 689{
 690        struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
 691
 692        if (fcc && fcc->f2fs_issue_flush) {
 693                struct task_struct *flush_thread = fcc->f2fs_issue_flush;
 694
 695                fcc->f2fs_issue_flush = NULL;
 696                kthread_stop(flush_thread);
 697        }
 698        if (free) {
 699                kfree(fcc);
 700                SM_I(sbi)->fcc_info = NULL;
 701        }
 702}
 703
 704int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
 705{
 706        int ret = 0, i;
 707
 708        if (!sbi->s_ndevs)
 709                return 0;
 710
 711        for (i = 1; i < sbi->s_ndevs; i++) {
 712                if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
 713                        continue;
 714                ret = __submit_flush_wait(sbi, FDEV(i).bdev);
 715                if (ret)
 716                        break;
 717
 718                spin_lock(&sbi->dev_lock);
 719                f2fs_clear_bit(i, (char *)&sbi->dirty_device);
 720                spin_unlock(&sbi->dev_lock);
 721        }
 722
 723        return ret;
 724}
 725
 726static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 727                enum dirty_type dirty_type)
 728{
 729        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 730
 731        /* need not be added */
 732        if (IS_CURSEG(sbi, segno))
 733                return;
 734
 735        if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
 736                dirty_i->nr_dirty[dirty_type]++;
 737
 738        if (dirty_type == DIRTY) {
 739                struct seg_entry *sentry = get_seg_entry(sbi, segno);
 740                enum dirty_type t = sentry->type;
 741
 742                if (unlikely(t >= DIRTY)) {
 743                        f2fs_bug_on(sbi, 1);
 744                        return;
 745                }
 746                if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
 747                        dirty_i->nr_dirty[t]++;
 748        }
 749}
 750
 751static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 752                enum dirty_type dirty_type)
 753{
 754        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 755
 756        if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
 757                dirty_i->nr_dirty[dirty_type]--;
 758
 759        if (dirty_type == DIRTY) {
 760                struct seg_entry *sentry = get_seg_entry(sbi, segno);
 761                enum dirty_type t = sentry->type;
 762
 763                if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
 764                        dirty_i->nr_dirty[t]--;
 765
 766                if (get_valid_blocks(sbi, segno, true) == 0)
 767                        clear_bit(GET_SEC_FROM_SEG(sbi, segno),
 768                                                dirty_i->victim_secmap);
 769        }
 770}
 771
 772/*
 773 * Should not occur error such as -ENOMEM.
 774 * Adding dirty entry into seglist is not critical operation.
 775 * If a given segment is one of current working segments, it won't be added.
 776 */
 777static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 778{
 779        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 780        unsigned short valid_blocks;
 781
 782        if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
 783                return;
 784
 785        mutex_lock(&dirty_i->seglist_lock);
 786
 787        valid_blocks = get_valid_blocks(sbi, segno, false);
 788
 789        if (valid_blocks == 0) {
 790                __locate_dirty_segment(sbi, segno, PRE);
 791                __remove_dirty_segment(sbi, segno, DIRTY);
 792        } else if (valid_blocks < sbi->blocks_per_seg) {
 793                __locate_dirty_segment(sbi, segno, DIRTY);
 794        } else {
 795                /* Recovery routine with SSR needs this */
 796                __remove_dirty_segment(sbi, segno, DIRTY);
 797        }
 798
 799        mutex_unlock(&dirty_i->seglist_lock);
 800}
 801
 802static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
 803                struct block_device *bdev, block_t lstart,
 804                block_t start, block_t len)
 805{
 806        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 807        struct list_head *pend_list;
 808        struct discard_cmd *dc;
 809
 810        f2fs_bug_on(sbi, !len);
 811
 812        pend_list = &dcc->pend_list[plist_idx(len)];
 813
 814        dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
 815        INIT_LIST_HEAD(&dc->list);
 816        dc->bdev = bdev;
 817        dc->lstart = lstart;
 818        dc->start = start;
 819        dc->len = len;
 820        dc->ref = 0;
 821        dc->state = D_PREP;
 822        dc->error = 0;
 823        init_completion(&dc->wait);
 824        list_add_tail(&dc->list, pend_list);
 825        atomic_inc(&dcc->discard_cmd_cnt);
 826        dcc->undiscard_blks += len;
 827
 828        return dc;
 829}
 830
 831static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
 832                                struct block_device *bdev, block_t lstart,
 833                                block_t start, block_t len,
 834                                struct rb_node *parent, struct rb_node **p)
 835{
 836        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 837        struct discard_cmd *dc;
 838
 839        dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
 840
 841        rb_link_node(&dc->rb_node, parent, p);
 842        rb_insert_color(&dc->rb_node, &dcc->root);
 843
 844        return dc;
 845}
 846
 847static void __detach_discard_cmd(struct discard_cmd_control *dcc,
 848                                                        struct discard_cmd *dc)
 849{
 850        if (dc->state == D_DONE)
 851                atomic_dec(&dcc->issing_discard);
 852
 853        list_del(&dc->list);
 854        rb_erase(&dc->rb_node, &dcc->root);
 855        dcc->undiscard_blks -= dc->len;
 856
 857        kmem_cache_free(discard_cmd_slab, dc);
 858
 859        atomic_dec(&dcc->discard_cmd_cnt);
 860}
 861
 862static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
 863                                                        struct discard_cmd *dc)
 864{
 865        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 866
 867        trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
 868
 869        f2fs_bug_on(sbi, dc->ref);
 870
 871        if (dc->error == -EOPNOTSUPP)
 872                dc->error = 0;
 873
 874        if (dc->error)
 875                f2fs_msg(sbi->sb, KERN_INFO,
 876                        "Issue discard(%u, %u, %u) failed, ret: %d",
 877                        dc->lstart, dc->start, dc->len, dc->error);
 878        __detach_discard_cmd(dcc, dc);
 879}
 880
 881static void f2fs_submit_discard_endio(struct bio *bio)
 882{
 883        struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
 884
 885        dc->error = blk_status_to_errno(bio->bi_status);
 886        dc->state = D_DONE;
 887        complete_all(&dc->wait);
 888        bio_put(bio);
 889}
 890
 891static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
 892                                block_t start, block_t end)
 893{
 894#ifdef CONFIG_F2FS_CHECK_FS
 895        struct seg_entry *sentry;
 896        unsigned int segno;
 897        block_t blk = start;
 898        unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
 899        unsigned long *map;
 900
 901        while (blk < end) {
 902                segno = GET_SEGNO(sbi, blk);
 903                sentry = get_seg_entry(sbi, segno);
 904                offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
 905
 906                if (end < START_BLOCK(sbi, segno + 1))
 907                        size = GET_BLKOFF_FROM_SEG0(sbi, end);
 908                else
 909                        size = max_blocks;
 910                map = (unsigned long *)(sentry->cur_valid_map);
 911                offset = __find_rev_next_bit(map, size, offset);
 912                f2fs_bug_on(sbi, offset != size);
 913                blk = START_BLOCK(sbi, segno + 1);
 914        }
 915#endif
 916}
 917
 918/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
 919static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
 920                                                struct discard_policy *dpolicy,
 921                                                struct discard_cmd *dc)
 922{
 923        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 924        struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
 925                                        &(dcc->fstrim_list) : &(dcc->wait_list);
 926        struct bio *bio = NULL;
 927        int flag = dpolicy->sync ? REQ_SYNC : 0;
 928
 929        if (dc->state != D_PREP)
 930                return;
 931
 932        trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len);
 933
 934        dc->error = __blkdev_issue_discard(dc->bdev,
 935                                SECTOR_FROM_BLOCK(dc->start),
 936                                SECTOR_FROM_BLOCK(dc->len),
 937                                GFP_NOFS, 0, &bio);
 938        if (!dc->error) {
 939                /* should keep before submission to avoid D_DONE right away */
 940                dc->state = D_SUBMIT;
 941                atomic_inc(&dcc->issued_discard);
 942                atomic_inc(&dcc->issing_discard);
 943                if (bio) {
 944                        bio->bi_private = dc;
 945                        bio->bi_end_io = f2fs_submit_discard_endio;
 946                        bio->bi_opf |= flag;
 947                        submit_bio(bio);
 948                        list_move_tail(&dc->list, wait_list);
 949                        __check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
 950
 951                        f2fs_update_iostat(sbi, FS_DISCARD, 1);
 952                }
 953        } else {
 954                __remove_discard_cmd(sbi, dc);
 955        }
 956}
 957
 958static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
 959                                struct block_device *bdev, block_t lstart,
 960                                block_t start, block_t len,
 961                                struct rb_node **insert_p,
 962                                struct rb_node *insert_parent)
 963{
 964        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 965        struct rb_node **p;
 966        struct rb_node *parent = NULL;
 967        struct discard_cmd *dc = NULL;
 968
 969        if (insert_p && insert_parent) {
 970                parent = insert_parent;
 971                p = insert_p;
 972                goto do_insert;
 973        }
 974
 975        p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
 976do_insert:
 977        dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p);
 978        if (!dc)
 979                return NULL;
 980
 981        return dc;
 982}
 983
 984static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
 985                                                struct discard_cmd *dc)
 986{
 987        list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
 988}
 989
 990static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
 991                                struct discard_cmd *dc, block_t blkaddr)
 992{
 993        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 994        struct discard_info di = dc->di;
 995        bool modified = false;
 996
 997        if (dc->state == D_DONE || dc->len == 1) {
 998                __remove_discard_cmd(sbi, dc);
 999                return;
1000        }
1001
1002        dcc->undiscard_blks -= di.len;
1003
1004        if (blkaddr > di.lstart) {
1005                dc->len = blkaddr - dc->lstart;
1006                dcc->undiscard_blks += dc->len;
1007                __relocate_discard_cmd(dcc, dc);
1008                modified = true;
1009        }
1010
1011        if (blkaddr < di.lstart + di.len - 1) {
1012                if (modified) {
1013                        __insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
1014                                        di.start + blkaddr + 1 - di.lstart,
1015                                        di.lstart + di.len - 1 - blkaddr,
1016                                        NULL, NULL);
1017                } else {
1018                        dc->lstart++;
1019                        dc->len--;
1020                        dc->start++;
1021                        dcc->undiscard_blks += dc->len;
1022                        __relocate_discard_cmd(dcc, dc);
1023                }
1024        }
1025}
1026
1027static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1028                                struct block_device *bdev, block_t lstart,
1029                                block_t start, block_t len)
1030{
1031        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1032        struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1033        struct discard_cmd *dc;
1034        struct discard_info di = {0};
1035        struct rb_node **insert_p = NULL, *insert_parent = NULL;
1036        block_t end = lstart + len;
1037
1038        mutex_lock(&dcc->cmd_lock);
1039
1040        dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
1041                                        NULL, lstart,
1042                                        (struct rb_entry **)&prev_dc,
1043                                        (struct rb_entry **)&next_dc,
1044                                        &insert_p, &insert_parent, true);
1045        if (dc)
1046                prev_dc = dc;
1047
1048        if (!prev_dc) {
1049                di.lstart = lstart;
1050                di.len = next_dc ? next_dc->lstart - lstart : len;
1051                di.len = min(di.len, len);
1052                di.start = start;
1053        }
1054
1055        while (1) {
1056                struct rb_node *node;
1057                bool merged = false;
1058                struct discard_cmd *tdc = NULL;
1059
1060                if (prev_dc) {
1061                        di.lstart = prev_dc->lstart + prev_dc->len;
1062                        if (di.lstart < lstart)
1063                                di.lstart = lstart;
1064                        if (di.lstart >= end)
1065                                break;
1066
1067                        if (!next_dc || next_dc->lstart > end)
1068                                di.len = end - di.lstart;
1069                        else
1070                                di.len = next_dc->lstart - di.lstart;
1071                        di.start = start + di.lstart - lstart;
1072                }
1073
1074                if (!di.len)
1075                        goto next;
1076
1077                if (prev_dc && prev_dc->state == D_PREP &&
1078                        prev_dc->bdev == bdev &&
1079                        __is_discard_back_mergeable(&di, &prev_dc->di)) {
1080                        prev_dc->di.len += di.len;
1081                        dcc->undiscard_blks += di.len;
1082                        __relocate_discard_cmd(dcc, prev_dc);
1083                        di = prev_dc->di;
1084                        tdc = prev_dc;
1085                        merged = true;
1086                }
1087
1088                if (next_dc && next_dc->state == D_PREP &&
1089                        next_dc->bdev == bdev &&
1090                        __is_discard_front_mergeable(&di, &next_dc->di)) {
1091                        next_dc->di.lstart = di.lstart;
1092                        next_dc->di.len += di.len;
1093                        next_dc->di.start = di.start;
1094                        dcc->undiscard_blks += di.len;
1095                        __relocate_discard_cmd(dcc, next_dc);
1096                        if (tdc)
1097                                __remove_discard_cmd(sbi, tdc);
1098                        merged = true;
1099                }
1100
1101                if (!merged) {
1102                        __insert_discard_tree(sbi, bdev, di.lstart, di.start,
1103                                                        di.len, NULL, NULL);
1104                }
1105 next:
1106                prev_dc = next_dc;
1107                if (!prev_dc)
1108                        break;
1109
1110                node = rb_next(&prev_dc->rb_node);
1111                next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1112        }
1113
1114        mutex_unlock(&dcc->cmd_lock);
1115}
1116
1117static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
1118                struct block_device *bdev, block_t blkstart, block_t blklen)
1119{
1120        block_t lblkstart = blkstart;
1121
1122        trace_f2fs_queue_discard(bdev, blkstart, blklen);
1123
1124        if (sbi->s_ndevs) {
1125                int devi = f2fs_target_device_index(sbi, blkstart);
1126
1127                blkstart -= FDEV(devi).start_blk;
1128        }
1129        __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1130        return 0;
1131}
1132
1133static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
1134                                        struct discard_policy *dpolicy,
1135                                        unsigned int start, unsigned int end)
1136{
1137        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1138        struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1139        struct rb_node **insert_p = NULL, *insert_parent = NULL;
1140        struct discard_cmd *dc;
1141        struct blk_plug plug;
1142        int issued;
1143
1144next:
1145        issued = 0;
1146
1147        mutex_lock(&dcc->cmd_lock);
1148        f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
1149
1150        dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
1151                                        NULL, start,
1152                                        (struct rb_entry **)&prev_dc,
1153                                        (struct rb_entry **)&next_dc,
1154                                        &insert_p, &insert_parent, true);
1155        if (!dc)
1156                dc = next_dc;
1157
1158        blk_start_plug(&plug);
1159
1160        while (dc && dc->lstart <= end) {
1161                struct rb_node *node;
1162
1163                if (dc->len < dpolicy->granularity)
1164                        goto skip;
1165
1166                if (dc->state != D_PREP) {
1167                        list_move_tail(&dc->list, &dcc->fstrim_list);
1168                        goto skip;
1169                }
1170
1171                __submit_discard_cmd(sbi, dpolicy, dc);
1172
1173                if (++issued >= dpolicy->max_requests) {
1174                        start = dc->lstart + dc->len;
1175
1176                        blk_finish_plug(&plug);
1177                        mutex_unlock(&dcc->cmd_lock);
1178
1179                        schedule();
1180
1181                        goto next;
1182                }
1183skip:
1184                node = rb_next(&dc->rb_node);
1185                dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1186
1187                if (fatal_signal_pending(current))
1188                        break;
1189        }
1190
1191        blk_finish_plug(&plug);
1192        mutex_unlock(&dcc->cmd_lock);
1193}
1194
1195static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1196                                        struct discard_policy *dpolicy)
1197{
1198        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1199        struct list_head *pend_list;
1200        struct discard_cmd *dc, *tmp;
1201        struct blk_plug plug;
1202        int i, iter = 0, issued = 0;
1203        bool io_interrupted = false;
1204
1205        for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1206                if (i + 1 < dpolicy->granularity)
1207                        break;
1208                pend_list = &dcc->pend_list[i];
1209
1210                mutex_lock(&dcc->cmd_lock);
1211                if (list_empty(pend_list))
1212                        goto next;
1213                f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
1214                blk_start_plug(&plug);
1215                list_for_each_entry_safe(dc, tmp, pend_list, list) {
1216                        f2fs_bug_on(sbi, dc->state != D_PREP);
1217
1218                        if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1219                                                                !is_idle(sbi)) {
1220                                io_interrupted = true;
1221                                goto skip;
1222                        }
1223
1224                        __submit_discard_cmd(sbi, dpolicy, dc);
1225                        issued++;
1226skip:
1227                        if (++iter >= dpolicy->max_requests)
1228                                break;
1229                }
1230                blk_finish_plug(&plug);
1231next:
1232                mutex_unlock(&dcc->cmd_lock);
1233
1234                if (iter >= dpolicy->max_requests)
1235                        break;
1236        }
1237
1238        if (!issued && io_interrupted)
1239                issued = -1;
1240
1241        return issued;
1242}
1243
1244static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1245{
1246        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1247        struct list_head *pend_list;
1248        struct discard_cmd *dc, *tmp;
1249        int i;
1250        bool dropped = false;
1251
1252        mutex_lock(&dcc->cmd_lock);
1253        for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1254                pend_list = &dcc->pend_list[i];
1255                list_for_each_entry_safe(dc, tmp, pend_list, list) {
1256                        f2fs_bug_on(sbi, dc->state != D_PREP);
1257                        __remove_discard_cmd(sbi, dc);
1258                        dropped = true;
1259                }
1260        }
1261        mutex_unlock(&dcc->cmd_lock);
1262
1263        return dropped;
1264}
1265
1266void drop_discard_cmd(struct f2fs_sb_info *sbi)
1267{
1268        __drop_discard_cmd(sbi);
1269}
1270
1271static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1272                                                        struct discard_cmd *dc)
1273{
1274        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1275        unsigned int len = 0;
1276
1277        wait_for_completion_io(&dc->wait);
1278        mutex_lock(&dcc->cmd_lock);
1279        f2fs_bug_on(sbi, dc->state != D_DONE);
1280        dc->ref--;
1281        if (!dc->ref) {
1282                if (!dc->error)
1283                        len = dc->len;
1284                __remove_discard_cmd(sbi, dc);
1285        }
1286        mutex_unlock(&dcc->cmd_lock);
1287
1288        return len;
1289}
1290
1291static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1292                                                struct discard_policy *dpolicy,
1293                                                block_t start, block_t end)
1294{
1295        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1296        struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1297                                        &(dcc->fstrim_list) : &(dcc->wait_list);
1298        struct discard_cmd *dc, *tmp;
1299        bool need_wait;
1300        unsigned int trimmed = 0;
1301
1302next:
1303        need_wait = false;
1304
1305        mutex_lock(&dcc->cmd_lock);
1306        list_for_each_entry_safe(dc, tmp, wait_list, list) {
1307                if (dc->lstart + dc->len <= start || end <= dc->lstart)
1308                        continue;
1309                if (dc->len < dpolicy->granularity)
1310                        continue;
1311                if (dc->state == D_DONE && !dc->ref) {
1312                        wait_for_completion_io(&dc->wait);
1313                        if (!dc->error)
1314                                trimmed += dc->len;
1315                        __remove_discard_cmd(sbi, dc);
1316                } else {
1317                        dc->ref++;
1318                        need_wait = true;
1319                        break;
1320                }
1321        }
1322        mutex_unlock(&dcc->cmd_lock);
1323
1324        if (need_wait) {
1325                trimmed += __wait_one_discard_bio(sbi, dc);
1326                goto next;
1327        }
1328
1329        return trimmed;
1330}
1331
1332static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1333                                                struct discard_policy *dpolicy)
1334{
1335        __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1336}
1337
1338/* This should be covered by global mutex, &sit_i->sentry_lock */
1339static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1340{
1341        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1342        struct discard_cmd *dc;
1343        bool need_wait = false;
1344
1345        mutex_lock(&dcc->cmd_lock);
1346        dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr);
1347        if (dc) {
1348                if (dc->state == D_PREP) {
1349                        __punch_discard_cmd(sbi, dc, blkaddr);
1350                } else {
1351                        dc->ref++;
1352                        need_wait = true;
1353                }
1354        }
1355        mutex_unlock(&dcc->cmd_lock);
1356
1357        if (need_wait)
1358                __wait_one_discard_bio(sbi, dc);
1359}
1360
1361void stop_discard_thread(struct f2fs_sb_info *sbi)
1362{
1363        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1364
1365        if (dcc && dcc->f2fs_issue_discard) {
1366                struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1367
1368                dcc->f2fs_issue_discard = NULL;
1369                kthread_stop(discard_thread);
1370        }
1371}
1372
1373/* This comes from f2fs_put_super */
1374bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
1375{
1376        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1377        struct discard_policy dpolicy;
1378        bool dropped;
1379
1380        init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity);
1381        __issue_discard_cmd(sbi, &dpolicy);
1382        dropped = __drop_discard_cmd(sbi);
1383        __wait_all_discard_cmd(sbi, &dpolicy);
1384
1385        return dropped;
1386}
1387
1388static int issue_discard_thread(void *data)
1389{
1390        struct f2fs_sb_info *sbi = data;
1391        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1392        wait_queue_head_t *q = &dcc->discard_wait_queue;
1393        struct discard_policy dpolicy;
1394        unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
1395        int issued;
1396
1397        set_freezable();
1398
1399        do {
1400                init_discard_policy(&dpolicy, DPOLICY_BG,
1401                                        dcc->discard_granularity);
1402
1403                wait_event_interruptible_timeout(*q,
1404                                kthread_should_stop() || freezing(current) ||
1405                                dcc->discard_wake,
1406                                msecs_to_jiffies(wait_ms));
1407                if (try_to_freeze())
1408                        continue;
1409                if (f2fs_readonly(sbi->sb))
1410                        continue;
1411                if (kthread_should_stop())
1412                        return 0;
1413
1414                if (dcc->discard_wake) {
1415                        dcc->discard_wake = 0;
1416                        if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
1417                                init_discard_policy(&dpolicy,
1418                                                        DPOLICY_FORCE, 1);
1419                }
1420
1421                sb_start_intwrite(sbi->sb);
1422
1423                issued = __issue_discard_cmd(sbi, &dpolicy);
1424                if (issued) {
1425                        __wait_all_discard_cmd(sbi, &dpolicy);
1426                        wait_ms = dpolicy.min_interval;
1427                } else {
1428                        wait_ms = dpolicy.max_interval;
1429                }
1430
1431                sb_end_intwrite(sbi->sb);
1432
1433        } while (!kthread_should_stop());
1434        return 0;
1435}
1436
1437#ifdef CONFIG_BLK_DEV_ZONED
1438static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1439                struct block_device *bdev, block_t blkstart, block_t blklen)
1440{
1441        sector_t sector, nr_sects;
1442        block_t lblkstart = blkstart;
1443        int devi = 0;
1444
1445        if (sbi->s_ndevs) {
1446                devi = f2fs_target_device_index(sbi, blkstart);
1447                blkstart -= FDEV(devi).start_blk;
1448        }
1449
1450        /*
1451         * We need to know the type of the zone: for conventional zones,
1452         * use regular discard if the drive supports it. For sequential
1453         * zones, reset the zone write pointer.
1454         */
1455        switch (get_blkz_type(sbi, bdev, blkstart)) {
1456
1457        case BLK_ZONE_TYPE_CONVENTIONAL:
1458                if (!blk_queue_discard(bdev_get_queue(bdev)))
1459                        return 0;
1460                return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1461        case BLK_ZONE_TYPE_SEQWRITE_REQ:
1462        case BLK_ZONE_TYPE_SEQWRITE_PREF:
1463                sector = SECTOR_FROM_BLOCK(blkstart);
1464                nr_sects = SECTOR_FROM_BLOCK(blklen);
1465
1466                if (sector & (bdev_zone_sectors(bdev) - 1) ||
1467                                nr_sects != bdev_zone_sectors(bdev)) {
1468                        f2fs_msg(sbi->sb, KERN_INFO,
1469                                "(%d) %s: Unaligned discard attempted (block %x + %x)",
1470                                devi, sbi->s_ndevs ? FDEV(devi).path: "",
1471                                blkstart, blklen);
1472                        return -EIO;
1473                }
1474                trace_f2fs_issue_reset_zone(bdev, blkstart);
1475                return blkdev_reset_zones(bdev, sector,
1476                                          nr_sects, GFP_NOFS);
1477        default:
1478                /* Unknown zone type: broken device ? */
1479                return -EIO;
1480        }
1481}
1482#endif
1483
1484static int __issue_discard_async(struct f2fs_sb_info *sbi,
1485                struct block_device *bdev, block_t blkstart, block_t blklen)
1486{
1487#ifdef CONFIG_BLK_DEV_ZONED
1488        if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
1489                                bdev_zoned_model(bdev) != BLK_ZONED_NONE)
1490                return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
1491#endif
1492        return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
1493}
1494
1495static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1496                                block_t blkstart, block_t blklen)
1497{
1498        sector_t start = blkstart, len = 0;
1499        struct block_device *bdev;
1500        struct seg_entry *se;
1501        unsigned int offset;
1502        block_t i;
1503        int err = 0;
1504
1505        bdev = f2fs_target_device(sbi, blkstart, NULL);
1506
1507        for (i = blkstart; i < blkstart + blklen; i++, len++) {
1508                if (i != start) {
1509                        struct block_device *bdev2 =
1510                                f2fs_target_device(sbi, i, NULL);
1511
1512                        if (bdev2 != bdev) {
1513                                err = __issue_discard_async(sbi, bdev,
1514                                                start, len);
1515                                if (err)
1516                                        return err;
1517                                bdev = bdev2;
1518                                start = i;
1519                                len = 0;
1520                        }
1521                }
1522
1523                se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
1524                offset = GET_BLKOFF_FROM_SEG0(sbi, i);
1525
1526                if (!f2fs_test_and_set_bit(offset, se->discard_map))
1527                        sbi->discard_blks--;
1528        }
1529
1530        if (len)
1531                err = __issue_discard_async(sbi, bdev, start, len);
1532        return err;
1533}
1534
1535static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
1536                                                        bool check_only)
1537{
1538        int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
1539        int max_blocks = sbi->blocks_per_seg;
1540        struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
1541        unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
1542        unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1543        unsigned long *discard_map = (unsigned long *)se->discard_map;
1544        unsigned long *dmap = SIT_I(sbi)->tmp_map;
1545        unsigned int start = 0, end = -1;
1546        bool force = (cpc->reason & CP_DISCARD);
1547        struct discard_entry *de = NULL;
1548        struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
1549        int i;
1550
1551        if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
1552                return false;
1553
1554        if (!force) {
1555                if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
1556                        SM_I(sbi)->dcc_info->nr_discards >=
1557                                SM_I(sbi)->dcc_info->max_discards)
1558                        return false;
1559        }
1560
1561        /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
1562        for (i = 0; i < entries; i++)
1563                dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
1564                                (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
1565
1566        while (force || SM_I(sbi)->dcc_info->nr_discards <=
1567                                SM_I(sbi)->dcc_info->max_discards) {
1568                start = __find_rev_next_bit(dmap, max_blocks, end + 1);
1569                if (start >= max_blocks)
1570                        break;
1571
1572                end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
1573                if (force && start && end != max_blocks
1574                                        && (end - start) < cpc->trim_minlen)
1575                        continue;
1576
1577                if (check_only)
1578                        return true;
1579
1580                if (!de) {
1581                        de = f2fs_kmem_cache_alloc(discard_entry_slab,
1582                                                                GFP_F2FS_ZERO);
1583                        de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
1584                        list_add_tail(&de->list, head);
1585                }
1586
1587                for (i = start; i < end; i++)
1588                        __set_bit_le(i, (void *)de->discard_map);
1589
1590                SM_I(sbi)->dcc_info->nr_discards += end - start;
1591        }
1592        return false;
1593}
1594
1595void release_discard_addrs(struct f2fs_sb_info *sbi)
1596{
1597        struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
1598        struct discard_entry *entry, *this;
1599
1600        /* drop caches */
1601        list_for_each_entry_safe(entry, this, head, list) {
1602                list_del(&entry->list);
1603                kmem_cache_free(discard_entry_slab, entry);
1604        }
1605}
1606
1607/*
1608 * Should call clear_prefree_segments after checkpoint is done.
1609 */
1610static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
1611{
1612        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1613        unsigned int segno;
1614
1615        mutex_lock(&dirty_i->seglist_lock);
1616        for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
1617                __set_test_and_free(sbi, segno);
1618        mutex_unlock(&dirty_i->seglist_lock);
1619}
1620
1621void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1622{
1623        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1624        struct list_head *head = &dcc->entry_list;
1625        struct discard_entry *entry, *this;
1626        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1627        unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
1628        unsigned int start = 0, end = -1;
1629        unsigned int secno, start_segno;
1630        bool force = (cpc->reason & CP_DISCARD);
1631
1632        mutex_lock(&dirty_i->seglist_lock);
1633
1634        while (1) {
1635                int i;
1636                start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
1637                if (start >= MAIN_SEGS(sbi))
1638                        break;
1639                end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
1640                                                                start + 1);
1641
1642                for (i = start; i < end; i++)
1643                        clear_bit(i, prefree_map);
1644
1645                dirty_i->nr_dirty[PRE] -= end - start;
1646
1647                if (!test_opt(sbi, DISCARD))
1648                        continue;
1649
1650                if (force && start >= cpc->trim_start &&
1651                                        (end - 1) <= cpc->trim_end)
1652                                continue;
1653
1654                if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
1655                        f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
1656                                (end - start) << sbi->log_blocks_per_seg);
1657                        continue;
1658                }
1659next:
1660                secno = GET_SEC_FROM_SEG(sbi, start);
1661                start_segno = GET_SEG_FROM_SEC(sbi, secno);
1662                if (!IS_CURSEC(sbi, secno) &&
1663                        !get_valid_blocks(sbi, start, true))
1664                        f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
1665                                sbi->segs_per_sec << sbi->log_blocks_per_seg);
1666
1667                start = start_segno + sbi->segs_per_sec;
1668                if (start < end)
1669                        goto next;
1670                else
1671                        end = start - 1;
1672        }
1673        mutex_unlock(&dirty_i->seglist_lock);
1674
1675        /* send small discards */
1676        list_for_each_entry_safe(entry, this, head, list) {
1677                unsigned int cur_pos = 0, next_pos, len, total_len = 0;
1678                bool is_valid = test_bit_le(0, entry->discard_map);
1679
1680find_next:
1681                if (is_valid) {
1682                        next_pos = find_next_zero_bit_le(entry->discard_map,
1683                                        sbi->blocks_per_seg, cur_pos);
1684                        len = next_pos - cur_pos;
1685
1686                        if (f2fs_sb_mounted_blkzoned(sbi->sb) ||
1687                            (force && len < cpc->trim_minlen))
1688                                goto skip;
1689
1690                        f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
1691                                                                        len);
1692                        total_len += len;
1693                } else {
1694                        next_pos = find_next_bit_le(entry->discard_map,
1695                                        sbi->blocks_per_seg, cur_pos);
1696                }
1697skip:
1698                cur_pos = next_pos;
1699                is_valid = !is_valid;
1700
1701                if (cur_pos < sbi->blocks_per_seg)
1702                        goto find_next;
1703
1704                list_del(&entry->list);
1705                dcc->nr_discards -= total_len;
1706                kmem_cache_free(discard_entry_slab, entry);
1707        }
1708
1709        wake_up_discard_thread(sbi, false);
1710}
1711
1712void init_discard_policy(struct discard_policy *dpolicy,
1713                                int discard_type, unsigned int granularity)
1714{
1715        /* common policy */
1716        dpolicy->type = discard_type;
1717        dpolicy->sync = true;
1718        dpolicy->granularity = granularity;
1719
1720        dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
1721        dpolicy->io_aware_gran = MAX_PLIST_NUM;
1722
1723        if (discard_type == DPOLICY_BG) {
1724                dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1725                dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1726                dpolicy->io_aware = true;
1727        } else if (discard_type == DPOLICY_FORCE) {
1728                dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1729                dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1730                dpolicy->io_aware = true;
1731        } else if (discard_type == DPOLICY_FSTRIM) {
1732                dpolicy->io_aware = false;
1733        } else if (discard_type == DPOLICY_UMOUNT) {
1734                dpolicy->io_aware = false;
1735        }
1736}
1737
1738static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
1739{
1740        dev_t dev = sbi->sb->s_bdev->bd_dev;
1741        struct discard_cmd_control *dcc;
1742        int err = 0, i;
1743
1744        if (SM_I(sbi)->dcc_info) {
1745                dcc = SM_I(sbi)->dcc_info;
1746                goto init_thread;
1747        }
1748
1749        dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
1750        if (!dcc)
1751                return -ENOMEM;
1752
1753        dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
1754        INIT_LIST_HEAD(&dcc->entry_list);
1755        for (i = 0; i < MAX_PLIST_NUM; i++)
1756                INIT_LIST_HEAD(&dcc->pend_list[i]);
1757        INIT_LIST_HEAD(&dcc->wait_list);
1758        INIT_LIST_HEAD(&dcc->fstrim_list);
1759        mutex_init(&dcc->cmd_lock);
1760        atomic_set(&dcc->issued_discard, 0);
1761        atomic_set(&dcc->issing_discard, 0);
1762        atomic_set(&dcc->discard_cmd_cnt, 0);
1763        dcc->nr_discards = 0;
1764        dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
1765        dcc->undiscard_blks = 0;
1766        dcc->root = RB_ROOT;
1767
1768        init_waitqueue_head(&dcc->discard_wait_queue);
1769        SM_I(sbi)->dcc_info = dcc;
1770init_thread:
1771        dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
1772                                "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
1773        if (IS_ERR(dcc->f2fs_issue_discard)) {
1774                err = PTR_ERR(dcc->f2fs_issue_discard);
1775                kfree(dcc);
1776                SM_I(sbi)->dcc_info = NULL;
1777                return err;
1778        }
1779
1780        return err;
1781}
1782
1783static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
1784{
1785        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1786
1787        if (!dcc)
1788                return;
1789
1790        stop_discard_thread(sbi);
1791
1792        kfree(dcc);
1793        SM_I(sbi)->dcc_info = NULL;
1794}
1795
1796static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
1797{
1798        struct sit_info *sit_i = SIT_I(sbi);
1799
1800        if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
1801                sit_i->dirty_sentries++;
1802                return false;
1803        }
1804
1805        return true;
1806}
1807
1808static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
1809                                        unsigned int segno, int modified)
1810{
1811        struct seg_entry *se = get_seg_entry(sbi, segno);
1812        se->type = type;
1813        if (modified)
1814                __mark_sit_entry_dirty(sbi, segno);
1815}
1816
1817static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
1818{
1819        struct seg_entry *se;
1820        unsigned int segno, offset;
1821        long int new_vblocks;
1822        bool exist;
1823#ifdef CONFIG_F2FS_CHECK_FS
1824        bool mir_exist;
1825#endif
1826
1827        segno = GET_SEGNO(sbi, blkaddr);
1828
1829        se = get_seg_entry(sbi, segno);
1830        new_vblocks = se->valid_blocks + del;
1831        offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
1832
1833        f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
1834                                (new_vblocks > sbi->blocks_per_seg)));
1835
1836        se->valid_blocks = new_vblocks;
1837        se->mtime = get_mtime(sbi);
1838        SIT_I(sbi)->max_mtime = se->mtime;
1839
1840        /* Update valid block bitmap */
1841        if (del > 0) {
1842                exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
1843#ifdef CONFIG_F2FS_CHECK_FS
1844                mir_exist = f2fs_test_and_set_bit(offset,
1845                                                se->cur_valid_map_mir);
1846                if (unlikely(exist != mir_exist)) {
1847                        f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
1848                                "when setting bitmap, blk:%u, old bit:%d",
1849                                blkaddr, exist);
1850                        f2fs_bug_on(sbi, 1);
1851                }
1852#endif
1853                if (unlikely(exist)) {
1854                        f2fs_msg(sbi->sb, KERN_ERR,
1855                                "Bitmap was wrongly set, blk:%u", blkaddr);
1856                        f2fs_bug_on(sbi, 1);
1857                        se->valid_blocks--;
1858                        del = 0;
1859                }
1860
1861                if (f2fs_discard_en(sbi) &&
1862                        !f2fs_test_and_set_bit(offset, se->discard_map))
1863                        sbi->discard_blks--;
1864
1865                /* don't overwrite by SSR to keep node chain */
1866                if (se->type == CURSEG_WARM_NODE) {
1867                        if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
1868                                se->ckpt_valid_blocks++;
1869                }
1870        } else {
1871                exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
1872#ifdef CONFIG_F2FS_CHECK_FS
1873                mir_exist = f2fs_test_and_clear_bit(offset,
1874                                                se->cur_valid_map_mir);
1875                if (unlikely(exist != mir_exist)) {
1876                        f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
1877                                "when clearing bitmap, blk:%u, old bit:%d",
1878                                blkaddr, exist);
1879                        f2fs_bug_on(sbi, 1);
1880                }
1881#endif
1882                if (unlikely(!exist)) {
1883                        f2fs_msg(sbi->sb, KERN_ERR,
1884                                "Bitmap was wrongly cleared, blk:%u", blkaddr);
1885                        f2fs_bug_on(sbi, 1);
1886                        se->valid_blocks++;
1887                        del = 0;
1888                }
1889
1890                if (f2fs_discard_en(sbi) &&
1891                        f2fs_test_and_clear_bit(offset, se->discard_map))
1892                        sbi->discard_blks++;
1893        }
1894        if (!f2fs_test_bit(offset, se->ckpt_valid_map))
1895                se->ckpt_valid_blocks += del;
1896
1897        __mark_sit_entry_dirty(sbi, segno);
1898
1899        /* update total number of valid blocks to be written in ckpt area */
1900        SIT_I(sbi)->written_valid_blocks += del;
1901
1902        if (sbi->segs_per_sec > 1)
1903                get_sec_entry(sbi, segno)->valid_blocks += del;
1904}
1905
1906void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
1907{
1908        unsigned int segno = GET_SEGNO(sbi, addr);
1909        struct sit_info *sit_i = SIT_I(sbi);
1910
1911        f2fs_bug_on(sbi, addr == NULL_ADDR);
1912        if (addr == NEW_ADDR)
1913                return;
1914
1915        /* add it into sit main buffer */
1916        down_write(&sit_i->sentry_lock);
1917
1918        update_sit_entry(sbi, addr, -1);
1919
1920        /* add it into dirty seglist */
1921        locate_dirty_segment(sbi, segno);
1922
1923        up_write(&sit_i->sentry_lock);
1924}
1925
1926bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
1927{
1928        struct sit_info *sit_i = SIT_I(sbi);
1929        unsigned int segno, offset;
1930        struct seg_entry *se;
1931        bool is_cp = false;
1932
1933        if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
1934                return true;
1935
1936        down_read(&sit_i->sentry_lock);
1937
1938        segno = GET_SEGNO(sbi, blkaddr);
1939        se = get_seg_entry(sbi, segno);
1940        offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
1941
1942        if (f2fs_test_bit(offset, se->ckpt_valid_map))
1943                is_cp = true;
1944
1945        up_read(&sit_i->sentry_lock);
1946
1947        return is_cp;
1948}
1949
1950/*
1951 * This function should be resided under the curseg_mutex lock
1952 */
1953static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
1954                                        struct f2fs_summary *sum)
1955{
1956        struct curseg_info *curseg = CURSEG_I(sbi, type);
1957        void *addr = curseg->sum_blk;
1958        addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
1959        memcpy(addr, sum, sizeof(struct f2fs_summary));
1960}
1961
1962/*
1963 * Calculate the number of current summary pages for writing
1964 */
1965int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
1966{
1967        int valid_sum_count = 0;
1968        int i, sum_in_page;
1969
1970        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1971                if (sbi->ckpt->alloc_type[i] == SSR)
1972                        valid_sum_count += sbi->blocks_per_seg;
1973                else {
1974                        if (for_ra)
1975                                valid_sum_count += le16_to_cpu(
1976                                        F2FS_CKPT(sbi)->cur_data_blkoff[i]);
1977                        else
1978                                valid_sum_count += curseg_blkoff(sbi, i);
1979                }
1980        }
1981
1982        sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
1983                        SUM_FOOTER_SIZE) / SUMMARY_SIZE;
1984        if (valid_sum_count <= sum_in_page)
1985                return 1;
1986        else if ((valid_sum_count - sum_in_page) <=
1987                (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
1988                return 2;
1989        return 3;
1990}
1991
1992/*
1993 * Caller should put this summary page
1994 */
1995struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
1996{
1997        return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
1998}
1999
2000void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
2001{
2002        struct page *page = grab_meta_page(sbi, blk_addr);
2003
2004        memcpy(page_address(page), src, PAGE_SIZE);
2005        set_page_dirty(page);
2006        f2fs_put_page(page, 1);
2007}
2008
2009static void write_sum_page(struct f2fs_sb_info *sbi,
2010                        struct f2fs_summary_block *sum_blk, block_t blk_addr)
2011{
2012        update_meta_page(sbi, (void *)sum_blk, blk_addr);
2013}
2014
2015static void write_current_sum_page(struct f2fs_sb_info *sbi,
2016                                                int type, block_t blk_addr)
2017{
2018        struct curseg_info *curseg = CURSEG_I(sbi, type);
2019        struct page *page = grab_meta_page(sbi, blk_addr);
2020        struct f2fs_summary_block *src = curseg->sum_blk;
2021        struct f2fs_summary_block *dst;
2022
2023        dst = (struct f2fs_summary_block *)page_address(page);
2024
2025        mutex_lock(&curseg->curseg_mutex);
2026
2027        down_read(&curseg->journal_rwsem);
2028        memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2029        up_read(&curseg->journal_rwsem);
2030
2031        memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2032        memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2033
2034        mutex_unlock(&curseg->curseg_mutex);
2035
2036        set_page_dirty(page);
2037        f2fs_put_page(page, 1);
2038}
2039
2040static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
2041{
2042        struct curseg_info *curseg = CURSEG_I(sbi, type);
2043        unsigned int segno = curseg->segno + 1;
2044        struct free_segmap_info *free_i = FREE_I(sbi);
2045
2046        if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
2047                return !test_bit(segno, free_i->free_segmap);
2048        return 0;
2049}
2050
2051/*
2052 * Find a new segment from the free segments bitmap to right order
2053 * This function should be returned with success, otherwise BUG
2054 */
2055static void get_new_segment(struct f2fs_sb_info *sbi,
2056                        unsigned int *newseg, bool new_sec, int dir)
2057{
2058        struct free_segmap_info *free_i = FREE_I(sbi);
2059        unsigned int segno, secno, zoneno;
2060        unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2061        unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2062        unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2063        unsigned int left_start = hint;
2064        bool init = true;
2065        int go_left = 0;
2066        int i;
2067
2068        spin_lock(&free_i->segmap_lock);
2069
2070        if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
2071                segno = find_next_zero_bit(free_i->free_segmap,
2072                        GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2073                if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2074                        goto got_it;
2075        }
2076find_other_zone:
2077        secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2078        if (secno >= MAIN_SECS(sbi)) {
2079                if (dir == ALLOC_RIGHT) {
2080                        secno = find_next_zero_bit(free_i->free_secmap,
2081                                                        MAIN_SECS(sbi), 0);
2082                        f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
2083                } else {
2084                        go_left = 1;
2085                        left_start = hint - 1;
2086                }
2087        }
2088        if (go_left == 0)
2089                goto skip_left;
2090
2091        while (test_bit(left_start, free_i->free_secmap)) {
2092                if (left_start > 0) {
2093                        left_start--;
2094                        continue;
2095                }
2096                left_start = find_next_zero_bit(free_i->free_secmap,
2097                                                        MAIN_SECS(sbi), 0);
2098                f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
2099                break;
2100        }
2101        secno = left_start;
2102skip_left:
2103        segno = GET_SEG_FROM_SEC(sbi, secno);
2104        zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2105
2106        /* give up on finding another zone */
2107        if (!init)
2108                goto got_it;
2109        if (sbi->secs_per_zone == 1)
2110                goto got_it;
2111        if (zoneno == old_zoneno)
2112                goto got_it;
2113        if (dir == ALLOC_LEFT) {
2114                if (!go_left && zoneno + 1 >= total_zones)
2115                        goto got_it;
2116                if (go_left && zoneno == 0)
2117                        goto got_it;
2118        }
2119        for (i = 0; i < NR_CURSEG_TYPE; i++)
2120                if (CURSEG_I(sbi, i)->zone == zoneno)
2121                        break;
2122
2123        if (i < NR_CURSEG_TYPE) {
2124                /* zone is in user, try another */
2125                if (go_left)
2126                        hint = zoneno * sbi->secs_per_zone - 1;
2127                else if (zoneno + 1 >= total_zones)
2128                        hint = 0;
2129                else
2130                        hint = (zoneno + 1) * sbi->secs_per_zone;
2131                init = false;
2132                goto find_other_zone;
2133        }
2134got_it:
2135        /* set it as dirty segment in free segmap */
2136        f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2137        __set_inuse(sbi, segno);
2138        *newseg = segno;
2139        spin_unlock(&free_i->segmap_lock);
2140}
2141
2142static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2143{
2144        struct curseg_info *curseg = CURSEG_I(sbi, type);
2145        struct summary_footer *sum_footer;
2146
2147        curseg->segno = curseg->next_segno;
2148        curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2149        curseg->next_blkoff = 0;
2150        curseg->next_segno = NULL_SEGNO;
2151
2152        sum_footer = &(curseg->sum_blk->footer);
2153        memset(sum_footer, 0, sizeof(struct summary_footer));
2154        if (IS_DATASEG(type))
2155                SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2156        if (IS_NODESEG(type))
2157                SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2158        __set_sit_entry_type(sbi, type, curseg->segno, modified);
2159}
2160
2161static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2162{
2163        /* if segs_per_sec is large than 1, we need to keep original policy. */
2164        if (sbi->segs_per_sec != 1)
2165                return CURSEG_I(sbi, type)->segno;
2166
2167        if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
2168                return 0;
2169
2170        if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2171                return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2172        return CURSEG_I(sbi, type)->segno;
2173}
2174
2175/*
2176 * Allocate a current working segment.
2177 * This function always allocates a free segment in LFS manner.
2178 */
2179static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2180{
2181        struct curseg_info *curseg = CURSEG_I(sbi, type);
2182        unsigned int segno = curseg->segno;
2183        int dir = ALLOC_LEFT;
2184
2185        write_sum_page(sbi, curseg->sum_blk,
2186                                GET_SUM_BLOCK(sbi, segno));
2187        if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
2188                dir = ALLOC_RIGHT;
2189
2190        if (test_opt(sbi, NOHEAP))
2191                dir = ALLOC_RIGHT;
2192
2193        segno = __get_next_segno(sbi, type);
2194        get_new_segment(sbi, &segno, new_sec, dir);
2195        curseg->next_segno = segno;
2196        reset_curseg(sbi, type, 1);
2197        curseg->alloc_type = LFS;
2198}
2199
2200static void __next_free_blkoff(struct f2fs_sb_info *sbi,
2201                        struct curseg_info *seg, block_t start)
2202{
2203        struct seg_entry *se = get_seg_entry(sbi, seg->segno);
2204        int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2205        unsigned long *target_map = SIT_I(sbi)->tmp_map;
2206        unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2207        unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2208        int i, pos;
2209
2210        for (i = 0; i < entries; i++)
2211                target_map[i] = ckpt_map[i] | cur_map[i];
2212
2213        pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
2214
2215        seg->next_blkoff = pos;
2216}
2217
2218/*
2219 * If a segment is written by LFS manner, next block offset is just obtained
2220 * by increasing the current block offset. However, if a segment is written by
2221 * SSR manner, next block offset obtained by calling __next_free_blkoff
2222 */
2223static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
2224                                struct curseg_info *seg)
2225{
2226        if (seg->alloc_type == SSR)
2227                __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
2228        else
2229                seg->next_blkoff++;
2230}
2231
2232/*
2233 * This function always allocates a used segment(from dirty seglist) by SSR
2234 * manner, so it should recover the existing segment information of valid blocks
2235 */
2236static void change_curseg(struct f2fs_sb_info *sbi, int type)
2237{
2238        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2239        struct curseg_info *curseg = CURSEG_I(sbi, type);
2240        unsigned int new_segno = curseg->next_segno;
2241        struct f2fs_summary_block *sum_node;
2242        struct page *sum_page;
2243
2244        write_sum_page(sbi, curseg->sum_blk,
2245                                GET_SUM_BLOCK(sbi, curseg->segno));
2246        __set_test_and_inuse(sbi, new_segno);
2247
2248        mutex_lock(&dirty_i->seglist_lock);
2249        __remove_dirty_segment(sbi, new_segno, PRE);
2250        __remove_dirty_segment(sbi, new_segno, DIRTY);
2251        mutex_unlock(&dirty_i->seglist_lock);
2252
2253        reset_curseg(sbi, type, 1);
2254        curseg->alloc_type = SSR;
2255        __next_free_blkoff(sbi, curseg, 0);
2256
2257        sum_page = get_sum_page(sbi, new_segno);
2258        sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2259        memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2260        f2fs_put_page(sum_page, 1);
2261}
2262
2263static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
2264{
2265        struct curseg_info *curseg = CURSEG_I(sbi, type);
2266        const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2267        unsigned segno = NULL_SEGNO;
2268        int i, cnt;
2269        bool reversed = false;
2270
2271        /* need_SSR() already forces to do this */
2272        if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
2273                curseg->next_segno = segno;
2274                return 1;
2275        }
2276
2277        /* For node segments, let's do SSR more intensively */
2278        if (IS_NODESEG(type)) {
2279                if (type >= CURSEG_WARM_NODE) {
2280                        reversed = true;
2281                        i = CURSEG_COLD_NODE;
2282                } else {
2283                        i = CURSEG_HOT_NODE;
2284                }
2285                cnt = NR_CURSEG_NODE_TYPE;
2286        } else {
2287                if (type >= CURSEG_WARM_DATA) {
2288                        reversed = true;
2289                        i = CURSEG_COLD_DATA;
2290                } else {
2291                        i = CURSEG_HOT_DATA;
2292                }
2293                cnt = NR_CURSEG_DATA_TYPE;
2294        }
2295
2296        for (; cnt-- > 0; reversed ? i-- : i++) {
2297                if (i == type)
2298                        continue;
2299                if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
2300                        curseg->next_segno = segno;
2301                        return 1;
2302                }
2303        }
2304        return 0;
2305}
2306
2307/*
2308 * flush out current segment and replace it with new segment
2309 * This function should be returned with success, otherwise BUG
2310 */
2311static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
2312                                                int type, bool force)
2313{
2314        struct curseg_info *curseg = CURSEG_I(sbi, type);
2315
2316        if (force)
2317                new_curseg(sbi, type, true);
2318        else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
2319                                        type == CURSEG_WARM_NODE)
2320                new_curseg(sbi, type, false);
2321        else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
2322                new_curseg(sbi, type, false);
2323        else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
2324                change_curseg(sbi, type);
2325        else
2326                new_curseg(sbi, type, false);
2327
2328        stat_inc_seg_type(sbi, curseg);
2329}
2330
2331void allocate_new_segments(struct f2fs_sb_info *sbi)
2332{
2333        struct curseg_info *curseg;
2334        unsigned int old_segno;
2335        int i;
2336
2337        down_write(&SIT_I(sbi)->sentry_lock);
2338
2339        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2340                curseg = CURSEG_I(sbi, i);
2341                old_segno = curseg->segno;
2342                SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
2343                locate_dirty_segment(sbi, old_segno);
2344        }
2345
2346        up_write(&SIT_I(sbi)->sentry_lock);
2347}
2348
2349static const struct segment_allocation default_salloc_ops = {
2350        .allocate_segment = allocate_segment_by_default,
2351};
2352
2353bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2354{
2355        __u64 trim_start = cpc->trim_start;
2356        bool has_candidate = false;
2357
2358        down_write(&SIT_I(sbi)->sentry_lock);
2359        for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
2360                if (add_discard_addrs(sbi, cpc, true)) {
2361                        has_candidate = true;
2362                        break;
2363                }
2364        }
2365        up_write(&SIT_I(sbi)->sentry_lock);
2366
2367        cpc->trim_start = trim_start;
2368        return has_candidate;
2369}
2370
2371int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
2372{
2373        __u64 start = F2FS_BYTES_TO_BLK(range->start);
2374        __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
2375        unsigned int start_segno, end_segno, cur_segno;
2376        block_t start_block, end_block;
2377        struct cp_control cpc;
2378        struct discard_policy dpolicy;
2379        unsigned long long trimmed = 0;
2380        int err = 0;
2381
2382        if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
2383                return -EINVAL;
2384
2385        if (end <= MAIN_BLKADDR(sbi))
2386                goto out;
2387
2388        if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
2389                f2fs_msg(sbi->sb, KERN_WARNING,
2390                        "Found FS corruption, run fsck to fix.");
2391                goto out;
2392        }
2393
2394        /* start/end segment number in main_area */
2395        start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
2396        end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
2397                                                GET_SEGNO(sbi, end);
2398
2399        cpc.reason = CP_DISCARD;
2400        cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
2401
2402        /* do checkpoint to issue discard commands safely */
2403        for (cur_segno = start_segno; cur_segno <= end_segno;
2404                                        cur_segno = cpc.trim_end + 1) {
2405                cpc.trim_start = cur_segno;
2406
2407                if (sbi->discard_blks == 0)
2408                        break;
2409                else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
2410                        cpc.trim_end = end_segno;
2411                else
2412                        cpc.trim_end = min_t(unsigned int,
2413                                rounddown(cur_segno +
2414                                BATCHED_TRIM_SEGMENTS(sbi),
2415                                sbi->segs_per_sec) - 1, end_segno);
2416
2417                mutex_lock(&sbi->gc_mutex);
2418                err = write_checkpoint(sbi, &cpc);
2419                mutex_unlock(&sbi->gc_mutex);
2420                if (err)
2421                        break;
2422
2423                schedule();
2424        }
2425
2426        start_block = START_BLOCK(sbi, start_segno);
2427        end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1);
2428
2429        init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
2430        __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
2431        trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
2432                                        start_block, end_block);
2433out:
2434        range->len = F2FS_BLK_TO_BYTES(trimmed);
2435        return err;
2436}
2437
2438static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
2439{
2440        struct curseg_info *curseg = CURSEG_I(sbi, type);
2441        if (curseg->next_blkoff < sbi->blocks_per_seg)
2442                return true;
2443        return false;
2444}
2445
2446int rw_hint_to_seg_type(enum rw_hint hint)
2447{
2448        switch (hint) {
2449        case WRITE_LIFE_SHORT:
2450                return CURSEG_HOT_DATA;
2451        case WRITE_LIFE_EXTREME:
2452                return CURSEG_COLD_DATA;
2453        default:
2454                return CURSEG_WARM_DATA;
2455        }
2456}
2457
2458static int __get_segment_type_2(struct f2fs_io_info *fio)
2459{
2460        if (fio->type == DATA)
2461                return CURSEG_HOT_DATA;
2462        else
2463                return CURSEG_HOT_NODE;
2464}
2465
2466static int __get_segment_type_4(struct f2fs_io_info *fio)
2467{
2468        if (fio->type == DATA) {
2469                struct inode *inode = fio->page->mapping->host;
2470
2471                if (S_ISDIR(inode->i_mode))
2472                        return CURSEG_HOT_DATA;
2473                else
2474                        return CURSEG_COLD_DATA;
2475        } else {
2476                if (IS_DNODE(fio->page) && is_cold_node(fio->page))
2477                        return CURSEG_WARM_NODE;
2478                else
2479                        return CURSEG_COLD_NODE;
2480        }
2481}
2482
2483static int __get_segment_type_6(struct f2fs_io_info *fio)
2484{
2485        if (fio->type == DATA) {
2486                struct inode *inode = fio->page->mapping->host;
2487
2488                if (is_cold_data(fio->page) || file_is_cold(inode))
2489                        return CURSEG_COLD_DATA;
2490                if (is_inode_flag_set(inode, FI_HOT_DATA))
2491                        return CURSEG_HOT_DATA;
2492                return rw_hint_to_seg_type(inode->i_write_hint);
2493        } else {
2494                if (IS_DNODE(fio->page))
2495                        return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
2496                                                CURSEG_HOT_NODE;
2497                return CURSEG_COLD_NODE;
2498        }
2499}
2500
2501static int __get_segment_type(struct f2fs_io_info *fio)
2502{
2503        int type = 0;
2504
2505        switch (fio->sbi->active_logs) {
2506        case 2:
2507                type = __get_segment_type_2(fio);
2508                break;
2509        case 4:
2510                type = __get_segment_type_4(fio);
2511                break;
2512        case 6:
2513                type = __get_segment_type_6(fio);
2514                break;
2515        default:
2516                f2fs_bug_on(fio->sbi, true);
2517        }
2518
2519        if (IS_HOT(type))
2520                fio->temp = HOT;
2521        else if (IS_WARM(type))
2522                fio->temp = WARM;
2523        else
2524                fio->temp = COLD;
2525        return type;
2526}
2527
2528void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
2529                block_t old_blkaddr, block_t *new_blkaddr,
2530                struct f2fs_summary *sum, int type,
2531                struct f2fs_io_info *fio, bool add_list)
2532{
2533        struct sit_info *sit_i = SIT_I(sbi);
2534        struct curseg_info *curseg = CURSEG_I(sbi, type);
2535
2536        down_read(&SM_I(sbi)->curseg_lock);
2537
2538        mutex_lock(&curseg->curseg_mutex);
2539        down_write(&sit_i->sentry_lock);
2540
2541        *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
2542
2543        f2fs_wait_discard_bio(sbi, *new_blkaddr);
2544
2545        /*
2546         * __add_sum_entry should be resided under the curseg_mutex
2547         * because, this function updates a summary entry in the
2548         * current summary block.
2549         */
2550        __add_sum_entry(sbi, type, sum);
2551
2552        __refresh_next_blkoff(sbi, curseg);
2553
2554        stat_inc_block_count(sbi, curseg);
2555
2556        /*
2557         * SIT information should be updated before segment allocation,
2558         * since SSR needs latest valid block information.
2559         */
2560        update_sit_entry(sbi, *new_blkaddr, 1);
2561        if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
2562                update_sit_entry(sbi, old_blkaddr, -1);
2563
2564        if (!__has_curseg_space(sbi, type))
2565                sit_i->s_ops->allocate_segment(sbi, type, false);
2566
2567        /*
2568         * segment dirty status should be updated after segment allocation,
2569         * so we just need to update status only one time after previous
2570         * segment being closed.
2571         */
2572        locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
2573        locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
2574
2575        up_write(&sit_i->sentry_lock);
2576
2577        if (page && IS_NODESEG(type)) {
2578                fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
2579
2580                f2fs_inode_chksum_set(sbi, page);
2581        }
2582
2583        if (add_list) {
2584                struct f2fs_bio_info *io;
2585
2586                INIT_LIST_HEAD(&fio->list);
2587                fio->in_list = true;
2588                io = sbi->write_io[fio->type] + fio->temp;
2589                spin_lock(&io->io_lock);
2590                list_add_tail(&fio->list, &io->io_list);
2591                spin_unlock(&io->io_lock);
2592        }
2593
2594        mutex_unlock(&curseg->curseg_mutex);
2595
2596        up_read(&SM_I(sbi)->curseg_lock);
2597}
2598
2599static void update_device_state(struct f2fs_io_info *fio)
2600{
2601        struct f2fs_sb_info *sbi = fio->sbi;
2602        unsigned int devidx;
2603
2604        if (!sbi->s_ndevs)
2605                return;
2606
2607        devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
2608
2609        /* update device state for fsync */
2610        set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
2611
2612        /* update device state for checkpoint */
2613        if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
2614                spin_lock(&sbi->dev_lock);
2615                f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
2616                spin_unlock(&sbi->dev_lock);
2617        }
2618}
2619
2620static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
2621{
2622        int type = __get_segment_type(fio);
2623        int err;
2624
2625reallocate:
2626        allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
2627                        &fio->new_blkaddr, sum, type, fio, true);
2628
2629        /* writeout dirty page into bdev */
2630        err = f2fs_submit_page_write(fio);
2631        if (err == -EAGAIN) {
2632                fio->old_blkaddr = fio->new_blkaddr;
2633                goto reallocate;
2634        } else if (!err) {
2635                update_device_state(fio);
2636        }
2637}
2638
2639void write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
2640                                        enum iostat_type io_type)
2641{
2642        struct f2fs_io_info fio = {
2643                .sbi = sbi,
2644                .type = META,
2645                .op = REQ_OP_WRITE,
2646                .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
2647                .old_blkaddr = page->index,
2648                .new_blkaddr = page->index,
2649                .page = page,
2650                .encrypted_page = NULL,
2651                .in_list = false,
2652        };
2653
2654        if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
2655                fio.op_flags &= ~REQ_META;
2656
2657        set_page_writeback(page);
2658        f2fs_submit_page_write(&fio);
2659
2660        f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
2661}
2662
2663void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
2664{
2665        struct f2fs_summary sum;
2666
2667        set_summary(&sum, nid, 0, 0);
2668        do_write_page(&sum, fio);
2669
2670        f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
2671}
2672
2673void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
2674{
2675        struct f2fs_sb_info *sbi = fio->sbi;
2676        struct f2fs_summary sum;
2677        struct node_info ni;
2678
2679        f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
2680        get_node_info(sbi, dn->nid, &ni);
2681        set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
2682        do_write_page(&sum, fio);
2683        f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
2684
2685        f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
2686}
2687
2688int rewrite_data_page(struct f2fs_io_info *fio)
2689{
2690        int err;
2691
2692        fio->new_blkaddr = fio->old_blkaddr;
2693        stat_inc_inplace_blocks(fio->sbi);
2694
2695        err = f2fs_submit_page_bio(fio);
2696        if (!err)
2697                update_device_state(fio);
2698
2699        f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
2700
2701        return err;
2702}
2703
2704static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
2705                                                unsigned int segno)
2706{
2707        int i;
2708
2709        for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
2710                if (CURSEG_I(sbi, i)->segno == segno)
2711                        break;
2712        }
2713        return i;
2714}
2715
2716void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
2717                                block_t old_blkaddr, block_t new_blkaddr,
2718                                bool recover_curseg, bool recover_newaddr)
2719{
2720        struct sit_info *sit_i = SIT_I(sbi);
2721        struct curseg_info *curseg;
2722        unsigned int segno, old_cursegno;
2723        struct seg_entry *se;
2724        int type;
2725        unsigned short old_blkoff;
2726
2727        segno = GET_SEGNO(sbi, new_blkaddr);
2728        se = get_seg_entry(sbi, segno);
2729        type = se->type;
2730
2731        down_write(&SM_I(sbi)->curseg_lock);
2732
2733        if (!recover_curseg) {
2734                /* for recovery flow */
2735                if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
2736                        if (old_blkaddr == NULL_ADDR)
2737                                type = CURSEG_COLD_DATA;
2738                        else
2739                                type = CURSEG_WARM_DATA;
2740                }
2741        } else {
2742                if (IS_CURSEG(sbi, segno)) {
2743                        /* se->type is volatile as SSR allocation */
2744                        type = __f2fs_get_curseg(sbi, segno);
2745                        f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
2746                } else {
2747                        type = CURSEG_WARM_DATA;
2748                }
2749        }
2750
2751        f2fs_bug_on(sbi, !IS_DATASEG(type));
2752        curseg = CURSEG_I(sbi, type);
2753
2754        mutex_lock(&curseg->curseg_mutex);
2755        down_write(&sit_i->sentry_lock);
2756
2757        old_cursegno = curseg->segno;
2758        old_blkoff = curseg->next_blkoff;
2759
2760        /* change the current segment */
2761        if (segno != curseg->segno) {
2762                curseg->next_segno = segno;
2763                change_curseg(sbi, type);
2764        }
2765
2766        curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
2767        __add_sum_entry(sbi, type, sum);
2768
2769        if (!recover_curseg || recover_newaddr)
2770                update_sit_entry(sbi, new_blkaddr, 1);
2771        if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
2772                update_sit_entry(sbi, old_blkaddr, -1);
2773
2774        locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
2775        locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
2776
2777        locate_dirty_segment(sbi, old_cursegno);
2778
2779        if (recover_curseg) {
2780                if (old_cursegno != curseg->segno) {
2781                        curseg->next_segno = old_cursegno;
2782                        change_curseg(sbi, type);
2783                }
2784                curseg->next_blkoff = old_blkoff;
2785        }
2786
2787        up_write(&sit_i->sentry_lock);
2788        mutex_unlock(&curseg->curseg_mutex);
2789        up_write(&SM_I(sbi)->curseg_lock);
2790}
2791
2792void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
2793                                block_t old_addr, block_t new_addr,
2794                                unsigned char version, bool recover_curseg,
2795                                bool recover_newaddr)
2796{
2797        struct f2fs_summary sum;
2798
2799        set_summary(&sum, dn->nid, dn->ofs_in_node, version);
2800
2801        __f2fs_replace_block(sbi, &sum, old_addr, new_addr,
2802                                        recover_curseg, recover_newaddr);
2803
2804        f2fs_update_data_blkaddr(dn, new_addr);
2805}
2806
2807void f2fs_wait_on_page_writeback(struct page *page,
2808                                enum page_type type, bool ordered)
2809{
2810        if (PageWriteback(page)) {
2811                struct f2fs_sb_info *sbi = F2FS_P_SB(page);
2812
2813                f2fs_submit_merged_write_cond(sbi, page->mapping->host,
2814                                                0, page->index, type);
2815                if (ordered)
2816                        wait_on_page_writeback(page);
2817                else
2818                        wait_for_stable_page(page);
2819        }
2820}
2821
2822void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
2823{
2824        struct page *cpage;
2825
2826        if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
2827                return;
2828
2829        cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
2830        if (cpage) {
2831                f2fs_wait_on_page_writeback(cpage, DATA, true);
2832                f2fs_put_page(cpage, 1);
2833        }
2834}
2835
2836static void read_compacted_summaries(struct f2fs_sb_info *sbi)
2837{
2838        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2839        struct curseg_info *seg_i;
2840        unsigned char *kaddr;
2841        struct page *page;
2842        block_t start;
2843        int i, j, offset;
2844
2845        start = start_sum_block(sbi);
2846
2847        page = get_meta_page(sbi, start++);
2848        kaddr = (unsigned char *)page_address(page);
2849
2850        /* Step 1: restore nat cache */
2851        seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
2852        memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
2853
2854        /* Step 2: restore sit cache */
2855        seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
2856        memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
2857        offset = 2 * SUM_JOURNAL_SIZE;
2858
2859        /* Step 3: restore summary entries */
2860        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2861                unsigned short blk_off;
2862                unsigned int segno;
2863
2864                seg_i = CURSEG_I(sbi, i);
2865                segno = le32_to_cpu(ckpt->cur_data_segno[i]);
2866                blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
2867                seg_i->next_segno = segno;
2868                reset_curseg(sbi, i, 0);
2869                seg_i->alloc_type = ckpt->alloc_type[i];
2870                seg_i->next_blkoff = blk_off;
2871
2872                if (seg_i->alloc_type == SSR)
2873                        blk_off = sbi->blocks_per_seg;
2874
2875                for (j = 0; j < blk_off; j++) {
2876                        struct f2fs_summary *s;
2877                        s = (struct f2fs_summary *)(kaddr + offset);
2878                        seg_i->sum_blk->entries[j] = *s;
2879                        offset += SUMMARY_SIZE;
2880                        if (offset + SUMMARY_SIZE <= PAGE_SIZE -
2881                                                SUM_FOOTER_SIZE)
2882                                continue;
2883
2884                        f2fs_put_page(page, 1);
2885                        page = NULL;
2886
2887                        page = get_meta_page(sbi, start++);
2888                        kaddr = (unsigned char *)page_address(page);
2889                        offset = 0;
2890                }
2891        }
2892        f2fs_put_page(page, 1);
2893}
2894
2895static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
2896{
2897        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2898        struct f2fs_summary_block *sum;
2899        struct curseg_info *curseg;
2900        struct page *new;
2901        unsigned short blk_off;
2902        unsigned int segno = 0;
2903        block_t blk_addr = 0;
2904
2905        /* get segment number and block addr */
2906        if (IS_DATASEG(type)) {
2907                segno = le32_to_cpu(ckpt->cur_data_segno[type]);
2908                blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
2909                                                        CURSEG_HOT_DATA]);
2910                if (__exist_node_summaries(sbi))
2911                        blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
2912                else
2913                        blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
2914        } else {
2915                segno = le32_to_cpu(ckpt->cur_node_segno[type -
2916                                                        CURSEG_HOT_NODE]);
2917                blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
2918                                                        CURSEG_HOT_NODE]);
2919                if (__exist_node_summaries(sbi))
2920                        blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
2921                                                        type - CURSEG_HOT_NODE);
2922                else
2923                        blk_addr = GET_SUM_BLOCK(sbi, segno);
2924        }
2925
2926        new = get_meta_page(sbi, blk_addr);
2927        sum = (struct f2fs_summary_block *)page_address(new);
2928
2929        if (IS_NODESEG(type)) {
2930                if (__exist_node_summaries(sbi)) {
2931                        struct f2fs_summary *ns = &sum->entries[0];
2932                        int i;
2933                        for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
2934                                ns->version = 0;
2935                                ns->ofs_in_node = 0;
2936                        }
2937                } else {
2938                        restore_node_summary(sbi, segno, sum);
2939                }
2940        }
2941
2942        /* set uncompleted segment to curseg */
2943        curseg = CURSEG_I(sbi, type);
2944        mutex_lock(&curseg->curseg_mutex);
2945
2946        /* update journal info */
2947        down_write(&curseg->journal_rwsem);
2948        memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
2949        up_write(&curseg->journal_rwsem);
2950
2951        memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
2952        memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
2953        curseg->next_segno = segno;
2954        reset_curseg(sbi, type, 0);
2955        curseg->alloc_type = ckpt->alloc_type[type];
2956        curseg->next_blkoff = blk_off;
2957        mutex_unlock(&curseg->curseg_mutex);
2958        f2fs_put_page(new, 1);
2959        return 0;
2960}
2961
2962static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
2963{
2964        struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
2965        struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
2966        int type = CURSEG_HOT_DATA;
2967        int err;
2968
2969        if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
2970                int npages = npages_for_summary_flush(sbi, true);
2971
2972                if (npages >= 2)
2973                        ra_meta_pages(sbi, start_sum_block(sbi), npages,
2974                                                        META_CP, true);
2975
2976                /* restore for compacted data summary */
2977                read_compacted_summaries(sbi);
2978                type = CURSEG_HOT_NODE;
2979        }
2980
2981        if (__exist_node_summaries(sbi))
2982                ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
2983                                        NR_CURSEG_TYPE - type, META_CP, true);
2984
2985        for (; type <= CURSEG_COLD_NODE; type++) {
2986                err = read_normal_summaries(sbi, type);
2987                if (err)
2988                        return err;
2989        }
2990
2991        /* sanity check for summary blocks */
2992        if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
2993                        sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
2994                return -EINVAL;
2995
2996        return 0;
2997}
2998
2999static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
3000{
3001        struct page *page;
3002        unsigned char *kaddr;
3003        struct f2fs_summary *summary;
3004        struct curseg_info *seg_i;
3005        int written_size = 0;
3006        int i, j;
3007
3008        page = grab_meta_page(sbi, blkaddr++);
3009        kaddr = (unsigned char *)page_address(page);
3010
3011        /* Step 1: write nat cache */
3012        seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3013        memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
3014        written_size += SUM_JOURNAL_SIZE;
3015
3016        /* Step 2: write sit cache */
3017        seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3018        memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
3019        written_size += SUM_JOURNAL_SIZE;
3020
3021        /* Step 3: write summary entries */
3022        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3023                unsigned short blkoff;
3024                seg_i = CURSEG_I(sbi, i);
3025                if (sbi->ckpt->alloc_type[i] == SSR)
3026                        blkoff = sbi->blocks_per_seg;
3027                else
3028                        blkoff = curseg_blkoff(sbi, i);
3029
3030                for (j = 0; j < blkoff; j++) {
3031                        if (!page) {
3032                                page = grab_meta_page(sbi, blkaddr++);
3033                                kaddr = (unsigned char *)page_address(page);
3034                                written_size = 0;
3035                        }
3036                        summary = (struct f2fs_summary *)(kaddr + written_size);
3037                        *summary = seg_i->sum_blk->entries[j];
3038                        written_size += SUMMARY_SIZE;
3039
3040                        if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
3041                                                        SUM_FOOTER_SIZE)
3042                                continue;
3043
3044                        set_page_dirty(page);
3045                        f2fs_put_page(page, 1);
3046                        page = NULL;
3047                }
3048        }
3049        if (page) {
3050                set_page_dirty(page);
3051                f2fs_put_page(page, 1);
3052        }
3053}
3054
3055static void write_normal_summaries(struct f2fs_sb_info *sbi,
3056                                        block_t blkaddr, int type)
3057{
3058        int i, end;
3059        if (IS_DATASEG(type))
3060                end = type + NR_CURSEG_DATA_TYPE;
3061        else
3062                end = type + NR_CURSEG_NODE_TYPE;
3063
3064        for (i = type; i < end; i++)
3065                write_current_sum_page(sbi, i, blkaddr + (i - type));
3066}
3067
3068void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3069{
3070        if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
3071                write_compacted_summaries(sbi, start_blk);
3072        else
3073                write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
3074}
3075
3076void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3077{
3078        write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
3079}
3080
3081int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
3082                                        unsigned int val, int alloc)
3083{
3084        int i;
3085
3086        if (type == NAT_JOURNAL) {
3087                for (i = 0; i < nats_in_cursum(journal); i++) {
3088                        if (le32_to_cpu(nid_in_journal(journal, i)) == val)
3089                                return i;
3090                }
3091                if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
3092                        return update_nats_in_cursum(journal, 1);
3093        } else if (type == SIT_JOURNAL) {
3094                for (i = 0; i < sits_in_cursum(journal); i++)
3095                        if (le32_to_cpu(segno_in_journal(journal, i)) == val)
3096                                return i;
3097                if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
3098                        return update_sits_in_cursum(journal, 1);
3099        }
3100        return -1;
3101}
3102
3103static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
3104                                        unsigned int segno)
3105{
3106        return get_meta_page(sbi, current_sit_addr(sbi, segno));
3107}
3108
3109static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
3110                                        unsigned int start)
3111{
3112        struct sit_info *sit_i = SIT_I(sbi);
3113        struct page *page;
3114        pgoff_t src_off, dst_off;
3115
3116        src_off = current_sit_addr(sbi, start);
3117        dst_off = next_sit_addr(sbi, src_off);
3118
3119        page = grab_meta_page(sbi, dst_off);
3120        seg_info_to_sit_page(sbi, page, start);
3121
3122        set_page_dirty(page);
3123        set_to_next_sit(sit_i, start);
3124
3125        return page;
3126}
3127
3128static struct sit_entry_set *grab_sit_entry_set(void)
3129{
3130        struct sit_entry_set *ses =
3131                        f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
3132
3133        ses->entry_cnt = 0;
3134        INIT_LIST_HEAD(&ses->set_list);
3135        return ses;
3136}
3137
3138static void release_sit_entry_set(struct sit_entry_set *ses)
3139{
3140        list_del(&ses->set_list);
3141        kmem_cache_free(sit_entry_set_slab, ses);
3142}
3143
3144static void adjust_sit_entry_set(struct sit_entry_set *ses,
3145                                                struct list_head *head)
3146{
3147        struct sit_entry_set *next = ses;
3148
3149        if (list_is_last(&ses->set_list, head))
3150                return;
3151
3152        list_for_each_entry_continue(next, head, set_list)
3153                if (ses->entry_cnt <= next->entry_cnt)
3154                        break;
3155
3156        list_move_tail(&ses->set_list, &next->set_list);
3157}
3158
3159static void add_sit_entry(unsigned int segno, struct list_head *head)
3160{
3161        struct sit_entry_set *ses;
3162        unsigned int start_segno = START_SEGNO(segno);
3163
3164        list_for_each_entry(ses, head, set_list) {
3165                if (ses->start_segno == start_segno) {
3166                        ses->entry_cnt++;
3167                        adjust_sit_entry_set(ses, head);
3168                        return;
3169                }
3170        }
3171
3172        ses = grab_sit_entry_set();
3173
3174        ses->start_segno = start_segno;
3175        ses->entry_cnt++;
3176        list_add(&ses->set_list, head);
3177}
3178
3179static void add_sits_in_set(struct f2fs_sb_info *sbi)
3180{
3181        struct f2fs_sm_info *sm_info = SM_I(sbi);
3182        struct list_head *set_list = &sm_info->sit_entry_set;
3183        unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
3184        unsigned int segno;
3185
3186        for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
3187                add_sit_entry(segno, set_list);
3188}
3189
3190static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
3191{
3192        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3193        struct f2fs_journal *journal = curseg->journal;
3194        int i;
3195
3196        down_write(&curseg->journal_rwsem);
3197        for (i = 0; i < sits_in_cursum(journal); i++) {
3198                unsigned int segno;
3199                bool dirtied;
3200
3201                segno = le32_to_cpu(segno_in_journal(journal, i));
3202                dirtied = __mark_sit_entry_dirty(sbi, segno);
3203
3204                if (!dirtied)
3205                        add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
3206        }
3207        update_sits_in_cursum(journal, -i);
3208        up_write(&curseg->journal_rwsem);
3209}
3210
3211/*
3212 * CP calls this function, which flushes SIT entries including sit_journal,
3213 * and moves prefree segs to free segs.
3214 */
3215void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
3216{
3217        struct sit_info *sit_i = SIT_I(sbi);
3218        unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
3219        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3220        struct f2fs_journal *journal = curseg->journal;
3221        struct sit_entry_set *ses, *tmp;
3222        struct list_head *head = &SM_I(sbi)->sit_entry_set;
3223        bool to_journal = true;
3224        struct seg_entry *se;
3225
3226        down_write(&sit_i->sentry_lock);
3227
3228        if (!sit_i->dirty_sentries)
3229                goto out;
3230
3231        /*
3232         * add and account sit entries of dirty bitmap in sit entry
3233         * set temporarily
3234         */
3235        add_sits_in_set(sbi);
3236
3237        /*
3238         * if there are no enough space in journal to store dirty sit
3239         * entries, remove all entries from journal and add and account
3240         * them in sit entry set.
3241         */
3242        if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
3243                remove_sits_in_journal(sbi);
3244
3245        /*
3246         * there are two steps to flush sit entries:
3247         * #1, flush sit entries to journal in current cold data summary block.
3248         * #2, flush sit entries to sit page.
3249         */
3250        list_for_each_entry_safe(ses, tmp, head, set_list) {
3251                struct page *page = NULL;
3252                struct f2fs_sit_block *raw_sit = NULL;
3253                unsigned int start_segno = ses->start_segno;
3254                unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
3255                                                (unsigned long)MAIN_SEGS(sbi));
3256                unsigned int segno = start_segno;
3257
3258                if (to_journal &&
3259                        !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
3260                        to_journal = false;
3261
3262                if (to_journal) {
3263                        down_write(&curseg->journal_rwsem);
3264                } else {
3265                        page = get_next_sit_page(sbi, start_segno);
3266                        raw_sit = page_address(page);
3267                }
3268
3269                /* flush dirty sit entries in region of current sit set */
3270                for_each_set_bit_from(segno, bitmap, end) {
3271                        int offset, sit_offset;
3272
3273                        se = get_seg_entry(sbi, segno);
3274
3275                        /* add discard candidates */
3276                        if (!(cpc->reason & CP_DISCARD)) {
3277                                cpc->trim_start = segno;
3278                                add_discard_addrs(sbi, cpc, false);
3279                        }
3280
3281                        if (to_journal) {
3282                                offset = lookup_journal_in_cursum(journal,
3283                                                        SIT_JOURNAL, segno, 1);
3284                                f2fs_bug_on(sbi, offset < 0);
3285                                segno_in_journal(journal, offset) =
3286                                                        cpu_to_le32(segno);
3287                                seg_info_to_raw_sit(se,
3288                                        &sit_in_journal(journal, offset));
3289                        } else {
3290                                sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
3291                                seg_info_to_raw_sit(se,
3292                                                &raw_sit->entries[sit_offset]);
3293                        }
3294
3295                        __clear_bit(segno, bitmap);
3296                        sit_i->dirty_sentries--;
3297                        ses->entry_cnt--;
3298                }
3299
3300                if (to_journal)
3301                        up_write(&curseg->journal_rwsem);
3302                else
3303                        f2fs_put_page(page, 1);
3304
3305                f2fs_bug_on(sbi, ses->entry_cnt);
3306                release_sit_entry_set(ses);
3307        }
3308
3309        f2fs_bug_on(sbi, !list_empty(head));
3310        f2fs_bug_on(sbi, sit_i->dirty_sentries);
3311out:
3312        if (cpc->reason & CP_DISCARD) {
3313                __u64 trim_start = cpc->trim_start;
3314
3315                for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
3316                        add_discard_addrs(sbi, cpc, false);
3317
3318                cpc->trim_start = trim_start;
3319        }
3320        up_write(&sit_i->sentry_lock);
3321
3322        set_prefree_as_free_segments(sbi);
3323}
3324
3325static int build_sit_info(struct f2fs_sb_info *sbi)
3326{
3327        struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
3328        struct sit_info *sit_i;
3329        unsigned int sit_segs, start;
3330        char *src_bitmap;
3331        unsigned int bitmap_size;
3332
3333        /* allocate memory for SIT information */
3334        sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
3335        if (!sit_i)
3336                return -ENOMEM;
3337
3338        SM_I(sbi)->sit_info = sit_i;
3339
3340        sit_i->sentries = f2fs_kvzalloc(sbi, MAIN_SEGS(sbi) *
3341                                        sizeof(struct seg_entry), GFP_KERNEL);
3342        if (!sit_i->sentries)
3343                return -ENOMEM;
3344
3345        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
3346        sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
3347                                                                GFP_KERNEL);
3348        if (!sit_i->dirty_sentries_bitmap)
3349                return -ENOMEM;
3350
3351        for (start = 0; start < MAIN_SEGS(sbi); start++) {
3352                sit_i->sentries[start].cur_valid_map
3353                        = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3354                sit_i->sentries[start].ckpt_valid_map
3355                        = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3356                if (!sit_i->sentries[start].cur_valid_map ||
3357                                !sit_i->sentries[start].ckpt_valid_map)
3358                        return -ENOMEM;
3359
3360#ifdef CONFIG_F2FS_CHECK_FS
3361                sit_i->sentries[start].cur_valid_map_mir
3362                        = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3363                if (!sit_i->sentries[start].cur_valid_map_mir)
3364                        return -ENOMEM;
3365#endif
3366
3367                if (f2fs_discard_en(sbi)) {
3368                        sit_i->sentries[start].discard_map
3369                                = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
3370                                                                GFP_KERNEL);
3371                        if (!sit_i->sentries[start].discard_map)
3372                                return -ENOMEM;
3373                }
3374        }
3375
3376        sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3377        if (!sit_i->tmp_map)
3378                return -ENOMEM;
3379
3380        if (sbi->segs_per_sec > 1) {
3381                sit_i->sec_entries = f2fs_kvzalloc(sbi, MAIN_SECS(sbi) *
3382                                        sizeof(struct sec_entry), GFP_KERNEL);
3383                if (!sit_i->sec_entries)
3384                        return -ENOMEM;
3385        }
3386
3387        /* get information related with SIT */
3388        sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
3389
3390        /* setup SIT bitmap from ckeckpoint pack */
3391        bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
3392        src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
3393
3394        sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
3395        if (!sit_i->sit_bitmap)
3396                return -ENOMEM;
3397
3398#ifdef CONFIG_F2FS_CHECK_FS
3399        sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
3400        if (!sit_i->sit_bitmap_mir)
3401                return -ENOMEM;
3402#endif
3403
3404        /* init SIT information */
3405        sit_i->s_ops = &default_salloc_ops;
3406
3407        sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
3408        sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
3409        sit_i->written_valid_blocks = 0;
3410        sit_i->bitmap_size = bitmap_size;
3411        sit_i->dirty_sentries = 0;
3412        sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
3413        sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
3414        sit_i->mounted_time = ktime_get_real_seconds();
3415        init_rwsem(&sit_i->sentry_lock);
3416        return 0;
3417}
3418
3419static int build_free_segmap(struct f2fs_sb_info *sbi)
3420{
3421        struct free_segmap_info *free_i;
3422        unsigned int bitmap_size, sec_bitmap_size;
3423
3424        /* allocate memory for free segmap information */
3425        free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
3426        if (!free_i)
3427                return -ENOMEM;
3428
3429        SM_I(sbi)->free_info = free_i;
3430
3431        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
3432        free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
3433        if (!free_i->free_segmap)
3434                return -ENOMEM;
3435
3436        sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
3437        free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
3438        if (!free_i->free_secmap)
3439                return -ENOMEM;
3440
3441        /* set all segments as dirty temporarily */
3442        memset(free_i->free_segmap, 0xff, bitmap_size);
3443        memset(free_i->free_secmap, 0xff, sec_bitmap_size);
3444
3445        /* init free segmap information */
3446        free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
3447        free_i->free_segments = 0;
3448        free_i->free_sections = 0;
3449        spin_lock_init(&free_i->segmap_lock);
3450        return 0;
3451}
3452
3453static int build_curseg(struct f2fs_sb_info *sbi)
3454{
3455        struct curseg_info *array;
3456        int i;
3457
3458        array = f2fs_kzalloc(sbi, sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
3459        if (!array)
3460                return -ENOMEM;
3461
3462        SM_I(sbi)->curseg_array = array;
3463
3464        for (i = 0; i < NR_CURSEG_TYPE; i++) {
3465                mutex_init(&array[i].curseg_mutex);
3466                array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
3467                if (!array[i].sum_blk)
3468                        return -ENOMEM;
3469                init_rwsem(&array[i].journal_rwsem);
3470                array[i].journal = f2fs_kzalloc(sbi,
3471                                sizeof(struct f2fs_journal), GFP_KERNEL);
3472                if (!array[i].journal)
3473                        return -ENOMEM;
3474                array[i].segno = NULL_SEGNO;
3475                array[i].next_blkoff = 0;
3476        }
3477        return restore_curseg_summaries(sbi);
3478}
3479
3480static int build_sit_entries(struct f2fs_sb_info *sbi)
3481{
3482        struct sit_info *sit_i = SIT_I(sbi);
3483        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3484        struct f2fs_journal *journal = curseg->journal;
3485        struct seg_entry *se;
3486        struct f2fs_sit_entry sit;
3487        int sit_blk_cnt = SIT_BLK_CNT(sbi);
3488        unsigned int i, start, end;
3489        unsigned int readed, start_blk = 0;
3490        int err = 0;
3491
3492        do {
3493                readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
3494                                                        META_SIT, true);
3495
3496                start = start_blk * sit_i->sents_per_block;
3497                end = (start_blk + readed) * sit_i->sents_per_block;
3498
3499                for (; start < end && start < MAIN_SEGS(sbi); start++) {
3500                        struct f2fs_sit_block *sit_blk;
3501                        struct page *page;
3502
3503                        se = &sit_i->sentries[start];
3504                        page = get_current_sit_page(sbi, start);
3505                        sit_blk = (struct f2fs_sit_block *)page_address(page);
3506                        sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
3507                        f2fs_put_page(page, 1);
3508
3509                        err = check_block_count(sbi, start, &sit);
3510                        if (err)
3511                                return err;
3512                        seg_info_from_raw_sit(se, &sit);
3513
3514                        /* build discard map only one time */
3515                        if (f2fs_discard_en(sbi)) {
3516                                if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
3517                                        memset(se->discard_map, 0xff,
3518                                                SIT_VBLOCK_MAP_SIZE);
3519                                } else {
3520                                        memcpy(se->discard_map,
3521                                                se->cur_valid_map,
3522                                                SIT_VBLOCK_MAP_SIZE);
3523                                        sbi->discard_blks +=
3524                                                sbi->blocks_per_seg -
3525                                                se->valid_blocks;
3526                                }
3527                        }
3528
3529                        if (sbi->segs_per_sec > 1)
3530                                get_sec_entry(sbi, start)->valid_blocks +=
3531                                                        se->valid_blocks;
3532                }
3533                start_blk += readed;
3534        } while (start_blk < sit_blk_cnt);
3535
3536        down_read(&curseg->journal_rwsem);
3537        for (i = 0; i < sits_in_cursum(journal); i++) {
3538                unsigned int old_valid_blocks;
3539
3540                start = le32_to_cpu(segno_in_journal(journal, i));
3541                se = &sit_i->sentries[start];
3542                sit = sit_in_journal(journal, i);
3543
3544                old_valid_blocks = se->valid_blocks;
3545
3546                err = check_block_count(sbi, start, &sit);
3547                if (err)
3548                        break;
3549                seg_info_from_raw_sit(se, &sit);
3550
3551                if (f2fs_discard_en(sbi)) {
3552                        if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
3553                                memset(se->discard_map, 0xff,
3554                                                        SIT_VBLOCK_MAP_SIZE);
3555                        } else {
3556                                memcpy(se->discard_map, se->cur_valid_map,
3557                                                        SIT_VBLOCK_MAP_SIZE);
3558                                sbi->discard_blks += old_valid_blocks -
3559                                                        se->valid_blocks;
3560                        }
3561                }
3562
3563                if (sbi->segs_per_sec > 1)
3564                        get_sec_entry(sbi, start)->valid_blocks +=
3565                                se->valid_blocks - old_valid_blocks;
3566        }
3567        up_read(&curseg->journal_rwsem);
3568        return err;
3569}
3570
3571static void init_free_segmap(struct f2fs_sb_info *sbi)
3572{
3573        unsigned int start;
3574        int type;
3575
3576        for (start = 0; start < MAIN_SEGS(sbi); start++) {
3577                struct seg_entry *sentry = get_seg_entry(sbi, start);
3578                if (!sentry->valid_blocks)
3579                        __set_free(sbi, start);
3580                else
3581                        SIT_I(sbi)->written_valid_blocks +=
3582                                                sentry->valid_blocks;
3583        }
3584
3585        /* set use the current segments */
3586        for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
3587                struct curseg_info *curseg_t = CURSEG_I(sbi, type);
3588                __set_test_and_inuse(sbi, curseg_t->segno);
3589        }
3590}
3591
3592static void init_dirty_segmap(struct f2fs_sb_info *sbi)
3593{
3594        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
3595        struct free_segmap_info *free_i = FREE_I(sbi);
3596        unsigned int segno = 0, offset = 0;
3597        unsigned short valid_blocks;
3598
3599        while (1) {
3600                /* find dirty segment based on free segmap */
3601                segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
3602                if (segno >= MAIN_SEGS(sbi))
3603                        break;
3604                offset = segno + 1;
3605                valid_blocks = get_valid_blocks(sbi, segno, false);
3606                if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
3607                        continue;
3608                if (valid_blocks > sbi->blocks_per_seg) {
3609                        f2fs_bug_on(sbi, 1);
3610                        continue;
3611                }
3612                mutex_lock(&dirty_i->seglist_lock);
3613                __locate_dirty_segment(sbi, segno, DIRTY);
3614                mutex_unlock(&dirty_i->seglist_lock);
3615        }
3616}
3617
3618static int init_victim_secmap(struct f2fs_sb_info *sbi)
3619{
3620        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
3621        unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
3622
3623        dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
3624        if (!dirty_i->victim_secmap)
3625                return -ENOMEM;
3626        return 0;
3627}
3628
3629static int build_dirty_segmap(struct f2fs_sb_info *sbi)
3630{
3631        struct dirty_seglist_info *dirty_i;
3632        unsigned int bitmap_size, i;
3633
3634        /* allocate memory for dirty segments list information */
3635        dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
3636                                                                GFP_KERNEL);
3637        if (!dirty_i)
3638                return -ENOMEM;
3639
3640        SM_I(sbi)->dirty_info = dirty_i;
3641        mutex_init(&dirty_i->seglist_lock);
3642
3643        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
3644
3645        for (i = 0; i < NR_DIRTY_TYPE; i++) {
3646                dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
3647                                                                GFP_KERNEL);
3648                if (!dirty_i->dirty_segmap[i])
3649                        return -ENOMEM;
3650        }
3651
3652        init_dirty_segmap(sbi);
3653        return init_victim_secmap(sbi);
3654}
3655
3656/*
3657 * Update min, max modified time for cost-benefit GC algorithm
3658 */
3659static void init_min_max_mtime(struct f2fs_sb_info *sbi)
3660{
3661        struct sit_info *sit_i = SIT_I(sbi);
3662        unsigned int segno;
3663
3664        down_write(&sit_i->sentry_lock);
3665
3666        sit_i->min_mtime = LLONG_MAX;
3667
3668        for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
3669                unsigned int i;
3670                unsigned long long mtime = 0;
3671
3672                for (i = 0; i < sbi->segs_per_sec; i++)
3673                        mtime += get_seg_entry(sbi, segno + i)->mtime;
3674
3675                mtime = div_u64(mtime, sbi->segs_per_sec);
3676
3677                if (sit_i->min_mtime > mtime)
3678                        sit_i->min_mtime = mtime;
3679        }
3680        sit_i->max_mtime = get_mtime(sbi);
3681        up_write(&sit_i->sentry_lock);
3682}
3683
3684int build_segment_manager(struct f2fs_sb_info *sbi)
3685{
3686        struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
3687        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3688        struct f2fs_sm_info *sm_info;
3689        int err;
3690
3691        sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
3692        if (!sm_info)
3693                return -ENOMEM;
3694
3695        /* init sm info */
3696        sbi->sm_info = sm_info;
3697        sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
3698        sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
3699        sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
3700        sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
3701        sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
3702        sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
3703        sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
3704        sm_info->rec_prefree_segments = sm_info->main_segments *
3705                                        DEF_RECLAIM_PREFREE_SEGMENTS / 100;
3706        if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
3707                sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
3708
3709        if (!test_opt(sbi, LFS))
3710                sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
3711        sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
3712        sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
3713        sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
3714        sm_info->min_ssr_sections = reserved_sections(sbi);
3715
3716        sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
3717
3718        INIT_LIST_HEAD(&sm_info->sit_entry_set);
3719
3720        init_rwsem(&sm_info->curseg_lock);
3721
3722        if (!f2fs_readonly(sbi->sb)) {
3723                err = create_flush_cmd_control(sbi);
3724                if (err)
3725                        return err;
3726        }
3727
3728        err = create_discard_cmd_control(sbi);
3729        if (err)
3730                return err;
3731
3732        err = build_sit_info(sbi);
3733        if (err)
3734                return err;
3735        err = build_free_segmap(sbi);
3736        if (err)
3737                return err;
3738        err = build_curseg(sbi);
3739        if (err)
3740                return err;
3741
3742        /* reinit free segmap based on SIT */
3743        err = build_sit_entries(sbi);
3744        if (err)
3745                return err;
3746
3747        init_free_segmap(sbi);
3748        err = build_dirty_segmap(sbi);
3749        if (err)
3750                return err;
3751
3752        init_min_max_mtime(sbi);
3753        return 0;
3754}
3755
3756static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
3757                enum dirty_type dirty_type)
3758{
3759        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
3760
3761        mutex_lock(&dirty_i->seglist_lock);
3762        kvfree(dirty_i->dirty_segmap[dirty_type]);
3763        dirty_i->nr_dirty[dirty_type] = 0;
3764        mutex_unlock(&dirty_i->seglist_lock);
3765}
3766
3767static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
3768{
3769        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
3770        kvfree(dirty_i->victim_secmap);
3771}
3772
3773static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
3774{
3775        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
3776        int i;
3777
3778        if (!dirty_i)
3779                return;
3780
3781        /* discard pre-free/dirty segments list */
3782        for (i = 0; i < NR_DIRTY_TYPE; i++)
3783                discard_dirty_segmap(sbi, i);
3784
3785        destroy_victim_secmap(sbi);
3786        SM_I(sbi)->dirty_info = NULL;
3787        kfree(dirty_i);
3788}
3789
3790static void destroy_curseg(struct f2fs_sb_info *sbi)
3791{
3792        struct curseg_info *array = SM_I(sbi)->curseg_array;
3793        int i;
3794
3795        if (!array)
3796                return;
3797        SM_I(sbi)->curseg_array = NULL;
3798        for (i = 0; i < NR_CURSEG_TYPE; i++) {
3799                kfree(array[i].sum_blk);
3800                kfree(array[i].journal);
3801        }
3802        kfree(array);
3803}
3804
3805static void destroy_free_segmap(struct f2fs_sb_info *sbi)
3806{
3807        struct free_segmap_info *free_i = SM_I(sbi)->free_info;
3808        if (!free_i)
3809                return;
3810        SM_I(sbi)->free_info = NULL;
3811        kvfree(free_i->free_segmap);
3812        kvfree(free_i->free_secmap);
3813        kfree(free_i);
3814}
3815
3816static void destroy_sit_info(struct f2fs_sb_info *sbi)
3817{
3818        struct sit_info *sit_i = SIT_I(sbi);
3819        unsigned int start;
3820
3821        if (!sit_i)
3822                return;
3823
3824        if (sit_i->sentries) {
3825                for (start = 0; start < MAIN_SEGS(sbi); start++) {
3826                        kfree(sit_i->sentries[start].cur_valid_map);
3827#ifdef CONFIG_F2FS_CHECK_FS
3828                        kfree(sit_i->sentries[start].cur_valid_map_mir);
3829#endif
3830                        kfree(sit_i->sentries[start].ckpt_valid_map);
3831                        kfree(sit_i->sentries[start].discard_map);
3832                }
3833        }
3834        kfree(sit_i->tmp_map);
3835
3836        kvfree(sit_i->sentries);
3837        kvfree(sit_i->sec_entries);
3838        kvfree(sit_i->dirty_sentries_bitmap);
3839
3840        SM_I(sbi)->sit_info = NULL;
3841        kfree(sit_i->sit_bitmap);
3842#ifdef CONFIG_F2FS_CHECK_FS
3843        kfree(sit_i->sit_bitmap_mir);
3844#endif
3845        kfree(sit_i);
3846}
3847
3848void destroy_segment_manager(struct f2fs_sb_info *sbi)
3849{
3850        struct f2fs_sm_info *sm_info = SM_I(sbi);
3851
3852        if (!sm_info)
3853                return;
3854        destroy_flush_cmd_control(sbi, true);
3855        destroy_discard_cmd_control(sbi);
3856        destroy_dirty_segmap(sbi);
3857        destroy_curseg(sbi);
3858        destroy_free_segmap(sbi);
3859        destroy_sit_info(sbi);
3860        sbi->sm_info = NULL;
3861        kfree(sm_info);
3862}
3863
3864int __init create_segment_manager_caches(void)
3865{
3866        discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
3867                        sizeof(struct discard_entry));
3868        if (!discard_entry_slab)
3869                goto fail;
3870
3871        discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
3872                        sizeof(struct discard_cmd));
3873        if (!discard_cmd_slab)
3874                goto destroy_discard_entry;
3875
3876        sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
3877                        sizeof(struct sit_entry_set));
3878        if (!sit_entry_set_slab)
3879                goto destroy_discard_cmd;
3880
3881        inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
3882                        sizeof(struct inmem_pages));
3883        if (!inmem_entry_slab)
3884                goto destroy_sit_entry_set;
3885        return 0;
3886
3887destroy_sit_entry_set:
3888        kmem_cache_destroy(sit_entry_set_slab);
3889destroy_discard_cmd:
3890        kmem_cache_destroy(discard_cmd_slab);
3891destroy_discard_entry:
3892        kmem_cache_destroy(discard_entry_slab);
3893fail:
3894        return -ENOMEM;
3895}
3896
3897void destroy_segment_manager_caches(void)
3898{
3899        kmem_cache_destroy(sit_entry_set_slab);
3900        kmem_cache_destroy(discard_cmd_slab);
3901        kmem_cache_destroy(discard_entry_slab);
3902        kmem_cache_destroy(inmem_entry_slab);
3903}
3904