linux/fs/f2fs/segment.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * fs/f2fs/segment.c
   4 *
   5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   6 *             http://www.samsung.com/
   7 */
   8#include <linux/fs.h>
   9#include <linux/f2fs_fs.h>
  10#include <linux/bio.h>
  11#include <linux/blkdev.h>
  12#include <linux/prefetch.h>
  13#include <linux/kthread.h>
  14#include <linux/swap.h>
  15#include <linux/timer.h>
  16#include <linux/freezer.h>
  17#include <linux/sched/signal.h>
  18
  19#include "f2fs.h"
  20#include "segment.h"
  21#include "node.h"
  22#include "gc.h"
  23#include "trace.h"
  24#include <trace/events/f2fs.h>
  25
  26#define __reverse_ffz(x) __reverse_ffs(~(x))
  27
  28static struct kmem_cache *discard_entry_slab;
  29static struct kmem_cache *discard_cmd_slab;
  30static struct kmem_cache *sit_entry_set_slab;
  31static struct kmem_cache *inmem_entry_slab;
  32
  33static unsigned long __reverse_ulong(unsigned char *str)
  34{
  35        unsigned long tmp = 0;
  36        int shift = 24, idx = 0;
  37
  38#if BITS_PER_LONG == 64
  39        shift = 56;
  40#endif
  41        while (shift >= 0) {
  42                tmp |= (unsigned long)str[idx++] << shift;
  43                shift -= BITS_PER_BYTE;
  44        }
  45        return tmp;
  46}
  47
  48/*
  49 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
  50 * MSB and LSB are reversed in a byte by f2fs_set_bit.
  51 */
  52static inline unsigned long __reverse_ffs(unsigned long word)
  53{
  54        int num = 0;
  55
  56#if BITS_PER_LONG == 64
  57        if ((word & 0xffffffff00000000UL) == 0)
  58                num += 32;
  59        else
  60                word >>= 32;
  61#endif
  62        if ((word & 0xffff0000) == 0)
  63                num += 16;
  64        else
  65                word >>= 16;
  66
  67        if ((word & 0xff00) == 0)
  68                num += 8;
  69        else
  70                word >>= 8;
  71
  72        if ((word & 0xf0) == 0)
  73                num += 4;
  74        else
  75                word >>= 4;
  76
  77        if ((word & 0xc) == 0)
  78                num += 2;
  79        else
  80                word >>= 2;
  81
  82        if ((word & 0x2) == 0)
  83                num += 1;
  84        return num;
  85}
  86
  87/*
  88 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
  89 * f2fs_set_bit makes MSB and LSB reversed in a byte.
  90 * @size must be integral times of unsigned long.
  91 * Example:
  92 *                             MSB <--> LSB
  93 *   f2fs_set_bit(0, bitmap) => 1000 0000
  94 *   f2fs_set_bit(7, bitmap) => 0000 0001
  95 */
  96static unsigned long __find_rev_next_bit(const unsigned long *addr,
  97                        unsigned long size, unsigned long offset)
  98{
  99        const unsigned long *p = addr + BIT_WORD(offset);
 100        unsigned long result = size;
 101        unsigned long tmp;
 102
 103        if (offset >= size)
 104                return size;
 105
 106        size -= (offset & ~(BITS_PER_LONG - 1));
 107        offset %= BITS_PER_LONG;
 108
 109        while (1) {
 110                if (*p == 0)
 111                        goto pass;
 112
 113                tmp = __reverse_ulong((unsigned char *)p);
 114
 115                tmp &= ~0UL >> offset;
 116                if (size < BITS_PER_LONG)
 117                        tmp &= (~0UL << (BITS_PER_LONG - size));
 118                if (tmp)
 119                        goto found;
 120pass:
 121                if (size <= BITS_PER_LONG)
 122                        break;
 123                size -= BITS_PER_LONG;
 124                offset = 0;
 125                p++;
 126        }
 127        return result;
 128found:
 129        return result - size + __reverse_ffs(tmp);
 130}
 131
 132static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
 133                        unsigned long size, unsigned long offset)
 134{
 135        const unsigned long *p = addr + BIT_WORD(offset);
 136        unsigned long result = size;
 137        unsigned long tmp;
 138
 139        if (offset >= size)
 140                return size;
 141
 142        size -= (offset & ~(BITS_PER_LONG - 1));
 143        offset %= BITS_PER_LONG;
 144
 145        while (1) {
 146                if (*p == ~0UL)
 147                        goto pass;
 148
 149                tmp = __reverse_ulong((unsigned char *)p);
 150
 151                if (offset)
 152                        tmp |= ~0UL << (BITS_PER_LONG - offset);
 153                if (size < BITS_PER_LONG)
 154                        tmp |= ~0UL >> size;
 155                if (tmp != ~0UL)
 156                        goto found;
 157pass:
 158                if (size <= BITS_PER_LONG)
 159                        break;
 160                size -= BITS_PER_LONG;
 161                offset = 0;
 162                p++;
 163        }
 164        return result;
 165found:
 166        return result - size + __reverse_ffz(tmp);
 167}
 168
 169bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
 170{
 171        int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
 172        int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
 173        int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
 174
 175        if (test_opt(sbi, LFS))
 176                return false;
 177        if (sbi->gc_mode == GC_URGENT)
 178                return true;
 179        if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 180                return true;
 181
 182        return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
 183                        SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
 184}
 185
 186void f2fs_register_inmem_page(struct inode *inode, struct page *page)
 187{
 188        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 189        struct f2fs_inode_info *fi = F2FS_I(inode);
 190        struct inmem_pages *new;
 191
 192        f2fs_trace_pid(page);
 193
 194        set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
 195        SetPagePrivate(page);
 196
 197        new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
 198
 199        /* add atomic page indices to the list */
 200        new->page = page;
 201        INIT_LIST_HEAD(&new->list);
 202
 203        /* increase reference count with clean state */
 204        mutex_lock(&fi->inmem_lock);
 205        get_page(page);
 206        list_add_tail(&new->list, &fi->inmem_pages);
 207        spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
 208        if (list_empty(&fi->inmem_ilist))
 209                list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
 210        spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
 211        inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
 212        mutex_unlock(&fi->inmem_lock);
 213
 214        trace_f2fs_register_inmem_page(page, INMEM);
 215}
 216
 217static int __revoke_inmem_pages(struct inode *inode,
 218                                struct list_head *head, bool drop, bool recover)
 219{
 220        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 221        struct inmem_pages *cur, *tmp;
 222        int err = 0;
 223
 224        list_for_each_entry_safe(cur, tmp, head, list) {
 225                struct page *page = cur->page;
 226
 227                if (drop)
 228                        trace_f2fs_commit_inmem_page(page, INMEM_DROP);
 229
 230                lock_page(page);
 231
 232                f2fs_wait_on_page_writeback(page, DATA, true, true);
 233
 234                if (recover) {
 235                        struct dnode_of_data dn;
 236                        struct node_info ni;
 237
 238                        trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
 239retry:
 240                        set_new_dnode(&dn, inode, NULL, NULL, 0);
 241                        err = f2fs_get_dnode_of_data(&dn, page->index,
 242                                                                LOOKUP_NODE);
 243                        if (err) {
 244                                if (err == -ENOMEM) {
 245                                        congestion_wait(BLK_RW_ASYNC, HZ/50);
 246                                        cond_resched();
 247                                        goto retry;
 248                                }
 249                                err = -EAGAIN;
 250                                goto next;
 251                        }
 252
 253                        err = f2fs_get_node_info(sbi, dn.nid, &ni);
 254                        if (err) {
 255                                f2fs_put_dnode(&dn);
 256                                return err;
 257                        }
 258
 259                        if (cur->old_addr == NEW_ADDR) {
 260                                f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
 261                                f2fs_update_data_blkaddr(&dn, NEW_ADDR);
 262                        } else
 263                                f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
 264                                        cur->old_addr, ni.version, true, true);
 265                        f2fs_put_dnode(&dn);
 266                }
 267next:
 268                /* we don't need to invalidate this in the sccessful status */
 269                if (drop || recover) {
 270                        ClearPageUptodate(page);
 271                        clear_cold_data(page);
 272                }
 273                set_page_private(page, 0);
 274                ClearPagePrivate(page);
 275                f2fs_put_page(page, 1);
 276
 277                list_del(&cur->list);
 278                kmem_cache_free(inmem_entry_slab, cur);
 279                dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
 280        }
 281        return err;
 282}
 283
 284void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
 285{
 286        struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
 287        struct inode *inode;
 288        struct f2fs_inode_info *fi;
 289next:
 290        spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
 291        if (list_empty(head)) {
 292                spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
 293                return;
 294        }
 295        fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
 296        inode = igrab(&fi->vfs_inode);
 297        spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
 298
 299        if (inode) {
 300                if (gc_failure) {
 301                        if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
 302                                goto drop;
 303                        goto skip;
 304                }
 305drop:
 306                set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
 307                f2fs_drop_inmem_pages(inode);
 308                iput(inode);
 309        }
 310skip:
 311        congestion_wait(BLK_RW_ASYNC, HZ/50);
 312        cond_resched();
 313        goto next;
 314}
 315
 316void f2fs_drop_inmem_pages(struct inode *inode)
 317{
 318        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 319        struct f2fs_inode_info *fi = F2FS_I(inode);
 320
 321        mutex_lock(&fi->inmem_lock);
 322        __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
 323        spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
 324        if (!list_empty(&fi->inmem_ilist))
 325                list_del_init(&fi->inmem_ilist);
 326        spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
 327        mutex_unlock(&fi->inmem_lock);
 328
 329        clear_inode_flag(inode, FI_ATOMIC_FILE);
 330        fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
 331        stat_dec_atomic_write(inode);
 332}
 333
 334void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
 335{
 336        struct f2fs_inode_info *fi = F2FS_I(inode);
 337        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 338        struct list_head *head = &fi->inmem_pages;
 339        struct inmem_pages *cur = NULL;
 340
 341        f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));
 342
 343        mutex_lock(&fi->inmem_lock);
 344        list_for_each_entry(cur, head, list) {
 345                if (cur->page == page)
 346                        break;
 347        }
 348
 349        f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
 350        list_del(&cur->list);
 351        mutex_unlock(&fi->inmem_lock);
 352
 353        dec_page_count(sbi, F2FS_INMEM_PAGES);
 354        kmem_cache_free(inmem_entry_slab, cur);
 355
 356        ClearPageUptodate(page);
 357        set_page_private(page, 0);
 358        ClearPagePrivate(page);
 359        f2fs_put_page(page, 0);
 360
 361        trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
 362}
 363
 364static int __f2fs_commit_inmem_pages(struct inode *inode)
 365{
 366        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 367        struct f2fs_inode_info *fi = F2FS_I(inode);
 368        struct inmem_pages *cur, *tmp;
 369        struct f2fs_io_info fio = {
 370                .sbi = sbi,
 371                .ino = inode->i_ino,
 372                .type = DATA,
 373                .op = REQ_OP_WRITE,
 374                .op_flags = REQ_SYNC | REQ_PRIO,
 375                .io_type = FS_DATA_IO,
 376        };
 377        struct list_head revoke_list;
 378        bool submit_bio = false;
 379        int err = 0;
 380
 381        INIT_LIST_HEAD(&revoke_list);
 382
 383        list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
 384                struct page *page = cur->page;
 385
 386                lock_page(page);
 387                if (page->mapping == inode->i_mapping) {
 388                        trace_f2fs_commit_inmem_page(page, INMEM);
 389
 390                        f2fs_wait_on_page_writeback(page, DATA, true, true);
 391
 392                        set_page_dirty(page);
 393                        if (clear_page_dirty_for_io(page)) {
 394                                inode_dec_dirty_pages(inode);
 395                                f2fs_remove_dirty_inode(inode);
 396                        }
 397retry:
 398                        fio.page = page;
 399                        fio.old_blkaddr = NULL_ADDR;
 400                        fio.encrypted_page = NULL;
 401                        fio.need_lock = LOCK_DONE;
 402                        err = f2fs_do_write_data_page(&fio);
 403                        if (err) {
 404                                if (err == -ENOMEM) {
 405                                        congestion_wait(BLK_RW_ASYNC, HZ/50);
 406                                        cond_resched();
 407                                        goto retry;
 408                                }
 409                                unlock_page(page);
 410                                break;
 411                        }
 412                        /* record old blkaddr for revoking */
 413                        cur->old_addr = fio.old_blkaddr;
 414                        submit_bio = true;
 415                }
 416                unlock_page(page);
 417                list_move_tail(&cur->list, &revoke_list);
 418        }
 419
 420        if (submit_bio)
 421                f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
 422
 423        if (err) {
 424                /*
 425                 * try to revoke all committed pages, but still we could fail
 426                 * due to no memory or other reason, if that happened, EAGAIN
 427                 * will be returned, which means in such case, transaction is
 428                 * already not integrity, caller should use journal to do the
 429                 * recovery or rewrite & commit last transaction. For other
 430                 * error number, revoking was done by filesystem itself.
 431                 */
 432                err = __revoke_inmem_pages(inode, &revoke_list, false, true);
 433
 434                /* drop all uncommitted pages */
 435                __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
 436        } else {
 437                __revoke_inmem_pages(inode, &revoke_list, false, false);
 438        }
 439
 440        return err;
 441}
 442
 443int f2fs_commit_inmem_pages(struct inode *inode)
 444{
 445        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 446        struct f2fs_inode_info *fi = F2FS_I(inode);
 447        int err;
 448
 449        f2fs_balance_fs(sbi, true);
 450
 451        down_write(&fi->i_gc_rwsem[WRITE]);
 452
 453        f2fs_lock_op(sbi);
 454        set_inode_flag(inode, FI_ATOMIC_COMMIT);
 455
 456        mutex_lock(&fi->inmem_lock);
 457        err = __f2fs_commit_inmem_pages(inode);
 458
 459        spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
 460        if (!list_empty(&fi->inmem_ilist))
 461                list_del_init(&fi->inmem_ilist);
 462        spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
 463        mutex_unlock(&fi->inmem_lock);
 464
 465        clear_inode_flag(inode, FI_ATOMIC_COMMIT);
 466
 467        f2fs_unlock_op(sbi);
 468        up_write(&fi->i_gc_rwsem[WRITE]);
 469
 470        return err;
 471}
 472
 473/*
 474 * This function balances dirty node and dentry pages.
 475 * In addition, it controls garbage collection.
 476 */
 477void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 478{
 479        if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
 480                f2fs_show_injection_info(FAULT_CHECKPOINT);
 481                f2fs_stop_checkpoint(sbi, false);
 482        }
 483
 484        /* balance_fs_bg is able to be pending */
 485        if (need && excess_cached_nats(sbi))
 486                f2fs_balance_fs_bg(sbi);
 487
 488        if (f2fs_is_checkpoint_ready(sbi))
 489                return;
 490
 491        /*
 492         * We should do GC or end up with checkpoint, if there are so many dirty
 493         * dir/node pages without enough free segments.
 494         */
 495        if (has_not_enough_free_secs(sbi, 0, 0)) {
 496                mutex_lock(&sbi->gc_mutex);
 497                f2fs_gc(sbi, false, false, NULL_SEGNO);
 498        }
 499}
 500
 501void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 502{
 503        if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
 504                return;
 505
 506        /* try to shrink extent cache when there is no enough memory */
 507        if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
 508                f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
 509
 510        /* check the # of cached NAT entries */
 511        if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
 512                f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
 513
 514        if (!f2fs_available_free_memory(sbi, FREE_NIDS))
 515                f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
 516        else
 517                f2fs_build_free_nids(sbi, false, false);
 518
 519        if (!is_idle(sbi, REQ_TIME) &&
 520                (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
 521                return;
 522
 523        /* checkpoint is the only way to shrink partial cached entries */
 524        if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
 525                        !f2fs_available_free_memory(sbi, INO_ENTRIES) ||
 526                        excess_prefree_segs(sbi) ||
 527                        excess_dirty_nats(sbi) ||
 528                        excess_dirty_nodes(sbi) ||
 529                        f2fs_time_over(sbi, CP_TIME)) {
 530                if (test_opt(sbi, DATA_FLUSH)) {
 531                        struct blk_plug plug;
 532
 533                        blk_start_plug(&plug);
 534                        f2fs_sync_dirty_inodes(sbi, FILE_INODE);
 535                        blk_finish_plug(&plug);
 536                }
 537                f2fs_sync_fs(sbi->sb, true);
 538                stat_inc_bg_cp_count(sbi->stat_info);
 539        }
 540}
 541
 542static int __submit_flush_wait(struct f2fs_sb_info *sbi,
 543                                struct block_device *bdev)
 544{
 545        struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
 546        int ret;
 547
 548        bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
 549        bio_set_dev(bio, bdev);
 550        ret = submit_bio_wait(bio);
 551        bio_put(bio);
 552
 553        trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
 554                                test_opt(sbi, FLUSH_MERGE), ret);
 555        return ret;
 556}
 557
 558static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
 559{
 560        int ret = 0;
 561        int i;
 562
 563        if (!sbi->s_ndevs)
 564                return __submit_flush_wait(sbi, sbi->sb->s_bdev);
 565
 566        for (i = 0; i < sbi->s_ndevs; i++) {
 567                if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
 568                        continue;
 569                ret = __submit_flush_wait(sbi, FDEV(i).bdev);
 570                if (ret)
 571                        break;
 572        }
 573        return ret;
 574}
 575
 576static int issue_flush_thread(void *data)
 577{
 578        struct f2fs_sb_info *sbi = data;
 579        struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
 580        wait_queue_head_t *q = &fcc->flush_wait_queue;
 581repeat:
 582        if (kthread_should_stop())
 583                return 0;
 584
 585        sb_start_intwrite(sbi->sb);
 586
 587        if (!llist_empty(&fcc->issue_list)) {
 588                struct flush_cmd *cmd, *next;
 589                int ret;
 590
 591                fcc->dispatch_list = llist_del_all(&fcc->issue_list);
 592                fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
 593
 594                cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
 595
 596                ret = submit_flush_wait(sbi, cmd->ino);
 597                atomic_inc(&fcc->issued_flush);
 598
 599                llist_for_each_entry_safe(cmd, next,
 600                                          fcc->dispatch_list, llnode) {
 601                        cmd->ret = ret;
 602                        complete(&cmd->wait);
 603                }
 604                fcc->dispatch_list = NULL;
 605        }
 606
 607        sb_end_intwrite(sbi->sb);
 608
 609        wait_event_interruptible(*q,
 610                kthread_should_stop() || !llist_empty(&fcc->issue_list));
 611        goto repeat;
 612}
 613
 614int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
 615{
 616        struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
 617        struct flush_cmd cmd;
 618        int ret;
 619
 620        if (test_opt(sbi, NOBARRIER))
 621                return 0;
 622
 623        if (!test_opt(sbi, FLUSH_MERGE)) {
 624                atomic_inc(&fcc->queued_flush);
 625                ret = submit_flush_wait(sbi, ino);
 626                atomic_dec(&fcc->queued_flush);
 627                atomic_inc(&fcc->issued_flush);
 628                return ret;
 629        }
 630
 631        if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) {
 632                ret = submit_flush_wait(sbi, ino);
 633                atomic_dec(&fcc->queued_flush);
 634
 635                atomic_inc(&fcc->issued_flush);
 636                return ret;
 637        }
 638
 639        cmd.ino = ino;
 640        init_completion(&cmd.wait);
 641
 642        llist_add(&cmd.llnode, &fcc->issue_list);
 643
 644        /* update issue_list before we wake up issue_flush thread */
 645        smp_mb();
 646
 647        if (waitqueue_active(&fcc->flush_wait_queue))
 648                wake_up(&fcc->flush_wait_queue);
 649
 650        if (fcc->f2fs_issue_flush) {
 651                wait_for_completion(&cmd.wait);
 652                atomic_dec(&fcc->queued_flush);
 653        } else {
 654                struct llist_node *list;
 655
 656                list = llist_del_all(&fcc->issue_list);
 657                if (!list) {
 658                        wait_for_completion(&cmd.wait);
 659                        atomic_dec(&fcc->queued_flush);
 660                } else {
 661                        struct flush_cmd *tmp, *next;
 662
 663                        ret = submit_flush_wait(sbi, ino);
 664
 665                        llist_for_each_entry_safe(tmp, next, list, llnode) {
 666                                if (tmp == &cmd) {
 667                                        cmd.ret = ret;
 668                                        atomic_dec(&fcc->queued_flush);
 669                                        continue;
 670                                }
 671                                tmp->ret = ret;
 672                                complete(&tmp->wait);
 673                        }
 674                }
 675        }
 676
 677        return cmd.ret;
 678}
 679
 680int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
 681{
 682        dev_t dev = sbi->sb->s_bdev->bd_dev;
 683        struct flush_cmd_control *fcc;
 684        int err = 0;
 685
 686        if (SM_I(sbi)->fcc_info) {
 687                fcc = SM_I(sbi)->fcc_info;
 688                if (fcc->f2fs_issue_flush)
 689                        return err;
 690                goto init_thread;
 691        }
 692
 693        fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
 694        if (!fcc)
 695                return -ENOMEM;
 696        atomic_set(&fcc->issued_flush, 0);
 697        atomic_set(&fcc->queued_flush, 0);
 698        init_waitqueue_head(&fcc->flush_wait_queue);
 699        init_llist_head(&fcc->issue_list);
 700        SM_I(sbi)->fcc_info = fcc;
 701        if (!test_opt(sbi, FLUSH_MERGE))
 702                return err;
 703
 704init_thread:
 705        fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
 706                                "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
 707        if (IS_ERR(fcc->f2fs_issue_flush)) {
 708                err = PTR_ERR(fcc->f2fs_issue_flush);
 709                kvfree(fcc);
 710                SM_I(sbi)->fcc_info = NULL;
 711                return err;
 712        }
 713
 714        return err;
 715}
 716
 717void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
 718{
 719        struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
 720
 721        if (fcc && fcc->f2fs_issue_flush) {
 722                struct task_struct *flush_thread = fcc->f2fs_issue_flush;
 723
 724                fcc->f2fs_issue_flush = NULL;
 725                kthread_stop(flush_thread);
 726        }
 727        if (free) {
 728                kvfree(fcc);
 729                SM_I(sbi)->fcc_info = NULL;
 730        }
 731}
 732
 733int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
 734{
 735        int ret = 0, i;
 736
 737        if (!sbi->s_ndevs)
 738                return 0;
 739
 740        for (i = 1; i < sbi->s_ndevs; i++) {
 741                if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
 742                        continue;
 743                ret = __submit_flush_wait(sbi, FDEV(i).bdev);
 744                if (ret)
 745                        break;
 746
 747                spin_lock(&sbi->dev_lock);
 748                f2fs_clear_bit(i, (char *)&sbi->dirty_device);
 749                spin_unlock(&sbi->dev_lock);
 750        }
 751
 752        return ret;
 753}
 754
 755static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 756                enum dirty_type dirty_type)
 757{
 758        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 759
 760        /* need not be added */
 761        if (IS_CURSEG(sbi, segno))
 762                return;
 763
 764        if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
 765                dirty_i->nr_dirty[dirty_type]++;
 766
 767        if (dirty_type == DIRTY) {
 768                struct seg_entry *sentry = get_seg_entry(sbi, segno);
 769                enum dirty_type t = sentry->type;
 770
 771                if (unlikely(t >= DIRTY)) {
 772                        f2fs_bug_on(sbi, 1);
 773                        return;
 774                }
 775                if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
 776                        dirty_i->nr_dirty[t]++;
 777        }
 778}
 779
 780static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 781                enum dirty_type dirty_type)
 782{
 783        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 784
 785        if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
 786                dirty_i->nr_dirty[dirty_type]--;
 787
 788        if (dirty_type == DIRTY) {
 789                struct seg_entry *sentry = get_seg_entry(sbi, segno);
 790                enum dirty_type t = sentry->type;
 791
 792                if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
 793                        dirty_i->nr_dirty[t]--;
 794
 795                if (get_valid_blocks(sbi, segno, true) == 0)
 796                        clear_bit(GET_SEC_FROM_SEG(sbi, segno),
 797                                                dirty_i->victim_secmap);
 798        }
 799}
 800
 801/*
 802 * Should not occur error such as -ENOMEM.
 803 * Adding dirty entry into seglist is not critical operation.
 804 * If a given segment is one of current working segments, it won't be added.
 805 */
 806static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 807{
 808        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 809        unsigned short valid_blocks, ckpt_valid_blocks;
 810
 811        if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
 812                return;
 813
 814        mutex_lock(&dirty_i->seglist_lock);
 815
 816        valid_blocks = get_valid_blocks(sbi, segno, false);
 817        ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
 818
 819        if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
 820                                ckpt_valid_blocks == sbi->blocks_per_seg)) {
 821                __locate_dirty_segment(sbi, segno, PRE);
 822                __remove_dirty_segment(sbi, segno, DIRTY);
 823        } else if (valid_blocks < sbi->blocks_per_seg) {
 824                __locate_dirty_segment(sbi, segno, DIRTY);
 825        } else {
 826                /* Recovery routine with SSR needs this */
 827                __remove_dirty_segment(sbi, segno, DIRTY);
 828        }
 829
 830        mutex_unlock(&dirty_i->seglist_lock);
 831}
 832
 833/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
 834void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
 835{
 836        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 837        unsigned int segno;
 838
 839        mutex_lock(&dirty_i->seglist_lock);
 840        for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
 841                if (get_valid_blocks(sbi, segno, false))
 842                        continue;
 843                if (IS_CURSEG(sbi, segno))
 844                        continue;
 845                __locate_dirty_segment(sbi, segno, PRE);
 846                __remove_dirty_segment(sbi, segno, DIRTY);
 847        }
 848        mutex_unlock(&dirty_i->seglist_lock);
 849}
 850
 851int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
 852{
 853        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 854        block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
 855        block_t holes[2] = {0, 0};      /* DATA and NODE */
 856        struct seg_entry *se;
 857        unsigned int segno;
 858
 859        mutex_lock(&dirty_i->seglist_lock);
 860        for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
 861                se = get_seg_entry(sbi, segno);
 862                if (IS_NODESEG(se->type))
 863                        holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
 864                else
 865                        holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
 866        }
 867        mutex_unlock(&dirty_i->seglist_lock);
 868
 869        if (holes[DATA] > ovp || holes[NODE] > ovp)
 870                return -EAGAIN;
 871        return 0;
 872}
 873
 874/* This is only used by SBI_CP_DISABLED */
 875static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
 876{
 877        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 878        unsigned int segno = 0;
 879
 880        mutex_lock(&dirty_i->seglist_lock);
 881        for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
 882                if (get_valid_blocks(sbi, segno, false))
 883                        continue;
 884                if (get_ckpt_valid_blocks(sbi, segno))
 885                        continue;
 886                mutex_unlock(&dirty_i->seglist_lock);
 887                return segno;
 888        }
 889        mutex_unlock(&dirty_i->seglist_lock);
 890        return NULL_SEGNO;
 891}
 892
 893static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
 894                struct block_device *bdev, block_t lstart,
 895                block_t start, block_t len)
 896{
 897        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 898        struct list_head *pend_list;
 899        struct discard_cmd *dc;
 900
 901        f2fs_bug_on(sbi, !len);
 902
 903        pend_list = &dcc->pend_list[plist_idx(len)];
 904
 905        dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
 906        INIT_LIST_HEAD(&dc->list);
 907        dc->bdev = bdev;
 908        dc->lstart = lstart;
 909        dc->start = start;
 910        dc->len = len;
 911        dc->ref = 0;
 912        dc->state = D_PREP;
 913        dc->queued = 0;
 914        dc->error = 0;
 915        init_completion(&dc->wait);
 916        list_add_tail(&dc->list, pend_list);
 917        spin_lock_init(&dc->lock);
 918        dc->bio_ref = 0;
 919        atomic_inc(&dcc->discard_cmd_cnt);
 920        dcc->undiscard_blks += len;
 921
 922        return dc;
 923}
 924
 925static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
 926                                struct block_device *bdev, block_t lstart,
 927                                block_t start, block_t len,
 928                                struct rb_node *parent, struct rb_node **p,
 929                                bool leftmost)
 930{
 931        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 932        struct discard_cmd *dc;
 933
 934        dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
 935
 936        rb_link_node(&dc->rb_node, parent, p);
 937        rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
 938
 939        return dc;
 940}
 941
 942static void __detach_discard_cmd(struct discard_cmd_control *dcc,
 943                                                        struct discard_cmd *dc)
 944{
 945        if (dc->state == D_DONE)
 946                atomic_sub(dc->queued, &dcc->queued_discard);
 947
 948        list_del(&dc->list);
 949        rb_erase_cached(&dc->rb_node, &dcc->root);
 950        dcc->undiscard_blks -= dc->len;
 951
 952        kmem_cache_free(discard_cmd_slab, dc);
 953
 954        atomic_dec(&dcc->discard_cmd_cnt);
 955}
 956
 957static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
 958                                                        struct discard_cmd *dc)
 959{
 960        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 961        unsigned long flags;
 962
 963        trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
 964
 965        spin_lock_irqsave(&dc->lock, flags);
 966        if (dc->bio_ref) {
 967                spin_unlock_irqrestore(&dc->lock, flags);
 968                return;
 969        }
 970        spin_unlock_irqrestore(&dc->lock, flags);
 971
 972        f2fs_bug_on(sbi, dc->ref);
 973
 974        if (dc->error == -EOPNOTSUPP)
 975                dc->error = 0;
 976
 977        if (dc->error)
 978                printk_ratelimited(
 979                        "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
 980                        KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
 981        __detach_discard_cmd(dcc, dc);
 982}
 983
 984static void f2fs_submit_discard_endio(struct bio *bio)
 985{
 986        struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
 987        unsigned long flags;
 988
 989        dc->error = blk_status_to_errno(bio->bi_status);
 990
 991        spin_lock_irqsave(&dc->lock, flags);
 992        dc->bio_ref--;
 993        if (!dc->bio_ref && dc->state == D_SUBMIT) {
 994                dc->state = D_DONE;
 995                complete_all(&dc->wait);
 996        }
 997        spin_unlock_irqrestore(&dc->lock, flags);
 998        bio_put(bio);
 999}
1000
1001static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1002                                block_t start, block_t end)
1003{
1004#ifdef CONFIG_F2FS_CHECK_FS
1005        struct seg_entry *sentry;
1006        unsigned int segno;
1007        block_t blk = start;
1008        unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
1009        unsigned long *map;
1010
1011        while (blk < end) {
1012                segno = GET_SEGNO(sbi, blk);
1013                sentry = get_seg_entry(sbi, segno);
1014                offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1015
1016                if (end < START_BLOCK(sbi, segno + 1))
1017                        size = GET_BLKOFF_FROM_SEG0(sbi, end);
1018                else
1019                        size = max_blocks;
1020                map = (unsigned long *)(sentry->cur_valid_map);
1021                offset = __find_rev_next_bit(map, size, offset);
1022                f2fs_bug_on(sbi, offset != size);
1023                blk = START_BLOCK(sbi, segno + 1);
1024        }
1025#endif
1026}
1027
1028static void __init_discard_policy(struct f2fs_sb_info *sbi,
1029                                struct discard_policy *dpolicy,
1030                                int discard_type, unsigned int granularity)
1031{
1032        /* common policy */
1033        dpolicy->type = discard_type;
1034        dpolicy->sync = true;
1035        dpolicy->ordered = false;
1036        dpolicy->granularity = granularity;
1037
1038        dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
1039        dpolicy->io_aware_gran = MAX_PLIST_NUM;
1040
1041        if (discard_type == DPOLICY_BG) {
1042                dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1043                dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1044                dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1045                dpolicy->io_aware = true;
1046                dpolicy->sync = false;
1047                dpolicy->ordered = true;
1048                if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
1049                        dpolicy->granularity = 1;
1050                        dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1051                }
1052        } else if (discard_type == DPOLICY_FORCE) {
1053                dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1054                dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1055                dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1056                dpolicy->io_aware = false;
1057        } else if (discard_type == DPOLICY_FSTRIM) {
1058                dpolicy->io_aware = false;
1059        } else if (discard_type == DPOLICY_UMOUNT) {
1060                dpolicy->max_requests = UINT_MAX;
1061                dpolicy->io_aware = false;
1062        }
1063}
1064
1065static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1066                                struct block_device *bdev, block_t lstart,
1067                                block_t start, block_t len);
1068/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
1069static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1070                                                struct discard_policy *dpolicy,
1071                                                struct discard_cmd *dc,
1072                                                unsigned int *issued)
1073{
1074        struct block_device *bdev = dc->bdev;
1075        struct request_queue *q = bdev_get_queue(bdev);
1076        unsigned int max_discard_blocks =
1077                        SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1078        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1079        struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1080                                        &(dcc->fstrim_list) : &(dcc->wait_list);
1081        int flag = dpolicy->sync ? REQ_SYNC : 0;
1082        block_t lstart, start, len, total_len;
1083        int err = 0;
1084
1085        if (dc->state != D_PREP)
1086                return 0;
1087
1088        if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1089                return 0;
1090
1091        trace_f2fs_issue_discard(bdev, dc->start, dc->len);
1092
1093        lstart = dc->lstart;
1094        start = dc->start;
1095        len = dc->len;
1096        total_len = len;
1097
1098        dc->len = 0;
1099
1100        while (total_len && *issued < dpolicy->max_requests && !err) {
1101                struct bio *bio = NULL;
1102                unsigned long flags;
1103                bool last = true;
1104
1105                if (len > max_discard_blocks) {
1106                        len = max_discard_blocks;
1107                        last = false;
1108                }
1109
1110                (*issued)++;
1111                if (*issued == dpolicy->max_requests)
1112                        last = true;
1113
1114                dc->len += len;
1115
1116                if (time_to_inject(sbi, FAULT_DISCARD)) {
1117                        f2fs_show_injection_info(FAULT_DISCARD);
1118                        err = -EIO;
1119                        goto submit;
1120                }
1121                err = __blkdev_issue_discard(bdev,
1122                                        SECTOR_FROM_BLOCK(start),
1123                                        SECTOR_FROM_BLOCK(len),
1124                                        GFP_NOFS, 0, &bio);
1125submit:
1126                if (err) {
1127                        spin_lock_irqsave(&dc->lock, flags);
1128                        if (dc->state == D_PARTIAL)
1129                                dc->state = D_SUBMIT;
1130                        spin_unlock_irqrestore(&dc->lock, flags);
1131
1132                        break;
1133                }
1134
1135                f2fs_bug_on(sbi, !bio);
1136
1137                /*
1138                 * should keep before submission to avoid D_DONE
1139                 * right away
1140                 */
1141                spin_lock_irqsave(&dc->lock, flags);
1142                if (last)
1143                        dc->state = D_SUBMIT;
1144                else
1145                        dc->state = D_PARTIAL;
1146                dc->bio_ref++;
1147                spin_unlock_irqrestore(&dc->lock, flags);
1148
1149                atomic_inc(&dcc->queued_discard);
1150                dc->queued++;
1151                list_move_tail(&dc->list, wait_list);
1152
1153                /* sanity check on discard range */
1154                __check_sit_bitmap(sbi, lstart, lstart + len);
1155
1156                bio->bi_private = dc;
1157                bio->bi_end_io = f2fs_submit_discard_endio;
1158                bio->bi_opf |= flag;
1159                submit_bio(bio);
1160
1161                atomic_inc(&dcc->issued_discard);
1162
1163                f2fs_update_iostat(sbi, FS_DISCARD, 1);
1164
1165                lstart += len;
1166                start += len;
1167                total_len -= len;
1168                len = total_len;
1169        }
1170
1171        if (!err && len)
1172                __update_discard_tree_range(sbi, bdev, lstart, start, len);
1173        return err;
1174}
1175
1176static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
1177                                struct block_device *bdev, block_t lstart,
1178                                block_t start, block_t len,
1179                                struct rb_node **insert_p,
1180                                struct rb_node *insert_parent)
1181{
1182        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1183        struct rb_node **p;
1184        struct rb_node *parent = NULL;
1185        struct discard_cmd *dc = NULL;
1186        bool leftmost = true;
1187
1188        if (insert_p && insert_parent) {
1189                parent = insert_parent;
1190                p = insert_p;
1191                goto do_insert;
1192        }
1193
1194        p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
1195                                                        lstart, &leftmost);
1196do_insert:
1197        dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
1198                                                                p, leftmost);
1199        if (!dc)
1200                return NULL;
1201
1202        return dc;
1203}
1204
1205static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1206                                                struct discard_cmd *dc)
1207{
1208        list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
1209}
1210
1211static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1212                                struct discard_cmd *dc, block_t blkaddr)
1213{
1214        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1215        struct discard_info di = dc->di;
1216        bool modified = false;
1217
1218        if (dc->state == D_DONE || dc->len == 1) {
1219                __remove_discard_cmd(sbi, dc);
1220                return;
1221        }
1222
1223        dcc->undiscard_blks -= di.len;
1224
1225        if (blkaddr > di.lstart) {
1226                dc->len = blkaddr - dc->lstart;
1227                dcc->undiscard_blks += dc->len;
1228                __relocate_discard_cmd(dcc, dc);
1229                modified = true;
1230        }
1231
1232        if (blkaddr < di.lstart + di.len - 1) {
1233                if (modified) {
1234                        __insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
1235                                        di.start + blkaddr + 1 - di.lstart,
1236                                        di.lstart + di.len - 1 - blkaddr,
1237                                        NULL, NULL);
1238                } else {
1239                        dc->lstart++;
1240                        dc->len--;
1241                        dc->start++;
1242                        dcc->undiscard_blks += dc->len;
1243                        __relocate_discard_cmd(dcc, dc);
1244                }
1245        }
1246}
1247
1248static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1249                                struct block_device *bdev, block_t lstart,
1250                                block_t start, block_t len)
1251{
1252        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1253        struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1254        struct discard_cmd *dc;
1255        struct discard_info di = {0};
1256        struct rb_node **insert_p = NULL, *insert_parent = NULL;
1257        struct request_queue *q = bdev_get_queue(bdev);
1258        unsigned int max_discard_blocks =
1259                        SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1260        block_t end = lstart + len;
1261
1262        dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1263                                        NULL, lstart,
1264                                        (struct rb_entry **)&prev_dc,
1265                                        (struct rb_entry **)&next_dc,
1266                                        &insert_p, &insert_parent, true, NULL);
1267        if (dc)
1268                prev_dc = dc;
1269
1270        if (!prev_dc) {
1271                di.lstart = lstart;
1272                di.len = next_dc ? next_dc->lstart - lstart : len;
1273                di.len = min(di.len, len);
1274                di.start = start;
1275        }
1276
1277        while (1) {
1278                struct rb_node *node;
1279                bool merged = false;
1280                struct discard_cmd *tdc = NULL;
1281
1282                if (prev_dc) {
1283                        di.lstart = prev_dc->lstart + prev_dc->len;
1284                        if (di.lstart < lstart)
1285                                di.lstart = lstart;
1286                        if (di.lstart >= end)
1287                                break;
1288
1289                        if (!next_dc || next_dc->lstart > end)
1290                                di.len = end - di.lstart;
1291                        else
1292                                di.len = next_dc->lstart - di.lstart;
1293                        di.start = start + di.lstart - lstart;
1294                }
1295
1296                if (!di.len)
1297                        goto next;
1298
1299                if (prev_dc && prev_dc->state == D_PREP &&
1300                        prev_dc->bdev == bdev &&
1301                        __is_discard_back_mergeable(&di, &prev_dc->di,
1302                                                        max_discard_blocks)) {
1303                        prev_dc->di.len += di.len;
1304                        dcc->undiscard_blks += di.len;
1305                        __relocate_discard_cmd(dcc, prev_dc);
1306                        di = prev_dc->di;
1307                        tdc = prev_dc;
1308                        merged = true;
1309                }
1310
1311                if (next_dc && next_dc->state == D_PREP &&
1312                        next_dc->bdev == bdev &&
1313                        __is_discard_front_mergeable(&di, &next_dc->di,
1314                                                        max_discard_blocks)) {
1315                        next_dc->di.lstart = di.lstart;
1316                        next_dc->di.len += di.len;
1317                        next_dc->di.start = di.start;
1318                        dcc->undiscard_blks += di.len;
1319                        __relocate_discard_cmd(dcc, next_dc);
1320                        if (tdc)
1321                                __remove_discard_cmd(sbi, tdc);
1322                        merged = true;
1323                }
1324
1325                if (!merged) {
1326                        __insert_discard_tree(sbi, bdev, di.lstart, di.start,
1327                                                        di.len, NULL, NULL);
1328                }
1329 next:
1330                prev_dc = next_dc;
1331                if (!prev_dc)
1332                        break;
1333
1334                node = rb_next(&prev_dc->rb_node);
1335                next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1336        }
1337}
1338
1339static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
1340                struct block_device *bdev, block_t blkstart, block_t blklen)
1341{
1342        block_t lblkstart = blkstart;
1343
1344        trace_f2fs_queue_discard(bdev, blkstart, blklen);
1345
1346        if (sbi->s_ndevs) {
1347                int devi = f2fs_target_device_index(sbi, blkstart);
1348
1349                blkstart -= FDEV(devi).start_blk;
1350        }
1351        mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1352        __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1353        mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1354        return 0;
1355}
1356
1357static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1358                                        struct discard_policy *dpolicy)
1359{
1360        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1361        struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1362        struct rb_node **insert_p = NULL, *insert_parent = NULL;
1363        struct discard_cmd *dc;
1364        struct blk_plug plug;
1365        unsigned int pos = dcc->next_pos;
1366        unsigned int issued = 0;
1367        bool io_interrupted = false;
1368
1369        mutex_lock(&dcc->cmd_lock);
1370        dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1371                                        NULL, pos,
1372                                        (struct rb_entry **)&prev_dc,
1373                                        (struct rb_entry **)&next_dc,
1374                                        &insert_p, &insert_parent, true, NULL);
1375        if (!dc)
1376                dc = next_dc;
1377
1378        blk_start_plug(&plug);
1379
1380        while (dc) {
1381                struct rb_node *node;
1382                int err = 0;
1383
1384                if (dc->state != D_PREP)
1385                        goto next;
1386
1387                if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1388                        io_interrupted = true;
1389                        break;
1390                }
1391
1392                dcc->next_pos = dc->lstart + dc->len;
1393                err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1394
1395                if (issued >= dpolicy->max_requests)
1396                        break;
1397next:
1398                node = rb_next(&dc->rb_node);
1399                if (err)
1400                        __remove_discard_cmd(sbi, dc);
1401                dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1402        }
1403
1404        blk_finish_plug(&plug);
1405
1406        if (!dc)
1407                dcc->next_pos = 0;
1408
1409        mutex_unlock(&dcc->cmd_lock);
1410
1411        if (!issued && io_interrupted)
1412                issued = -1;
1413
1414        return issued;
1415}
1416
1417static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1418                                        struct discard_policy *dpolicy)
1419{
1420        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1421        struct list_head *pend_list;
1422        struct discard_cmd *dc, *tmp;
1423        struct blk_plug plug;
1424        int i, issued = 0;
1425        bool io_interrupted = false;
1426
1427        for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1428                if (i + 1 < dpolicy->granularity)
1429                        break;
1430
1431                if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
1432                        return __issue_discard_cmd_orderly(sbi, dpolicy);
1433
1434                pend_list = &dcc->pend_list[i];
1435
1436                mutex_lock(&dcc->cmd_lock);
1437                if (list_empty(pend_list))
1438                        goto next;
1439                if (unlikely(dcc->rbtree_check))
1440                        f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
1441                                                                &dcc->root));
1442                blk_start_plug(&plug);
1443                list_for_each_entry_safe(dc, tmp, pend_list, list) {
1444                        f2fs_bug_on(sbi, dc->state != D_PREP);
1445
1446                        if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1447                                                !is_idle(sbi, DISCARD_TIME)) {
1448                                io_interrupted = true;
1449                                break;
1450                        }
1451
1452                        __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1453
1454                        if (issued >= dpolicy->max_requests)
1455                                break;
1456                }
1457                blk_finish_plug(&plug);
1458next:
1459                mutex_unlock(&dcc->cmd_lock);
1460
1461                if (issued >= dpolicy->max_requests || io_interrupted)
1462                        break;
1463        }
1464
1465        if (!issued && io_interrupted)
1466                issued = -1;
1467
1468        return issued;
1469}
1470
1471static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1472{
1473        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1474        struct list_head *pend_list;
1475        struct discard_cmd *dc, *tmp;
1476        int i;
1477        bool dropped = false;
1478
1479        mutex_lock(&dcc->cmd_lock);
1480        for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1481                pend_list = &dcc->pend_list[i];
1482                list_for_each_entry_safe(dc, tmp, pend_list, list) {
1483                        f2fs_bug_on(sbi, dc->state != D_PREP);
1484                        __remove_discard_cmd(sbi, dc);
1485                        dropped = true;
1486                }
1487        }
1488        mutex_unlock(&dcc->cmd_lock);
1489
1490        return dropped;
1491}
1492
1493void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1494{
1495        __drop_discard_cmd(sbi);
1496}
1497
1498static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1499                                                        struct discard_cmd *dc)
1500{
1501        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1502        unsigned int len = 0;
1503
1504        wait_for_completion_io(&dc->wait);
1505        mutex_lock(&dcc->cmd_lock);
1506        f2fs_bug_on(sbi, dc->state != D_DONE);
1507        dc->ref--;
1508        if (!dc->ref) {
1509                if (!dc->error)
1510                        len = dc->len;
1511                __remove_discard_cmd(sbi, dc);
1512        }
1513        mutex_unlock(&dcc->cmd_lock);
1514
1515        return len;
1516}
1517
1518static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1519                                                struct discard_policy *dpolicy,
1520                                                block_t start, block_t end)
1521{
1522        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1523        struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1524                                        &(dcc->fstrim_list) : &(dcc->wait_list);
1525        struct discard_cmd *dc, *tmp;
1526        bool need_wait;
1527        unsigned int trimmed = 0;
1528
1529next:
1530        need_wait = false;
1531
1532        mutex_lock(&dcc->cmd_lock);
1533        list_for_each_entry_safe(dc, tmp, wait_list, list) {
1534                if (dc->lstart + dc->len <= start || end <= dc->lstart)
1535                        continue;
1536                if (dc->len < dpolicy->granularity)
1537                        continue;
1538                if (dc->state == D_DONE && !dc->ref) {
1539                        wait_for_completion_io(&dc->wait);
1540                        if (!dc->error)
1541                                trimmed += dc->len;
1542                        __remove_discard_cmd(sbi, dc);
1543                } else {
1544                        dc->ref++;
1545                        need_wait = true;
1546                        break;
1547                }
1548        }
1549        mutex_unlock(&dcc->cmd_lock);
1550
1551        if (need_wait) {
1552                trimmed += __wait_one_discard_bio(sbi, dc);
1553                goto next;
1554        }
1555
1556        return trimmed;
1557}
1558
1559static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1560                                                struct discard_policy *dpolicy)
1561{
1562        struct discard_policy dp;
1563        unsigned int discard_blks;
1564
1565        if (dpolicy)
1566                return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1567
1568        /* wait all */
1569        __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
1570        discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1571        __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
1572        discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1573
1574        return discard_blks;
1575}
1576
1577/* This should be covered by global mutex, &sit_i->sentry_lock */
1578static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1579{
1580        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1581        struct discard_cmd *dc;
1582        bool need_wait = false;
1583
1584        mutex_lock(&dcc->cmd_lock);
1585        dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
1586                                                        NULL, blkaddr);
1587        if (dc) {
1588                if (dc->state == D_PREP) {
1589                        __punch_discard_cmd(sbi, dc, blkaddr);
1590                } else {
1591                        dc->ref++;
1592                        need_wait = true;
1593                }
1594        }
1595        mutex_unlock(&dcc->cmd_lock);
1596
1597        if (need_wait)
1598                __wait_one_discard_bio(sbi, dc);
1599}
1600
1601void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1602{
1603        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1604
1605        if (dcc && dcc->f2fs_issue_discard) {
1606                struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1607
1608                dcc->f2fs_issue_discard = NULL;
1609                kthread_stop(discard_thread);
1610        }
1611}
1612
1613/* This comes from f2fs_put_super */
1614bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
1615{
1616        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1617        struct discard_policy dpolicy;
1618        bool dropped;
1619
1620        __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1621                                        dcc->discard_granularity);
1622        __issue_discard_cmd(sbi, &dpolicy);
1623        dropped = __drop_discard_cmd(sbi);
1624
1625        /* just to make sure there is no pending discard commands */
1626        __wait_all_discard_cmd(sbi, NULL);
1627
1628        f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1629        return dropped;
1630}
1631
1632static int issue_discard_thread(void *data)
1633{
1634        struct f2fs_sb_info *sbi = data;
1635        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1636        wait_queue_head_t *q = &dcc->discard_wait_queue;
1637        struct discard_policy dpolicy;
1638        unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
1639        int issued;
1640
1641        set_freezable();
1642
1643        do {
1644                __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1645                                        dcc->discard_granularity);
1646
1647                wait_event_interruptible_timeout(*q,
1648                                kthread_should_stop() || freezing(current) ||
1649                                dcc->discard_wake,
1650                                msecs_to_jiffies(wait_ms));
1651
1652                if (dcc->discard_wake)
1653                        dcc->discard_wake = 0;
1654
1655                /* clean up pending candidates before going to sleep */
1656                if (atomic_read(&dcc->queued_discard))
1657                        __wait_all_discard_cmd(sbi, NULL);
1658
1659                if (try_to_freeze())
1660                        continue;
1661                if (f2fs_readonly(sbi->sb))
1662                        continue;
1663                if (kthread_should_stop())
1664                        return 0;
1665                if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
1666                        wait_ms = dpolicy.max_interval;
1667                        continue;
1668                }
1669
1670                if (sbi->gc_mode == GC_URGENT)
1671                        __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
1672
1673                sb_start_intwrite(sbi->sb);
1674
1675                issued = __issue_discard_cmd(sbi, &dpolicy);
1676                if (issued > 0) {
1677                        __wait_all_discard_cmd(sbi, &dpolicy);
1678                        wait_ms = dpolicy.min_interval;
1679                } else if (issued == -1){
1680                        wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1681                        if (!wait_ms)
1682                                wait_ms = dpolicy.mid_interval;
1683                } else {
1684                        wait_ms = dpolicy.max_interval;
1685                }
1686
1687                sb_end_intwrite(sbi->sb);
1688
1689        } while (!kthread_should_stop());
1690        return 0;
1691}
1692
1693#ifdef CONFIG_BLK_DEV_ZONED
1694static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1695                struct block_device *bdev, block_t blkstart, block_t blklen)
1696{
1697        sector_t sector, nr_sects;
1698        block_t lblkstart = blkstart;
1699        int devi = 0;
1700
1701        if (sbi->s_ndevs) {
1702                devi = f2fs_target_device_index(sbi, blkstart);
1703                blkstart -= FDEV(devi).start_blk;
1704        }
1705
1706        /*
1707         * We need to know the type of the zone: for conventional zones,
1708         * use regular discard if the drive supports it. For sequential
1709         * zones, reset the zone write pointer.
1710         */
1711        switch (get_blkz_type(sbi, bdev, blkstart)) {
1712
1713        case BLK_ZONE_TYPE_CONVENTIONAL:
1714                if (!blk_queue_discard(bdev_get_queue(bdev)))
1715                        return 0;
1716                return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1717        case BLK_ZONE_TYPE_SEQWRITE_REQ:
1718        case BLK_ZONE_TYPE_SEQWRITE_PREF:
1719                sector = SECTOR_FROM_BLOCK(blkstart);
1720                nr_sects = SECTOR_FROM_BLOCK(blklen);
1721
1722                if (sector & (bdev_zone_sectors(bdev) - 1) ||
1723                                nr_sects != bdev_zone_sectors(bdev)) {
1724                        f2fs_msg(sbi->sb, KERN_INFO,
1725                                "(%d) %s: Unaligned discard attempted (block %x + %x)",
1726                                devi, sbi->s_ndevs ? FDEV(devi).path: "",
1727                                blkstart, blklen);
1728                        return -EIO;
1729                }
1730                trace_f2fs_issue_reset_zone(bdev, blkstart);
1731                return blkdev_reset_zones(bdev, sector,
1732                                          nr_sects, GFP_NOFS);
1733        default:
1734                /* Unknown zone type: broken device ? */
1735                return -EIO;
1736        }
1737}
1738#endif
1739
1740static int __issue_discard_async(struct f2fs_sb_info *sbi,
1741                struct block_device *bdev, block_t blkstart, block_t blklen)
1742{
1743#ifdef CONFIG_BLK_DEV_ZONED
1744        if (f2fs_sb_has_blkzoned(sbi) &&
1745                                bdev_zoned_model(bdev) != BLK_ZONED_NONE)
1746                return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
1747#endif
1748        return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
1749}
1750
1751static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1752                                block_t blkstart, block_t blklen)
1753{
1754        sector_t start = blkstart, len = 0;
1755        struct block_device *bdev;
1756        struct seg_entry *se;
1757        unsigned int offset;
1758        block_t i;
1759        int err = 0;
1760
1761        bdev = f2fs_target_device(sbi, blkstart, NULL);
1762
1763        for (i = blkstart; i < blkstart + blklen; i++, len++) {
1764                if (i != start) {
1765                        struct block_device *bdev2 =
1766                                f2fs_target_device(sbi, i, NULL);
1767
1768                        if (bdev2 != bdev) {
1769                                err = __issue_discard_async(sbi, bdev,
1770                                                start, len);
1771                                if (err)
1772                                        return err;
1773                                bdev = bdev2;
1774                                start = i;
1775                                len = 0;
1776                        }
1777                }
1778
1779                se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
1780                offset = GET_BLKOFF_FROM_SEG0(sbi, i);
1781
1782                if (!f2fs_test_and_set_bit(offset, se->discard_map))
1783                        sbi->discard_blks--;
1784        }
1785
1786        if (len)
1787                err = __issue_discard_async(sbi, bdev, start, len);
1788        return err;
1789}
1790
1791static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
1792                                                        bool check_only)
1793{
1794        int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
1795        int max_blocks = sbi->blocks_per_seg;
1796        struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
1797        unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
1798        unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1799        unsigned long *discard_map = (unsigned long *)se->discard_map;
1800        unsigned long *dmap = SIT_I(sbi)->tmp_map;
1801        unsigned int start = 0, end = -1;
1802        bool force = (cpc->reason & CP_DISCARD);
1803        struct discard_entry *de = NULL;
1804        struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
1805        int i;
1806
1807        if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
1808                return false;
1809
1810        if (!force) {
1811                if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
1812                        SM_I(sbi)->dcc_info->nr_discards >=
1813                                SM_I(sbi)->dcc_info->max_discards)
1814                        return false;
1815        }
1816
1817        /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
1818        for (i = 0; i < entries; i++)
1819                dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
1820                                (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
1821
1822        while (force || SM_I(sbi)->dcc_info->nr_discards <=
1823                                SM_I(sbi)->dcc_info->max_discards) {
1824                start = __find_rev_next_bit(dmap, max_blocks, end + 1);
1825                if (start >= max_blocks)
1826                        break;
1827
1828                end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
1829                if (force && start && end != max_blocks
1830                                        && (end - start) < cpc->trim_minlen)
1831                        continue;
1832
1833                if (check_only)
1834                        return true;
1835
1836                if (!de) {
1837                        de = f2fs_kmem_cache_alloc(discard_entry_slab,
1838                                                                GFP_F2FS_ZERO);
1839                        de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
1840                        list_add_tail(&de->list, head);
1841                }
1842
1843                for (i = start; i < end; i++)
1844                        __set_bit_le(i, (void *)de->discard_map);
1845
1846                SM_I(sbi)->dcc_info->nr_discards += end - start;
1847        }
1848        return false;
1849}
1850
1851static void release_discard_addr(struct discard_entry *entry)
1852{
1853        list_del(&entry->list);
1854        kmem_cache_free(discard_entry_slab, entry);
1855}
1856
1857void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
1858{
1859        struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
1860        struct discard_entry *entry, *this;
1861
1862        /* drop caches */
1863        list_for_each_entry_safe(entry, this, head, list)
1864                release_discard_addr(entry);
1865}
1866
1867/*
1868 * Should call f2fs_clear_prefree_segments after checkpoint is done.
1869 */
1870static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
1871{
1872        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1873        unsigned int segno;
1874
1875        mutex_lock(&dirty_i->seglist_lock);
1876        for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
1877                __set_test_and_free(sbi, segno);
1878        mutex_unlock(&dirty_i->seglist_lock);
1879}
1880
1881void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
1882                                                struct cp_control *cpc)
1883{
1884        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1885        struct list_head *head = &dcc->entry_list;
1886        struct discard_entry *entry, *this;
1887        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1888        unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
1889        unsigned int start = 0, end = -1;
1890        unsigned int secno, start_segno;
1891        bool force = (cpc->reason & CP_DISCARD);
1892        bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
1893
1894        mutex_lock(&dirty_i->seglist_lock);
1895
1896        while (1) {
1897                int i;
1898
1899                if (need_align && end != -1)
1900                        end--;
1901                start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
1902                if (start >= MAIN_SEGS(sbi))
1903                        break;
1904                end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
1905                                                                start + 1);
1906
1907                if (need_align) {
1908                        start = rounddown(start, sbi->segs_per_sec);
1909                        end = roundup(end, sbi->segs_per_sec);
1910                }
1911
1912                for (i = start; i < end; i++) {
1913                        if (test_and_clear_bit(i, prefree_map))
1914                                dirty_i->nr_dirty[PRE]--;
1915                }
1916
1917                if (!f2fs_realtime_discard_enable(sbi))
1918                        continue;
1919
1920                if (force && start >= cpc->trim_start &&
1921                                        (end - 1) <= cpc->trim_end)
1922                                continue;
1923
1924                if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
1925                        f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
1926                                (end - start) << sbi->log_blocks_per_seg);
1927                        continue;
1928                }
1929next:
1930                secno = GET_SEC_FROM_SEG(sbi, start);
1931                start_segno = GET_SEG_FROM_SEC(sbi, secno);
1932                if (!IS_CURSEC(sbi, secno) &&
1933                        !get_valid_blocks(sbi, start, true))
1934                        f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
1935                                sbi->segs_per_sec << sbi->log_blocks_per_seg);
1936
1937                start = start_segno + sbi->segs_per_sec;
1938                if (start < end)
1939                        goto next;
1940                else
1941                        end = start - 1;
1942        }
1943        mutex_unlock(&dirty_i->seglist_lock);
1944
1945        /* send small discards */
1946        list_for_each_entry_safe(entry, this, head, list) {
1947                unsigned int cur_pos = 0, next_pos, len, total_len = 0;
1948                bool is_valid = test_bit_le(0, entry->discard_map);
1949
1950find_next:
1951                if (is_valid) {
1952                        next_pos = find_next_zero_bit_le(entry->discard_map,
1953                                        sbi->blocks_per_seg, cur_pos);
1954                        len = next_pos - cur_pos;
1955
1956                        if (f2fs_sb_has_blkzoned(sbi) ||
1957                            (force && len < cpc->trim_minlen))
1958                                goto skip;
1959
1960                        f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
1961                                                                        len);
1962                        total_len += len;
1963                } else {
1964                        next_pos = find_next_bit_le(entry->discard_map,
1965                                        sbi->blocks_per_seg, cur_pos);
1966                }
1967skip:
1968                cur_pos = next_pos;
1969                is_valid = !is_valid;
1970
1971                if (cur_pos < sbi->blocks_per_seg)
1972                        goto find_next;
1973
1974                release_discard_addr(entry);
1975                dcc->nr_discards -= total_len;
1976        }
1977
1978        wake_up_discard_thread(sbi, false);
1979}
1980
1981static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
1982{
1983        dev_t dev = sbi->sb->s_bdev->bd_dev;
1984        struct discard_cmd_control *dcc;
1985        int err = 0, i;
1986
1987        if (SM_I(sbi)->dcc_info) {
1988                dcc = SM_I(sbi)->dcc_info;
1989                goto init_thread;
1990        }
1991
1992        dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
1993        if (!dcc)
1994                return -ENOMEM;
1995
1996        dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
1997        INIT_LIST_HEAD(&dcc->entry_list);
1998        for (i = 0; i < MAX_PLIST_NUM; i++)
1999                INIT_LIST_HEAD(&dcc->pend_list[i]);
2000        INIT_LIST_HEAD(&dcc->wait_list);
2001        INIT_LIST_HEAD(&dcc->fstrim_list);
2002        mutex_init(&dcc->cmd_lock);
2003        atomic_set(&dcc->issued_discard, 0);
2004        atomic_set(&dcc->queued_discard, 0);
2005        atomic_set(&dcc->discard_cmd_cnt, 0);
2006        dcc->nr_discards = 0;
2007        dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
2008        dcc->undiscard_blks = 0;
2009        dcc->next_pos = 0;
2010        dcc->root = RB_ROOT_CACHED;
2011        dcc->rbtree_check = false;
2012
2013        init_waitqueue_head(&dcc->discard_wait_queue);
2014        SM_I(sbi)->dcc_info = dcc;
2015init_thread:
2016        dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2017                                "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2018        if (IS_ERR(dcc->f2fs_issue_discard)) {
2019                err = PTR_ERR(dcc->f2fs_issue_discard);
2020                kvfree(dcc);
2021                SM_I(sbi)->dcc_info = NULL;
2022                return err;
2023        }
2024
2025        return err;
2026}
2027
2028static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2029{
2030        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2031
2032        if (!dcc)
2033                return;
2034
2035        f2fs_stop_discard_thread(sbi);
2036
2037        kvfree(dcc);
2038        SM_I(sbi)->dcc_info = NULL;
2039}
2040
2041static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2042{
2043        struct sit_info *sit_i = SIT_I(sbi);
2044
2045        if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2046                sit_i->dirty_sentries++;
2047                return false;
2048        }
2049
2050        return true;
2051}
2052
2053static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2054                                        unsigned int segno, int modified)
2055{
2056        struct seg_entry *se = get_seg_entry(sbi, segno);
2057        se->type = type;
2058        if (modified)
2059                __mark_sit_entry_dirty(sbi, segno);
2060}
2061
2062static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2063{
2064        struct seg_entry *se;
2065        unsigned int segno, offset;
2066        long int new_vblocks;
2067        bool exist;
2068#ifdef CONFIG_F2FS_CHECK_FS
2069        bool mir_exist;
2070#endif
2071
2072        segno = GET_SEGNO(sbi, blkaddr);
2073
2074        se = get_seg_entry(sbi, segno);
2075        new_vblocks = se->valid_blocks + del;
2076        offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2077
2078        f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
2079                                (new_vblocks > sbi->blocks_per_seg)));
2080
2081        se->valid_blocks = new_vblocks;
2082        se->mtime = get_mtime(sbi, false);
2083        if (se->mtime > SIT_I(sbi)->max_mtime)
2084                SIT_I(sbi)->max_mtime = se->mtime;
2085
2086        /* Update valid block bitmap */
2087        if (del > 0) {
2088                exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2089#ifdef CONFIG_F2FS_CHECK_FS
2090                mir_exist = f2fs_test_and_set_bit(offset,
2091                                                se->cur_valid_map_mir);
2092                if (unlikely(exist != mir_exist)) {
2093                        f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
2094                                "when setting bitmap, blk:%u, old bit:%d",
2095                                blkaddr, exist);
2096                        f2fs_bug_on(sbi, 1);
2097                }
2098#endif
2099                if (unlikely(exist)) {
2100                        f2fs_msg(sbi->sb, KERN_ERR,
2101                                "Bitmap was wrongly set, blk:%u", blkaddr);
2102                        f2fs_bug_on(sbi, 1);
2103                        se->valid_blocks--;
2104                        del = 0;
2105                }
2106
2107                if (!f2fs_test_and_set_bit(offset, se->discard_map))
2108                        sbi->discard_blks--;
2109
2110                /* don't overwrite by SSR to keep node chain */
2111                if (IS_NODESEG(se->type) &&
2112                                !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2113                        if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2114                                se->ckpt_valid_blocks++;
2115                }
2116        } else {
2117                exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2118#ifdef CONFIG_F2FS_CHECK_FS
2119                mir_exist = f2fs_test_and_clear_bit(offset,
2120                                                se->cur_valid_map_mir);
2121                if (unlikely(exist != mir_exist)) {
2122                        f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
2123                                "when clearing bitmap, blk:%u, old bit:%d",
2124                                blkaddr, exist);
2125                        f2fs_bug_on(sbi, 1);
2126                }
2127#endif
2128                if (unlikely(!exist)) {
2129                        f2fs_msg(sbi->sb, KERN_ERR,
2130                                "Bitmap was wrongly cleared, blk:%u", blkaddr);
2131                        f2fs_bug_on(sbi, 1);
2132                        se->valid_blocks++;
2133                        del = 0;
2134                } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2135                        /*
2136                         * If checkpoints are off, we must not reuse data that
2137                         * was used in the previous checkpoint. If it was used
2138                         * before, we must track that to know how much space we
2139                         * really have.
2140                         */
2141                        if (f2fs_test_bit(offset, se->ckpt_valid_map))
2142                                sbi->unusable_block_count++;
2143                }
2144
2145                if (f2fs_test_and_clear_bit(offset, se->discard_map))
2146                        sbi->discard_blks++;
2147        }
2148        if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2149                se->ckpt_valid_blocks += del;
2150
2151        __mark_sit_entry_dirty(sbi, segno);
2152
2153        /* update total number of valid blocks to be written in ckpt area */
2154        SIT_I(sbi)->written_valid_blocks += del;
2155
2156        if (__is_large_section(sbi))
2157                get_sec_entry(sbi, segno)->valid_blocks += del;
2158}
2159
2160void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2161{
2162        unsigned int segno = GET_SEGNO(sbi, addr);
2163        struct sit_info *sit_i = SIT_I(sbi);
2164
2165        f2fs_bug_on(sbi, addr == NULL_ADDR);
2166        if (addr == NEW_ADDR)
2167                return;
2168
2169        invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
2170
2171        /* add it into sit main buffer */
2172        down_write(&sit_i->sentry_lock);
2173
2174        update_sit_entry(sbi, addr, -1);
2175
2176        /* add it into dirty seglist */
2177        locate_dirty_segment(sbi, segno);
2178
2179        up_write(&sit_i->sentry_lock);
2180}
2181
2182bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2183{
2184        struct sit_info *sit_i = SIT_I(sbi);
2185        unsigned int segno, offset;
2186        struct seg_entry *se;
2187        bool is_cp = false;
2188
2189        if (!is_valid_data_blkaddr(sbi, blkaddr))
2190                return true;
2191
2192        down_read(&sit_i->sentry_lock);
2193
2194        segno = GET_SEGNO(sbi, blkaddr);
2195        se = get_seg_entry(sbi, segno);
2196        offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2197
2198        if (f2fs_test_bit(offset, se->ckpt_valid_map))
2199                is_cp = true;
2200
2201        up_read(&sit_i->sentry_lock);
2202
2203        return is_cp;
2204}
2205
2206/*
2207 * This function should be resided under the curseg_mutex lock
2208 */
2209static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
2210                                        struct f2fs_summary *sum)
2211{
2212        struct curseg_info *curseg = CURSEG_I(sbi, type);
2213        void *addr = curseg->sum_blk;
2214        addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
2215        memcpy(addr, sum, sizeof(struct f2fs_summary));
2216}
2217
2218/*
2219 * Calculate the number of current summary pages for writing
2220 */
2221int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2222{
2223        int valid_sum_count = 0;
2224        int i, sum_in_page;
2225
2226        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2227                if (sbi->ckpt->alloc_type[i] == SSR)
2228                        valid_sum_count += sbi->blocks_per_seg;
2229                else {
2230                        if (for_ra)
2231                                valid_sum_count += le16_to_cpu(
2232                                        F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2233                        else
2234                                valid_sum_count += curseg_blkoff(sbi, i);
2235                }
2236        }
2237
2238        sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2239                        SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2240        if (valid_sum_count <= sum_in_page)
2241                return 1;
2242        else if ((valid_sum_count - sum_in_page) <=
2243                (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2244                return 2;
2245        return 3;
2246}
2247
2248/*
2249 * Caller should put this summary page
2250 */
2251struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2252{
2253        return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
2254}
2255
2256void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2257                                        void *src, block_t blk_addr)
2258{
2259        struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2260
2261        memcpy(page_address(page), src, PAGE_SIZE);
2262        set_page_dirty(page);
2263        f2fs_put_page(page, 1);
2264}
2265
2266static void write_sum_page(struct f2fs_sb_info *sbi,
2267                        struct f2fs_summary_block *sum_blk, block_t blk_addr)
2268{
2269        f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2270}
2271
2272static void write_current_sum_page(struct f2fs_sb_info *sbi,
2273                                                int type, block_t blk_addr)
2274{
2275        struct curseg_info *curseg = CURSEG_I(sbi, type);
2276        struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2277        struct f2fs_summary_block *src = curseg->sum_blk;
2278        struct f2fs_summary_block *dst;
2279
2280        dst = (struct f2fs_summary_block *)page_address(page);
2281        memset(dst, 0, PAGE_SIZE);
2282
2283        mutex_lock(&curseg->curseg_mutex);
2284
2285        down_read(&curseg->journal_rwsem);
2286        memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2287        up_read(&curseg->journal_rwsem);
2288
2289        memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2290        memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2291
2292        mutex_unlock(&curseg->curseg_mutex);
2293
2294        set_page_dirty(page);
2295        f2fs_put_page(page, 1);
2296}
2297
2298static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
2299{
2300        struct curseg_info *curseg = CURSEG_I(sbi, type);
2301        unsigned int segno = curseg->segno + 1;
2302        struct free_segmap_info *free_i = FREE_I(sbi);
2303
2304        if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
2305                return !test_bit(segno, free_i->free_segmap);
2306        return 0;
2307}
2308
2309/*
2310 * Find a new segment from the free segments bitmap to right order
2311 * This function should be returned with success, otherwise BUG
2312 */
2313static void get_new_segment(struct f2fs_sb_info *sbi,
2314                        unsigned int *newseg, bool new_sec, int dir)
2315{
2316        struct free_segmap_info *free_i = FREE_I(sbi);
2317        unsigned int segno, secno, zoneno;
2318        unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2319        unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2320        unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2321        unsigned int left_start = hint;
2322        bool init = true;
2323        int go_left = 0;
2324        int i;
2325
2326        spin_lock(&free_i->segmap_lock);
2327
2328        if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
2329                segno = find_next_zero_bit(free_i->free_segmap,
2330                        GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2331                if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2332                        goto got_it;
2333        }
2334find_other_zone:
2335        secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2336        if (secno >= MAIN_SECS(sbi)) {
2337                if (dir == ALLOC_RIGHT) {
2338                        secno = find_next_zero_bit(free_i->free_secmap,
2339                                                        MAIN_SECS(sbi), 0);
2340                        f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
2341                } else {
2342                        go_left = 1;
2343                        left_start = hint - 1;
2344                }
2345        }
2346        if (go_left == 0)
2347                goto skip_left;
2348
2349        while (test_bit(left_start, free_i->free_secmap)) {
2350                if (left_start > 0) {
2351                        left_start--;
2352                        continue;
2353                }
2354                left_start = find_next_zero_bit(free_i->free_secmap,
2355                                                        MAIN_SECS(sbi), 0);
2356                f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
2357                break;
2358        }
2359        secno = left_start;
2360skip_left:
2361        segno = GET_SEG_FROM_SEC(sbi, secno);
2362        zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2363
2364        /* give up on finding another zone */
2365        if (!init)
2366                goto got_it;
2367        if (sbi->secs_per_zone == 1)
2368                goto got_it;
2369        if (zoneno == old_zoneno)
2370                goto got_it;
2371        if (dir == ALLOC_LEFT) {
2372                if (!go_left && zoneno + 1 >= total_zones)
2373                        goto got_it;
2374                if (go_left && zoneno == 0)
2375                        goto got_it;
2376        }
2377        for (i = 0; i < NR_CURSEG_TYPE; i++)
2378                if (CURSEG_I(sbi, i)->zone == zoneno)
2379                        break;
2380
2381        if (i < NR_CURSEG_TYPE) {
2382                /* zone is in user, try another */
2383                if (go_left)
2384                        hint = zoneno * sbi->secs_per_zone - 1;
2385                else if (zoneno + 1 >= total_zones)
2386                        hint = 0;
2387                else
2388                        hint = (zoneno + 1) * sbi->secs_per_zone;
2389                init = false;
2390                goto find_other_zone;
2391        }
2392got_it:
2393        /* set it as dirty segment in free segmap */
2394        f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2395        __set_inuse(sbi, segno);
2396        *newseg = segno;
2397        spin_unlock(&free_i->segmap_lock);
2398}
2399
2400static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2401{
2402        struct curseg_info *curseg = CURSEG_I(sbi, type);
2403        struct summary_footer *sum_footer;
2404
2405        curseg->segno = curseg->next_segno;
2406        curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2407        curseg->next_blkoff = 0;
2408        curseg->next_segno = NULL_SEGNO;
2409
2410        sum_footer = &(curseg->sum_blk->footer);
2411        memset(sum_footer, 0, sizeof(struct summary_footer));
2412        if (IS_DATASEG(type))
2413                SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2414        if (IS_NODESEG(type))
2415                SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2416        __set_sit_entry_type(sbi, type, curseg->segno, modified);
2417}
2418
2419static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2420{
2421        /* if segs_per_sec is large than 1, we need to keep original policy. */
2422        if (__is_large_section(sbi))
2423                return CURSEG_I(sbi, type)->segno;
2424
2425        if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2426                return 0;
2427
2428        if (test_opt(sbi, NOHEAP) &&
2429                (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
2430                return 0;
2431
2432        if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2433                return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2434
2435        /* find segments from 0 to reuse freed segments */
2436        if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2437                return 0;
2438
2439        return CURSEG_I(sbi, type)->segno;
2440}
2441
2442/*
2443 * Allocate a current working segment.
2444 * This function always allocates a free segment in LFS manner.
2445 */
2446static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2447{
2448        struct curseg_info *curseg = CURSEG_I(sbi, type);
2449        unsigned int segno = curseg->segno;
2450        int dir = ALLOC_LEFT;
2451
2452        write_sum_page(sbi, curseg->sum_blk,
2453                                GET_SUM_BLOCK(sbi, segno));
2454        if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
2455                dir = ALLOC_RIGHT;
2456
2457        if (test_opt(sbi, NOHEAP))
2458                dir = ALLOC_RIGHT;
2459
2460        segno = __get_next_segno(sbi, type);
2461        get_new_segment(sbi, &segno, new_sec, dir);
2462        curseg->next_segno = segno;
2463        reset_curseg(sbi, type, 1);
2464        curseg->alloc_type = LFS;
2465}
2466
2467static void __next_free_blkoff(struct f2fs_sb_info *sbi,
2468                        struct curseg_info *seg, block_t start)
2469{
2470        struct seg_entry *se = get_seg_entry(sbi, seg->segno);
2471        int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2472        unsigned long *target_map = SIT_I(sbi)->tmp_map;
2473        unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2474        unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2475        int i, pos;
2476
2477        for (i = 0; i < entries; i++)
2478                target_map[i] = ckpt_map[i] | cur_map[i];
2479
2480        pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
2481
2482        seg->next_blkoff = pos;
2483}
2484
2485/*
2486 * If a segment is written by LFS manner, next block offset is just obtained
2487 * by increasing the current block offset. However, if a segment is written by
2488 * SSR manner, next block offset obtained by calling __next_free_blkoff
2489 */
2490static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
2491                                struct curseg_info *seg)
2492{
2493        if (seg->alloc_type == SSR)
2494                __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
2495        else
2496                seg->next_blkoff++;
2497}
2498
2499/*
2500 * This function always allocates a used segment(from dirty seglist) by SSR
2501 * manner, so it should recover the existing segment information of valid blocks
2502 */
2503static void change_curseg(struct f2fs_sb_info *sbi, int type)
2504{
2505        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2506        struct curseg_info *curseg = CURSEG_I(sbi, type);
2507        unsigned int new_segno = curseg->next_segno;
2508        struct f2fs_summary_block *sum_node;
2509        struct page *sum_page;
2510
2511        write_sum_page(sbi, curseg->sum_blk,
2512                                GET_SUM_BLOCK(sbi, curseg->segno));
2513        __set_test_and_inuse(sbi, new_segno);
2514
2515        mutex_lock(&dirty_i->seglist_lock);
2516        __remove_dirty_segment(sbi, new_segno, PRE);
2517        __remove_dirty_segment(sbi, new_segno, DIRTY);
2518        mutex_unlock(&dirty_i->seglist_lock);
2519
2520        reset_curseg(sbi, type, 1);
2521        curseg->alloc_type = SSR;
2522        __next_free_blkoff(sbi, curseg, 0);
2523
2524        sum_page = f2fs_get_sum_page(sbi, new_segno);
2525        f2fs_bug_on(sbi, IS_ERR(sum_page));
2526        sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2527        memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2528        f2fs_put_page(sum_page, 1);
2529}
2530
2531static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
2532{
2533        struct curseg_info *curseg = CURSEG_I(sbi, type);
2534        const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2535        unsigned segno = NULL_SEGNO;
2536        int i, cnt;
2537        bool reversed = false;
2538
2539        /* f2fs_need_SSR() already forces to do this */
2540        if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
2541                curseg->next_segno = segno;
2542                return 1;
2543        }
2544
2545        /* For node segments, let's do SSR more intensively */
2546        if (IS_NODESEG(type)) {
2547                if (type >= CURSEG_WARM_NODE) {
2548                        reversed = true;
2549                        i = CURSEG_COLD_NODE;
2550                } else {
2551                        i = CURSEG_HOT_NODE;
2552                }
2553                cnt = NR_CURSEG_NODE_TYPE;
2554        } else {
2555                if (type >= CURSEG_WARM_DATA) {
2556                        reversed = true;
2557                        i = CURSEG_COLD_DATA;
2558                } else {
2559                        i = CURSEG_HOT_DATA;
2560                }
2561                cnt = NR_CURSEG_DATA_TYPE;
2562        }
2563
2564        for (; cnt-- > 0; reversed ? i-- : i++) {
2565                if (i == type)
2566                        continue;
2567                if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
2568                        curseg->next_segno = segno;
2569                        return 1;
2570                }
2571        }
2572
2573        /* find valid_blocks=0 in dirty list */
2574        if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2575                segno = get_free_segment(sbi);
2576                if (segno != NULL_SEGNO) {
2577                        curseg->next_segno = segno;
2578                        return 1;
2579                }
2580        }
2581        return 0;
2582}
2583
2584/*
2585 * flush out current segment and replace it with new segment
2586 * This function should be returned with success, otherwise BUG
2587 */
2588static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
2589                                                int type, bool force)
2590{
2591        struct curseg_info *curseg = CURSEG_I(sbi, type);
2592
2593        if (force)
2594                new_curseg(sbi, type, true);
2595        else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
2596                                        type == CURSEG_WARM_NODE)
2597                new_curseg(sbi, type, false);
2598        else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
2599                        likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2600                new_curseg(sbi, type, false);
2601        else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
2602                change_curseg(sbi, type);
2603        else
2604                new_curseg(sbi, type, false);
2605
2606        stat_inc_seg_type(sbi, curseg);
2607}
2608
2609void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
2610{
2611        struct curseg_info *curseg;
2612        unsigned int old_segno;
2613        int i;
2614
2615        down_write(&SIT_I(sbi)->sentry_lock);
2616
2617        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2618                curseg = CURSEG_I(sbi, i);
2619                old_segno = curseg->segno;
2620                SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
2621                locate_dirty_segment(sbi, old_segno);
2622        }
2623
2624        up_write(&SIT_I(sbi)->sentry_lock);
2625}
2626
2627static const struct segment_allocation default_salloc_ops = {
2628        .allocate_segment = allocate_segment_by_default,
2629};
2630
2631bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
2632                                                struct cp_control *cpc)
2633{
2634        __u64 trim_start = cpc->trim_start;
2635        bool has_candidate = false;
2636
2637        down_write(&SIT_I(sbi)->sentry_lock);
2638        for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
2639                if (add_discard_addrs(sbi, cpc, true)) {
2640                        has_candidate = true;
2641                        break;
2642                }
2643        }
2644        up_write(&SIT_I(sbi)->sentry_lock);
2645
2646        cpc->trim_start = trim_start;
2647        return has_candidate;
2648}
2649
2650static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
2651                                        struct discard_policy *dpolicy,
2652                                        unsigned int start, unsigned int end)
2653{
2654        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2655        struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
2656        struct rb_node **insert_p = NULL, *insert_parent = NULL;
2657        struct discard_cmd *dc;
2658        struct blk_plug plug;
2659        int issued;
2660        unsigned int trimmed = 0;
2661
2662next:
2663        issued = 0;
2664
2665        mutex_lock(&dcc->cmd_lock);
2666        if (unlikely(dcc->rbtree_check))
2667                f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
2668                                                                &dcc->root));
2669
2670        dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
2671                                        NULL, start,
2672                                        (struct rb_entry **)&prev_dc,
2673                                        (struct rb_entry **)&next_dc,
2674                                        &insert_p, &insert_parent, true, NULL);
2675        if (!dc)
2676                dc = next_dc;
2677
2678        blk_start_plug(&plug);
2679
2680        while (dc && dc->lstart <= end) {
2681                struct rb_node *node;
2682                int err = 0;
2683
2684                if (dc->len < dpolicy->granularity)
2685                        goto skip;
2686
2687                if (dc->state != D_PREP) {
2688                        list_move_tail(&dc->list, &dcc->fstrim_list);
2689                        goto skip;
2690                }
2691
2692                err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
2693
2694                if (issued >= dpolicy->max_requests) {
2695                        start = dc->lstart + dc->len;
2696
2697                        if (err)
2698                                __remove_discard_cmd(sbi, dc);
2699
2700                        blk_finish_plug(&plug);
2701                        mutex_unlock(&dcc->cmd_lock);
2702                        trimmed += __wait_all_discard_cmd(sbi, NULL);
2703                        congestion_wait(BLK_RW_ASYNC, HZ/50);
2704                        goto next;
2705                }
2706skip:
2707                node = rb_next(&dc->rb_node);
2708                if (err)
2709                        __remove_discard_cmd(sbi, dc);
2710                dc = rb_entry_safe(node, struct discard_cmd, rb_node);
2711
2712                if (fatal_signal_pending(current))
2713                        break;
2714        }
2715
2716        blk_finish_plug(&plug);
2717        mutex_unlock(&dcc->cmd_lock);
2718
2719        return trimmed;
2720}
2721
2722int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
2723{
2724        __u64 start = F2FS_BYTES_TO_BLK(range->start);
2725        __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
2726        unsigned int start_segno, end_segno;
2727        block_t start_block, end_block;
2728        struct cp_control cpc;
2729        struct discard_policy dpolicy;
2730        unsigned long long trimmed = 0;
2731        int err = 0;
2732        bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
2733
2734        if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
2735                return -EINVAL;
2736
2737        if (end < MAIN_BLKADDR(sbi))
2738                goto out;
2739
2740        if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
2741                f2fs_msg(sbi->sb, KERN_WARNING,
2742                        "Found FS corruption, run fsck to fix.");
2743                return -EIO;
2744        }
2745
2746        /* start/end segment number in main_area */
2747        start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
2748        end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
2749                                                GET_SEGNO(sbi, end);
2750        if (need_align) {
2751                start_segno = rounddown(start_segno, sbi->segs_per_sec);
2752                end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
2753        }
2754
2755        cpc.reason = CP_DISCARD;
2756        cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
2757        cpc.trim_start = start_segno;
2758        cpc.trim_end = end_segno;
2759
2760        if (sbi->discard_blks == 0)
2761                goto out;
2762
2763        mutex_lock(&sbi->gc_mutex);
2764        err = f2fs_write_checkpoint(sbi, &cpc);
2765        mutex_unlock(&sbi->gc_mutex);
2766        if (err)
2767                goto out;
2768
2769        /*
2770         * We filed discard candidates, but actually we don't need to wait for
2771         * all of them, since they'll be issued in idle time along with runtime
2772         * discard option. User configuration looks like using runtime discard
2773         * or periodic fstrim instead of it.
2774         */
2775        if (f2fs_realtime_discard_enable(sbi))
2776                goto out;
2777
2778        start_block = START_BLOCK(sbi, start_segno);
2779        end_block = START_BLOCK(sbi, end_segno + 1);
2780
2781        __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
2782        trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
2783                                        start_block, end_block);
2784
2785        trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
2786                                        start_block, end_block);
2787out:
2788        if (!err)
2789                range->len = F2FS_BLK_TO_BYTES(trimmed);
2790        return err;
2791}
2792
2793static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
2794{
2795        struct curseg_info *curseg = CURSEG_I(sbi, type);
2796        if (curseg->next_blkoff < sbi->blocks_per_seg)
2797                return true;
2798        return false;
2799}
2800
2801int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
2802{
2803        switch (hint) {
2804        case WRITE_LIFE_SHORT:
2805                return CURSEG_HOT_DATA;
2806        case WRITE_LIFE_EXTREME:
2807                return CURSEG_COLD_DATA;
2808        default:
2809                return CURSEG_WARM_DATA;
2810        }
2811}
2812
2813/* This returns write hints for each segment type. This hints will be
2814 * passed down to block layer. There are mapping tables which depend on
2815 * the mount option 'whint_mode'.
2816 *
2817 * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
2818 *
2819 * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
2820 *
2821 * User                  F2FS                     Block
2822 * ----                  ----                     -----
2823 *                       META                     WRITE_LIFE_NOT_SET
2824 *                       HOT_NODE                 "
2825 *                       WARM_NODE                "
2826 *                       COLD_NODE                "
2827 * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
2828 * extension list        "                        "
2829 *
2830 * -- buffered io
2831 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2832 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2833 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2834 * WRITE_LIFE_NONE       "                        "
2835 * WRITE_LIFE_MEDIUM     "                        "
2836 * WRITE_LIFE_LONG       "                        "
2837 *
2838 * -- direct io
2839 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2840 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2841 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2842 * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
2843 * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
2844 * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
2845 *
2846 * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
2847 *
2848 * User                  F2FS                     Block
2849 * ----                  ----                     -----
2850 *                       META                     WRITE_LIFE_MEDIUM;
2851 *                       HOT_NODE                 WRITE_LIFE_NOT_SET
2852 *                       WARM_NODE                "
2853 *                       COLD_NODE                WRITE_LIFE_NONE
2854 * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
2855 * extension list        "                        "
2856 *
2857 * -- buffered io
2858 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2859 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2860 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_LONG
2861 * WRITE_LIFE_NONE       "                        "
2862 * WRITE_LIFE_MEDIUM     "                        "
2863 * WRITE_LIFE_LONG       "                        "
2864 *
2865 * -- direct io
2866 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2867 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2868 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2869 * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
2870 * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
2871 * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
2872 */
2873
2874enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
2875                                enum page_type type, enum temp_type temp)
2876{
2877        if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
2878                if (type == DATA) {
2879                        if (temp == WARM)
2880                                return WRITE_LIFE_NOT_SET;
2881                        else if (temp == HOT)
2882                                return WRITE_LIFE_SHORT;
2883                        else if (temp == COLD)
2884                                return WRITE_LIFE_EXTREME;
2885                } else {
2886                        return WRITE_LIFE_NOT_SET;
2887                }
2888        } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
2889                if (type == DATA) {
2890                        if (temp == WARM)
2891                                return WRITE_LIFE_LONG;
2892                        else if (temp == HOT)
2893                                return WRITE_LIFE_SHORT;
2894                        else if (temp == COLD)
2895                                return WRITE_LIFE_EXTREME;
2896                } else if (type == NODE) {
2897                        if (temp == WARM || temp == HOT)
2898                                return WRITE_LIFE_NOT_SET;
2899                        else if (temp == COLD)
2900                                return WRITE_LIFE_NONE;
2901                } else if (type == META) {
2902                        return WRITE_LIFE_MEDIUM;
2903                }
2904        }
2905        return WRITE_LIFE_NOT_SET;
2906}
2907
2908static int __get_segment_type_2(struct f2fs_io_info *fio)
2909{
2910        if (fio->type == DATA)
2911                return CURSEG_HOT_DATA;
2912        else
2913                return CURSEG_HOT_NODE;
2914}
2915
2916static int __get_segment_type_4(struct f2fs_io_info *fio)
2917{
2918        if (fio->type == DATA) {
2919                struct inode *inode = fio->page->mapping->host;
2920
2921                if (S_ISDIR(inode->i_mode))
2922                        return CURSEG_HOT_DATA;
2923                else
2924                        return CURSEG_COLD_DATA;
2925        } else {
2926                if (IS_DNODE(fio->page) && is_cold_node(fio->page))
2927                        return CURSEG_WARM_NODE;
2928                else
2929                        return CURSEG_COLD_NODE;
2930        }
2931}
2932
2933static int __get_segment_type_6(struct f2fs_io_info *fio)
2934{
2935        if (fio->type == DATA) {
2936                struct inode *inode = fio->page->mapping->host;
2937
2938                if (is_cold_data(fio->page) || file_is_cold(inode))
2939                        return CURSEG_COLD_DATA;
2940                if (file_is_hot(inode) ||
2941                                is_inode_flag_set(inode, FI_HOT_DATA) ||
2942                                f2fs_is_atomic_file(inode) ||
2943                                f2fs_is_volatile_file(inode))
2944                        return CURSEG_HOT_DATA;
2945                return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
2946        } else {
2947                if (IS_DNODE(fio->page))
2948                        return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
2949                                                CURSEG_HOT_NODE;
2950                return CURSEG_COLD_NODE;
2951        }
2952}
2953
2954static int __get_segment_type(struct f2fs_io_info *fio)
2955{
2956        int type = 0;
2957
2958        switch (F2FS_OPTION(fio->sbi).active_logs) {
2959        case 2:
2960                type = __get_segment_type_2(fio);
2961                break;
2962        case 4:
2963                type = __get_segment_type_4(fio);
2964                break;
2965        case 6:
2966                type = __get_segment_type_6(fio);
2967                break;
2968        default:
2969                f2fs_bug_on(fio->sbi, true);
2970        }
2971
2972        if (IS_HOT(type))
2973                fio->temp = HOT;
2974        else if (IS_WARM(type))
2975                fio->temp = WARM;
2976        else
2977                fio->temp = COLD;
2978        return type;
2979}
2980
2981void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
2982                block_t old_blkaddr, block_t *new_blkaddr,
2983                struct f2fs_summary *sum, int type,
2984                struct f2fs_io_info *fio, bool add_list)
2985{
2986        struct sit_info *sit_i = SIT_I(sbi);
2987        struct curseg_info *curseg = CURSEG_I(sbi, type);
2988
2989        down_read(&SM_I(sbi)->curseg_lock);
2990
2991        mutex_lock(&curseg->curseg_mutex);
2992        down_write(&sit_i->sentry_lock);
2993
2994        *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
2995
2996        f2fs_wait_discard_bio(sbi, *new_blkaddr);
2997
2998        /*
2999         * __add_sum_entry should be resided under the curseg_mutex
3000         * because, this function updates a summary entry in the
3001         * current summary block.
3002         */
3003        __add_sum_entry(sbi, type, sum);
3004
3005        __refresh_next_blkoff(sbi, curseg);
3006
3007        stat_inc_block_count(sbi, curseg);
3008
3009        /*
3010         * SIT information should be updated before segment allocation,
3011         * since SSR needs latest valid block information.
3012         */
3013        update_sit_entry(sbi, *new_blkaddr, 1);
3014        if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
3015                update_sit_entry(sbi, old_blkaddr, -1);
3016
3017        if (!__has_curseg_space(sbi, type))
3018                sit_i->s_ops->allocate_segment(sbi, type, false);
3019
3020        /*
3021         * segment dirty status should be updated after segment allocation,
3022         * so we just need to update status only one time after previous
3023         * segment being closed.
3024         */
3025        locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3026        locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3027
3028        up_write(&sit_i->sentry_lock);
3029
3030        if (page && IS_NODESEG(type)) {
3031                fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3032
3033                f2fs_inode_chksum_set(sbi, page);
3034        }
3035
3036        if (add_list) {
3037                struct f2fs_bio_info *io;
3038
3039                INIT_LIST_HEAD(&fio->list);
3040                fio->in_list = true;
3041                fio->retry = false;
3042                io = sbi->write_io[fio->type] + fio->temp;
3043                spin_lock(&io->io_lock);
3044                list_add_tail(&fio->list, &io->io_list);
3045                spin_unlock(&io->io_lock);
3046        }
3047
3048        mutex_unlock(&curseg->curseg_mutex);
3049
3050        up_read(&SM_I(sbi)->curseg_lock);
3051}
3052
3053static void update_device_state(struct f2fs_io_info *fio)
3054{
3055        struct f2fs_sb_info *sbi = fio->sbi;
3056        unsigned int devidx;
3057
3058        if (!sbi->s_ndevs)
3059                return;
3060
3061        devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
3062
3063        /* update device state for fsync */
3064        f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
3065
3066        /* update device state for checkpoint */
3067        if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3068                spin_lock(&sbi->dev_lock);
3069                f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3070                spin_unlock(&sbi->dev_lock);
3071        }
3072}
3073
3074static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3075{
3076        int type = __get_segment_type(fio);
3077        bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA);
3078
3079        if (keep_order)
3080                down_read(&fio->sbi->io_order_lock);
3081reallocate:
3082        f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3083                        &fio->new_blkaddr, sum, type, fio, true);
3084        if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
3085                invalidate_mapping_pages(META_MAPPING(fio->sbi),
3086                                        fio->old_blkaddr, fio->old_blkaddr);
3087
3088        /* writeout dirty page into bdev */
3089        f2fs_submit_page_write(fio);
3090        if (fio->retry) {
3091                fio->old_blkaddr = fio->new_blkaddr;
3092                goto reallocate;
3093        }
3094
3095        update_device_state(fio);
3096
3097        if (keep_order)
3098                up_read(&fio->sbi->io_order_lock);
3099}
3100
3101void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3102                                        enum iostat_type io_type)
3103{
3104        struct f2fs_io_info fio = {
3105                .sbi = sbi,
3106                .type = META,
3107                .temp = HOT,
3108                .op = REQ_OP_WRITE,
3109                .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3110                .old_blkaddr = page->index,
3111                .new_blkaddr = page->index,
3112                .page = page,
3113                .encrypted_page = NULL,
3114                .in_list = false,
3115        };
3116
3117        if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3118                fio.op_flags &= ~REQ_META;
3119
3120        set_page_writeback(page);
3121        ClearPageError(page);
3122        f2fs_submit_page_write(&fio);
3123
3124        stat_inc_meta_count(sbi, page->index);
3125        f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
3126}
3127
3128void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3129{
3130        struct f2fs_summary sum;
3131
3132        set_summary(&sum, nid, 0, 0);
3133        do_write_page(&sum, fio);
3134
3135        f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3136}
3137
3138void f2fs_outplace_write_data(struct dnode_of_data *dn,
3139                                        struct f2fs_io_info *fio)
3140{
3141        struct f2fs_sb_info *sbi = fio->sbi;
3142        struct f2fs_summary sum;
3143
3144        f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3145        set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3146        do_write_page(&sum, fio);
3147        f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3148
3149        f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
3150}
3151
3152int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3153{
3154        int err;
3155        struct f2fs_sb_info *sbi = fio->sbi;
3156
3157        fio->new_blkaddr = fio->old_blkaddr;
3158        /* i/o temperature is needed for passing down write hints */
3159        __get_segment_type(fio);
3160
3161        f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi,
3162                        GET_SEGNO(sbi, fio->new_blkaddr))->type));
3163
3164        stat_inc_inplace_blocks(fio->sbi);
3165
3166        err = f2fs_submit_page_bio(fio);
3167        if (!err)
3168                update_device_state(fio);
3169
3170        f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3171
3172        return err;
3173}
3174
3175static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3176                                                unsigned int segno)
3177{
3178        int i;
3179
3180        for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3181                if (CURSEG_I(sbi, i)->segno == segno)
3182                        break;
3183        }
3184        return i;
3185}
3186
3187void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3188                                block_t old_blkaddr, block_t new_blkaddr,
3189                                bool recover_curseg, bool recover_newaddr)
3190{
3191        struct sit_info *sit_i = SIT_I(sbi);
3192        struct curseg_info *curseg;
3193        unsigned int segno, old_cursegno;
3194        struct seg_entry *se;
3195        int type;
3196        unsigned short old_blkoff;
3197
3198        segno = GET_SEGNO(sbi, new_blkaddr);
3199        se = get_seg_entry(sbi, segno);
3200        type = se->type;
3201
3202        down_write(&SM_I(sbi)->curseg_lock);
3203
3204        if (!recover_curseg) {
3205                /* for recovery flow */
3206                if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3207                        if (old_blkaddr == NULL_ADDR)
3208                                type = CURSEG_COLD_DATA;
3209                        else
3210                                type = CURSEG_WARM_DATA;
3211                }
3212        } else {
3213                if (IS_CURSEG(sbi, segno)) {
3214                        /* se->type is volatile as SSR allocation */
3215                        type = __f2fs_get_curseg(sbi, segno);
3216                        f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3217                } else {
3218                        type = CURSEG_WARM_DATA;
3219                }
3220        }
3221
3222        f2fs_bug_on(sbi, !IS_DATASEG(type));
3223        curseg = CURSEG_I(sbi, type);
3224
3225        mutex_lock(&curseg->curseg_mutex);
3226        down_write(&sit_i->sentry_lock);
3227
3228        old_cursegno = curseg->segno;
3229        old_blkoff = curseg->next_blkoff;
3230
3231        /* change the current segment */
3232        if (segno != curseg->segno) {
3233                curseg->next_segno = segno;
3234                change_curseg(sbi, type);
3235        }
3236
3237        curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3238        __add_sum_entry(sbi, type, sum);
3239
3240        if (!recover_curseg || recover_newaddr)
3241                update_sit_entry(sbi, new_blkaddr, 1);
3242        if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
3243                invalidate_mapping_pages(META_MAPPING(sbi),
3244                                        old_blkaddr, old_blkaddr);
3245                update_sit_entry(sbi, old_blkaddr, -1);
3246        }
3247
3248        locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3249        locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
3250
3251        locate_dirty_segment(sbi, old_cursegno);
3252
3253        if (recover_curseg) {
3254                if (old_cursegno != curseg->segno) {
3255                        curseg->next_segno = old_cursegno;
3256                        change_curseg(sbi, type);
3257                }
3258                curseg->next_blkoff = old_blkoff;
3259        }
3260
3261        up_write(&sit_i->sentry_lock);
3262        mutex_unlock(&curseg->curseg_mutex);
3263        up_write(&SM_I(sbi)->curseg_lock);
3264}
3265
3266void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
3267                                block_t old_addr, block_t new_addr,
3268                                unsigned char version, bool recover_curseg,
3269                                bool recover_newaddr)
3270{
3271        struct f2fs_summary sum;
3272
3273        set_summary(&sum, dn->nid, dn->ofs_in_node, version);
3274
3275        f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3276                                        recover_curseg, recover_newaddr);
3277
3278        f2fs_update_data_blkaddr(dn, new_addr);
3279}
3280
3281void f2fs_wait_on_page_writeback(struct page *page,
3282                                enum page_type type, bool ordered, bool locked)
3283{
3284        if (PageWriteback(page)) {
3285                struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3286
3287                f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3288                if (ordered) {
3289                        wait_on_page_writeback(page);
3290                        f2fs_bug_on(sbi, locked && PageWriteback(page));
3291                } else {
3292                        wait_for_stable_page(page);
3293                }
3294        }
3295}
3296
3297void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3298{
3299        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3300        struct page *cpage;
3301
3302        if (!f2fs_post_read_required(inode))
3303                return;
3304
3305        if (!is_valid_data_blkaddr(sbi, blkaddr))
3306                return;
3307
3308        cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
3309        if (cpage) {
3310                f2fs_wait_on_page_writeback(cpage, DATA, true, true);
3311                f2fs_put_page(cpage, 1);
3312        }
3313}
3314
3315void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
3316                                                                block_t len)
3317{
3318        block_t i;
3319
3320        for (i = 0; i < len; i++)
3321                f2fs_wait_on_block_writeback(inode, blkaddr + i);
3322}
3323
3324static int read_compacted_summaries(struct f2fs_sb_info *sbi)
3325{
3326        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3327        struct curseg_info *seg_i;
3328        unsigned char *kaddr;
3329        struct page *page;
3330        block_t start;
3331        int i, j, offset;
3332
3333        start = start_sum_block(sbi);
3334
3335        page = f2fs_get_meta_page(sbi, start++);
3336        if (IS_ERR(page))
3337                return PTR_ERR(page);
3338        kaddr = (unsigned char *)page_address(page);
3339
3340        /* Step 1: restore nat cache */
3341        seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3342        memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
3343
3344        /* Step 2: restore sit cache */
3345        seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3346        memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
3347        offset = 2 * SUM_JOURNAL_SIZE;
3348
3349        /* Step 3: restore summary entries */
3350        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3351                unsigned short blk_off;
3352                unsigned int segno;
3353
3354                seg_i = CURSEG_I(sbi, i);
3355                segno = le32_to_cpu(ckpt->cur_data_segno[i]);
3356                blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
3357                seg_i->next_segno = segno;
3358                reset_curseg(sbi, i, 0);
3359                seg_i->alloc_type = ckpt->alloc_type[i];
3360                seg_i->next_blkoff = blk_off;
3361
3362                if (seg_i->alloc_type == SSR)
3363                        blk_off = sbi->blocks_per_seg;
3364
3365                for (j = 0; j < blk_off; j++) {
3366                        struct f2fs_summary *s;
3367                        s = (struct f2fs_summary *)(kaddr + offset);
3368                        seg_i->sum_blk->entries[j] = *s;
3369                        offset += SUMMARY_SIZE;
3370                        if (offset + SUMMARY_SIZE <= PAGE_SIZE -
3371                                                SUM_FOOTER_SIZE)
3372                                continue;
3373
3374                        f2fs_put_page(page, 1);
3375                        page = NULL;
3376
3377                        page = f2fs_get_meta_page(sbi, start++);
3378                        if (IS_ERR(page))
3379                                return PTR_ERR(page);
3380                        kaddr = (unsigned char *)page_address(page);
3381                        offset = 0;
3382                }
3383        }
3384        f2fs_put_page(page, 1);
3385        return 0;
3386}
3387
3388static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
3389{
3390        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3391        struct f2fs_summary_block *sum;
3392        struct curseg_info *curseg;
3393        struct page *new;
3394        unsigned short blk_off;
3395        unsigned int segno = 0;
3396        block_t blk_addr = 0;
3397        int err = 0;
3398
3399        /* get segment number and block addr */
3400        if (IS_DATASEG(type)) {
3401                segno = le32_to_cpu(ckpt->cur_data_segno[type]);
3402                blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
3403                                                        CURSEG_HOT_DATA]);
3404                if (__exist_node_summaries(sbi))
3405                        blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
3406                else
3407                        blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
3408        } else {
3409                segno = le32_to_cpu(ckpt->cur_node_segno[type -
3410                                                        CURSEG_HOT_NODE]);
3411                blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
3412                                                        CURSEG_HOT_NODE]);
3413                if (__exist_node_summaries(sbi))
3414                        blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
3415                                                        type - CURSEG_HOT_NODE);
3416                else
3417                        blk_addr = GET_SUM_BLOCK(sbi, segno);
3418        }
3419
3420        new = f2fs_get_meta_page(sbi, blk_addr);
3421        if (IS_ERR(new))
3422                return PTR_ERR(new);
3423        sum = (struct f2fs_summary_block *)page_address(new);
3424
3425        if (IS_NODESEG(type)) {
3426                if (__exist_node_summaries(sbi)) {
3427                        struct f2fs_summary *ns = &sum->entries[0];
3428                        int i;
3429                        for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
3430                                ns->version = 0;
3431                                ns->ofs_in_node = 0;
3432                        }
3433                } else {
3434                        err = f2fs_restore_node_summary(sbi, segno, sum);
3435                        if (err)
3436                                goto out;
3437                }
3438        }
3439
3440        /* set uncompleted segment to curseg */
3441        curseg = CURSEG_I(sbi, type);
3442        mutex_lock(&curseg->curseg_mutex);
3443
3444        /* update journal info */
3445        down_write(&curseg->journal_rwsem);
3446        memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
3447        up_write(&curseg->journal_rwsem);
3448
3449        memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
3450        memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
3451        curseg->next_segno = segno;
3452        reset_curseg(sbi, type, 0);
3453        curseg->alloc_type = ckpt->alloc_type[type];
3454        curseg->next_blkoff = blk_off;
3455        mutex_unlock(&curseg->curseg_mutex);
3456out:
3457        f2fs_put_page(new, 1);
3458        return err;
3459}
3460
3461static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
3462{
3463        struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
3464        struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
3465        int type = CURSEG_HOT_DATA;
3466        int err;
3467
3468        if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
3469                int npages = f2fs_npages_for_summary_flush(sbi, true);
3470
3471                if (npages >= 2)
3472                        f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3473                                                        META_CP, true);
3474
3475                /* restore for compacted data summary */
3476                err = read_compacted_summaries(sbi);
3477                if (err)
3478                        return err;
3479                type = CURSEG_HOT_NODE;
3480        }
3481
3482        if (__exist_node_summaries(sbi))
3483                f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
3484                                        NR_CURSEG_TYPE - type, META_CP, true);
3485
3486        for (; type <= CURSEG_COLD_NODE; type++) {
3487                err = read_normal_summaries(sbi, type);
3488                if (err)
3489                        return err;
3490        }
3491
3492        /* sanity check for summary blocks */
3493        if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
3494                        sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
3495                return -EINVAL;
3496
3497        return 0;
3498}
3499
3500static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
3501{
3502        struct page *page;
3503        unsigned char *kaddr;
3504        struct f2fs_summary *summary;
3505        struct curseg_info *seg_i;
3506        int written_size = 0;
3507        int i, j;
3508
3509        page = f2fs_grab_meta_page(sbi, blkaddr++);
3510        kaddr = (unsigned char *)page_address(page);
3511        memset(kaddr, 0, PAGE_SIZE);
3512
3513        /* Step 1: write nat cache */
3514        seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3515        memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
3516        written_size += SUM_JOURNAL_SIZE;
3517
3518        /* Step 2: write sit cache */
3519        seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3520        memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
3521        written_size += SUM_JOURNAL_SIZE;
3522
3523        /* Step 3: write summary entries */
3524        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3525                unsigned short blkoff;
3526                seg_i = CURSEG_I(sbi, i);
3527                if (sbi->ckpt->alloc_type[i] == SSR)
3528                        blkoff = sbi->blocks_per_seg;
3529                else
3530                        blkoff = curseg_blkoff(sbi, i);
3531
3532                for (j = 0; j < blkoff; j++) {
3533                        if (!page) {
3534                                page = f2fs_grab_meta_page(sbi, blkaddr++);
3535                                kaddr = (unsigned char *)page_address(page);
3536                                memset(kaddr, 0, PAGE_SIZE);
3537                                written_size = 0;
3538                        }
3539                        summary = (struct f2fs_summary *)(kaddr + written_size);
3540                        *summary = seg_i->sum_blk->entries[j];
3541                        written_size += SUMMARY_SIZE;
3542
3543                        if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
3544                                                        SUM_FOOTER_SIZE)
3545                                continue;
3546
3547                        set_page_dirty(page);
3548                        f2fs_put_page(page, 1);
3549                        page = NULL;
3550                }
3551        }
3552        if (page) {
3553                set_page_dirty(page);
3554                f2fs_put_page(page, 1);
3555        }
3556}
3557
3558static void write_normal_summaries(struct f2fs_sb_info *sbi,
3559                                        block_t blkaddr, int type)
3560{
3561        int i, end;
3562        if (IS_DATASEG(type))
3563                end = type + NR_CURSEG_DATA_TYPE;
3564        else
3565                end = type + NR_CURSEG_NODE_TYPE;
3566
3567        for (i = type; i < end; i++)
3568                write_current_sum_page(sbi, i, blkaddr + (i - type));
3569}
3570
3571void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3572{
3573        if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
3574                write_compacted_summaries(sbi, start_blk);
3575        else
3576                write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
3577}
3578
3579void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3580{
3581        write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
3582}
3583
3584int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
3585                                        unsigned int val, int alloc)
3586{
3587        int i;
3588
3589        if (type == NAT_JOURNAL) {
3590                for (i = 0; i < nats_in_cursum(journal); i++) {
3591                        if (le32_to_cpu(nid_in_journal(journal, i)) == val)
3592                                return i;
3593                }
3594                if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
3595                        return update_nats_in_cursum(journal, 1);
3596        } else if (type == SIT_JOURNAL) {
3597                for (i = 0; i < sits_in_cursum(journal); i++)
3598                        if (le32_to_cpu(segno_in_journal(journal, i)) == val)
3599                                return i;
3600                if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
3601                        return update_sits_in_cursum(journal, 1);
3602        }
3603        return -1;
3604}
3605
3606static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
3607                                        unsigned int segno)
3608{
3609        return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
3610}
3611
3612static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
3613                                        unsigned int start)
3614{
3615        struct sit_info *sit_i = SIT_I(sbi);
3616        struct page *page;
3617        pgoff_t src_off, dst_off;
3618
3619        src_off = current_sit_addr(sbi, start);
3620        dst_off = next_sit_addr(sbi, src_off);
3621
3622        page = f2fs_grab_meta_page(sbi, dst_off);
3623        seg_info_to_sit_page(sbi, page, start);
3624
3625        set_page_dirty(page);
3626        set_to_next_sit(sit_i, start);
3627
3628        return page;
3629}
3630
3631static struct sit_entry_set *grab_sit_entry_set(void)
3632{
3633        struct sit_entry_set *ses =
3634                        f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
3635
3636        ses->entry_cnt = 0;
3637        INIT_LIST_HEAD(&ses->set_list);
3638        return ses;
3639}
3640
3641static void release_sit_entry_set(struct sit_entry_set *ses)
3642{
3643        list_del(&ses->set_list);
3644        kmem_cache_free(sit_entry_set_slab, ses);
3645}
3646
3647static void adjust_sit_entry_set(struct sit_entry_set *ses,
3648                                                struct list_head *head)
3649{
3650        struct sit_entry_set *next = ses;
3651
3652        if (list_is_last(&ses->set_list, head))
3653                return;
3654
3655        list_for_each_entry_continue(next, head, set_list)
3656                if (ses->entry_cnt <= next->entry_cnt)
3657                        break;
3658
3659        list_move_tail(&ses->set_list, &next->set_list);
3660}
3661
3662static void add_sit_entry(unsigned int segno, struct list_head *head)
3663{
3664        struct sit_entry_set *ses;
3665        unsigned int start_segno = START_SEGNO(segno);
3666
3667        list_for_each_entry(ses, head, set_list) {
3668                if (ses->start_segno == start_segno) {
3669                        ses->entry_cnt++;
3670                        adjust_sit_entry_set(ses, head);
3671                        return;
3672                }
3673        }
3674
3675        ses = grab_sit_entry_set();
3676
3677        ses->start_segno = start_segno;
3678        ses->entry_cnt++;
3679        list_add(&ses->set_list, head);
3680}
3681
3682static void add_sits_in_set(struct f2fs_sb_info *sbi)
3683{
3684        struct f2fs_sm_info *sm_info = SM_I(sbi);
3685        struct list_head *set_list = &sm_info->sit_entry_set;
3686        unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
3687        unsigned int segno;
3688
3689        for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
3690                add_sit_entry(segno, set_list);
3691}
3692
3693static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
3694{
3695        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3696        struct f2fs_journal *journal = curseg->journal;
3697        int i;
3698
3699        down_write(&curseg->journal_rwsem);
3700        for (i = 0; i < sits_in_cursum(journal); i++) {
3701                unsigned int segno;
3702                bool dirtied;
3703
3704                segno = le32_to_cpu(segno_in_journal(journal, i));
3705                dirtied = __mark_sit_entry_dirty(sbi, segno);
3706
3707                if (!dirtied)
3708                        add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
3709        }
3710        update_sits_in_cursum(journal, -i);
3711        up_write(&curseg->journal_rwsem);
3712}
3713
3714/*
3715 * CP calls this function, which flushes SIT entries including sit_journal,
3716 * and moves prefree segs to free segs.
3717 */
3718void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
3719{
3720        struct sit_info *sit_i = SIT_I(sbi);
3721        unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
3722        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3723        struct f2fs_journal *journal = curseg->journal;
3724        struct sit_entry_set *ses, *tmp;
3725        struct list_head *head = &SM_I(sbi)->sit_entry_set;
3726        bool to_journal = true;
3727        struct seg_entry *se;
3728
3729        down_write(&sit_i->sentry_lock);
3730
3731        if (!sit_i->dirty_sentries)
3732                goto out;
3733
3734        /*
3735         * add and account sit entries of dirty bitmap in sit entry
3736         * set temporarily
3737         */
3738        add_sits_in_set(sbi);
3739
3740        /*
3741         * if there are no enough space in journal to store dirty sit
3742         * entries, remove all entries from journal and add and account
3743         * them in sit entry set.
3744         */
3745        if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
3746                remove_sits_in_journal(sbi);
3747
3748        /*
3749         * there are two steps to flush sit entries:
3750         * #1, flush sit entries to journal in current cold data summary block.
3751         * #2, flush sit entries to sit page.
3752         */
3753        list_for_each_entry_safe(ses, tmp, head, set_list) {
3754                struct page *page = NULL;
3755                struct f2fs_sit_block *raw_sit = NULL;
3756                unsigned int start_segno = ses->start_segno;
3757                unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
3758                                                (unsigned long)MAIN_SEGS(sbi));
3759                unsigned int segno = start_segno;
3760
3761                if (to_journal &&
3762                        !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
3763                        to_journal = false;
3764
3765                if (to_journal) {
3766                        down_write(&curseg->journal_rwsem);
3767                } else {
3768                        page = get_next_sit_page(sbi, start_segno);
3769                        raw_sit = page_address(page);
3770                }
3771
3772                /* flush dirty sit entries in region of current sit set */
3773                for_each_set_bit_from(segno, bitmap, end) {
3774                        int offset, sit_offset;
3775
3776                        se = get_seg_entry(sbi, segno);
3777#ifdef CONFIG_F2FS_CHECK_FS
3778                        if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
3779                                                SIT_VBLOCK_MAP_SIZE))
3780                                f2fs_bug_on(sbi, 1);
3781#endif
3782
3783                        /* add discard candidates */
3784                        if (!(cpc->reason & CP_DISCARD)) {
3785                                cpc->trim_start = segno;
3786                                add_discard_addrs(sbi, cpc, false);
3787                        }
3788
3789                        if (to_journal) {
3790                                offset = f2fs_lookup_journal_in_cursum(journal,
3791                                                        SIT_JOURNAL, segno, 1);
3792                                f2fs_bug_on(sbi, offset < 0);
3793                                segno_in_journal(journal, offset) =
3794                                                        cpu_to_le32(segno);
3795                                seg_info_to_raw_sit(se,
3796                                        &sit_in_journal(journal, offset));
3797                                check_block_count(sbi, segno,
3798                                        &sit_in_journal(journal, offset));
3799                        } else {
3800                                sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
3801                                seg_info_to_raw_sit(se,
3802                                                &raw_sit->entries[sit_offset]);
3803                                check_block_count(sbi, segno,
3804                                                &raw_sit->entries[sit_offset]);
3805                        }
3806
3807                        __clear_bit(segno, bitmap);
3808                        sit_i->dirty_sentries--;
3809                        ses->entry_cnt--;
3810                }
3811
3812                if (to_journal)
3813                        up_write(&curseg->journal_rwsem);
3814                else
3815                        f2fs_put_page(page, 1);
3816
3817                f2fs_bug_on(sbi, ses->entry_cnt);
3818                release_sit_entry_set(ses);
3819        }
3820
3821        f2fs_bug_on(sbi, !list_empty(head));
3822        f2fs_bug_on(sbi, sit_i->dirty_sentries);
3823out:
3824        if (cpc->reason & CP_DISCARD) {
3825                __u64 trim_start = cpc->trim_start;
3826
3827                for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
3828                        add_discard_addrs(sbi, cpc, false);
3829
3830                cpc->trim_start = trim_start;
3831        }
3832        up_write(&sit_i->sentry_lock);
3833
3834        set_prefree_as_free_segments(sbi);
3835}
3836
3837static int build_sit_info(struct f2fs_sb_info *sbi)
3838{
3839        struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
3840        struct sit_info *sit_i;
3841        unsigned int sit_segs, start;
3842        char *src_bitmap;
3843        unsigned int bitmap_size;
3844
3845        /* allocate memory for SIT information */
3846        sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
3847        if (!sit_i)
3848                return -ENOMEM;
3849
3850        SM_I(sbi)->sit_info = sit_i;
3851
3852        sit_i->sentries =
3853                f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
3854                                              MAIN_SEGS(sbi)),
3855                              GFP_KERNEL);
3856        if (!sit_i->sentries)
3857                return -ENOMEM;
3858
3859        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
3860        sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
3861                                                                GFP_KERNEL);
3862        if (!sit_i->dirty_sentries_bitmap)
3863                return -ENOMEM;
3864
3865        for (start = 0; start < MAIN_SEGS(sbi); start++) {
3866                sit_i->sentries[start].cur_valid_map
3867                        = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3868                sit_i->sentries[start].ckpt_valid_map
3869                        = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3870                if (!sit_i->sentries[start].cur_valid_map ||
3871                                !sit_i->sentries[start].ckpt_valid_map)
3872                        return -ENOMEM;
3873
3874#ifdef CONFIG_F2FS_CHECK_FS
3875                sit_i->sentries[start].cur_valid_map_mir
3876                        = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3877                if (!sit_i->sentries[start].cur_valid_map_mir)
3878                        return -ENOMEM;
3879#endif
3880
3881                sit_i->sentries[start].discard_map
3882                        = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
3883                                                        GFP_KERNEL);
3884                if (!sit_i->sentries[start].discard_map)
3885                        return -ENOMEM;
3886        }
3887
3888        sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3889        if (!sit_i->tmp_map)
3890                return -ENOMEM;
3891
3892        if (__is_large_section(sbi)) {
3893                sit_i->sec_entries =
3894                        f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
3895                                                      MAIN_SECS(sbi)),
3896                                      GFP_KERNEL);
3897                if (!sit_i->sec_entries)
3898                        return -ENOMEM;
3899        }
3900
3901        /* get information related with SIT */
3902        sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
3903
3904        /* setup SIT bitmap from ckeckpoint pack */
3905        bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
3906        src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
3907
3908        sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
3909        if (!sit_i->sit_bitmap)
3910                return -ENOMEM;
3911
3912#ifdef CONFIG_F2FS_CHECK_FS
3913        sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
3914        if (!sit_i->sit_bitmap_mir)
3915                return -ENOMEM;
3916#endif
3917
3918        /* init SIT information */
3919        sit_i->s_ops = &default_salloc_ops;
3920
3921        sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
3922        sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
3923        sit_i->written_valid_blocks = 0;
3924        sit_i->bitmap_size = bitmap_size;
3925        sit_i->dirty_sentries = 0;
3926        sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
3927        sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
3928        sit_i->mounted_time = ktime_get_real_seconds();
3929        init_rwsem(&sit_i->sentry_lock);
3930        return 0;
3931}
3932
3933static int build_free_segmap(struct f2fs_sb_info *sbi)
3934{
3935        struct free_segmap_info *free_i;
3936        unsigned int bitmap_size, sec_bitmap_size;
3937
3938        /* allocate memory for free segmap information */
3939        free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
3940        if (!free_i)
3941                return -ENOMEM;
3942
3943        SM_I(sbi)->free_info = free_i;
3944
3945        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
3946        free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
3947        if (!free_i->free_segmap)
3948                return -ENOMEM;
3949
3950        sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
3951        free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
3952        if (!free_i->free_secmap)
3953                return -ENOMEM;
3954
3955        /* set all segments as dirty temporarily */
3956        memset(free_i->free_segmap, 0xff, bitmap_size);
3957        memset(free_i->free_secmap, 0xff, sec_bitmap_size);
3958
3959        /* init free segmap information */
3960        free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
3961        free_i->free_segments = 0;
3962        free_i->free_sections = 0;
3963        spin_lock_init(&free_i->segmap_lock);
3964        return 0;
3965}
3966
3967static int build_curseg(struct f2fs_sb_info *sbi)
3968{
3969        struct curseg_info *array;
3970        int i;
3971
3972        array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
3973                             GFP_KERNEL);
3974        if (!array)
3975                return -ENOMEM;
3976
3977        SM_I(sbi)->curseg_array = array;
3978
3979        for (i = 0; i < NR_CURSEG_TYPE; i++) {
3980                mutex_init(&array[i].curseg_mutex);
3981                array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
3982                if (!array[i].sum_blk)
3983                        return -ENOMEM;
3984                init_rwsem(&array[i].journal_rwsem);
3985                array[i].journal = f2fs_kzalloc(sbi,
3986                                sizeof(struct f2fs_journal), GFP_KERNEL);
3987                if (!array[i].journal)
3988                        return -ENOMEM;
3989                array[i].segno = NULL_SEGNO;
3990                array[i].next_blkoff = 0;
3991        }
3992        return restore_curseg_summaries(sbi);
3993}
3994
3995static int build_sit_entries(struct f2fs_sb_info *sbi)
3996{
3997        struct sit_info *sit_i = SIT_I(sbi);
3998        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3999        struct f2fs_journal *journal = curseg->journal;
4000        struct seg_entry *se;
4001        struct f2fs_sit_entry sit;
4002        int sit_blk_cnt = SIT_BLK_CNT(sbi);
4003        unsigned int i, start, end;
4004        unsigned int readed, start_blk = 0;
4005        int err = 0;
4006        block_t total_node_blocks = 0;
4007
4008        do {
4009                readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
4010                                                        META_SIT, true);
4011
4012                start = start_blk * sit_i->sents_per_block;
4013                end = (start_blk + readed) * sit_i->sents_per_block;
4014
4015                for (; start < end && start < MAIN_SEGS(sbi); start++) {
4016                        struct f2fs_sit_block *sit_blk;
4017                        struct page *page;
4018
4019                        se = &sit_i->sentries[start];
4020                        page = get_current_sit_page(sbi, start);
4021                        if (IS_ERR(page))
4022                                return PTR_ERR(page);
4023                        sit_blk = (struct f2fs_sit_block *)page_address(page);
4024                        sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4025                        f2fs_put_page(page, 1);
4026
4027                        err = check_block_count(sbi, start, &sit);
4028                        if (err)
4029                                return err;
4030                        seg_info_from_raw_sit(se, &sit);
4031                        if (IS_NODESEG(se->type))
4032                                total_node_blocks += se->valid_blocks;
4033
4034                        /* build discard map only one time */
4035                        if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4036                                memset(se->discard_map, 0xff,
4037                                        SIT_VBLOCK_MAP_SIZE);
4038                        } else {
4039                                memcpy(se->discard_map,
4040                                        se->cur_valid_map,
4041                                        SIT_VBLOCK_MAP_SIZE);
4042                                sbi->discard_blks +=
4043                                        sbi->blocks_per_seg -
4044                                        se->valid_blocks;
4045                        }
4046
4047                        if (__is_large_section(sbi))
4048                                get_sec_entry(sbi, start)->valid_blocks +=
4049                                                        se->valid_blocks;
4050                }
4051                start_blk += readed;
4052        } while (start_blk < sit_blk_cnt);
4053
4054        down_read(&curseg->journal_rwsem);
4055        for (i = 0; i < sits_in_cursum(journal); i++) {
4056                unsigned int old_valid_blocks;
4057
4058                start = le32_to_cpu(segno_in_journal(journal, i));
4059                if (start >= MAIN_SEGS(sbi)) {
4060                        f2fs_msg(sbi->sb, KERN_ERR,
4061                                        "Wrong journal entry on segno %u",
4062                                        start);
4063                        set_sbi_flag(sbi, SBI_NEED_FSCK);
4064                        err = -EINVAL;
4065                        break;
4066                }
4067
4068                se = &sit_i->sentries[start];
4069                sit = sit_in_journal(journal, i);
4070
4071                old_valid_blocks = se->valid_blocks;
4072                if (IS_NODESEG(se->type))
4073                        total_node_blocks -= old_valid_blocks;
4074
4075                err = check_block_count(sbi, start, &sit);
4076                if (err)
4077                        break;
4078                seg_info_from_raw_sit(se, &sit);
4079                if (IS_NODESEG(se->type))
4080                        total_node_blocks += se->valid_blocks;
4081
4082                if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4083                        memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4084                } else {
4085                        memcpy(se->discard_map, se->cur_valid_map,
4086                                                SIT_VBLOCK_MAP_SIZE);
4087                        sbi->discard_blks += old_valid_blocks;
4088                        sbi->discard_blks -= se->valid_blocks;
4089                }
4090
4091                if (__is_large_section(sbi)) {
4092                        get_sec_entry(sbi, start)->valid_blocks +=
4093                                                        se->valid_blocks;
4094                        get_sec_entry(sbi, start)->valid_blocks -=
4095                                                        old_valid_blocks;
4096                }
4097        }
4098        up_read(&curseg->journal_rwsem);
4099
4100        if (!err && total_node_blocks != valid_node_count(sbi)) {
4101                f2fs_msg(sbi->sb, KERN_ERR,
4102                        "SIT is corrupted node# %u vs %u",
4103                        total_node_blocks, valid_node_count(sbi));
4104                set_sbi_flag(sbi, SBI_NEED_FSCK);
4105                err = -EINVAL;
4106        }
4107
4108        return err;
4109}
4110
4111static void init_free_segmap(struct f2fs_sb_info *sbi)
4112{
4113        unsigned int start;
4114        int type;
4115
4116        for (start = 0; start < MAIN_SEGS(sbi); start++) {
4117                struct seg_entry *sentry = get_seg_entry(sbi, start);
4118                if (!sentry->valid_blocks)
4119                        __set_free(sbi, start);
4120                else
4121                        SIT_I(sbi)->written_valid_blocks +=
4122                                                sentry->valid_blocks;
4123        }
4124
4125        /* set use the current segments */
4126        for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4127                struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4128                __set_test_and_inuse(sbi, curseg_t->segno);
4129        }
4130}
4131
4132static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4133{
4134        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4135        struct free_segmap_info *free_i = FREE_I(sbi);
4136        unsigned int segno = 0, offset = 0;
4137        unsigned short valid_blocks;
4138
4139        while (1) {
4140                /* find dirty segment based on free segmap */
4141                segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4142                if (segno >= MAIN_SEGS(sbi))
4143                        break;
4144                offset = segno + 1;
4145                valid_blocks = get_valid_blocks(sbi, segno, false);
4146                if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
4147                        continue;
4148                if (valid_blocks > sbi->blocks_per_seg) {
4149                        f2fs_bug_on(sbi, 1);
4150                        continue;
4151                }
4152                mutex_lock(&dirty_i->seglist_lock);
4153                __locate_dirty_segment(sbi, segno, DIRTY);
4154                mutex_unlock(&dirty_i->seglist_lock);
4155        }
4156}
4157
4158static int init_victim_secmap(struct f2fs_sb_info *sbi)
4159{
4160        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4161        unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4162
4163        dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4164        if (!dirty_i->victim_secmap)
4165                return -ENOMEM;
4166        return 0;
4167}
4168
4169static int build_dirty_segmap(struct f2fs_sb_info *sbi)
4170{
4171        struct dirty_seglist_info *dirty_i;
4172        unsigned int bitmap_size, i;
4173
4174        /* allocate memory for dirty segments list information */
4175        dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
4176                                                                GFP_KERNEL);
4177        if (!dirty_i)
4178                return -ENOMEM;
4179
4180        SM_I(sbi)->dirty_info = dirty_i;
4181        mutex_init(&dirty_i->seglist_lock);
4182
4183        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4184
4185        for (i = 0; i < NR_DIRTY_TYPE; i++) {
4186                dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
4187                                                                GFP_KERNEL);
4188                if (!dirty_i->dirty_segmap[i])
4189                        return -ENOMEM;
4190        }
4191
4192        init_dirty_segmap(sbi);
4193        return init_victim_secmap(sbi);
4194}
4195
4196/*
4197 * Update min, max modified time for cost-benefit GC algorithm
4198 */
4199static void init_min_max_mtime(struct f2fs_sb_info *sbi)
4200{
4201        struct sit_info *sit_i = SIT_I(sbi);
4202        unsigned int segno;
4203
4204        down_write(&sit_i->sentry_lock);
4205
4206        sit_i->min_mtime = ULLONG_MAX;
4207
4208        for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
4209                unsigned int i;
4210                unsigned long long mtime = 0;
4211
4212                for (i = 0; i < sbi->segs_per_sec; i++)
4213                        mtime += get_seg_entry(sbi, segno + i)->mtime;
4214
4215                mtime = div_u64(mtime, sbi->segs_per_sec);
4216
4217                if (sit_i->min_mtime > mtime)
4218                        sit_i->min_mtime = mtime;
4219        }
4220        sit_i->max_mtime = get_mtime(sbi, false);
4221        up_write(&sit_i->sentry_lock);
4222}
4223
4224int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
4225{
4226        struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4227        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
4228        struct f2fs_sm_info *sm_info;
4229        int err;
4230
4231        sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
4232        if (!sm_info)
4233                return -ENOMEM;
4234
4235        /* init sm info */
4236        sbi->sm_info = sm_info;
4237        sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
4238        sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
4239        sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
4240        sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
4241        sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
4242        sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
4243        sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
4244        sm_info->rec_prefree_segments = sm_info->main_segments *
4245                                        DEF_RECLAIM_PREFREE_SEGMENTS / 100;
4246        if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
4247                sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
4248
4249        if (!test_opt(sbi, LFS))
4250                sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
4251        sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
4252        sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
4253        sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
4254        sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
4255        sm_info->min_ssr_sections = reserved_sections(sbi);
4256
4257        INIT_LIST_HEAD(&sm_info->sit_entry_set);
4258
4259        init_rwsem(&sm_info->curseg_lock);
4260
4261        if (!f2fs_readonly(sbi->sb)) {
4262                err = f2fs_create_flush_cmd_control(sbi);
4263                if (err)
4264                        return err;
4265        }
4266
4267        err = create_discard_cmd_control(sbi);
4268        if (err)
4269                return err;
4270
4271        err = build_sit_info(sbi);
4272        if (err)
4273                return err;
4274        err = build_free_segmap(sbi);
4275        if (err)
4276                return err;
4277        err = build_curseg(sbi);
4278        if (err)
4279                return err;
4280
4281        /* reinit free segmap based on SIT */
4282        err = build_sit_entries(sbi);
4283        if (err)
4284                return err;
4285
4286        init_free_segmap(sbi);
4287        err = build_dirty_segmap(sbi);
4288        if (err)
4289                return err;
4290
4291        init_min_max_mtime(sbi);
4292        return 0;
4293}
4294
4295static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
4296                enum dirty_type dirty_type)
4297{
4298        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4299
4300        mutex_lock(&dirty_i->seglist_lock);
4301        kvfree(dirty_i->dirty_segmap[dirty_type]);
4302        dirty_i->nr_dirty[dirty_type] = 0;
4303        mutex_unlock(&dirty_i->seglist_lock);
4304}
4305
4306static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
4307{
4308        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4309        kvfree(dirty_i->victim_secmap);
4310}
4311
4312static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
4313{
4314        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4315        int i;
4316
4317        if (!dirty_i)
4318                return;
4319
4320        /* discard pre-free/dirty segments list */
4321        for (i = 0; i < NR_DIRTY_TYPE; i++)
4322                discard_dirty_segmap(sbi, i);
4323
4324        destroy_victim_secmap(sbi);
4325        SM_I(sbi)->dirty_info = NULL;
4326        kvfree(dirty_i);
4327}
4328
4329static void destroy_curseg(struct f2fs_sb_info *sbi)
4330{
4331        struct curseg_info *array = SM_I(sbi)->curseg_array;
4332        int i;
4333
4334        if (!array)
4335                return;
4336        SM_I(sbi)->curseg_array = NULL;
4337        for (i = 0; i < NR_CURSEG_TYPE; i++) {
4338                kvfree(array[i].sum_blk);
4339                kvfree(array[i].journal);
4340        }
4341        kvfree(array);
4342}
4343
4344static void destroy_free_segmap(struct f2fs_sb_info *sbi)
4345{
4346        struct free_segmap_info *free_i = SM_I(sbi)->free_info;
4347        if (!free_i)
4348                return;
4349        SM_I(sbi)->free_info = NULL;
4350        kvfree(free_i->free_segmap);
4351        kvfree(free_i->free_secmap);
4352        kvfree(free_i);
4353}
4354
4355static void destroy_sit_info(struct f2fs_sb_info *sbi)
4356{
4357        struct sit_info *sit_i = SIT_I(sbi);
4358        unsigned int start;
4359
4360        if (!sit_i)
4361                return;
4362
4363        if (sit_i->sentries) {
4364                for (start = 0; start < MAIN_SEGS(sbi); start++) {
4365                        kvfree(sit_i->sentries[start].cur_valid_map);
4366#ifdef CONFIG_F2FS_CHECK_FS
4367                        kvfree(sit_i->sentries[start].cur_valid_map_mir);
4368#endif
4369                        kvfree(sit_i->sentries[start].ckpt_valid_map);
4370                        kvfree(sit_i->sentries[start].discard_map);
4371                }
4372        }
4373        kvfree(sit_i->tmp_map);
4374
4375        kvfree(sit_i->sentries);
4376        kvfree(sit_i->sec_entries);
4377        kvfree(sit_i->dirty_sentries_bitmap);
4378
4379        SM_I(sbi)->sit_info = NULL;
4380        kvfree(sit_i->sit_bitmap);
4381#ifdef CONFIG_F2FS_CHECK_FS
4382        kvfree(sit_i->sit_bitmap_mir);
4383#endif
4384        kvfree(sit_i);
4385}
4386
4387void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
4388{
4389        struct f2fs_sm_info *sm_info = SM_I(sbi);
4390
4391        if (!sm_info)
4392                return;
4393        f2fs_destroy_flush_cmd_control(sbi, true);
4394        destroy_discard_cmd_control(sbi);
4395        destroy_dirty_segmap(sbi);
4396        destroy_curseg(sbi);
4397        destroy_free_segmap(sbi);
4398        destroy_sit_info(sbi);
4399        sbi->sm_info = NULL;
4400        kvfree(sm_info);
4401}
4402
4403int __init f2fs_create_segment_manager_caches(void)
4404{
4405        discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
4406                        sizeof(struct discard_entry));
4407        if (!discard_entry_slab)
4408                goto fail;
4409
4410        discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
4411                        sizeof(struct discard_cmd));
4412        if (!discard_cmd_slab)
4413                goto destroy_discard_entry;
4414
4415        sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
4416                        sizeof(struct sit_entry_set));
4417        if (!sit_entry_set_slab)
4418                goto destroy_discard_cmd;
4419
4420        inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
4421                        sizeof(struct inmem_pages));
4422        if (!inmem_entry_slab)
4423                goto destroy_sit_entry_set;
4424        return 0;
4425
4426destroy_sit_entry_set:
4427        kmem_cache_destroy(sit_entry_set_slab);
4428destroy_discard_cmd:
4429        kmem_cache_destroy(discard_cmd_slab);
4430destroy_discard_entry:
4431        kmem_cache_destroy(discard_entry_slab);
4432fail:
4433        return -ENOMEM;
4434}
4435
4436void f2fs_destroy_segment_manager_caches(void)
4437{
4438        kmem_cache_destroy(sit_entry_set_slab);
4439        kmem_cache_destroy(discard_cmd_slab);
4440        kmem_cache_destroy(discard_entry_slab);
4441        kmem_cache_destroy(inmem_entry_slab);
4442}
4443