linux/fs/f2fs/data.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * fs/f2fs/data.c
   4 *
   5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   6 *             http://www.samsung.com/
   7 */
   8#include <linux/fs.h>
   9#include <linux/f2fs_fs.h>
  10#include <linux/buffer_head.h>
  11#include <linux/mpage.h>
  12#include <linux/writeback.h>
  13#include <linux/backing-dev.h>
  14#include <linux/pagevec.h>
  15#include <linux/blkdev.h>
  16#include <linux/bio.h>
  17#include <linux/prefetch.h>
  18#include <linux/uio.h>
  19#include <linux/cleancache.h>
  20#include <linux/sched/signal.h>
  21
  22#include "f2fs.h"
  23#include "node.h"
  24#include "segment.h"
  25#include "trace.h"
  26#include <trace/events/f2fs.h>
  27
  28#define NUM_PREALLOC_POST_READ_CTXS     128
  29
  30static struct kmem_cache *bio_post_read_ctx_cache;
  31static mempool_t *bio_post_read_ctx_pool;
  32
  33static bool __is_cp_guaranteed(struct page *page)
  34{
  35        struct address_space *mapping = page->mapping;
  36        struct inode *inode;
  37        struct f2fs_sb_info *sbi;
  38
  39        if (!mapping)
  40                return false;
  41
  42        inode = mapping->host;
  43        sbi = F2FS_I_SB(inode);
  44
  45        if (inode->i_ino == F2FS_META_INO(sbi) ||
  46                        inode->i_ino ==  F2FS_NODE_INO(sbi) ||
  47                        S_ISDIR(inode->i_mode) ||
  48                        (S_ISREG(inode->i_mode) &&
  49                        (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
  50                        is_cold_data(page))
  51                return true;
  52        return false;
  53}
  54
  55static enum count_type __read_io_type(struct page *page)
  56{
  57        struct address_space *mapping = page->mapping;
  58
  59        if (mapping) {
  60                struct inode *inode = mapping->host;
  61                struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  62
  63                if (inode->i_ino == F2FS_META_INO(sbi))
  64                        return F2FS_RD_META;
  65
  66                if (inode->i_ino == F2FS_NODE_INO(sbi))
  67                        return F2FS_RD_NODE;
  68        }
  69        return F2FS_RD_DATA;
  70}
  71
  72/* postprocessing steps for read bios */
  73enum bio_post_read_step {
  74        STEP_INITIAL = 0,
  75        STEP_DECRYPT,
  76};
  77
  78struct bio_post_read_ctx {
  79        struct bio *bio;
  80        struct work_struct work;
  81        unsigned int cur_step;
  82        unsigned int enabled_steps;
  83};
  84
  85static void __read_end_io(struct bio *bio)
  86{
  87        struct page *page;
  88        struct bio_vec *bv;
  89        int i;
  90        struct bvec_iter_all iter_all;
  91
  92        bio_for_each_segment_all(bv, bio, i, iter_all) {
  93                page = bv->bv_page;
  94
  95                /* PG_error was set if any post_read step failed */
  96                if (bio->bi_status || PageError(page)) {
  97                        ClearPageUptodate(page);
  98                        /* will re-read again later */
  99                        ClearPageError(page);
 100                } else {
 101                        SetPageUptodate(page);
 102                }
 103                dec_page_count(F2FS_P_SB(page), __read_io_type(page));
 104                unlock_page(page);
 105        }
 106        if (bio->bi_private)
 107                mempool_free(bio->bi_private, bio_post_read_ctx_pool);
 108        bio_put(bio);
 109}
 110
 111static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
 112
 113static void decrypt_work(struct work_struct *work)
 114{
 115        struct bio_post_read_ctx *ctx =
 116                container_of(work, struct bio_post_read_ctx, work);
 117
 118        fscrypt_decrypt_bio(ctx->bio);
 119
 120        bio_post_read_processing(ctx);
 121}
 122
 123static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
 124{
 125        switch (++ctx->cur_step) {
 126        case STEP_DECRYPT:
 127                if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
 128                        INIT_WORK(&ctx->work, decrypt_work);
 129                        fscrypt_enqueue_decrypt_work(&ctx->work);
 130                        return;
 131                }
 132                ctx->cur_step++;
 133                /* fall-through */
 134        default:
 135                __read_end_io(ctx->bio);
 136        }
 137}
 138
 139static bool f2fs_bio_post_read_required(struct bio *bio)
 140{
 141        return bio->bi_private && !bio->bi_status;
 142}
 143
 144static void f2fs_read_end_io(struct bio *bio)
 145{
 146        if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)),
 147                                                FAULT_READ_IO)) {
 148                f2fs_show_injection_info(FAULT_READ_IO);
 149                bio->bi_status = BLK_STS_IOERR;
 150        }
 151
 152        if (f2fs_bio_post_read_required(bio)) {
 153                struct bio_post_read_ctx *ctx = bio->bi_private;
 154
 155                ctx->cur_step = STEP_INITIAL;
 156                bio_post_read_processing(ctx);
 157                return;
 158        }
 159
 160        __read_end_io(bio);
 161}
 162
 163static void f2fs_write_end_io(struct bio *bio)
 164{
 165        struct f2fs_sb_info *sbi = bio->bi_private;
 166        struct bio_vec *bvec;
 167        int i;
 168        struct bvec_iter_all iter_all;
 169
 170        if (time_to_inject(sbi, FAULT_WRITE_IO)) {
 171                f2fs_show_injection_info(FAULT_WRITE_IO);
 172                bio->bi_status = BLK_STS_IOERR;
 173        }
 174
 175        bio_for_each_segment_all(bvec, bio, i, iter_all) {
 176                struct page *page = bvec->bv_page;
 177                enum count_type type = WB_DATA_TYPE(page);
 178
 179                if (IS_DUMMY_WRITTEN_PAGE(page)) {
 180                        set_page_private(page, (unsigned long)NULL);
 181                        ClearPagePrivate(page);
 182                        unlock_page(page);
 183                        mempool_free(page, sbi->write_io_dummy);
 184
 185                        if (unlikely(bio->bi_status))
 186                                f2fs_stop_checkpoint(sbi, true);
 187                        continue;
 188                }
 189
 190                fscrypt_pullback_bio_page(&page, true);
 191
 192                if (unlikely(bio->bi_status)) {
 193                        mapping_set_error(page->mapping, -EIO);
 194                        if (type == F2FS_WB_CP_DATA)
 195                                f2fs_stop_checkpoint(sbi, true);
 196                }
 197
 198                f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
 199                                        page->index != nid_of_node(page));
 200
 201                dec_page_count(sbi, type);
 202                if (f2fs_in_warm_node_list(sbi, page))
 203                        f2fs_del_fsync_node_entry(sbi, page);
 204                clear_cold_data(page);
 205                end_page_writeback(page);
 206        }
 207        if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
 208                                wq_has_sleeper(&sbi->cp_wait))
 209                wake_up(&sbi->cp_wait);
 210
 211        bio_put(bio);
 212}
 213
 214/*
 215 * Return true, if pre_bio's bdev is same as its target device.
 216 */
 217struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
 218                                block_t blk_addr, struct bio *bio)
 219{
 220        struct block_device *bdev = sbi->sb->s_bdev;
 221        int i;
 222
 223        for (i = 0; i < sbi->s_ndevs; i++) {
 224                if (FDEV(i).start_blk <= blk_addr &&
 225                                        FDEV(i).end_blk >= blk_addr) {
 226                        blk_addr -= FDEV(i).start_blk;
 227                        bdev = FDEV(i).bdev;
 228                        break;
 229                }
 230        }
 231        if (bio) {
 232                bio_set_dev(bio, bdev);
 233                bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
 234        }
 235        return bdev;
 236}
 237
 238int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
 239{
 240        int i;
 241
 242        for (i = 0; i < sbi->s_ndevs; i++)
 243                if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
 244                        return i;
 245        return 0;
 246}
 247
 248static bool __same_bdev(struct f2fs_sb_info *sbi,
 249                                block_t blk_addr, struct bio *bio)
 250{
 251        struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL);
 252        return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno;
 253}
 254
 255/*
 256 * Low-level block read/write IO operations.
 257 */
 258static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
 259                                struct writeback_control *wbc,
 260                                int npages, bool is_read,
 261                                enum page_type type, enum temp_type temp)
 262{
 263        struct bio *bio;
 264
 265        bio = f2fs_bio_alloc(sbi, npages, true);
 266
 267        f2fs_target_device(sbi, blk_addr, bio);
 268        if (is_read) {
 269                bio->bi_end_io = f2fs_read_end_io;
 270                bio->bi_private = NULL;
 271        } else {
 272                bio->bi_end_io = f2fs_write_end_io;
 273                bio->bi_private = sbi;
 274                bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp);
 275        }
 276        if (wbc)
 277                wbc_init_bio(wbc, bio);
 278
 279        return bio;
 280}
 281
 282static inline void __submit_bio(struct f2fs_sb_info *sbi,
 283                                struct bio *bio, enum page_type type)
 284{
 285        if (!is_read_io(bio_op(bio))) {
 286                unsigned int start;
 287
 288                if (type != DATA && type != NODE)
 289                        goto submit_io;
 290
 291                if (test_opt(sbi, LFS) && current->plug)
 292                        blk_finish_plug(current->plug);
 293
 294                start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
 295                start %= F2FS_IO_SIZE(sbi);
 296
 297                if (start == 0)
 298                        goto submit_io;
 299
 300                /* fill dummy pages */
 301                for (; start < F2FS_IO_SIZE(sbi); start++) {
 302                        struct page *page =
 303                                mempool_alloc(sbi->write_io_dummy,
 304                                              GFP_NOIO | __GFP_NOFAIL);
 305                        f2fs_bug_on(sbi, !page);
 306
 307                        zero_user_segment(page, 0, PAGE_SIZE);
 308                        SetPagePrivate(page);
 309                        set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
 310                        lock_page(page);
 311                        if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
 312                                f2fs_bug_on(sbi, 1);
 313                }
 314                /*
 315                 * In the NODE case, we lose next block address chain. So, we
 316                 * need to do checkpoint in f2fs_sync_file.
 317                 */
 318                if (type == NODE)
 319                        set_sbi_flag(sbi, SBI_NEED_CP);
 320        }
 321submit_io:
 322        if (is_read_io(bio_op(bio)))
 323                trace_f2fs_submit_read_bio(sbi->sb, type, bio);
 324        else
 325                trace_f2fs_submit_write_bio(sbi->sb, type, bio);
 326        submit_bio(bio);
 327}
 328
 329static void __submit_merged_bio(struct f2fs_bio_info *io)
 330{
 331        struct f2fs_io_info *fio = &io->fio;
 332
 333        if (!io->bio)
 334                return;
 335
 336        bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
 337
 338        if (is_read_io(fio->op))
 339                trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
 340        else
 341                trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
 342
 343        __submit_bio(io->sbi, io->bio, fio->type);
 344        io->bio = NULL;
 345}
 346
 347static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
 348                                                struct page *page, nid_t ino)
 349{
 350        struct bio_vec *bvec;
 351        struct page *target;
 352        int i;
 353        struct bvec_iter_all iter_all;
 354
 355        if (!io->bio)
 356                return false;
 357
 358        if (!inode && !page && !ino)
 359                return true;
 360
 361        bio_for_each_segment_all(bvec, io->bio, i, iter_all) {
 362
 363                if (bvec->bv_page->mapping)
 364                        target = bvec->bv_page;
 365                else
 366                        target = fscrypt_control_page(bvec->bv_page);
 367
 368                if (inode && inode == target->mapping->host)
 369                        return true;
 370                if (page && page == target)
 371                        return true;
 372                if (ino && ino == ino_of_node(target))
 373                        return true;
 374        }
 375
 376        return false;
 377}
 378
 379static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
 380                                enum page_type type, enum temp_type temp)
 381{
 382        enum page_type btype = PAGE_TYPE_OF_BIO(type);
 383        struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 384
 385        down_write(&io->io_rwsem);
 386
 387        /* change META to META_FLUSH in the checkpoint procedure */
 388        if (type >= META_FLUSH) {
 389                io->fio.type = META_FLUSH;
 390                io->fio.op = REQ_OP_WRITE;
 391                io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
 392                if (!test_opt(sbi, NOBARRIER))
 393                        io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
 394        }
 395        __submit_merged_bio(io);
 396        up_write(&io->io_rwsem);
 397}
 398
 399static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
 400                                struct inode *inode, struct page *page,
 401                                nid_t ino, enum page_type type, bool force)
 402{
 403        enum temp_type temp;
 404        bool ret = true;
 405
 406        for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
 407                if (!force)     {
 408                        enum page_type btype = PAGE_TYPE_OF_BIO(type);
 409                        struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 410
 411                        down_read(&io->io_rwsem);
 412                        ret = __has_merged_page(io, inode, page, ino);
 413                        up_read(&io->io_rwsem);
 414                }
 415                if (ret)
 416                        __f2fs_submit_merged_write(sbi, type, temp);
 417
 418                /* TODO: use HOT temp only for meta pages now. */
 419                if (type >= META)
 420                        break;
 421        }
 422}
 423
 424void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
 425{
 426        __submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
 427}
 428
 429void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
 430                                struct inode *inode, struct page *page,
 431                                nid_t ino, enum page_type type)
 432{
 433        __submit_merged_write_cond(sbi, inode, page, ino, type, false);
 434}
 435
 436void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
 437{
 438        f2fs_submit_merged_write(sbi, DATA);
 439        f2fs_submit_merged_write(sbi, NODE);
 440        f2fs_submit_merged_write(sbi, META);
 441}
 442
 443/*
 444 * Fill the locked page with data located in the block address.
 445 * A caller needs to unlock the page on failure.
 446 */
 447int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 448{
 449        struct bio *bio;
 450        struct page *page = fio->encrypted_page ?
 451                        fio->encrypted_page : fio->page;
 452
 453        if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
 454                        __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
 455                return -EFAULT;
 456
 457        trace_f2fs_submit_page_bio(page, fio);
 458        f2fs_trace_ios(fio, 0);
 459
 460        /* Allocate a new bio */
 461        bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
 462                                1, is_read_io(fio->op), fio->type, fio->temp);
 463
 464        if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
 465                bio_put(bio);
 466                return -EFAULT;
 467        }
 468
 469        if (fio->io_wbc && !is_read_io(fio->op))
 470                wbc_account_io(fio->io_wbc, page, PAGE_SIZE);
 471
 472        bio_set_op_attrs(bio, fio->op, fio->op_flags);
 473
 474        inc_page_count(fio->sbi, is_read_io(fio->op) ?
 475                        __read_io_type(page): WB_DATA_TYPE(fio->page));
 476
 477        __submit_bio(fio->sbi, bio, fio->type);
 478        return 0;
 479}
 480
 481void f2fs_submit_page_write(struct f2fs_io_info *fio)
 482{
 483        struct f2fs_sb_info *sbi = fio->sbi;
 484        enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
 485        struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
 486        struct page *bio_page;
 487
 488        f2fs_bug_on(sbi, is_read_io(fio->op));
 489
 490        down_write(&io->io_rwsem);
 491next:
 492        if (fio->in_list) {
 493                spin_lock(&io->io_lock);
 494                if (list_empty(&io->io_list)) {
 495                        spin_unlock(&io->io_lock);
 496                        goto out;
 497                }
 498                fio = list_first_entry(&io->io_list,
 499                                                struct f2fs_io_info, list);
 500                list_del(&fio->list);
 501                spin_unlock(&io->io_lock);
 502        }
 503
 504        if (__is_valid_data_blkaddr(fio->old_blkaddr))
 505                verify_block_addr(fio, fio->old_blkaddr);
 506        verify_block_addr(fio, fio->new_blkaddr);
 507
 508        bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
 509
 510        /* set submitted = true as a return value */
 511        fio->submitted = true;
 512
 513        inc_page_count(sbi, WB_DATA_TYPE(bio_page));
 514
 515        if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
 516            (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
 517                        !__same_bdev(sbi, fio->new_blkaddr, io->bio)))
 518                __submit_merged_bio(io);
 519alloc_new:
 520        if (io->bio == NULL) {
 521                if ((fio->type == DATA || fio->type == NODE) &&
 522                                fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
 523                        dec_page_count(sbi, WB_DATA_TYPE(bio_page));
 524                        fio->retry = true;
 525                        goto skip;
 526                }
 527                io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
 528                                                BIO_MAX_PAGES, false,
 529                                                fio->type, fio->temp);
 530                io->fio = *fio;
 531        }
 532
 533        if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
 534                __submit_merged_bio(io);
 535                goto alloc_new;
 536        }
 537
 538        if (fio->io_wbc)
 539                wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE);
 540
 541        io->last_block_in_bio = fio->new_blkaddr;
 542        f2fs_trace_ios(fio, 0);
 543
 544        trace_f2fs_submit_page_write(fio->page, fio);
 545skip:
 546        if (fio->in_list)
 547                goto next;
 548out:
 549        if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
 550                                f2fs_is_checkpoint_ready(sbi))
 551                __submit_merged_bio(io);
 552        up_write(&io->io_rwsem);
 553}
 554
 555static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
 556                                        unsigned nr_pages, unsigned op_flag)
 557{
 558        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 559        struct bio *bio;
 560        struct bio_post_read_ctx *ctx;
 561        unsigned int post_read_steps = 0;
 562
 563        if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
 564                return ERR_PTR(-EFAULT);
 565
 566        bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
 567        if (!bio)
 568                return ERR_PTR(-ENOMEM);
 569        f2fs_target_device(sbi, blkaddr, bio);
 570        bio->bi_end_io = f2fs_read_end_io;
 571        bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
 572
 573        if (f2fs_encrypted_file(inode))
 574                post_read_steps |= 1 << STEP_DECRYPT;
 575        if (post_read_steps) {
 576                ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
 577                if (!ctx) {
 578                        bio_put(bio);
 579                        return ERR_PTR(-ENOMEM);
 580                }
 581                ctx->bio = bio;
 582                ctx->enabled_steps = post_read_steps;
 583                bio->bi_private = ctx;
 584        }
 585
 586        return bio;
 587}
 588
 589/* This can handle encryption stuffs */
 590static int f2fs_submit_page_read(struct inode *inode, struct page *page,
 591                                                        block_t blkaddr)
 592{
 593        struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
 594
 595        if (IS_ERR(bio))
 596                return PTR_ERR(bio);
 597
 598        /* wait for GCed page writeback via META_MAPPING */
 599        f2fs_wait_on_block_writeback(inode, blkaddr);
 600
 601        if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
 602                bio_put(bio);
 603                return -EFAULT;
 604        }
 605        ClearPageError(page);
 606        inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
 607        __submit_bio(F2FS_I_SB(inode), bio, DATA);
 608        return 0;
 609}
 610
 611static void __set_data_blkaddr(struct dnode_of_data *dn)
 612{
 613        struct f2fs_node *rn = F2FS_NODE(dn->node_page);
 614        __le32 *addr_array;
 615        int base = 0;
 616
 617        if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
 618                base = get_extra_isize(dn->inode);
 619
 620        /* Get physical address of data block */
 621        addr_array = blkaddr_in_node(rn);
 622        addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
 623}
 624
 625/*
 626 * Lock ordering for the change of data block address:
 627 * ->data_page
 628 *  ->node_page
 629 *    update block addresses in the node page
 630 */
 631void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
 632{
 633        f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
 634        __set_data_blkaddr(dn);
 635        if (set_page_dirty(dn->node_page))
 636                dn->node_changed = true;
 637}
 638
 639void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
 640{
 641        dn->data_blkaddr = blkaddr;
 642        f2fs_set_data_blkaddr(dn);
 643        f2fs_update_extent_cache(dn);
 644}
 645
 646/* dn->ofs_in_node will be returned with up-to-date last block pointer */
 647int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
 648{
 649        struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
 650        int err;
 651
 652        if (!count)
 653                return 0;
 654
 655        if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
 656                return -EPERM;
 657        if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
 658                return err;
 659
 660        trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
 661                                                dn->ofs_in_node, count);
 662
 663        f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
 664
 665        for (; count > 0; dn->ofs_in_node++) {
 666                block_t blkaddr = datablock_addr(dn->inode,
 667                                        dn->node_page, dn->ofs_in_node);
 668                if (blkaddr == NULL_ADDR) {
 669                        dn->data_blkaddr = NEW_ADDR;
 670                        __set_data_blkaddr(dn);
 671                        count--;
 672                }
 673        }
 674
 675        if (set_page_dirty(dn->node_page))
 676                dn->node_changed = true;
 677        return 0;
 678}
 679
 680/* Should keep dn->ofs_in_node unchanged */
 681int f2fs_reserve_new_block(struct dnode_of_data *dn)
 682{
 683        unsigned int ofs_in_node = dn->ofs_in_node;
 684        int ret;
 685
 686        ret = f2fs_reserve_new_blocks(dn, 1);
 687        dn->ofs_in_node = ofs_in_node;
 688        return ret;
 689}
 690
 691int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
 692{
 693        bool need_put = dn->inode_page ? false : true;
 694        int err;
 695
 696        err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
 697        if (err)
 698                return err;
 699
 700        if (dn->data_blkaddr == NULL_ADDR)
 701                err = f2fs_reserve_new_block(dn);
 702        if (err || need_put)
 703                f2fs_put_dnode(dn);
 704        return err;
 705}
 706
 707int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
 708{
 709        struct extent_info ei  = {0,0,0};
 710        struct inode *inode = dn->inode;
 711
 712        if (f2fs_lookup_extent_cache(inode, index, &ei)) {
 713                dn->data_blkaddr = ei.blk + index - ei.fofs;
 714                return 0;
 715        }
 716
 717        return f2fs_reserve_block(dn, index);
 718}
 719
 720struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
 721                                                int op_flags, bool for_write)
 722{
 723        struct address_space *mapping = inode->i_mapping;
 724        struct dnode_of_data dn;
 725        struct page *page;
 726        struct extent_info ei = {0,0,0};
 727        int err;
 728
 729        page = f2fs_grab_cache_page(mapping, index, for_write);
 730        if (!page)
 731                return ERR_PTR(-ENOMEM);
 732
 733        if (f2fs_lookup_extent_cache(inode, index, &ei)) {
 734                dn.data_blkaddr = ei.blk + index - ei.fofs;
 735                goto got_it;
 736        }
 737
 738        set_new_dnode(&dn, inode, NULL, NULL, 0);
 739        err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
 740        if (err)
 741                goto put_err;
 742        f2fs_put_dnode(&dn);
 743
 744        if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
 745                err = -ENOENT;
 746                goto put_err;
 747        }
 748got_it:
 749        if (PageUptodate(page)) {
 750                unlock_page(page);
 751                return page;
 752        }
 753
 754        /*
 755         * A new dentry page is allocated but not able to be written, since its
 756         * new inode page couldn't be allocated due to -ENOSPC.
 757         * In such the case, its blkaddr can be remained as NEW_ADDR.
 758         * see, f2fs_add_link -> f2fs_get_new_data_page ->
 759         * f2fs_init_inode_metadata.
 760         */
 761        if (dn.data_blkaddr == NEW_ADDR) {
 762                zero_user_segment(page, 0, PAGE_SIZE);
 763                if (!PageUptodate(page))
 764                        SetPageUptodate(page);
 765                unlock_page(page);
 766                return page;
 767        }
 768
 769        err = f2fs_submit_page_read(inode, page, dn.data_blkaddr);
 770        if (err)
 771                goto put_err;
 772        return page;
 773
 774put_err:
 775        f2fs_put_page(page, 1);
 776        return ERR_PTR(err);
 777}
 778
 779struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
 780{
 781        struct address_space *mapping = inode->i_mapping;
 782        struct page *page;
 783
 784        page = find_get_page(mapping, index);
 785        if (page && PageUptodate(page))
 786                return page;
 787        f2fs_put_page(page, 0);
 788
 789        page = f2fs_get_read_data_page(inode, index, 0, false);
 790        if (IS_ERR(page))
 791                return page;
 792
 793        if (PageUptodate(page))
 794                return page;
 795
 796        wait_on_page_locked(page);
 797        if (unlikely(!PageUptodate(page))) {
 798                f2fs_put_page(page, 0);
 799                return ERR_PTR(-EIO);
 800        }
 801        return page;
 802}
 803
 804/*
 805 * If it tries to access a hole, return an error.
 806 * Because, the callers, functions in dir.c and GC, should be able to know
 807 * whether this page exists or not.
 808 */
 809struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
 810                                                        bool for_write)
 811{
 812        struct address_space *mapping = inode->i_mapping;
 813        struct page *page;
 814repeat:
 815        page = f2fs_get_read_data_page(inode, index, 0, for_write);
 816        if (IS_ERR(page))
 817                return page;
 818
 819        /* wait for read completion */
 820        lock_page(page);
 821        if (unlikely(page->mapping != mapping)) {
 822                f2fs_put_page(page, 1);
 823                goto repeat;
 824        }
 825        if (unlikely(!PageUptodate(page))) {
 826                f2fs_put_page(page, 1);
 827                return ERR_PTR(-EIO);
 828        }
 829        return page;
 830}
 831
 832/*
 833 * Caller ensures that this data page is never allocated.
 834 * A new zero-filled data page is allocated in the page cache.
 835 *
 836 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
 837 * f2fs_unlock_op().
 838 * Note that, ipage is set only by make_empty_dir, and if any error occur,
 839 * ipage should be released by this function.
 840 */
 841struct page *f2fs_get_new_data_page(struct inode *inode,
 842                struct page *ipage, pgoff_t index, bool new_i_size)
 843{
 844        struct address_space *mapping = inode->i_mapping;
 845        struct page *page;
 846        struct dnode_of_data dn;
 847        int err;
 848
 849        page = f2fs_grab_cache_page(mapping, index, true);
 850        if (!page) {
 851                /*
 852                 * before exiting, we should make sure ipage will be released
 853                 * if any error occur.
 854                 */
 855                f2fs_put_page(ipage, 1);
 856                return ERR_PTR(-ENOMEM);
 857        }
 858
 859        set_new_dnode(&dn, inode, ipage, NULL, 0);
 860        err = f2fs_reserve_block(&dn, index);
 861        if (err) {
 862                f2fs_put_page(page, 1);
 863                return ERR_PTR(err);
 864        }
 865        if (!ipage)
 866                f2fs_put_dnode(&dn);
 867
 868        if (PageUptodate(page))
 869                goto got_it;
 870
 871        if (dn.data_blkaddr == NEW_ADDR) {
 872                zero_user_segment(page, 0, PAGE_SIZE);
 873                if (!PageUptodate(page))
 874                        SetPageUptodate(page);
 875        } else {
 876                f2fs_put_page(page, 1);
 877
 878                /* if ipage exists, blkaddr should be NEW_ADDR */
 879                f2fs_bug_on(F2FS_I_SB(inode), ipage);
 880                page = f2fs_get_lock_data_page(inode, index, true);
 881                if (IS_ERR(page))
 882                        return page;
 883        }
 884got_it:
 885        if (new_i_size && i_size_read(inode) <
 886                                ((loff_t)(index + 1) << PAGE_SHIFT))
 887                f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
 888        return page;
 889}
 890
 891static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
 892{
 893        struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
 894        struct f2fs_summary sum;
 895        struct node_info ni;
 896        block_t old_blkaddr;
 897        blkcnt_t count = 1;
 898        int err;
 899
 900        if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
 901                return -EPERM;
 902
 903        err = f2fs_get_node_info(sbi, dn->nid, &ni);
 904        if (err)
 905                return err;
 906
 907        dn->data_blkaddr = datablock_addr(dn->inode,
 908                                dn->node_page, dn->ofs_in_node);
 909        if (dn->data_blkaddr != NULL_ADDR)
 910                goto alloc;
 911
 912        if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
 913                return err;
 914
 915alloc:
 916        set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
 917        old_blkaddr = dn->data_blkaddr;
 918        f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
 919                                        &sum, seg_type, NULL, false);
 920        if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
 921                invalidate_mapping_pages(META_MAPPING(sbi),
 922                                        old_blkaddr, old_blkaddr);
 923        f2fs_set_data_blkaddr(dn);
 924
 925        /*
 926         * i_size will be updated by direct_IO. Otherwise, we'll get stale
 927         * data from unwritten block via dio_read.
 928         */
 929        return 0;
 930}
 931
 932int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
 933{
 934        struct inode *inode = file_inode(iocb->ki_filp);
 935        struct f2fs_map_blocks map;
 936        int flag;
 937        int err = 0;
 938        bool direct_io = iocb->ki_flags & IOCB_DIRECT;
 939
 940        /* convert inline data for Direct I/O*/
 941        if (direct_io) {
 942                err = f2fs_convert_inline_inode(inode);
 943                if (err)
 944                        return err;
 945        }
 946
 947        if (direct_io && allow_outplace_dio(inode, iocb, from))
 948                return 0;
 949
 950        if (is_inode_flag_set(inode, FI_NO_PREALLOC))
 951                return 0;
 952
 953        map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
 954        map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
 955        if (map.m_len > map.m_lblk)
 956                map.m_len -= map.m_lblk;
 957        else
 958                map.m_len = 0;
 959
 960        map.m_next_pgofs = NULL;
 961        map.m_next_extent = NULL;
 962        map.m_seg_type = NO_CHECK_TYPE;
 963        map.m_may_create = true;
 964
 965        if (direct_io) {
 966                map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
 967                flag = f2fs_force_buffered_io(inode, iocb, from) ?
 968                                        F2FS_GET_BLOCK_PRE_AIO :
 969                                        F2FS_GET_BLOCK_PRE_DIO;
 970                goto map_blocks;
 971        }
 972        if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
 973                err = f2fs_convert_inline_inode(inode);
 974                if (err)
 975                        return err;
 976        }
 977        if (f2fs_has_inline_data(inode))
 978                return err;
 979
 980        flag = F2FS_GET_BLOCK_PRE_AIO;
 981
 982map_blocks:
 983        err = f2fs_map_blocks(inode, &map, 1, flag);
 984        if (map.m_len > 0 && err == -ENOSPC) {
 985                if (!direct_io)
 986                        set_inode_flag(inode, FI_NO_PREALLOC);
 987                err = 0;
 988        }
 989        return err;
 990}
 991
 992void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
 993{
 994        if (flag == F2FS_GET_BLOCK_PRE_AIO) {
 995                if (lock)
 996                        down_read(&sbi->node_change);
 997                else
 998                        up_read(&sbi->node_change);
 999        } else {
1000                if (lock)
1001                        f2fs_lock_op(sbi);
1002                else
1003                        f2fs_unlock_op(sbi);
1004        }
1005}
1006
1007/*
1008 * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
1009 * f2fs_map_blocks structure.
1010 * If original data blocks are allocated, then give them to blockdev.
1011 * Otherwise,
1012 *     a. preallocate requested block addresses
1013 *     b. do not use extent cache for better performance
1014 *     c. give the block addresses to blockdev
1015 */
1016int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
1017                                                int create, int flag)
1018{
1019        unsigned int maxblocks = map->m_len;
1020        struct dnode_of_data dn;
1021        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1022        int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1023        pgoff_t pgofs, end_offset, end;
1024        int err = 0, ofs = 1;
1025        unsigned int ofs_in_node, last_ofs_in_node;
1026        blkcnt_t prealloc;
1027        struct extent_info ei = {0,0,0};
1028        block_t blkaddr;
1029        unsigned int start_pgofs;
1030
1031        if (!maxblocks)
1032                return 0;
1033
1034        map->m_len = 0;
1035        map->m_flags = 0;
1036
1037        /* it only supports block size == page size */
1038        pgofs = (pgoff_t)map->m_lblk;
1039        end = pgofs + maxblocks;
1040
1041        if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1042                if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
1043                                                        map->m_may_create)
1044                        goto next_dnode;
1045
1046                map->m_pblk = ei.blk + pgofs - ei.fofs;
1047                map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
1048                map->m_flags = F2FS_MAP_MAPPED;
1049                if (map->m_next_extent)
1050                        *map->m_next_extent = pgofs + map->m_len;
1051
1052                /* for hardware encryption, but to avoid potential issue in future */
1053                if (flag == F2FS_GET_BLOCK_DIO)
1054                        f2fs_wait_on_block_writeback_range(inode,
1055                                                map->m_pblk, map->m_len);
1056                goto out;
1057        }
1058
1059next_dnode:
1060        if (map->m_may_create)
1061                __do_map_lock(sbi, flag, true);
1062
1063        /* When reading holes, we need its node page */
1064        set_new_dnode(&dn, inode, NULL, NULL, 0);
1065        err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1066        if (err) {
1067                if (flag == F2FS_GET_BLOCK_BMAP)
1068                        map->m_pblk = 0;
1069                if (err == -ENOENT) {
1070                        err = 0;
1071                        if (map->m_next_pgofs)
1072                                *map->m_next_pgofs =
1073                                        f2fs_get_next_page_offset(&dn, pgofs);
1074                        if (map->m_next_extent)
1075                                *map->m_next_extent =
1076                                        f2fs_get_next_page_offset(&dn, pgofs);
1077                }
1078                goto unlock_out;
1079        }
1080
1081        start_pgofs = pgofs;
1082        prealloc = 0;
1083        last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1084        end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1085
1086next_block:
1087        blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
1088
1089        if (__is_valid_data_blkaddr(blkaddr) &&
1090                !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
1091                err = -EFAULT;
1092                goto sync_out;
1093        }
1094
1095        if (is_valid_data_blkaddr(sbi, blkaddr)) {
1096                /* use out-place-update for driect IO under LFS mode */
1097                if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
1098                                                        map->m_may_create) {
1099                        err = __allocate_data_block(&dn, map->m_seg_type);
1100                        if (!err) {
1101                                blkaddr = dn.data_blkaddr;
1102                                set_inode_flag(inode, FI_APPEND_WRITE);
1103                        }
1104                }
1105        } else {
1106                if (create) {
1107                        if (unlikely(f2fs_cp_error(sbi))) {
1108                                err = -EIO;
1109                                goto sync_out;
1110                        }
1111                        if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1112                                if (blkaddr == NULL_ADDR) {
1113                                        prealloc++;
1114                                        last_ofs_in_node = dn.ofs_in_node;
1115                                }
1116                        } else {
1117                                WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
1118                                        flag != F2FS_GET_BLOCK_DIO);
1119                                err = __allocate_data_block(&dn,
1120                                                        map->m_seg_type);
1121                                if (!err)
1122                                        set_inode_flag(inode, FI_APPEND_WRITE);
1123                        }
1124                        if (err)
1125                                goto sync_out;
1126                        map->m_flags |= F2FS_MAP_NEW;
1127                        blkaddr = dn.data_blkaddr;
1128                } else {
1129                        if (flag == F2FS_GET_BLOCK_BMAP) {
1130                                map->m_pblk = 0;
1131                                goto sync_out;
1132                        }
1133                        if (flag == F2FS_GET_BLOCK_PRECACHE)
1134                                goto sync_out;
1135                        if (flag == F2FS_GET_BLOCK_FIEMAP &&
1136                                                blkaddr == NULL_ADDR) {
1137                                if (map->m_next_pgofs)
1138                                        *map->m_next_pgofs = pgofs + 1;
1139                                goto sync_out;
1140                        }
1141                        if (flag != F2FS_GET_BLOCK_FIEMAP) {
1142                                /* for defragment case */
1143                                if (map->m_next_pgofs)
1144                                        *map->m_next_pgofs = pgofs + 1;
1145                                goto sync_out;
1146                        }
1147                }
1148        }
1149
1150        if (flag == F2FS_GET_BLOCK_PRE_AIO)
1151                goto skip;
1152
1153        if (map->m_len == 0) {
1154                /* preallocated unwritten block should be mapped for fiemap. */
1155                if (blkaddr == NEW_ADDR)
1156                        map->m_flags |= F2FS_MAP_UNWRITTEN;
1157                map->m_flags |= F2FS_MAP_MAPPED;
1158
1159                map->m_pblk = blkaddr;
1160                map->m_len = 1;
1161        } else if ((map->m_pblk != NEW_ADDR &&
1162                        blkaddr == (map->m_pblk + ofs)) ||
1163                        (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1164                        flag == F2FS_GET_BLOCK_PRE_DIO) {
1165                ofs++;
1166                map->m_len++;
1167        } else {
1168                goto sync_out;
1169        }
1170
1171skip:
1172        dn.ofs_in_node++;
1173        pgofs++;
1174
1175        /* preallocate blocks in batch for one dnode page */
1176        if (flag == F2FS_GET_BLOCK_PRE_AIO &&
1177                        (pgofs == end || dn.ofs_in_node == end_offset)) {
1178
1179                dn.ofs_in_node = ofs_in_node;
1180                err = f2fs_reserve_new_blocks(&dn, prealloc);
1181                if (err)
1182                        goto sync_out;
1183
1184                map->m_len += dn.ofs_in_node - ofs_in_node;
1185                if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
1186                        err = -ENOSPC;
1187                        goto sync_out;
1188                }
1189                dn.ofs_in_node = end_offset;
1190        }
1191
1192        if (pgofs >= end)
1193                goto sync_out;
1194        else if (dn.ofs_in_node < end_offset)
1195                goto next_block;
1196
1197        if (flag == F2FS_GET_BLOCK_PRECACHE) {
1198                if (map->m_flags & F2FS_MAP_MAPPED) {
1199                        unsigned int ofs = start_pgofs - map->m_lblk;
1200
1201                        f2fs_update_extent_cache_range(&dn,
1202                                start_pgofs, map->m_pblk + ofs,
1203                                map->m_len - ofs);
1204                }
1205        }
1206
1207        f2fs_put_dnode(&dn);
1208
1209        if (map->m_may_create) {
1210                __do_map_lock(sbi, flag, false);
1211                f2fs_balance_fs(sbi, dn.node_changed);
1212        }
1213        goto next_dnode;
1214
1215sync_out:
1216
1217        /* for hardware encryption, but to avoid potential issue in future */
1218        if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
1219                f2fs_wait_on_block_writeback_range(inode,
1220                                                map->m_pblk, map->m_len);
1221
1222        if (flag == F2FS_GET_BLOCK_PRECACHE) {
1223                if (map->m_flags & F2FS_MAP_MAPPED) {
1224                        unsigned int ofs = start_pgofs - map->m_lblk;
1225
1226                        f2fs_update_extent_cache_range(&dn,
1227                                start_pgofs, map->m_pblk + ofs,
1228                                map->m_len - ofs);
1229                }
1230                if (map->m_next_extent)
1231                        *map->m_next_extent = pgofs + 1;
1232        }
1233        f2fs_put_dnode(&dn);
1234unlock_out:
1235        if (map->m_may_create) {
1236                __do_map_lock(sbi, flag, false);
1237                f2fs_balance_fs(sbi, dn.node_changed);
1238        }
1239out:
1240        trace_f2fs_map_blocks(inode, map, err);
1241        return err;
1242}
1243
1244bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
1245{
1246        struct f2fs_map_blocks map;
1247        block_t last_lblk;
1248        int err;
1249
1250        if (pos + len > i_size_read(inode))
1251                return false;
1252
1253        map.m_lblk = F2FS_BYTES_TO_BLK(pos);
1254        map.m_next_pgofs = NULL;
1255        map.m_next_extent = NULL;
1256        map.m_seg_type = NO_CHECK_TYPE;
1257        map.m_may_create = false;
1258        last_lblk = F2FS_BLK_ALIGN(pos + len);
1259
1260        while (map.m_lblk < last_lblk) {
1261                map.m_len = last_lblk - map.m_lblk;
1262                err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
1263                if (err || map.m_len == 0)
1264                        return false;
1265                map.m_lblk += map.m_len;
1266        }
1267        return true;
1268}
1269
1270static int __get_data_block(struct inode *inode, sector_t iblock,
1271                        struct buffer_head *bh, int create, int flag,
1272                        pgoff_t *next_pgofs, int seg_type, bool may_write)
1273{
1274        struct f2fs_map_blocks map;
1275        int err;
1276
1277        map.m_lblk = iblock;
1278        map.m_len = bh->b_size >> inode->i_blkbits;
1279        map.m_next_pgofs = next_pgofs;
1280        map.m_next_extent = NULL;
1281        map.m_seg_type = seg_type;
1282        map.m_may_create = may_write;
1283
1284        err = f2fs_map_blocks(inode, &map, create, flag);
1285        if (!err) {
1286                map_bh(bh, inode->i_sb, map.m_pblk);
1287                bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
1288                bh->b_size = (u64)map.m_len << inode->i_blkbits;
1289        }
1290        return err;
1291}
1292
1293static int get_data_block(struct inode *inode, sector_t iblock,
1294                        struct buffer_head *bh_result, int create, int flag,
1295                        pgoff_t *next_pgofs)
1296{
1297        return __get_data_block(inode, iblock, bh_result, create,
1298                                                        flag, next_pgofs,
1299                                                        NO_CHECK_TYPE, create);
1300}
1301
1302static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
1303                        struct buffer_head *bh_result, int create)
1304{
1305        return __get_data_block(inode, iblock, bh_result, create,
1306                                F2FS_GET_BLOCK_DIO, NULL,
1307                                f2fs_rw_hint_to_seg_type(inode->i_write_hint),
1308                                true);
1309}
1310
1311static int get_data_block_dio(struct inode *inode, sector_t iblock,
1312                        struct buffer_head *bh_result, int create)
1313{
1314        return __get_data_block(inode, iblock, bh_result, create,
1315                                F2FS_GET_BLOCK_DIO, NULL,
1316                                f2fs_rw_hint_to_seg_type(inode->i_write_hint),
1317                                false);
1318}
1319
1320static int get_data_block_bmap(struct inode *inode, sector_t iblock,
1321                        struct buffer_head *bh_result, int create)
1322{
1323        /* Block number less than F2FS MAX BLOCKS */
1324        if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
1325                return -EFBIG;
1326
1327        return __get_data_block(inode, iblock, bh_result, create,
1328                                                F2FS_GET_BLOCK_BMAP, NULL,
1329                                                NO_CHECK_TYPE, create);
1330}
1331
1332static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
1333{
1334        return (offset >> inode->i_blkbits);
1335}
1336
1337static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
1338{
1339        return (blk << inode->i_blkbits);
1340}
1341
1342static int f2fs_xattr_fiemap(struct inode *inode,
1343                                struct fiemap_extent_info *fieinfo)
1344{
1345        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1346        struct page *page;
1347        struct node_info ni;
1348        __u64 phys = 0, len;
1349        __u32 flags;
1350        nid_t xnid = F2FS_I(inode)->i_xattr_nid;
1351        int err = 0;
1352
1353        if (f2fs_has_inline_xattr(inode)) {
1354                int offset;
1355
1356                page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
1357                                                inode->i_ino, false);
1358                if (!page)
1359                        return -ENOMEM;
1360
1361                err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
1362                if (err) {
1363                        f2fs_put_page(page, 1);
1364                        return err;
1365                }
1366
1367                phys = (__u64)blk_to_logical(inode, ni.blk_addr);
1368                offset = offsetof(struct f2fs_inode, i_addr) +
1369                                        sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1370                                        get_inline_xattr_addrs(inode));
1371
1372                phys += offset;
1373                len = inline_xattr_size(inode);
1374
1375                f2fs_put_page(page, 1);
1376
1377                flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
1378
1379                if (!xnid)
1380                        flags |= FIEMAP_EXTENT_LAST;
1381
1382                err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1383                if (err || err == 1)
1384                        return err;
1385        }
1386
1387        if (xnid) {
1388                page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
1389                if (!page)
1390                        return -ENOMEM;
1391
1392                err = f2fs_get_node_info(sbi, xnid, &ni);
1393                if (err) {
1394                        f2fs_put_page(page, 1);
1395                        return err;
1396                }
1397
1398                phys = (__u64)blk_to_logical(inode, ni.blk_addr);
1399                len = inode->i_sb->s_blocksize;
1400
1401                f2fs_put_page(page, 1);
1402
1403                flags = FIEMAP_EXTENT_LAST;
1404        }
1405
1406        if (phys)
1407                err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1408
1409        return (err < 0 ? err : 0);
1410}
1411
1412int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1413                u64 start, u64 len)
1414{
1415        struct buffer_head map_bh;
1416        sector_t start_blk, last_blk;
1417        pgoff_t next_pgofs;
1418        u64 logical = 0, phys = 0, size = 0;
1419        u32 flags = 0;
1420        int ret = 0;
1421
1422        if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
1423                ret = f2fs_precache_extents(inode);
1424                if (ret)
1425                        return ret;
1426        }
1427
1428        ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
1429        if (ret)
1430                return ret;
1431
1432        inode_lock(inode);
1433
1434        if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1435                ret = f2fs_xattr_fiemap(inode, fieinfo);
1436                goto out;
1437        }
1438
1439        if (f2fs_has_inline_data(inode)) {
1440                ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
1441                if (ret != -EAGAIN)
1442                        goto out;
1443        }
1444
1445        if (logical_to_blk(inode, len) == 0)
1446                len = blk_to_logical(inode, 1);
1447
1448        start_blk = logical_to_blk(inode, start);
1449        last_blk = logical_to_blk(inode, start + len - 1);
1450
1451next:
1452        memset(&map_bh, 0, sizeof(struct buffer_head));
1453        map_bh.b_size = len;
1454
1455        ret = get_data_block(inode, start_blk, &map_bh, 0,
1456                                        F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
1457        if (ret)
1458                goto out;
1459
1460        /* HOLE */
1461        if (!buffer_mapped(&map_bh)) {
1462                start_blk = next_pgofs;
1463
1464                if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
1465                                        F2FS_I_SB(inode)->max_file_blocks))
1466                        goto prep_next;
1467
1468                flags |= FIEMAP_EXTENT_LAST;
1469        }
1470
1471        if (size) {
1472                if (IS_ENCRYPTED(inode))
1473                        flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
1474
1475                ret = fiemap_fill_next_extent(fieinfo, logical,
1476                                phys, size, flags);
1477        }
1478
1479        if (start_blk > last_blk || ret)
1480                goto out;
1481
1482        logical = blk_to_logical(inode, start_blk);
1483        phys = blk_to_logical(inode, map_bh.b_blocknr);
1484        size = map_bh.b_size;
1485        flags = 0;
1486        if (buffer_unwritten(&map_bh))
1487                flags = FIEMAP_EXTENT_UNWRITTEN;
1488
1489        start_blk += logical_to_blk(inode, size);
1490
1491prep_next:
1492        cond_resched();
1493        if (fatal_signal_pending(current))
1494                ret = -EINTR;
1495        else
1496                goto next;
1497out:
1498        if (ret == 1)
1499                ret = 0;
1500
1501        inode_unlock(inode);
1502        return ret;
1503}
1504
1505/*
1506 * This function was originally taken from fs/mpage.c, and customized for f2fs.
1507 * Major change was from block_size == page_size in f2fs by default.
1508 *
1509 * Note that the aops->readpages() function is ONLY used for read-ahead. If
1510 * this function ever deviates from doing just read-ahead, it should either
1511 * use ->readpage() or do the necessary surgery to decouple ->readpages()
1512 * from read-ahead.
1513 */
1514static int f2fs_mpage_readpages(struct address_space *mapping,
1515                        struct list_head *pages, struct page *page,
1516                        unsigned nr_pages, bool is_readahead)
1517{
1518        struct bio *bio = NULL;
1519        sector_t last_block_in_bio = 0;
1520        struct inode *inode = mapping->host;
1521        const unsigned blkbits = inode->i_blkbits;
1522        const unsigned blocksize = 1 << blkbits;
1523        sector_t block_in_file;
1524        sector_t last_block;
1525        sector_t last_block_in_file;
1526        sector_t block_nr;
1527        struct f2fs_map_blocks map;
1528
1529        map.m_pblk = 0;
1530        map.m_lblk = 0;
1531        map.m_len = 0;
1532        map.m_flags = 0;
1533        map.m_next_pgofs = NULL;
1534        map.m_next_extent = NULL;
1535        map.m_seg_type = NO_CHECK_TYPE;
1536        map.m_may_create = false;
1537
1538        for (; nr_pages; nr_pages--) {
1539                if (pages) {
1540                        page = list_last_entry(pages, struct page, lru);
1541
1542                        prefetchw(&page->flags);
1543                        list_del(&page->lru);
1544                        if (add_to_page_cache_lru(page, mapping,
1545                                                  page->index,
1546                                                  readahead_gfp_mask(mapping)))
1547                                goto next_page;
1548                }
1549
1550                block_in_file = (sector_t)page->index;
1551                last_block = block_in_file + nr_pages;
1552                last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
1553                                                                blkbits;
1554                if (last_block > last_block_in_file)
1555                        last_block = last_block_in_file;
1556
1557                /* just zeroing out page which is beyond EOF */
1558                if (block_in_file >= last_block)
1559                        goto zero_out;
1560                /*
1561                 * Map blocks using the previous result first.
1562                 */
1563                if ((map.m_flags & F2FS_MAP_MAPPED) &&
1564                                block_in_file > map.m_lblk &&
1565                                block_in_file < (map.m_lblk + map.m_len))
1566                        goto got_it;
1567
1568                /*
1569                 * Then do more f2fs_map_blocks() calls until we are
1570                 * done with this page.
1571                 */
1572                map.m_lblk = block_in_file;
1573                map.m_len = last_block - block_in_file;
1574
1575                if (f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT))
1576                        goto set_error_page;
1577got_it:
1578                if ((map.m_flags & F2FS_MAP_MAPPED)) {
1579                        block_nr = map.m_pblk + block_in_file - map.m_lblk;
1580                        SetPageMappedToDisk(page);
1581
1582                        if (!PageUptodate(page) && !cleancache_get_page(page)) {
1583                                SetPageUptodate(page);
1584                                goto confused;
1585                        }
1586
1587                        if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
1588                                                                DATA_GENERIC))
1589                                goto set_error_page;
1590                } else {
1591zero_out:
1592                        zero_user_segment(page, 0, PAGE_SIZE);
1593                        if (!PageUptodate(page))
1594                                SetPageUptodate(page);
1595                        unlock_page(page);
1596                        goto next_page;
1597                }
1598
1599                /*
1600                 * This page will go to BIO.  Do we need to send this
1601                 * BIO off first?
1602                 */
1603                if (bio && (last_block_in_bio != block_nr - 1 ||
1604                        !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
1605submit_and_realloc:
1606                        __submit_bio(F2FS_I_SB(inode), bio, DATA);
1607                        bio = NULL;
1608                }
1609                if (bio == NULL) {
1610                        bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
1611                                        is_readahead ? REQ_RAHEAD : 0);
1612                        if (IS_ERR(bio)) {
1613                                bio = NULL;
1614                                goto set_error_page;
1615                        }
1616                }
1617
1618                /*
1619                 * If the page is under writeback, we need to wait for
1620                 * its completion to see the correct decrypted data.
1621                 */
1622                f2fs_wait_on_block_writeback(inode, block_nr);
1623
1624                if (bio_add_page(bio, page, blocksize, 0) < blocksize)
1625                        goto submit_and_realloc;
1626
1627                inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
1628                ClearPageError(page);
1629                last_block_in_bio = block_nr;
1630                goto next_page;
1631set_error_page:
1632                SetPageError(page);
1633                zero_user_segment(page, 0, PAGE_SIZE);
1634                unlock_page(page);
1635                goto next_page;
1636confused:
1637                if (bio) {
1638                        __submit_bio(F2FS_I_SB(inode), bio, DATA);
1639                        bio = NULL;
1640                }
1641                unlock_page(page);
1642next_page:
1643                if (pages)
1644                        put_page(page);
1645        }
1646        BUG_ON(pages && !list_empty(pages));
1647        if (bio)
1648                __submit_bio(F2FS_I_SB(inode), bio, DATA);
1649        return 0;
1650}
1651
1652static int f2fs_read_data_page(struct file *file, struct page *page)
1653{
1654        struct inode *inode = page->mapping->host;
1655        int ret = -EAGAIN;
1656
1657        trace_f2fs_readpage(page, DATA);
1658
1659        /* If the file has inline data, try to read it directly */
1660        if (f2fs_has_inline_data(inode))
1661                ret = f2fs_read_inline_data(inode, page);
1662        if (ret == -EAGAIN)
1663                ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false);
1664        return ret;
1665}
1666
1667static int f2fs_read_data_pages(struct file *file,
1668                        struct address_space *mapping,
1669                        struct list_head *pages, unsigned nr_pages)
1670{
1671        struct inode *inode = mapping->host;
1672        struct page *page = list_last_entry(pages, struct page, lru);
1673
1674        trace_f2fs_readpages(inode, page, nr_pages);
1675
1676        /* If the file has inline data, skip readpages */
1677        if (f2fs_has_inline_data(inode))
1678                return 0;
1679
1680        return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
1681}
1682
1683static int encrypt_one_page(struct f2fs_io_info *fio)
1684{
1685        struct inode *inode = fio->page->mapping->host;
1686        struct page *mpage;
1687        gfp_t gfp_flags = GFP_NOFS;
1688
1689        if (!f2fs_encrypted_file(inode))
1690                return 0;
1691
1692        /* wait for GCed page writeback via META_MAPPING */
1693        f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
1694
1695retry_encrypt:
1696        fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
1697                        PAGE_SIZE, 0, fio->page->index, gfp_flags);
1698        if (IS_ERR(fio->encrypted_page)) {
1699                /* flush pending IOs and wait for a while in the ENOMEM case */
1700                if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
1701                        f2fs_flush_merged_writes(fio->sbi);
1702                        congestion_wait(BLK_RW_ASYNC, HZ/50);
1703                        gfp_flags |= __GFP_NOFAIL;
1704                        goto retry_encrypt;
1705                }
1706                return PTR_ERR(fio->encrypted_page);
1707        }
1708
1709        mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
1710        if (mpage) {
1711                if (PageUptodate(mpage))
1712                        memcpy(page_address(mpage),
1713                                page_address(fio->encrypted_page), PAGE_SIZE);
1714                f2fs_put_page(mpage, 1);
1715        }
1716        return 0;
1717}
1718
1719static inline bool check_inplace_update_policy(struct inode *inode,
1720                                struct f2fs_io_info *fio)
1721{
1722        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1723        unsigned int policy = SM_I(sbi)->ipu_policy;
1724
1725        if (policy & (0x1 << F2FS_IPU_FORCE))
1726                return true;
1727        if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
1728                return true;
1729        if (policy & (0x1 << F2FS_IPU_UTIL) &&
1730                        utilization(sbi) > SM_I(sbi)->min_ipu_util)
1731                return true;
1732        if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
1733                        utilization(sbi) > SM_I(sbi)->min_ipu_util)
1734                return true;
1735
1736        /*
1737         * IPU for rewrite async pages
1738         */
1739        if (policy & (0x1 << F2FS_IPU_ASYNC) &&
1740                        fio && fio->op == REQ_OP_WRITE &&
1741                        !(fio->op_flags & REQ_SYNC) &&
1742                        !IS_ENCRYPTED(inode))
1743                return true;
1744
1745        /* this is only set during fdatasync */
1746        if (policy & (0x1 << F2FS_IPU_FSYNC) &&
1747                        is_inode_flag_set(inode, FI_NEED_IPU))
1748                return true;
1749
1750        if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
1751                        !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
1752                return true;
1753
1754        return false;
1755}
1756
1757bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
1758{
1759        if (f2fs_is_pinned_file(inode))
1760                return true;
1761
1762        /* if this is cold file, we should overwrite to avoid fragmentation */
1763        if (file_is_cold(inode))
1764                return true;
1765
1766        return check_inplace_update_policy(inode, fio);
1767}
1768
1769bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
1770{
1771        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1772
1773        if (test_opt(sbi, LFS))
1774                return true;
1775        if (S_ISDIR(inode->i_mode))
1776                return true;
1777        if (IS_NOQUOTA(inode))
1778                return true;
1779        if (f2fs_is_atomic_file(inode))
1780                return true;
1781        if (fio) {
1782                if (is_cold_data(fio->page))
1783                        return true;
1784                if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
1785                        return true;
1786                if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
1787                        f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
1788                        return true;
1789        }
1790        return false;
1791}
1792
1793static inline bool need_inplace_update(struct f2fs_io_info *fio)
1794{
1795        struct inode *inode = fio->page->mapping->host;
1796
1797        if (f2fs_should_update_outplace(inode, fio))
1798                return false;
1799
1800        return f2fs_should_update_inplace(inode, fio);
1801}
1802
1803int f2fs_do_write_data_page(struct f2fs_io_info *fio)
1804{
1805        struct page *page = fio->page;
1806        struct inode *inode = page->mapping->host;
1807        struct dnode_of_data dn;
1808        struct extent_info ei = {0,0,0};
1809        struct node_info ni;
1810        bool ipu_force = false;
1811        int err = 0;
1812
1813        set_new_dnode(&dn, inode, NULL, NULL, 0);
1814        if (need_inplace_update(fio) &&
1815                        f2fs_lookup_extent_cache(inode, page->index, &ei)) {
1816                fio->old_blkaddr = ei.blk + page->index - ei.fofs;
1817
1818                if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
1819                                                        DATA_GENERIC))
1820                        return -EFAULT;
1821
1822                ipu_force = true;
1823                fio->need_lock = LOCK_DONE;
1824                goto got_it;
1825        }
1826
1827        /* Deadlock due to between page->lock and f2fs_lock_op */
1828        if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
1829                return -EAGAIN;
1830
1831        err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
1832        if (err)
1833                goto out;
1834
1835        fio->old_blkaddr = dn.data_blkaddr;
1836
1837        /* This page is already truncated */
1838        if (fio->old_blkaddr == NULL_ADDR) {
1839                ClearPageUptodate(page);
1840                clear_cold_data(page);
1841                goto out_writepage;
1842        }
1843got_it:
1844        if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
1845                !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
1846                                                        DATA_GENERIC)) {
1847                err = -EFAULT;
1848                goto out_writepage;
1849        }
1850        /*
1851         * If current allocation needs SSR,
1852         * it had better in-place writes for updated data.
1853         */
1854        if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
1855                                        need_inplace_update(fio))) {
1856                err = encrypt_one_page(fio);
1857                if (err)
1858                        goto out_writepage;
1859
1860                set_page_writeback(page);
1861                ClearPageError(page);
1862                f2fs_put_dnode(&dn);
1863                if (fio->need_lock == LOCK_REQ)
1864                        f2fs_unlock_op(fio->sbi);
1865                err = f2fs_inplace_write_data(fio);
1866                if (err) {
1867                        if (f2fs_encrypted_file(inode))
1868                                fscrypt_pullback_bio_page(&fio->encrypted_page,
1869                                                                        true);
1870                        if (PageWriteback(page))
1871                                end_page_writeback(page);
1872                }
1873                trace_f2fs_do_write_data_page(fio->page, IPU);
1874                set_inode_flag(inode, FI_UPDATE_WRITE);
1875                return err;
1876        }
1877
1878        if (fio->need_lock == LOCK_RETRY) {
1879                if (!f2fs_trylock_op(fio->sbi)) {
1880                        err = -EAGAIN;
1881                        goto out_writepage;
1882                }
1883                fio->need_lock = LOCK_REQ;
1884        }
1885
1886        err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
1887        if (err)
1888                goto out_writepage;
1889
1890        fio->version = ni.version;
1891
1892        err = encrypt_one_page(fio);
1893        if (err)
1894                goto out_writepage;
1895
1896        set_page_writeback(page);
1897        ClearPageError(page);
1898
1899        /* LFS mode write path */
1900        f2fs_outplace_write_data(&dn, fio);
1901        trace_f2fs_do_write_data_page(page, OPU);
1902        set_inode_flag(inode, FI_APPEND_WRITE);
1903        if (page->index == 0)
1904                set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
1905out_writepage:
1906        f2fs_put_dnode(&dn);
1907out:
1908        if (fio->need_lock == LOCK_REQ)
1909                f2fs_unlock_op(fio->sbi);
1910        return err;
1911}
1912
1913static int __write_data_page(struct page *page, bool *submitted,
1914                                struct writeback_control *wbc,
1915                                enum iostat_type io_type)
1916{
1917        struct inode *inode = page->mapping->host;
1918        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1919        loff_t i_size = i_size_read(inode);
1920        const pgoff_t end_index = ((unsigned long long) i_size)
1921                                                        >> PAGE_SHIFT;
1922        loff_t psize = (page->index + 1) << PAGE_SHIFT;
1923        unsigned offset = 0;
1924        bool need_balance_fs = false;
1925        int err = 0;
1926        struct f2fs_io_info fio = {
1927                .sbi = sbi,
1928                .ino = inode->i_ino,
1929                .type = DATA,
1930                .op = REQ_OP_WRITE,
1931                .op_flags = wbc_to_write_flags(wbc),
1932                .old_blkaddr = NULL_ADDR,
1933                .page = page,
1934                .encrypted_page = NULL,
1935                .submitted = false,
1936                .need_lock = LOCK_RETRY,
1937                .io_type = io_type,
1938                .io_wbc = wbc,
1939        };
1940
1941        trace_f2fs_writepage(page, DATA);
1942
1943        /* we should bypass data pages to proceed the kworkder jobs */
1944        if (unlikely(f2fs_cp_error(sbi))) {
1945                mapping_set_error(page->mapping, -EIO);
1946                /*
1947                 * don't drop any dirty dentry pages for keeping lastest
1948                 * directory structure.
1949                 */
1950                if (S_ISDIR(inode->i_mode))
1951                        goto redirty_out;
1952                goto out;
1953        }
1954
1955        if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1956                goto redirty_out;
1957
1958        if (page->index < end_index)
1959                goto write;
1960
1961        /*
1962         * If the offset is out-of-range of file size,
1963         * this page does not have to be written to disk.
1964         */
1965        offset = i_size & (PAGE_SIZE - 1);
1966        if ((page->index >= end_index + 1) || !offset)
1967                goto out;
1968
1969        zero_user_segment(page, offset, PAGE_SIZE);
1970write:
1971        if (f2fs_is_drop_cache(inode))
1972                goto out;
1973        /* we should not write 0'th page having journal header */
1974        if (f2fs_is_volatile_file(inode) && (!page->index ||
1975                        (!wbc->for_reclaim &&
1976                        f2fs_available_free_memory(sbi, BASE_CHECK))))
1977                goto redirty_out;
1978
1979        /* Dentry blocks are controlled by checkpoint */
1980        if (S_ISDIR(inode->i_mode)) {
1981                fio.need_lock = LOCK_DONE;
1982                err = f2fs_do_write_data_page(&fio);
1983                goto done;
1984        }
1985
1986        if (!wbc->for_reclaim)
1987                need_balance_fs = true;
1988        else if (has_not_enough_free_secs(sbi, 0, 0))
1989                goto redirty_out;
1990        else
1991                set_inode_flag(inode, FI_HOT_DATA);
1992
1993        err = -EAGAIN;
1994        if (f2fs_has_inline_data(inode)) {
1995                err = f2fs_write_inline_data(inode, page);
1996                if (!err)
1997                        goto out;
1998        }
1999
2000        if (err == -EAGAIN) {
2001                err = f2fs_do_write_data_page(&fio);
2002                if (err == -EAGAIN) {
2003                        fio.need_lock = LOCK_REQ;
2004                        err = f2fs_do_write_data_page(&fio);
2005                }
2006        }
2007
2008        if (err) {
2009                file_set_keep_isize(inode);
2010        } else {
2011                down_write(&F2FS_I(inode)->i_sem);
2012                if (F2FS_I(inode)->last_disk_size < psize)
2013                        F2FS_I(inode)->last_disk_size = psize;
2014                up_write(&F2FS_I(inode)->i_sem);
2015        }
2016
2017done:
2018        if (err && err != -ENOENT)
2019                goto redirty_out;
2020
2021out:
2022        inode_dec_dirty_pages(inode);
2023        if (err) {
2024                ClearPageUptodate(page);
2025                clear_cold_data(page);
2026        }
2027
2028        if (wbc->for_reclaim) {
2029                f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
2030                clear_inode_flag(inode, FI_HOT_DATA);
2031                f2fs_remove_dirty_inode(inode);
2032                submitted = NULL;
2033        }
2034
2035        unlock_page(page);
2036        if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
2037                f2fs_balance_fs(sbi, need_balance_fs);
2038
2039        if (unlikely(f2fs_cp_error(sbi))) {
2040                f2fs_submit_merged_write(sbi, DATA);
2041                submitted = NULL;
2042        }
2043
2044        if (submitted)
2045                *submitted = fio.submitted;
2046
2047        return 0;
2048
2049redirty_out:
2050        redirty_page_for_writepage(wbc, page);
2051        /*
2052         * pageout() in MM traslates EAGAIN, so calls handle_write_error()
2053         * -> mapping_set_error() -> set_bit(AS_EIO, ...).
2054         * file_write_and_wait_range() will see EIO error, which is critical
2055         * to return value of fsync() followed by atomic_write failure to user.
2056         */
2057        if (!err || wbc->for_reclaim)
2058                return AOP_WRITEPAGE_ACTIVATE;
2059        unlock_page(page);
2060        return err;
2061}
2062
2063static int f2fs_write_data_page(struct page *page,
2064                                        struct writeback_control *wbc)
2065{
2066        return __write_data_page(page, NULL, wbc, FS_DATA_IO);
2067}
2068
2069/*
2070 * This function was copied from write_cche_pages from mm/page-writeback.c.
2071 * The major change is making write step of cold data page separately from
2072 * warm/hot data page.
2073 */
2074static int f2fs_write_cache_pages(struct address_space *mapping,
2075                                        struct writeback_control *wbc,
2076                                        enum iostat_type io_type)
2077{
2078        int ret = 0;
2079        int done = 0;
2080        struct pagevec pvec;
2081        struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
2082        int nr_pages;
2083        pgoff_t uninitialized_var(writeback_index);
2084        pgoff_t index;
2085        pgoff_t end;            /* Inclusive */
2086        pgoff_t done_index;
2087        int cycled;
2088        int range_whole = 0;
2089        xa_mark_t tag;
2090        int nwritten = 0;
2091
2092        pagevec_init(&pvec);
2093
2094        if (get_dirty_pages(mapping->host) <=
2095                                SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
2096                set_inode_flag(mapping->host, FI_HOT_DATA);
2097        else
2098                clear_inode_flag(mapping->host, FI_HOT_DATA);
2099
2100        if (wbc->range_cyclic) {
2101                writeback_index = mapping->writeback_index; /* prev offset */
2102                index = writeback_index;
2103                if (index == 0)
2104                        cycled = 1;
2105                else
2106                        cycled = 0;
2107                end = -1;
2108        } else {
2109                index = wbc->range_start >> PAGE_SHIFT;
2110                end = wbc->range_end >> PAGE_SHIFT;
2111                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2112                        range_whole = 1;
2113                cycled = 1; /* ignore range_cyclic tests */
2114        }
2115        if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2116                tag = PAGECACHE_TAG_TOWRITE;
2117        else
2118                tag = PAGECACHE_TAG_DIRTY;
2119retry:
2120        if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2121                tag_pages_for_writeback(mapping, index, end);
2122        done_index = index;
2123        while (!done && (index <= end)) {
2124                int i;
2125
2126                nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2127                                tag);
2128                if (nr_pages == 0)
2129                        break;
2130
2131                for (i = 0; i < nr_pages; i++) {
2132                        struct page *page = pvec.pages[i];
2133                        bool submitted = false;
2134
2135                        /* give a priority to WB_SYNC threads */
2136                        if (atomic_read(&sbi->wb_sync_req[DATA]) &&
2137                                        wbc->sync_mode == WB_SYNC_NONE) {
2138                                done = 1;
2139                                break;
2140                        }
2141
2142                        done_index = page->index;
2143retry_write:
2144                        lock_page(page);
2145
2146                        if (unlikely(page->mapping != mapping)) {
2147continue_unlock:
2148                                unlock_page(page);
2149                                continue;
2150                        }
2151
2152                        if (!PageDirty(page)) {
2153                                /* someone wrote it for us */
2154                                goto continue_unlock;
2155                        }
2156
2157                        if (PageWriteback(page)) {
2158                                if (wbc->sync_mode != WB_SYNC_NONE)
2159                                        f2fs_wait_on_page_writeback(page,
2160                                                        DATA, true, true);
2161                                else
2162                                        goto continue_unlock;
2163                        }
2164
2165                        if (!clear_page_dirty_for_io(page))
2166                                goto continue_unlock;
2167
2168                        ret = __write_data_page(page, &submitted, wbc, io_type);
2169                        if (unlikely(ret)) {
2170                                /*
2171                                 * keep nr_to_write, since vfs uses this to
2172                                 * get # of written pages.
2173                                 */
2174                                if (ret == AOP_WRITEPAGE_ACTIVATE) {
2175                                        unlock_page(page);
2176                                        ret = 0;
2177                                        continue;
2178                                } else if (ret == -EAGAIN) {
2179                                        ret = 0;
2180                                        if (wbc->sync_mode == WB_SYNC_ALL) {
2181                                                cond_resched();
2182                                                congestion_wait(BLK_RW_ASYNC,
2183                                                                        HZ/50);
2184                                                goto retry_write;
2185                                        }
2186                                        continue;
2187                                }
2188                                done_index = page->index + 1;
2189                                done = 1;
2190                                break;
2191                        } else if (submitted) {
2192                                nwritten++;
2193                        }
2194
2195                        if (--wbc->nr_to_write <= 0 &&
2196                                        wbc->sync_mode == WB_SYNC_NONE) {
2197                                done = 1;
2198                                break;
2199                        }
2200                }
2201                pagevec_release(&pvec);
2202                cond_resched();
2203        }
2204
2205        if (!cycled && !done) {
2206                cycled = 1;
2207                index = 0;
2208                end = writeback_index - 1;
2209                goto retry;
2210        }
2211        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2212                mapping->writeback_index = done_index;
2213
2214        if (nwritten)
2215                f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
2216                                                                NULL, 0, DATA);
2217
2218        return ret;
2219}
2220
2221static inline bool __should_serialize_io(struct inode *inode,
2222                                        struct writeback_control *wbc)
2223{
2224        if (!S_ISREG(inode->i_mode))
2225                return false;
2226        if (IS_NOQUOTA(inode))
2227                return false;
2228        if (wbc->sync_mode != WB_SYNC_ALL)
2229                return true;
2230        if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
2231                return true;
2232        return false;
2233}
2234
2235static int __f2fs_write_data_pages(struct address_space *mapping,
2236                                                struct writeback_control *wbc,
2237                                                enum iostat_type io_type)
2238{
2239        struct inode *inode = mapping->host;
2240        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2241        struct blk_plug plug;
2242        int ret;
2243        bool locked = false;
2244
2245        /* deal with chardevs and other special file */
2246        if (!mapping->a_ops->writepage)
2247                return 0;
2248
2249        /* skip writing if there is no dirty page in this inode */
2250        if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
2251                return 0;
2252
2253        /* during POR, we don't need to trigger writepage at all. */
2254        if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2255                goto skip_write;
2256
2257        if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
2258                        wbc->sync_mode == WB_SYNC_NONE &&
2259                        get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
2260                        f2fs_available_free_memory(sbi, DIRTY_DENTS))
2261                goto skip_write;
2262
2263        /* skip writing during file defragment */
2264        if (is_inode_flag_set(inode, FI_DO_DEFRAG))
2265                goto skip_write;
2266
2267        trace_f2fs_writepages(mapping->host, wbc, DATA);
2268
2269        /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
2270        if (wbc->sync_mode == WB_SYNC_ALL)
2271                atomic_inc(&sbi->wb_sync_req[DATA]);
2272        else if (atomic_read(&sbi->wb_sync_req[DATA]))
2273                goto skip_write;
2274
2275        if (__should_serialize_io(inode, wbc)) {
2276                mutex_lock(&sbi->writepages);
2277                locked = true;
2278        }
2279
2280        blk_start_plug(&plug);
2281        ret = f2fs_write_cache_pages(mapping, wbc, io_type);
2282        blk_finish_plug(&plug);
2283
2284        if (locked)
2285                mutex_unlock(&sbi->writepages);
2286
2287        if (wbc->sync_mode == WB_SYNC_ALL)
2288                atomic_dec(&sbi->wb_sync_req[DATA]);
2289        /*
2290         * if some pages were truncated, we cannot guarantee its mapping->host
2291         * to detect pending bios.
2292         */
2293
2294        f2fs_remove_dirty_inode(inode);
2295        return ret;
2296
2297skip_write:
2298        wbc->pages_skipped += get_dirty_pages(inode);
2299        trace_f2fs_writepages(mapping->host, wbc, DATA);
2300        return 0;
2301}
2302
2303static int f2fs_write_data_pages(struct address_space *mapping,
2304                            struct writeback_control *wbc)
2305{
2306        struct inode *inode = mapping->host;
2307
2308        return __f2fs_write_data_pages(mapping, wbc,
2309                        F2FS_I(inode)->cp_task == current ?
2310                        FS_CP_DATA_IO : FS_DATA_IO);
2311}
2312
2313static void f2fs_write_failed(struct address_space *mapping, loff_t to)
2314{
2315        struct inode *inode = mapping->host;
2316        loff_t i_size = i_size_read(inode);
2317
2318        if (to > i_size) {
2319                down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2320                down_write(&F2FS_I(inode)->i_mmap_sem);
2321
2322                truncate_pagecache(inode, i_size);
2323                if (!IS_NOQUOTA(inode))
2324                        f2fs_truncate_blocks(inode, i_size, true);
2325
2326                up_write(&F2FS_I(inode)->i_mmap_sem);
2327                up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2328        }
2329}
2330
2331static int prepare_write_begin(struct f2fs_sb_info *sbi,
2332                        struct page *page, loff_t pos, unsigned len,
2333                        block_t *blk_addr, bool *node_changed)
2334{
2335        struct inode *inode = page->mapping->host;
2336        pgoff_t index = page->index;
2337        struct dnode_of_data dn;
2338        struct page *ipage;
2339        bool locked = false;
2340        struct extent_info ei = {0,0,0};
2341        int err = 0;
2342        int flag;
2343
2344        /*
2345         * we already allocated all the blocks, so we don't need to get
2346         * the block addresses when there is no need to fill the page.
2347         */
2348        if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
2349                        !is_inode_flag_set(inode, FI_NO_PREALLOC))
2350                return 0;
2351
2352        /* f2fs_lock_op avoids race between write CP and convert_inline_page */
2353        if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
2354                flag = F2FS_GET_BLOCK_DEFAULT;
2355        else
2356                flag = F2FS_GET_BLOCK_PRE_AIO;
2357
2358        if (f2fs_has_inline_data(inode) ||
2359                        (pos & PAGE_MASK) >= i_size_read(inode)) {
2360                __do_map_lock(sbi, flag, true);
2361                locked = true;
2362        }
2363restart:
2364        /* check inline_data */
2365        ipage = f2fs_get_node_page(sbi, inode->i_ino);
2366        if (IS_ERR(ipage)) {
2367                err = PTR_ERR(ipage);
2368                goto unlock_out;
2369        }
2370
2371        set_new_dnode(&dn, inode, ipage, ipage, 0);
2372
2373        if (f2fs_has_inline_data(inode)) {
2374                if (pos + len <= MAX_INLINE_DATA(inode)) {
2375                        f2fs_do_read_inline_data(page, ipage);
2376                        set_inode_flag(inode, FI_DATA_EXIST);
2377                        if (inode->i_nlink)
2378                                set_inline_node(ipage);
2379                } else {
2380                        err = f2fs_convert_inline_page(&dn, page);
2381                        if (err)
2382                                goto out;
2383                        if (dn.data_blkaddr == NULL_ADDR)
2384                                err = f2fs_get_block(&dn, index);
2385                }
2386        } else if (locked) {
2387                err = f2fs_get_block(&dn, index);
2388        } else {
2389                if (f2fs_lookup_extent_cache(inode, index, &ei)) {
2390                        dn.data_blkaddr = ei.blk + index - ei.fofs;
2391                } else {
2392                        /* hole case */
2393                        err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
2394                        if (err || dn.data_blkaddr == NULL_ADDR) {
2395                                f2fs_put_dnode(&dn);
2396                                __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
2397                                                                true);
2398                                WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
2399                                locked = true;
2400                                goto restart;
2401                        }
2402                }
2403        }
2404
2405        /* convert_inline_page can make node_changed */
2406        *blk_addr = dn.data_blkaddr;
2407        *node_changed = dn.node_changed;
2408out:
2409        f2fs_put_dnode(&dn);
2410unlock_out:
2411        if (locked)
2412                __do_map_lock(sbi, flag, false);
2413        return err;
2414}
2415
2416static int f2fs_write_begin(struct file *file, struct address_space *mapping,
2417                loff_t pos, unsigned len, unsigned flags,
2418                struct page **pagep, void **fsdata)
2419{
2420        struct inode *inode = mapping->host;
2421        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2422        struct page *page = NULL;
2423        pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
2424        bool need_balance = false, drop_atomic = false;
2425        block_t blkaddr = NULL_ADDR;
2426        int err = 0;
2427
2428        trace_f2fs_write_begin(inode, pos, len, flags);
2429
2430        err = f2fs_is_checkpoint_ready(sbi);
2431        if (err)
2432                goto fail;
2433
2434        if ((f2fs_is_atomic_file(inode) &&
2435                        !f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
2436                        is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
2437                err = -ENOMEM;
2438                drop_atomic = true;
2439                goto fail;
2440        }
2441
2442        /*
2443         * We should check this at this moment to avoid deadlock on inode page
2444         * and #0 page. The locking rule for inline_data conversion should be:
2445         * lock_page(page #0) -> lock_page(inode_page)
2446         */
2447        if (index != 0) {
2448                err = f2fs_convert_inline_inode(inode);
2449                if (err)
2450                        goto fail;
2451        }
2452repeat:
2453        /*
2454         * Do not use grab_cache_page_write_begin() to avoid deadlock due to
2455         * wait_for_stable_page. Will wait that below with our IO control.
2456         */
2457        page = f2fs_pagecache_get_page(mapping, index,
2458                                FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
2459        if (!page) {
2460                err = -ENOMEM;
2461                goto fail;
2462        }
2463
2464        *pagep = page;
2465
2466        err = prepare_write_begin(sbi, page, pos, len,
2467                                        &blkaddr, &need_balance);
2468        if (err)
2469                goto fail;
2470
2471        if (need_balance && !IS_NOQUOTA(inode) &&
2472                        has_not_enough_free_secs(sbi, 0, 0)) {
2473                unlock_page(page);
2474                f2fs_balance_fs(sbi, true);
2475                lock_page(page);
2476                if (page->mapping != mapping) {
2477                        /* The page got truncated from under us */
2478                        f2fs_put_page(page, 1);
2479                        goto repeat;
2480                }
2481        }
2482
2483        f2fs_wait_on_page_writeback(page, DATA, false, true);
2484
2485        if (len == PAGE_SIZE || PageUptodate(page))
2486                return 0;
2487
2488        if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
2489                zero_user_segment(page, len, PAGE_SIZE);
2490                return 0;
2491        }
2492
2493        if (blkaddr == NEW_ADDR) {
2494                zero_user_segment(page, 0, PAGE_SIZE);
2495                SetPageUptodate(page);
2496        } else {
2497                err = f2fs_submit_page_read(inode, page, blkaddr);
2498                if (err)
2499                        goto fail;
2500
2501                lock_page(page);
2502                if (unlikely(page->mapping != mapping)) {
2503                        f2fs_put_page(page, 1);
2504                        goto repeat;
2505                }
2506                if (unlikely(!PageUptodate(page))) {
2507                        err = -EIO;
2508                        goto fail;
2509                }
2510        }
2511        return 0;
2512
2513fail:
2514        f2fs_put_page(page, 1);
2515        f2fs_write_failed(mapping, pos + len);
2516        if (drop_atomic)
2517                f2fs_drop_inmem_pages_all(sbi, false);
2518        return err;
2519}
2520
2521static int f2fs_write_end(struct file *file,
2522                        struct address_space *mapping,
2523                        loff_t pos, unsigned len, unsigned copied,
2524                        struct page *page, void *fsdata)
2525{
2526        struct inode *inode = page->mapping->host;
2527
2528        trace_f2fs_write_end(inode, pos, len, copied);
2529
2530        /*
2531         * This should be come from len == PAGE_SIZE, and we expect copied
2532         * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
2533         * let generic_perform_write() try to copy data again through copied=0.
2534         */
2535        if (!PageUptodate(page)) {
2536                if (unlikely(copied != len))
2537                        copied = 0;
2538                else
2539                        SetPageUptodate(page);
2540        }
2541        if (!copied)
2542                goto unlock_out;
2543
2544        set_page_dirty(page);
2545
2546        if (pos + copied > i_size_read(inode))
2547                f2fs_i_size_write(inode, pos + copied);
2548unlock_out:
2549        f2fs_put_page(page, 1);
2550        f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2551        return copied;
2552}
2553
2554static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
2555                           loff_t offset)
2556{
2557        unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
2558        unsigned blkbits = i_blkbits;
2559        unsigned blocksize_mask = (1 << blkbits) - 1;
2560        unsigned long align = offset | iov_iter_alignment(iter);
2561        struct block_device *bdev = inode->i_sb->s_bdev;
2562
2563        if (align & blocksize_mask) {
2564                if (bdev)
2565                        blkbits = blksize_bits(bdev_logical_block_size(bdev));
2566                blocksize_mask = (1 << blkbits) - 1;
2567                if (align & blocksize_mask)
2568                        return -EINVAL;
2569                return 1;
2570        }
2571        return 0;
2572}
2573
2574static void f2fs_dio_end_io(struct bio *bio)
2575{
2576        struct f2fs_private_dio *dio = bio->bi_private;
2577
2578        dec_page_count(F2FS_I_SB(dio->inode),
2579                        dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
2580
2581        bio->bi_private = dio->orig_private;
2582        bio->bi_end_io = dio->orig_end_io;
2583
2584        kvfree(dio);
2585
2586        bio_endio(bio);
2587}
2588
2589static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
2590                                                        loff_t file_offset)
2591{
2592        struct f2fs_private_dio *dio;
2593        bool write = (bio_op(bio) == REQ_OP_WRITE);
2594
2595        dio = f2fs_kzalloc(F2FS_I_SB(inode),
2596                        sizeof(struct f2fs_private_dio), GFP_NOFS);
2597        if (!dio)
2598                goto out;
2599
2600        dio->inode = inode;
2601        dio->orig_end_io = bio->bi_end_io;
2602        dio->orig_private = bio->bi_private;
2603        dio->write = write;
2604
2605        bio->bi_end_io = f2fs_dio_end_io;
2606        bio->bi_private = dio;
2607
2608        inc_page_count(F2FS_I_SB(inode),
2609                        write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
2610
2611        submit_bio(bio);
2612        return;
2613out:
2614        bio->bi_status = BLK_STS_IOERR;
2615        bio_endio(bio);
2616}
2617
2618static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2619{
2620        struct address_space *mapping = iocb->ki_filp->f_mapping;
2621        struct inode *inode = mapping->host;
2622        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2623        struct f2fs_inode_info *fi = F2FS_I(inode);
2624        size_t count = iov_iter_count(iter);
2625        loff_t offset = iocb->ki_pos;
2626        int rw = iov_iter_rw(iter);
2627        int err;
2628        enum rw_hint hint = iocb->ki_hint;
2629        int whint_mode = F2FS_OPTION(sbi).whint_mode;
2630        bool do_opu;
2631
2632        err = check_direct_IO(inode, iter, offset);
2633        if (err)
2634                return err < 0 ? err : 0;
2635
2636        if (f2fs_force_buffered_io(inode, iocb, iter))
2637                return 0;
2638
2639        do_opu = allow_outplace_dio(inode, iocb, iter);
2640
2641        trace_f2fs_direct_IO_enter(inode, offset, count, rw);
2642
2643        if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
2644                iocb->ki_hint = WRITE_LIFE_NOT_SET;
2645
2646        if (iocb->ki_flags & IOCB_NOWAIT) {
2647                if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
2648                        iocb->ki_hint = hint;
2649                        err = -EAGAIN;
2650                        goto out;
2651                }
2652                if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
2653                        up_read(&fi->i_gc_rwsem[rw]);
2654                        iocb->ki_hint = hint;
2655                        err = -EAGAIN;
2656                        goto out;
2657                }
2658        } else {
2659                down_read(&fi->i_gc_rwsem[rw]);
2660                if (do_opu)
2661                        down_read(&fi->i_gc_rwsem[READ]);
2662        }
2663
2664        err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
2665                        iter, rw == WRITE ? get_data_block_dio_write :
2666                        get_data_block_dio, NULL, f2fs_dio_submit_bio,
2667                        DIO_LOCKING | DIO_SKIP_HOLES);
2668
2669        if (do_opu)
2670                up_read(&fi->i_gc_rwsem[READ]);
2671
2672        up_read(&fi->i_gc_rwsem[rw]);
2673
2674        if (rw == WRITE) {
2675                if (whint_mode == WHINT_MODE_OFF)
2676                        iocb->ki_hint = hint;
2677                if (err > 0) {
2678                        f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
2679                                                                        err);
2680                        if (!do_opu)
2681                                set_inode_flag(inode, FI_UPDATE_WRITE);
2682                } else if (err < 0) {
2683                        f2fs_write_failed(mapping, offset + count);
2684                }
2685        }
2686
2687out:
2688        trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
2689
2690        return err;
2691}
2692
2693void f2fs_invalidate_page(struct page *page, unsigned int offset,
2694                                                        unsigned int length)
2695{
2696        struct inode *inode = page->mapping->host;
2697        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2698
2699        if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
2700                (offset % PAGE_SIZE || length != PAGE_SIZE))
2701                return;
2702
2703        if (PageDirty(page)) {
2704                if (inode->i_ino == F2FS_META_INO(sbi)) {
2705                        dec_page_count(sbi, F2FS_DIRTY_META);
2706                } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
2707                        dec_page_count(sbi, F2FS_DIRTY_NODES);
2708                } else {
2709                        inode_dec_dirty_pages(inode);
2710                        f2fs_remove_dirty_inode(inode);
2711                }
2712        }
2713
2714        clear_cold_data(page);
2715
2716        if (IS_ATOMIC_WRITTEN_PAGE(page))
2717                return f2fs_drop_inmem_page(inode, page);
2718
2719        f2fs_clear_page_private(page);
2720}
2721
2722int f2fs_release_page(struct page *page, gfp_t wait)
2723{
2724        /* If this is dirty page, keep PagePrivate */
2725        if (PageDirty(page))
2726                return 0;
2727
2728        /* This is atomic written page, keep Private */
2729        if (IS_ATOMIC_WRITTEN_PAGE(page))
2730                return 0;
2731
2732        clear_cold_data(page);
2733        f2fs_clear_page_private(page);
2734        return 1;
2735}
2736
2737static int f2fs_set_data_page_dirty(struct page *page)
2738{
2739        struct address_space *mapping = page->mapping;
2740        struct inode *inode = mapping->host;
2741
2742        trace_f2fs_set_page_dirty(page, DATA);
2743
2744        if (!PageUptodate(page))
2745                SetPageUptodate(page);
2746
2747        if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
2748                if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
2749                        f2fs_register_inmem_page(inode, page);
2750                        return 1;
2751                }
2752                /*
2753                 * Previously, this page has been registered, we just
2754                 * return here.
2755                 */
2756                return 0;
2757        }
2758
2759        if (!PageDirty(page)) {
2760                __set_page_dirty_nobuffers(page);
2761                f2fs_update_dirty_page(inode, page);
2762                return 1;
2763        }
2764        return 0;
2765}
2766
2767static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
2768{
2769        struct inode *inode = mapping->host;
2770
2771        if (f2fs_has_inline_data(inode))
2772                return 0;
2773
2774        /* make sure allocating whole blocks */
2775        if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
2776                filemap_write_and_wait(mapping);
2777
2778        return generic_block_bmap(mapping, block, get_data_block_bmap);
2779}
2780
2781#ifdef CONFIG_MIGRATION
2782#include <linux/migrate.h>
2783
2784int f2fs_migrate_page(struct address_space *mapping,
2785                struct page *newpage, struct page *page, enum migrate_mode mode)
2786{
2787        int rc, extra_count;
2788        struct f2fs_inode_info *fi = F2FS_I(mapping->host);
2789        bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page);
2790
2791        BUG_ON(PageWriteback(page));
2792
2793        /* migrating an atomic written page is safe with the inmem_lock hold */
2794        if (atomic_written) {
2795                if (mode != MIGRATE_SYNC)
2796                        return -EBUSY;
2797                if (!mutex_trylock(&fi->inmem_lock))
2798                        return -EAGAIN;
2799        }
2800
2801        /* one extra reference was held for atomic_write page */
2802        extra_count = atomic_written ? 1 : 0;
2803        rc = migrate_page_move_mapping(mapping, newpage,
2804                                page, mode, extra_count);
2805        if (rc != MIGRATEPAGE_SUCCESS) {
2806                if (atomic_written)
2807                        mutex_unlock(&fi->inmem_lock);
2808                return rc;
2809        }
2810
2811        if (atomic_written) {
2812                struct inmem_pages *cur;
2813                list_for_each_entry(cur, &fi->inmem_pages, list)
2814                        if (cur->page == page) {
2815                                cur->page = newpage;
2816                                break;
2817                        }
2818                mutex_unlock(&fi->inmem_lock);
2819                put_page(page);
2820                get_page(newpage);
2821        }
2822
2823        if (PagePrivate(page)) {
2824                f2fs_set_page_private(newpage, page_private(page));
2825                f2fs_clear_page_private(page);
2826        }
2827
2828        if (mode != MIGRATE_SYNC_NO_COPY)
2829                migrate_page_copy(newpage, page);
2830        else
2831                migrate_page_states(newpage, page);
2832
2833        return MIGRATEPAGE_SUCCESS;
2834}
2835#endif
2836
2837const struct address_space_operations f2fs_dblock_aops = {
2838        .readpage       = f2fs_read_data_page,
2839        .readpages      = f2fs_read_data_pages,
2840        .writepage      = f2fs_write_data_page,
2841        .writepages     = f2fs_write_data_pages,
2842        .write_begin    = f2fs_write_begin,
2843        .write_end      = f2fs_write_end,
2844        .set_page_dirty = f2fs_set_data_page_dirty,
2845        .invalidatepage = f2fs_invalidate_page,
2846        .releasepage    = f2fs_release_page,
2847        .direct_IO      = f2fs_direct_IO,
2848        .bmap           = f2fs_bmap,
2849#ifdef CONFIG_MIGRATION
2850        .migratepage    = f2fs_migrate_page,
2851#endif
2852};
2853
2854void f2fs_clear_page_cache_dirty_tag(struct page *page)
2855{
2856        struct address_space *mapping = page_mapping(page);
2857        unsigned long flags;
2858
2859        xa_lock_irqsave(&mapping->i_pages, flags);
2860        __xa_clear_mark(&mapping->i_pages, page_index(page),
2861                                                PAGECACHE_TAG_DIRTY);
2862        xa_unlock_irqrestore(&mapping->i_pages, flags);
2863}
2864
2865int __init f2fs_init_post_read_processing(void)
2866{
2867        bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0);
2868        if (!bio_post_read_ctx_cache)
2869                goto fail;
2870        bio_post_read_ctx_pool =
2871                mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
2872                                         bio_post_read_ctx_cache);
2873        if (!bio_post_read_ctx_pool)
2874                goto fail_free_cache;
2875        return 0;
2876
2877fail_free_cache:
2878        kmem_cache_destroy(bio_post_read_ctx_cache);
2879fail:
2880        return -ENOMEM;
2881}
2882
2883void __exit f2fs_destroy_post_read_processing(void)
2884{
2885        mempool_destroy(bio_post_read_ctx_pool);
2886        kmem_cache_destroy(bio_post_read_ctx_cache);
2887}
2888