linux/fs/nfs/write.c
<<
>>
Prefs
   1/*
   2 * linux/fs/nfs/write.c
   3 *
   4 * Write file data over NFS.
   5 *
   6 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
   7 */
   8
   9#include <linux/types.h>
  10#include <linux/slab.h>
  11#include <linux/mm.h>
  12#include <linux/pagemap.h>
  13#include <linux/file.h>
  14#include <linux/writeback.h>
  15#include <linux/swap.h>
  16#include <linux/migrate.h>
  17
  18#include <linux/sunrpc/clnt.h>
  19#include <linux/nfs_fs.h>
  20#include <linux/nfs_mount.h>
  21#include <linux/nfs_page.h>
  22#include <linux/backing-dev.h>
  23#include <linux/export.h>
  24#include <linux/freezer.h>
  25#include <linux/wait.h>
  26
  27#include <linux/uaccess.h>
  28
  29#include "delegation.h"
  30#include "internal.h"
  31#include "iostat.h"
  32#include "nfs4_fs.h"
  33#include "fscache.h"
  34#include "pnfs.h"
  35
  36#include "nfstrace.h"
  37
  38#define NFSDBG_FACILITY         NFSDBG_PAGECACHE
  39
  40#define MIN_POOL_WRITE          (32)
  41#define MIN_POOL_COMMIT         (4)
  42
  43/*
  44 * Local function declarations
  45 */
  46static void nfs_redirty_request(struct nfs_page *req);
  47static const struct rpc_call_ops nfs_commit_ops;
  48static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
  49static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
  50static const struct nfs_rw_ops nfs_rw_write_ops;
  51static void nfs_clear_request_commit(struct nfs_page *req);
  52static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
  53                                      struct inode *inode);
  54static struct nfs_page *
  55nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
  56                                                struct page *page);
  57
  58static struct kmem_cache *nfs_wdata_cachep;
  59static mempool_t *nfs_wdata_mempool;
  60static struct kmem_cache *nfs_cdata_cachep;
  61static mempool_t *nfs_commit_mempool;
  62
  63struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail)
  64{
  65        struct nfs_commit_data *p;
  66
  67        if (never_fail)
  68                p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
  69        else {
  70                /* It is OK to do some reclaim, not no safe to wait
  71                 * for anything to be returned to the pool.
  72                 * mempool_alloc() cannot handle that particular combination,
  73                 * so we need two separate attempts.
  74                 */
  75                p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT);
  76                if (!p)
  77                        p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO |
  78                                             __GFP_NOWARN | __GFP_NORETRY);
  79                if (!p)
  80                        return NULL;
  81        }
  82
  83        memset(p, 0, sizeof(*p));
  84        INIT_LIST_HEAD(&p->pages);
  85        return p;
  86}
  87EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
  88
  89void nfs_commit_free(struct nfs_commit_data *p)
  90{
  91        mempool_free(p, nfs_commit_mempool);
  92}
  93EXPORT_SYMBOL_GPL(nfs_commit_free);
  94
  95static struct nfs_pgio_header *nfs_writehdr_alloc(void)
  96{
  97        struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
  98
  99        if (p) {
 100                memset(p, 0, sizeof(*p));
 101                p->rw_mode = FMODE_WRITE;
 102        }
 103        return p;
 104}
 105
 106static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 107{
 108        mempool_free(hdr, nfs_wdata_mempool);
 109}
 110
 111static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
 112{
 113        ctx->error = error;
 114        smp_wmb();
 115        set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
 116}
 117
 118/*
 119 * nfs_page_find_head_request_locked - find head request associated with @page
 120 *
 121 * must be called while holding the inode lock.
 122 *
 123 * returns matching head request with reference held, or NULL if not found.
 124 */
 125static struct nfs_page *
 126nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
 127{
 128        struct nfs_page *req = NULL;
 129
 130        if (PagePrivate(page))
 131                req = (struct nfs_page *)page_private(page);
 132        else if (unlikely(PageSwapCache(page)))
 133                req = nfs_page_search_commits_for_head_request_locked(nfsi,
 134                        page);
 135
 136        if (req) {
 137                WARN_ON_ONCE(req->wb_head != req);
 138                kref_get(&req->wb_kref);
 139        }
 140
 141        return req;
 142}
 143
 144/*
 145 * nfs_page_find_head_request - find head request associated with @page
 146 *
 147 * returns matching head request with reference held, or NULL if not found.
 148 */
 149static struct nfs_page *nfs_page_find_head_request(struct page *page)
 150{
 151        struct inode *inode = page_file_mapping(page)->host;
 152        struct nfs_page *req = NULL;
 153
 154        spin_lock(&inode->i_lock);
 155        req = nfs_page_find_head_request_locked(NFS_I(inode), page);
 156        spin_unlock(&inode->i_lock);
 157        return req;
 158}
 159
 160/* Adjust the file length if we're writing beyond the end */
 161static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
 162{
 163        struct inode *inode = page_file_mapping(page)->host;
 164        loff_t end, i_size;
 165        pgoff_t end_index;
 166
 167        spin_lock(&inode->i_lock);
 168        i_size = i_size_read(inode);
 169        end_index = (i_size - 1) >> PAGE_SHIFT;
 170        if (i_size > 0 && page_index(page) < end_index)
 171                goto out;
 172        end = page_file_offset(page) + ((loff_t)offset+count);
 173        if (i_size >= end)
 174                goto out;
 175        i_size_write(inode, end);
 176        nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
 177out:
 178        spin_unlock(&inode->i_lock);
 179}
 180
 181/* A writeback failed: mark the page as bad, and invalidate the page cache */
 182static void nfs_set_pageerror(struct page *page)
 183{
 184        nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page));
 185}
 186
 187/*
 188 * nfs_page_group_search_locked
 189 * @head - head request of page group
 190 * @page_offset - offset into page
 191 *
 192 * Search page group with head @head to find a request that contains the
 193 * page offset @page_offset.
 194 *
 195 * Returns a pointer to the first matching nfs request, or NULL if no
 196 * match is found.
 197 *
 198 * Must be called with the page group lock held
 199 */
 200static struct nfs_page *
 201nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
 202{
 203        struct nfs_page *req;
 204
 205        WARN_ON_ONCE(head != head->wb_head);
 206        WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags));
 207
 208        req = head;
 209        do {
 210                if (page_offset >= req->wb_pgbase &&
 211                    page_offset < (req->wb_pgbase + req->wb_bytes))
 212                        return req;
 213
 214                req = req->wb_this_page;
 215        } while (req != head);
 216
 217        return NULL;
 218}
 219
 220/*
 221 * nfs_page_group_covers_page
 222 * @head - head request of page group
 223 *
 224 * Return true if the page group with head @head covers the whole page,
 225 * returns false otherwise
 226 */
 227static bool nfs_page_group_covers_page(struct nfs_page *req)
 228{
 229        struct nfs_page *tmp;
 230        unsigned int pos = 0;
 231        unsigned int len = nfs_page_length(req->wb_page);
 232
 233        nfs_page_group_lock(req, false);
 234
 235        do {
 236                tmp = nfs_page_group_search_locked(req->wb_head, pos);
 237                if (tmp) {
 238                        /* no way this should happen */
 239                        WARN_ON_ONCE(tmp->wb_pgbase != pos);
 240                        pos += tmp->wb_bytes - (pos - tmp->wb_pgbase);
 241                }
 242        } while (tmp && pos < len);
 243
 244        nfs_page_group_unlock(req);
 245        WARN_ON_ONCE(pos > len);
 246        return pos == len;
 247}
 248
 249/* We can set the PG_uptodate flag if we see that a write request
 250 * covers the full page.
 251 */
 252static void nfs_mark_uptodate(struct nfs_page *req)
 253{
 254        if (PageUptodate(req->wb_page))
 255                return;
 256        if (!nfs_page_group_covers_page(req))
 257                return;
 258        SetPageUptodate(req->wb_page);
 259}
 260
 261static int wb_priority(struct writeback_control *wbc)
 262{
 263        int ret = 0;
 264
 265        if (wbc->sync_mode == WB_SYNC_ALL)
 266                ret = FLUSH_COND_STABLE;
 267        return ret;
 268}
 269
 270/*
 271 * NFS congestion control
 272 */
 273
 274int nfs_congestion_kb;
 275
 276#define NFS_CONGESTION_ON_THRESH        (nfs_congestion_kb >> (PAGE_SHIFT-10))
 277#define NFS_CONGESTION_OFF_THRESH       \
 278        (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
 279
 280static void nfs_set_page_writeback(struct page *page)
 281{
 282        struct inode *inode = page_file_mapping(page)->host;
 283        struct nfs_server *nfss = NFS_SERVER(inode);
 284        int ret = test_set_page_writeback(page);
 285
 286        WARN_ON_ONCE(ret != 0);
 287
 288        if (atomic_long_inc_return(&nfss->writeback) >
 289                        NFS_CONGESTION_ON_THRESH)
 290                set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
 291}
 292
 293static void nfs_end_page_writeback(struct nfs_page *req)
 294{
 295        struct inode *inode = page_file_mapping(req->wb_page)->host;
 296        struct nfs_server *nfss = NFS_SERVER(inode);
 297
 298        if (!nfs_page_group_sync_on_bit(req, PG_WB_END))
 299                return;
 300
 301        end_page_writeback(req->wb_page);
 302        if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
 303                clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
 304}
 305
 306
 307/* nfs_page_group_clear_bits
 308 *   @req - an nfs request
 309 * clears all page group related bits from @req
 310 */
 311static void
 312nfs_page_group_clear_bits(struct nfs_page *req)
 313{
 314        clear_bit(PG_TEARDOWN, &req->wb_flags);
 315        clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
 316        clear_bit(PG_UPTODATE, &req->wb_flags);
 317        clear_bit(PG_WB_END, &req->wb_flags);
 318        clear_bit(PG_REMOVE, &req->wb_flags);
 319}
 320
 321
 322/*
 323 * nfs_unroll_locks_and_wait -  unlock all newly locked reqs and wait on @req
 324 *
 325 * this is a helper function for nfs_lock_and_join_requests
 326 *
 327 * @inode - inode associated with request page group, must be holding inode lock
 328 * @head  - head request of page group, must be holding head lock
 329 * @req   - request that couldn't lock and needs to wait on the req bit lock
 330 * @nonblock - if true, don't actually wait
 331 *
 332 * NOTE: this must be called holding page_group bit lock and inode spin lock
 333 *       and BOTH will be released before returning.
 334 *
 335 * returns 0 on success, < 0 on error.
 336 */
 337static int
 338nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
 339                          struct nfs_page *req, bool nonblock)
 340        __releases(&inode->i_lock)
 341{
 342        struct nfs_page *tmp;
 343        int ret;
 344
 345        /* relinquish all the locks successfully grabbed this run */
 346        for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
 347                nfs_unlock_request(tmp);
 348
 349        WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
 350
 351        /* grab a ref on the request that will be waited on */
 352        kref_get(&req->wb_kref);
 353
 354        nfs_page_group_unlock(head);
 355        spin_unlock(&inode->i_lock);
 356
 357        /* release ref from nfs_page_find_head_request_locked */
 358        nfs_release_request(head);
 359
 360        if (!nonblock)
 361                ret = nfs_wait_on_request(req);
 362        else
 363                ret = -EAGAIN;
 364        nfs_release_request(req);
 365
 366        return ret;
 367}
 368
 369/*
 370 * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
 371 *
 372 * @destroy_list - request list (using wb_this_page) terminated by @old_head
 373 * @old_head - the old head of the list
 374 *
 375 * All subrequests must be locked and removed from all lists, so at this point
 376 * they are only "active" in this function, and possibly in nfs_wait_on_request
 377 * with a reference held by some other context.
 378 */
 379static void
 380nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
 381                                 struct nfs_page *old_head)
 382{
 383        while (destroy_list) {
 384                struct nfs_page *subreq = destroy_list;
 385
 386                destroy_list = (subreq->wb_this_page == old_head) ?
 387                                   NULL : subreq->wb_this_page;
 388
 389                WARN_ON_ONCE(old_head != subreq->wb_head);
 390
 391                /* make sure old group is not used */
 392                subreq->wb_head = subreq;
 393                subreq->wb_this_page = subreq;
 394
 395                /* subreq is now totally disconnected from page group or any
 396                 * write / commit lists. last chance to wake any waiters */
 397                nfs_unlock_request(subreq);
 398
 399                if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
 400                        /* release ref on old head request */
 401                        nfs_release_request(old_head);
 402
 403                        nfs_page_group_clear_bits(subreq);
 404
 405                        /* release the PG_INODE_REF reference */
 406                        if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
 407                                nfs_release_request(subreq);
 408                        else
 409                                WARN_ON_ONCE(1);
 410                } else {
 411                        WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
 412                        /* zombie requests have already released the last
 413                         * reference and were waiting on the rest of the
 414                         * group to complete. Since it's no longer part of a
 415                         * group, simply free the request */
 416                        nfs_page_group_clear_bits(subreq);
 417                        nfs_free_request(subreq);
 418                }
 419        }
 420}
 421
 422/*
 423 * nfs_lock_and_join_requests - join all subreqs to the head req and return
 424 *                              a locked reference, cancelling any pending
 425 *                              operations for this page.
 426 *
 427 * @page - the page used to lookup the "page group" of nfs_page structures
 428 * @nonblock - if true, don't block waiting for request locks
 429 *
 430 * This function joins all sub requests to the head request by first
 431 * locking all requests in the group, cancelling any pending operations
 432 * and finally updating the head request to cover the whole range covered by
 433 * the (former) group.  All subrequests are removed from any write or commit
 434 * lists, unlinked from the group and destroyed.
 435 *
 436 * Returns a locked, referenced pointer to the head request - which after
 437 * this call is guaranteed to be the only request associated with the page.
 438 * Returns NULL if no requests are found for @page, or a ERR_PTR if an
 439 * error was encountered.
 440 */
 441static struct nfs_page *
 442nfs_lock_and_join_requests(struct page *page, bool nonblock)
 443{
 444        struct inode *inode = page_file_mapping(page)->host;
 445        struct nfs_page *head, *subreq;
 446        struct nfs_page *destroy_list = NULL;
 447        unsigned int total_bytes;
 448        int ret;
 449
 450try_again:
 451        total_bytes = 0;
 452
 453        WARN_ON_ONCE(destroy_list);
 454
 455        spin_lock(&inode->i_lock);
 456
 457        /*
 458         * A reference is taken only on the head request which acts as a
 459         * reference to the whole page group - the group will not be destroyed
 460         * until the head reference is released.
 461         */
 462        head = nfs_page_find_head_request_locked(NFS_I(inode), page);
 463
 464        if (!head) {
 465                spin_unlock(&inode->i_lock);
 466                return NULL;
 467        }
 468
 469        /* holding inode lock, so always make a non-blocking call to try the
 470         * page group lock */
 471        ret = nfs_page_group_lock(head, true);
 472        if (ret < 0) {
 473                spin_unlock(&inode->i_lock);
 474
 475                if (!nonblock && ret == -EAGAIN) {
 476                        nfs_page_group_lock_wait(head);
 477                        nfs_release_request(head);
 478                        goto try_again;
 479                }
 480
 481                nfs_release_request(head);
 482                return ERR_PTR(ret);
 483        }
 484
 485        /* lock each request in the page group */
 486        subreq = head;
 487        do {
 488                /*
 489                 * Subrequests are always contiguous, non overlapping
 490                 * and in order - but may be repeated (mirrored writes).
 491                 */
 492                if (subreq->wb_offset == (head->wb_offset + total_bytes)) {
 493                        /* keep track of how many bytes this group covers */
 494                        total_bytes += subreq->wb_bytes;
 495                } else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset ||
 496                            ((subreq->wb_offset + subreq->wb_bytes) >
 497                             (head->wb_offset + total_bytes)))) {
 498                        nfs_page_group_unlock(head);
 499                        spin_unlock(&inode->i_lock);
 500                        return ERR_PTR(-EIO);
 501                }
 502
 503                if (!nfs_lock_request(subreq)) {
 504                        /* releases page group bit lock and
 505                         * inode spin lock and all references */
 506                        ret = nfs_unroll_locks_and_wait(inode, head,
 507                                subreq, nonblock);
 508
 509                        if (ret == 0)
 510                                goto try_again;
 511
 512                        return ERR_PTR(ret);
 513                }
 514
 515                subreq = subreq->wb_this_page;
 516        } while (subreq != head);
 517
 518        /* Now that all requests are locked, make sure they aren't on any list.
 519         * Commit list removal accounting is done after locks are dropped */
 520        subreq = head;
 521        do {
 522                nfs_clear_request_commit(subreq);
 523                subreq = subreq->wb_this_page;
 524        } while (subreq != head);
 525
 526        /* unlink subrequests from head, destroy them later */
 527        if (head->wb_this_page != head) {
 528                /* destroy list will be terminated by head */
 529                destroy_list = head->wb_this_page;
 530                head->wb_this_page = head;
 531
 532                /* change head request to cover whole range that
 533                 * the former page group covered */
 534                head->wb_bytes = total_bytes;
 535        }
 536
 537        /*
 538         * prepare head request to be added to new pgio descriptor
 539         */
 540        nfs_page_group_clear_bits(head);
 541
 542        /*
 543         * some part of the group was still on the inode list - otherwise
 544         * the group wouldn't be involved in async write.
 545         * grab a reference for the head request, iff it needs one.
 546         */
 547        if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
 548                kref_get(&head->wb_kref);
 549
 550        nfs_page_group_unlock(head);
 551
 552        /* drop lock to clean uprequests on destroy list */
 553        spin_unlock(&inode->i_lock);
 554
 555        nfs_destroy_unlinked_subrequests(destroy_list, head);
 556
 557        /* still holds ref on head from nfs_page_find_head_request_locked
 558         * and still has lock on head from lock loop */
 559        return head;
 560}
 561
 562static void nfs_write_error_remove_page(struct nfs_page *req)
 563{
 564        nfs_unlock_request(req);
 565        nfs_end_page_writeback(req);
 566        generic_error_remove_page(page_file_mapping(req->wb_page),
 567                                  req->wb_page);
 568        nfs_release_request(req);
 569}
 570
 571static bool
 572nfs_error_is_fatal_on_server(int err)
 573{
 574        switch (err) {
 575        case 0:
 576        case -ERESTARTSYS:
 577        case -EINTR:
 578                return false;
 579        }
 580        return nfs_error_is_fatal(err);
 581}
 582
 583/*
 584 * Find an associated nfs write request, and prepare to flush it out
 585 * May return an error if the user signalled nfs_wait_on_request().
 586 */
 587static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 588                                struct page *page, bool nonblock)
 589{
 590        struct nfs_page *req;
 591        int ret = 0;
 592
 593        req = nfs_lock_and_join_requests(page, nonblock);
 594        if (!req)
 595                goto out;
 596        ret = PTR_ERR(req);
 597        if (IS_ERR(req))
 598                goto out;
 599
 600        nfs_set_page_writeback(page);
 601        WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
 602
 603        ret = 0;
 604        /* If there is a fatal error that covers this write, just exit */
 605        if (nfs_error_is_fatal_on_server(req->wb_context->error))
 606                goto out_launder;
 607
 608        if (!nfs_pageio_add_request(pgio, req)) {
 609                ret = pgio->pg_error;
 610                /*
 611                 * Remove the problematic req upon fatal errors on the server
 612                 */
 613                if (nfs_error_is_fatal(ret)) {
 614                        nfs_context_set_write_error(req->wb_context, ret);
 615                        if (nfs_error_is_fatal_on_server(ret))
 616                                goto out_launder;
 617                }
 618                nfs_redirty_request(req);
 619                ret = -EAGAIN;
 620        } else
 621                nfs_add_stats(page_file_mapping(page)->host,
 622                                NFSIOS_WRITEPAGES, 1);
 623out:
 624        return ret;
 625out_launder:
 626        nfs_write_error_remove_page(req);
 627        return ret;
 628}
 629
 630static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
 631                            struct nfs_pageio_descriptor *pgio)
 632{
 633        int ret;
 634
 635        nfs_pageio_cond_complete(pgio, page_index(page));
 636        ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
 637        if (ret == -EAGAIN) {
 638                redirty_page_for_writepage(wbc, page);
 639                ret = 0;
 640        }
 641        return ret;
 642}
 643
 644/*
 645 * Write an mmapped page to the server.
 646 */
 647static int nfs_writepage_locked(struct page *page,
 648                                struct writeback_control *wbc)
 649{
 650        struct nfs_pageio_descriptor pgio;
 651        struct inode *inode = page_file_mapping(page)->host;
 652        int err;
 653
 654        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
 655        nfs_pageio_init_write(&pgio, inode, 0,
 656                                false, &nfs_async_write_completion_ops);
 657        err = nfs_do_writepage(page, wbc, &pgio);
 658        nfs_pageio_complete(&pgio);
 659        if (err < 0)
 660                return err;
 661        if (pgio.pg_error < 0)
 662                return pgio.pg_error;
 663        return 0;
 664}
 665
 666int nfs_writepage(struct page *page, struct writeback_control *wbc)
 667{
 668        int ret;
 669
 670        ret = nfs_writepage_locked(page, wbc);
 671        unlock_page(page);
 672        return ret;
 673}
 674
 675static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
 676{
 677        int ret;
 678
 679        ret = nfs_do_writepage(page, wbc, data);
 680        unlock_page(page);
 681        return ret;
 682}
 683
 684int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 685{
 686        struct inode *inode = mapping->host;
 687        struct nfs_pageio_descriptor pgio;
 688        int err;
 689
 690        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 691
 692        nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
 693                                &nfs_async_write_completion_ops);
 694        err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 695        nfs_pageio_complete(&pgio);
 696
 697        if (err < 0)
 698                goto out_err;
 699        err = pgio.pg_error;
 700        if (err < 0)
 701                goto out_err;
 702        return 0;
 703out_err:
 704        return err;
 705}
 706
 707/*
 708 * Insert a write request into an inode
 709 */
 710static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 711{
 712        struct nfs_inode *nfsi = NFS_I(inode);
 713
 714        WARN_ON_ONCE(req->wb_this_page != req);
 715
 716        /* Lock the request! */
 717        nfs_lock_request(req);
 718
 719        spin_lock(&inode->i_lock);
 720        if (!nfsi->nrequests &&
 721            NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
 722                inode->i_version++;
 723        /*
 724         * Swap-space should not get truncated. Hence no need to plug the race
 725         * with invalidate/truncate.
 726         */
 727        if (likely(!PageSwapCache(req->wb_page))) {
 728                set_bit(PG_MAPPED, &req->wb_flags);
 729                SetPagePrivate(req->wb_page);
 730                set_page_private(req->wb_page, (unsigned long)req);
 731        }
 732        nfsi->nrequests++;
 733        /* this a head request for a page group - mark it as having an
 734         * extra reference so sub groups can follow suit.
 735         * This flag also informs pgio layer when to bump nrequests when
 736         * adding subrequests. */
 737        WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
 738        kref_get(&req->wb_kref);
 739        spin_unlock(&inode->i_lock);
 740}
 741
 742/*
 743 * Remove a write request from an inode
 744 */
 745static void nfs_inode_remove_request(struct nfs_page *req)
 746{
 747        struct inode *inode = d_inode(req->wb_context->dentry);
 748        struct nfs_inode *nfsi = NFS_I(inode);
 749        struct nfs_page *head;
 750
 751        if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
 752                head = req->wb_head;
 753
 754                spin_lock(&inode->i_lock);
 755                if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {
 756                        set_page_private(head->wb_page, 0);
 757                        ClearPagePrivate(head->wb_page);
 758                        clear_bit(PG_MAPPED, &head->wb_flags);
 759                }
 760                nfsi->nrequests--;
 761                spin_unlock(&inode->i_lock);
 762        } else {
 763                spin_lock(&inode->i_lock);
 764                nfsi->nrequests--;
 765                spin_unlock(&inode->i_lock);
 766        }
 767
 768        if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
 769                nfs_release_request(req);
 770}
 771
 772static void
 773nfs_mark_request_dirty(struct nfs_page *req)
 774{
 775        if (req->wb_page)
 776                __set_page_dirty_nobuffers(req->wb_page);
 777}
 778
 779/*
 780 * nfs_page_search_commits_for_head_request_locked
 781 *
 782 * Search through commit lists on @inode for the head request for @page.
 783 * Must be called while holding the inode (which is cinfo) lock.
 784 *
 785 * Returns the head request if found, or NULL if not found.
 786 */
 787static struct nfs_page *
 788nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
 789                                                struct page *page)
 790{
 791        struct nfs_page *freq, *t;
 792        struct nfs_commit_info cinfo;
 793        struct inode *inode = &nfsi->vfs_inode;
 794
 795        nfs_init_cinfo_from_inode(&cinfo, inode);
 796
 797        /* search through pnfs commit lists */
 798        freq = pnfs_search_commit_reqs(inode, &cinfo, page);
 799        if (freq)
 800                return freq->wb_head;
 801
 802        /* Linearly search the commit list for the correct request */
 803        list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
 804                if (freq->wb_page == page)
 805                        return freq->wb_head;
 806        }
 807
 808        return NULL;
 809}
 810
 811/**
 812 * nfs_request_add_commit_list_locked - add request to a commit list
 813 * @req: pointer to a struct nfs_page
 814 * @dst: commit list head
 815 * @cinfo: holds list lock and accounting info
 816 *
 817 * This sets the PG_CLEAN bit, updates the cinfo count of
 818 * number of outstanding requests requiring a commit as well as
 819 * the MM page stats.
 820 *
 821 * The caller must hold cinfo->inode->i_lock, and the nfs_page lock.
 822 */
 823void
 824nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst,
 825                            struct nfs_commit_info *cinfo)
 826{
 827        set_bit(PG_CLEAN, &req->wb_flags);
 828        nfs_list_add_request(req, dst);
 829        cinfo->mds->ncommit++;
 830}
 831EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
 832
 833/**
 834 * nfs_request_add_commit_list - add request to a commit list
 835 * @req: pointer to a struct nfs_page
 836 * @dst: commit list head
 837 * @cinfo: holds list lock and accounting info
 838 *
 839 * This sets the PG_CLEAN bit, updates the cinfo count of
 840 * number of outstanding requests requiring a commit as well as
 841 * the MM page stats.
 842 *
 843 * The caller must _not_ hold the cinfo->lock, but must be
 844 * holding the nfs_page lock.
 845 */
 846void
 847nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
 848{
 849        spin_lock(&cinfo->inode->i_lock);
 850        nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
 851        spin_unlock(&cinfo->inode->i_lock);
 852        if (req->wb_page)
 853                nfs_mark_page_unstable(req->wb_page, cinfo);
 854}
 855EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
 856
 857/**
 858 * nfs_request_remove_commit_list - Remove request from a commit list
 859 * @req: pointer to a nfs_page
 860 * @cinfo: holds list lock and accounting info
 861 *
 862 * This clears the PG_CLEAN bit, and updates the cinfo's count of
 863 * number of outstanding requests requiring a commit
 864 * It does not update the MM page stats.
 865 *
 866 * The caller _must_ hold the cinfo->lock and the nfs_page lock.
 867 */
 868void
 869nfs_request_remove_commit_list(struct nfs_page *req,
 870                               struct nfs_commit_info *cinfo)
 871{
 872        if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
 873                return;
 874        nfs_list_remove_request(req);
 875        cinfo->mds->ncommit--;
 876}
 877EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
 878
 879static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
 880                                      struct inode *inode)
 881{
 882        cinfo->inode = inode;
 883        cinfo->mds = &NFS_I(inode)->commit_info;
 884        cinfo->ds = pnfs_get_ds_info(inode);
 885        cinfo->dreq = NULL;
 886        cinfo->completion_ops = &nfs_commit_completion_ops;
 887}
 888
 889void nfs_init_cinfo(struct nfs_commit_info *cinfo,
 890                    struct inode *inode,
 891                    struct nfs_direct_req *dreq)
 892{
 893        if (dreq)
 894                nfs_init_cinfo_from_dreq(cinfo, dreq);
 895        else
 896                nfs_init_cinfo_from_inode(cinfo, inode);
 897}
 898EXPORT_SYMBOL_GPL(nfs_init_cinfo);
 899
 900/*
 901 * Add a request to the inode's commit list.
 902 */
 903void
 904nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
 905                        struct nfs_commit_info *cinfo, u32 ds_commit_idx)
 906{
 907        if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx))
 908                return;
 909        nfs_request_add_commit_list(req, cinfo);
 910}
 911
 912static void
 913nfs_clear_page_commit(struct page *page)
 914{
 915        dec_node_page_state(page, NR_UNSTABLE_NFS);
 916        dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
 917                    WB_RECLAIMABLE);
 918}
 919
 920/* Called holding inode (/cinfo) lock */
 921static void
 922nfs_clear_request_commit(struct nfs_page *req)
 923{
 924        if (test_bit(PG_CLEAN, &req->wb_flags)) {
 925                struct inode *inode = d_inode(req->wb_context->dentry);
 926                struct nfs_commit_info cinfo;
 927
 928                nfs_init_cinfo_from_inode(&cinfo, inode);
 929                if (!pnfs_clear_request_commit(req, &cinfo)) {
 930                        nfs_request_remove_commit_list(req, &cinfo);
 931                }
 932                nfs_clear_page_commit(req->wb_page);
 933        }
 934}
 935
 936int nfs_write_need_commit(struct nfs_pgio_header *hdr)
 937{
 938        if (hdr->verf.committed == NFS_DATA_SYNC)
 939                return hdr->lseg == NULL;
 940        return hdr->verf.committed != NFS_FILE_SYNC;
 941}
 942
 943static void nfs_write_completion(struct nfs_pgio_header *hdr)
 944{
 945        struct nfs_commit_info cinfo;
 946        unsigned long bytes = 0;
 947
 948        if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
 949                goto out;
 950        nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
 951        while (!list_empty(&hdr->pages)) {
 952                struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 953
 954                bytes += req->wb_bytes;
 955                nfs_list_remove_request(req);
 956                if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
 957                    (hdr->good_bytes < bytes)) {
 958                        nfs_set_pageerror(req->wb_page);
 959                        nfs_context_set_write_error(req->wb_context, hdr->error);
 960                        goto remove_req;
 961                }
 962                if (nfs_write_need_commit(hdr)) {
 963                        memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
 964                        nfs_mark_request_commit(req, hdr->lseg, &cinfo,
 965                                hdr->pgio_mirror_idx);
 966                        goto next;
 967                }
 968remove_req:
 969                nfs_inode_remove_request(req);
 970next:
 971                nfs_unlock_request(req);
 972                nfs_end_page_writeback(req);
 973                nfs_release_request(req);
 974        }
 975out:
 976        hdr->release(hdr);
 977}
 978
 979unsigned long
 980nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
 981{
 982        return cinfo->mds->ncommit;
 983}
 984
 985/* cinfo->inode->i_lock held by caller */
 986int
 987nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
 988                     struct nfs_commit_info *cinfo, int max)
 989{
 990        struct nfs_page *req, *tmp;
 991        int ret = 0;
 992
 993        list_for_each_entry_safe(req, tmp, src, wb_list) {
 994                if (!nfs_lock_request(req))
 995                        continue;
 996                kref_get(&req->wb_kref);
 997                if (cond_resched_lock(&cinfo->inode->i_lock))
 998                        list_safe_reset_next(req, tmp, wb_list);
 999                nfs_request_remove_commit_list(req, cinfo);
1000                nfs_list_add_request(req, dst);
1001                ret++;
1002                if ((ret == max) && !cinfo->dreq)
1003                        break;
1004        }
1005        return ret;
1006}
1007
1008/*
1009 * nfs_scan_commit - Scan an inode for commit requests
1010 * @inode: NFS inode to scan
1011 * @dst: mds destination list
1012 * @cinfo: mds and ds lists of reqs ready to commit
1013 *
1014 * Moves requests from the inode's 'commit' request list.
1015 * The requests are *not* checked to ensure that they form a contiguous set.
1016 */
1017int
1018nfs_scan_commit(struct inode *inode, struct list_head *dst,
1019                struct nfs_commit_info *cinfo)
1020{
1021        int ret = 0;
1022
1023        spin_lock(&cinfo->inode->i_lock);
1024        if (cinfo->mds->ncommit > 0) {
1025                const int max = INT_MAX;
1026
1027                ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
1028                                           cinfo, max);
1029                ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
1030        }
1031        spin_unlock(&cinfo->inode->i_lock);
1032        return ret;
1033}
1034
1035/*
1036 * Search for an existing write request, and attempt to update
1037 * it to reflect a new dirty region on a given page.
1038 *
1039 * If the attempt fails, then the existing request is flushed out
1040 * to disk.
1041 */
1042static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
1043                struct page *page,
1044                unsigned int offset,
1045                unsigned int bytes)
1046{
1047        struct nfs_page *req;
1048        unsigned int rqend;
1049        unsigned int end;
1050        int error;
1051
1052        if (!PagePrivate(page))
1053                return NULL;
1054
1055        end = offset + bytes;
1056        spin_lock(&inode->i_lock);
1057
1058        for (;;) {
1059                req = nfs_page_find_head_request_locked(NFS_I(inode), page);
1060                if (req == NULL)
1061                        goto out_unlock;
1062
1063                /* should be handled by nfs_flush_incompatible */
1064                WARN_ON_ONCE(req->wb_head != req);
1065                WARN_ON_ONCE(req->wb_this_page != req);
1066
1067                rqend = req->wb_offset + req->wb_bytes;
1068                /*
1069                 * Tell the caller to flush out the request if
1070                 * the offsets are non-contiguous.
1071                 * Note: nfs_flush_incompatible() will already
1072                 * have flushed out requests having wrong owners.
1073                 */
1074                if (offset > rqend
1075                    || end < req->wb_offset)
1076                        goto out_flushme;
1077
1078                if (nfs_lock_request(req))
1079                        break;
1080
1081                /* The request is locked, so wait and then retry */
1082                spin_unlock(&inode->i_lock);
1083                error = nfs_wait_on_request(req);
1084                nfs_release_request(req);
1085                if (error != 0)
1086                        goto out_err;
1087                spin_lock(&inode->i_lock);
1088        }
1089
1090        /* Okay, the request matches. Update the region */
1091        if (offset < req->wb_offset) {
1092                req->wb_offset = offset;
1093                req->wb_pgbase = offset;
1094        }
1095        if (end > rqend)
1096                req->wb_bytes = end - req->wb_offset;
1097        else
1098                req->wb_bytes = rqend - req->wb_offset;
1099out_unlock:
1100        if (req)
1101                nfs_clear_request_commit(req);
1102        spin_unlock(&inode->i_lock);
1103        return req;
1104out_flushme:
1105        spin_unlock(&inode->i_lock);
1106        nfs_release_request(req);
1107        error = nfs_wb_page(inode, page);
1108out_err:
1109        return ERR_PTR(error);
1110}
1111
1112/*
1113 * Try to update an existing write request, or create one if there is none.
1114 *
1115 * Note: Should always be called with the Page Lock held to prevent races
1116 * if we have to add a new request. Also assumes that the caller has
1117 * already called nfs_flush_incompatible() if necessary.
1118 */
1119static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
1120                struct page *page, unsigned int offset, unsigned int bytes)
1121{
1122        struct inode *inode = page_file_mapping(page)->host;
1123        struct nfs_page *req;
1124
1125        req = nfs_try_to_update_request(inode, page, offset, bytes);
1126        if (req != NULL)
1127                goto out;
1128        req = nfs_create_request(ctx, page, NULL, offset, bytes);
1129        if (IS_ERR(req))
1130                goto out;
1131        nfs_inode_add_request(inode, req);
1132out:
1133        return req;
1134}
1135
1136static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
1137                unsigned int offset, unsigned int count)
1138{
1139        struct nfs_page *req;
1140
1141        req = nfs_setup_write_request(ctx, page, offset, count);
1142        if (IS_ERR(req))
1143                return PTR_ERR(req);
1144        /* Update file length */
1145        nfs_grow_file(page, offset, count);
1146        nfs_mark_uptodate(req);
1147        nfs_mark_request_dirty(req);
1148        nfs_unlock_and_release_request(req);
1149        return 0;
1150}
1151
1152int nfs_flush_incompatible(struct file *file, struct page *page)
1153{
1154        struct nfs_open_context *ctx = nfs_file_open_context(file);
1155        struct nfs_lock_context *l_ctx;
1156        struct file_lock_context *flctx = file_inode(file)->i_flctx;
1157        struct nfs_page *req;
1158        int do_flush, status;
1159        /*
1160         * Look for a request corresponding to this page. If there
1161         * is one, and it belongs to another file, we flush it out
1162         * before we try to copy anything into the page. Do this
1163         * due to the lack of an ACCESS-type call in NFSv2.
1164         * Also do the same if we find a request from an existing
1165         * dropped page.
1166         */
1167        do {
1168                req = nfs_page_find_head_request(page);
1169                if (req == NULL)
1170                        return 0;
1171                l_ctx = req->wb_lock_context;
1172                do_flush = req->wb_page != page ||
1173                        !nfs_match_open_context(req->wb_context, ctx);
1174                /* for now, flush if more than 1 request in page_group */
1175                do_flush |= req->wb_this_page != req;
1176                if (l_ctx && flctx &&
1177                    !(list_empty_careful(&flctx->flc_posix) &&
1178                      list_empty_careful(&flctx->flc_flock))) {
1179                        do_flush |= l_ctx->lockowner != current->files;
1180                }
1181                nfs_release_request(req);
1182                if (!do_flush)
1183                        return 0;
1184                status = nfs_wb_page(page_file_mapping(page)->host, page);
1185        } while (status == 0);
1186        return status;
1187}
1188
1189/*
1190 * Avoid buffered writes when a open context credential's key would
1191 * expire soon.
1192 *
1193 * Returns -EACCES if the key will expire within RPC_KEY_EXPIRE_FAIL.
1194 *
1195 * Return 0 and set a credential flag which triggers the inode to flush
1196 * and performs  NFS_FILE_SYNC writes if the key will expired within
1197 * RPC_KEY_EXPIRE_TIMEO.
1198 */
1199int
1200nfs_key_timeout_notify(struct file *filp, struct inode *inode)
1201{
1202        struct nfs_open_context *ctx = nfs_file_open_context(filp);
1203        struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;
1204
1205        return rpcauth_key_timeout_notify(auth, ctx->cred);
1206}
1207
1208/*
1209 * Test if the open context credential key is marked to expire soon.
1210 */
1211bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode)
1212{
1213        struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;
1214
1215        return rpcauth_cred_key_to_expire(auth, ctx->cred);
1216}
1217
1218/*
1219 * If the page cache is marked as unsafe or invalid, then we can't rely on
1220 * the PageUptodate() flag. In this case, we will need to turn off
1221 * write optimisations that depend on the page contents being correct.
1222 */
1223static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
1224{
1225        struct nfs_inode *nfsi = NFS_I(inode);
1226
1227        if (nfs_have_delegated_attributes(inode))
1228                goto out;
1229        if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
1230                return false;
1231        smp_rmb();
1232        if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
1233                return false;
1234out:
1235        if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
1236                return false;
1237        return PageUptodate(page) != 0;
1238}
1239
1240static bool
1241is_whole_file_wrlock(struct file_lock *fl)
1242{
1243        return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX &&
1244                        fl->fl_type == F_WRLCK;
1245}
1246
1247/* If we know the page is up to date, and we're not using byte range locks (or
1248 * if we have the whole file locked for writing), it may be more efficient to
1249 * extend the write to cover the entire page in order to avoid fragmentation
1250 * inefficiencies.
1251 *
1252 * If the file is opened for synchronous writes then we can just skip the rest
1253 * of the checks.
1254 */
1255static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
1256{
1257        int ret;
1258        struct file_lock_context *flctx = inode->i_flctx;
1259        struct file_lock *fl;
1260
1261        if (file->f_flags & O_DSYNC)
1262                return 0;
1263        if (!nfs_write_pageuptodate(page, inode))
1264                return 0;
1265        if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
1266                return 1;
1267        if (!flctx || (list_empty_careful(&flctx->flc_flock) &&
1268                       list_empty_careful(&flctx->flc_posix)))
1269                return 1;
1270
1271        /* Check to see if there are whole file write locks */
1272        ret = 0;
1273        spin_lock(&flctx->flc_lock);
1274        if (!list_empty(&flctx->flc_posix)) {
1275                fl = list_first_entry(&flctx->flc_posix, struct file_lock,
1276                                        fl_list);
1277                if (is_whole_file_wrlock(fl))
1278                        ret = 1;
1279        } else if (!list_empty(&flctx->flc_flock)) {
1280                fl = list_first_entry(&flctx->flc_flock, struct file_lock,
1281                                        fl_list);
1282                if (fl->fl_type == F_WRLCK)
1283                        ret = 1;
1284        }
1285        spin_unlock(&flctx->flc_lock);
1286        return ret;
1287}
1288
1289/*
1290 * Update and possibly write a cached page of an NFS file.
1291 *
1292 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
1293 * things with a page scheduled for an RPC call (e.g. invalidate it).
1294 */
1295int nfs_updatepage(struct file *file, struct page *page,
1296                unsigned int offset, unsigned int count)
1297{
1298        struct nfs_open_context *ctx = nfs_file_open_context(file);
1299        struct inode    *inode = page_file_mapping(page)->host;
1300        int             status = 0;
1301
1302        nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
1303
1304        dprintk("NFS:       nfs_updatepage(%pD2 %d@%lld)\n",
1305                file, count, (long long)(page_file_offset(page) + offset));
1306
1307        if (!count)
1308                goto out;
1309
1310        if (nfs_can_extend_write(file, page, inode)) {
1311                count = max(count + offset, nfs_page_length(page));
1312                offset = 0;
1313        }
1314
1315        status = nfs_writepage_setup(ctx, page, offset, count);
1316        if (status < 0)
1317                nfs_set_pageerror(page);
1318        else
1319                __set_page_dirty_nobuffers(page);
1320out:
1321        dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n",
1322                        status, (long long)i_size_read(inode));
1323        return status;
1324}
1325
1326static int flush_task_priority(int how)
1327{
1328        switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
1329                case FLUSH_HIGHPRI:
1330                        return RPC_PRIORITY_HIGH;
1331                case FLUSH_LOWPRI:
1332                        return RPC_PRIORITY_LOW;
1333        }
1334        return RPC_PRIORITY_NORMAL;
1335}
1336
1337static void nfs_initiate_write(struct nfs_pgio_header *hdr,
1338                               struct rpc_message *msg,
1339                               const struct nfs_rpc_ops *rpc_ops,
1340                               struct rpc_task_setup *task_setup_data, int how)
1341{
1342        int priority = flush_task_priority(how);
1343
1344        task_setup_data->priority = priority;
1345        rpc_ops->write_setup(hdr, msg);
1346
1347        nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client,
1348                                 &task_setup_data->rpc_client, msg, hdr);
1349}
1350
1351/* If a nfs_flush_* function fails, it should remove reqs from @head and
1352 * call this on each, which will prepare them to be retried on next
1353 * writeback using standard nfs.
1354 */
1355static void nfs_redirty_request(struct nfs_page *req)
1356{
1357        nfs_mark_request_dirty(req);
1358        set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
1359        nfs_unlock_request(req);
1360        nfs_end_page_writeback(req);
1361        nfs_release_request(req);
1362}
1363
1364static void nfs_async_write_error(struct list_head *head)
1365{
1366        struct nfs_page *req;
1367
1368        while (!list_empty(head)) {
1369                req = nfs_list_entry(head->next);
1370                nfs_list_remove_request(req);
1371                nfs_redirty_request(req);
1372        }
1373}
1374
1375static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
1376{
1377        nfs_async_write_error(&hdr->pages);
1378}
1379
1380static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
1381        .error_cleanup = nfs_async_write_error,
1382        .completion = nfs_write_completion,
1383        .reschedule_io = nfs_async_write_reschedule_io,
1384};
1385
1386void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1387                               struct inode *inode, int ioflags, bool force_mds,
1388                               const struct nfs_pgio_completion_ops *compl_ops)
1389{
1390        struct nfs_server *server = NFS_SERVER(inode);
1391        const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
1392
1393#ifdef CONFIG_NFS_V4_1
1394        if (server->pnfs_curr_ld && !force_mds)
1395                pg_ops = server->pnfs_curr_ld->pg_write_ops;
1396#endif
1397        nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
1398                        server->wsize, ioflags, GFP_NOIO);
1399}
1400EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
1401
1402void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
1403{
1404        struct nfs_pgio_mirror *mirror;
1405
1406        if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
1407                pgio->pg_ops->pg_cleanup(pgio);
1408
1409        pgio->pg_ops = &nfs_pgio_rw_ops;
1410
1411        nfs_pageio_stop_mirroring(pgio);
1412
1413        mirror = &pgio->pg_mirrors[0];
1414        mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
1415}
1416EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
1417
1418
1419void nfs_commit_prepare(struct rpc_task *task, void *calldata)
1420{
1421        struct nfs_commit_data *data = calldata;
1422
1423        NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
1424}
1425
1426/*
1427 * Special version of should_remove_suid() that ignores capabilities.
1428 */
1429static int nfs_should_remove_suid(const struct inode *inode)
1430{
1431        umode_t mode = inode->i_mode;
1432        int kill = 0;
1433
1434        /* suid always must be killed */
1435        if (unlikely(mode & S_ISUID))
1436                kill = ATTR_KILL_SUID;
1437
1438        /*
1439         * sgid without any exec bits is just a mandatory locking mark; leave
1440         * it alone.  If some exec bits are set, it's a real sgid; kill it.
1441         */
1442        if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
1443                kill |= ATTR_KILL_SGID;
1444
1445        if (unlikely(kill && S_ISREG(mode)))
1446                return kill;
1447
1448        return 0;
1449}
1450
1451static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
1452                struct nfs_fattr *fattr)
1453{
1454        struct nfs_pgio_args *argp = &hdr->args;
1455        struct nfs_pgio_res *resp = &hdr->res;
1456        u64 size = argp->offset + resp->count;
1457
1458        if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
1459                fattr->size = size;
1460        if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) {
1461                fattr->valid &= ~NFS_ATTR_FATTR_SIZE;
1462                return;
1463        }
1464        if (size != fattr->size)
1465                return;
1466        /* Set attribute barrier */
1467        nfs_fattr_set_barrier(fattr);
1468        /* ...and update size */
1469        fattr->valid |= NFS_ATTR_FATTR_SIZE;
1470}
1471
1472void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
1473{
1474        struct nfs_fattr *fattr = &hdr->fattr;
1475        struct inode *inode = hdr->inode;
1476
1477        spin_lock(&inode->i_lock);
1478        nfs_writeback_check_extend(hdr, fattr);
1479        nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
1480        spin_unlock(&inode->i_lock);
1481}
1482EXPORT_SYMBOL_GPL(nfs_writeback_update_inode);
1483
1484/*
1485 * This function is called when the WRITE call is complete.
1486 */
1487static int nfs_writeback_done(struct rpc_task *task,
1488                              struct nfs_pgio_header *hdr,
1489                              struct inode *inode)
1490{
1491        int status;
1492
1493        /*
1494         * ->write_done will attempt to use post-op attributes to detect
1495         * conflicting writes by other clients.  A strict interpretation
1496         * of close-to-open would allow us to continue caching even if
1497         * another writer had changed the file, but some applications
1498         * depend on tighter cache coherency when writing.
1499         */
1500        status = NFS_PROTO(inode)->write_done(task, hdr);
1501        if (status != 0)
1502                return status;
1503        nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
1504
1505        if (hdr->res.verf->committed < hdr->args.stable &&
1506            task->tk_status >= 0) {
1507                /* We tried a write call, but the server did not
1508                 * commit data to stable storage even though we
1509                 * requested it.
1510                 * Note: There is a known bug in Tru64 < 5.0 in which
1511                 *       the server reports NFS_DATA_SYNC, but performs
1512                 *       NFS_FILE_SYNC. We therefore implement this checking
1513                 *       as a dprintk() in order to avoid filling syslog.
1514                 */
1515                static unsigned long    complain;
1516
1517                /* Note this will print the MDS for a DS write */
1518                if (time_before(complain, jiffies)) {
1519                        dprintk("NFS:       faulty NFS server %s:"
1520                                " (committed = %d) != (stable = %d)\n",
1521                                NFS_SERVER(inode)->nfs_client->cl_hostname,
1522                                hdr->res.verf->committed, hdr->args.stable);
1523                        complain = jiffies + 300 * HZ;
1524                }
1525        }
1526
1527        /* Deal with the suid/sgid bit corner case */
1528        if (nfs_should_remove_suid(inode))
1529                nfs_mark_for_revalidate(inode);
1530        return 0;
1531}
1532
1533/*
1534 * This function is called when the WRITE call is complete.
1535 */
1536static void nfs_writeback_result(struct rpc_task *task,
1537                                 struct nfs_pgio_header *hdr)
1538{
1539        struct nfs_pgio_args    *argp = &hdr->args;
1540        struct nfs_pgio_res     *resp = &hdr->res;
1541
1542        if (resp->count < argp->count) {
1543                static unsigned long    complain;
1544
1545                /* This a short write! */
1546                nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
1547
1548                /* Has the server at least made some progress? */
1549                if (resp->count == 0) {
1550                        if (time_before(complain, jiffies)) {
1551                                printk(KERN_WARNING
1552                                       "NFS: Server wrote zero bytes, expected %u.\n",
1553                                       argp->count);
1554                                complain = jiffies + 300 * HZ;
1555                        }
1556                        nfs_set_pgio_error(hdr, -EIO, argp->offset);
1557                        task->tk_status = -EIO;
1558                        return;
1559                }
1560
1561                /* For non rpc-based layout drivers, retry-through-MDS */
1562                if (!task->tk_ops) {
1563                        hdr->pnfs_error = -EAGAIN;
1564                        return;
1565                }
1566
1567                /* Was this an NFSv2 write or an NFSv3 stable write? */
1568                if (resp->verf->committed != NFS_UNSTABLE) {
1569                        /* Resend from where the server left off */
1570                        hdr->mds_offset += resp->count;
1571                        argp->offset += resp->count;
1572                        argp->pgbase += resp->count;
1573                        argp->count -= resp->count;
1574                } else {
1575                        /* Resend as a stable write in order to avoid
1576                         * headaches in the case of a server crash.
1577                         */
1578                        argp->stable = NFS_FILE_SYNC;
1579                }
1580                rpc_restart_call_prepare(task);
1581        }
1582}
1583
1584static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
1585{
1586        return wait_on_atomic_t(&cinfo->rpcs_out,
1587                        nfs_wait_atomic_killable, TASK_KILLABLE);
1588}
1589
1590static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
1591{
1592        atomic_inc(&cinfo->rpcs_out);
1593}
1594
1595static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
1596{
1597        if (atomic_dec_and_test(&cinfo->rpcs_out))
1598                wake_up_atomic_t(&cinfo->rpcs_out);
1599}
1600
1601void nfs_commitdata_release(struct nfs_commit_data *data)
1602{
1603        put_nfs_open_context(data->context);
1604        nfs_commit_free(data);
1605}
1606EXPORT_SYMBOL_GPL(nfs_commitdata_release);
1607
1608int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
1609                        const struct nfs_rpc_ops *nfs_ops,
1610                        const struct rpc_call_ops *call_ops,
1611                        int how, int flags)
1612{
1613        struct rpc_task *task;
1614        int priority = flush_task_priority(how);
1615        struct rpc_message msg = {
1616                .rpc_argp = &data->args,
1617                .rpc_resp = &data->res,
1618                .rpc_cred = data->cred,
1619        };
1620        struct rpc_task_setup task_setup_data = {
1621                .task = &data->task,
1622                .rpc_client = clnt,
1623                .rpc_message = &msg,
1624                .callback_ops = call_ops,
1625                .callback_data = data,
1626                .workqueue = nfsiod_workqueue,
1627                .flags = RPC_TASK_ASYNC | flags,
1628                .priority = priority,
1629        };
1630        /* Set up the initial task struct.  */
1631        nfs_ops->commit_setup(data, &msg);
1632
1633        dprintk("NFS: initiated commit call\n");
1634
1635        nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client,
1636                NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg);
1637
1638        task = rpc_run_task(&task_setup_data);
1639        if (IS_ERR(task))
1640                return PTR_ERR(task);
1641        if (how & FLUSH_SYNC)
1642                rpc_wait_for_completion_task(task);
1643        rpc_put_task(task);
1644        return 0;
1645}
1646EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1647
1648static loff_t nfs_get_lwb(struct list_head *head)
1649{
1650        loff_t lwb = 0;
1651        struct nfs_page *req;
1652
1653        list_for_each_entry(req, head, wb_list)
1654                if (lwb < (req_offset(req) + req->wb_bytes))
1655                        lwb = req_offset(req) + req->wb_bytes;
1656
1657        return lwb;
1658}
1659
1660/*
1661 * Set up the argument/result storage required for the RPC call.
1662 */
1663void nfs_init_commit(struct nfs_commit_data *data,
1664                     struct list_head *head,
1665                     struct pnfs_layout_segment *lseg,
1666                     struct nfs_commit_info *cinfo)
1667{
1668        struct nfs_page *first = nfs_list_entry(head->next);
1669        struct inode *inode = d_inode(first->wb_context->dentry);
1670
1671        /* Set up the RPC argument and reply structs
1672         * NB: take care not to mess about with data->commit et al. */
1673
1674        list_splice_init(head, &data->pages);
1675
1676        data->inode       = inode;
1677        data->cred        = first->wb_context->cred;
1678        data->lseg        = lseg; /* reference transferred */
1679        /* only set lwb for pnfs commit */
1680        if (lseg)
1681                data->lwb = nfs_get_lwb(&data->pages);
1682        data->mds_ops     = &nfs_commit_ops;
1683        data->completion_ops = cinfo->completion_ops;
1684        data->dreq        = cinfo->dreq;
1685
1686        data->args.fh     = NFS_FH(data->inode);
1687        /* Note: we always request a commit of the entire inode */
1688        data->args.offset = 0;
1689        data->args.count  = 0;
1690        data->context     = get_nfs_open_context(first->wb_context);
1691        data->res.fattr   = &data->fattr;
1692        data->res.verf    = &data->verf;
1693        nfs_fattr_init(&data->fattr);
1694}
1695EXPORT_SYMBOL_GPL(nfs_init_commit);
1696
1697void nfs_retry_commit(struct list_head *page_list,
1698                      struct pnfs_layout_segment *lseg,
1699                      struct nfs_commit_info *cinfo,
1700                      u32 ds_commit_idx)
1701{
1702        struct nfs_page *req;
1703
1704        while (!list_empty(page_list)) {
1705                req = nfs_list_entry(page_list->next);
1706                nfs_list_remove_request(req);
1707                nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx);
1708                if (!cinfo->dreq)
1709                        nfs_clear_page_commit(req->wb_page);
1710                nfs_unlock_and_release_request(req);
1711        }
1712}
1713EXPORT_SYMBOL_GPL(nfs_retry_commit);
1714
1715static void
1716nfs_commit_resched_write(struct nfs_commit_info *cinfo,
1717                struct nfs_page *req)
1718{
1719        __set_page_dirty_nobuffers(req->wb_page);
1720}
1721
1722/*
1723 * Commit dirty pages
1724 */
1725static int
1726nfs_commit_list(struct inode *inode, struct list_head *head, int how,
1727                struct nfs_commit_info *cinfo)
1728{
1729        struct nfs_commit_data  *data;
1730
1731        /* another commit raced with us */
1732        if (list_empty(head))
1733                return 0;
1734
1735        data = nfs_commitdata_alloc(true);
1736
1737        /* Set up the argument struct */
1738        nfs_init_commit(data, head, NULL, cinfo);
1739        atomic_inc(&cinfo->mds->rpcs_out);
1740        return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
1741                                   data->mds_ops, how, 0);
1742}
1743
1744/*
1745 * COMMIT call returned
1746 */
1747static void nfs_commit_done(struct rpc_task *task, void *calldata)
1748{
1749        struct nfs_commit_data  *data = calldata;
1750
1751        dprintk("NFS: %5u nfs_commit_done (status %d)\n",
1752                                task->tk_pid, task->tk_status);
1753
1754        /* Call the NFS version-specific code */
1755        NFS_PROTO(data->inode)->commit_done(task, data);
1756}
1757
1758static void nfs_commit_release_pages(struct nfs_commit_data *data)
1759{
1760        struct nfs_page *req;
1761        int status = data->task.tk_status;
1762        struct nfs_commit_info cinfo;
1763        struct nfs_server *nfss;
1764
1765        while (!list_empty(&data->pages)) {
1766                req = nfs_list_entry(data->pages.next);
1767                nfs_list_remove_request(req);
1768                if (req->wb_page)
1769                        nfs_clear_page_commit(req->wb_page);
1770
1771                dprintk("NFS:       commit (%s/%llu %d@%lld)",
1772                        req->wb_context->dentry->d_sb->s_id,
1773                        (unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)),
1774                        req->wb_bytes,
1775                        (long long)req_offset(req));
1776                if (status < 0) {
1777                        nfs_context_set_write_error(req->wb_context, status);
1778                        if (req->wb_page)
1779                                nfs_inode_remove_request(req);
1780                        dprintk_cont(", error = %d\n", status);
1781                        goto next;
1782                }
1783
1784                /* Okay, COMMIT succeeded, apparently. Check the verifier
1785                 * returned by the server against all stored verfs. */
1786                if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
1787                        /* We have a match */
1788                        if (req->wb_page)
1789                                nfs_inode_remove_request(req);
1790                        dprintk_cont(" OK\n");
1791                        goto next;
1792                }
1793                /* We have a mismatch. Write the page again */
1794                dprintk_cont(" mismatch\n");
1795                nfs_mark_request_dirty(req);
1796                set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
1797        next:
1798                nfs_unlock_and_release_request(req);
1799        }
1800        nfss = NFS_SERVER(data->inode);
1801        if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
1802                clear_bdi_congested(inode_to_bdi(data->inode), BLK_RW_ASYNC);
1803
1804        nfs_init_cinfo(&cinfo, data->inode, data->dreq);
1805        nfs_commit_end(cinfo.mds);
1806}
1807
1808static void nfs_commit_release(void *calldata)
1809{
1810        struct nfs_commit_data *data = calldata;
1811
1812        data->completion_ops->completion(data);
1813        nfs_commitdata_release(calldata);
1814}
1815
1816static const struct rpc_call_ops nfs_commit_ops = {
1817        .rpc_call_prepare = nfs_commit_prepare,
1818        .rpc_call_done = nfs_commit_done,
1819        .rpc_release = nfs_commit_release,
1820};
1821
1822static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
1823        .completion = nfs_commit_release_pages,
1824        .resched_write = nfs_commit_resched_write,
1825};
1826
1827int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
1828                            int how, struct nfs_commit_info *cinfo)
1829{
1830        int status;
1831
1832        status = pnfs_commit_list(inode, head, how, cinfo);
1833        if (status == PNFS_NOT_ATTEMPTED)
1834                status = nfs_commit_list(inode, head, how, cinfo);
1835        return status;
1836}
1837
1838int nfs_commit_inode(struct inode *inode, int how)
1839{
1840        LIST_HEAD(head);
1841        struct nfs_commit_info cinfo;
1842        int may_wait = how & FLUSH_SYNC;
1843        int error = 0;
1844        int res;
1845
1846        nfs_init_cinfo_from_inode(&cinfo, inode);
1847        nfs_commit_begin(cinfo.mds);
1848        res = nfs_scan_commit(inode, &head, &cinfo);
1849        if (res)
1850                error = nfs_generic_commit_list(inode, &head, how, &cinfo);
1851        nfs_commit_end(cinfo.mds);
1852        if (error < 0)
1853                goto out_error;
1854        if (!may_wait)
1855                goto out_mark_dirty;
1856        error = wait_on_commit(cinfo.mds);
1857        if (error < 0)
1858                return error;
1859        return res;
1860out_error:
1861        res = error;
1862        /* Note: If we exit without ensuring that the commit is complete,
1863         * we must mark the inode as dirty. Otherwise, future calls to
1864         * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
1865         * that the data is on the disk.
1866         */
1867out_mark_dirty:
1868        __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1869        return res;
1870}
1871EXPORT_SYMBOL_GPL(nfs_commit_inode);
1872
1873int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1874{
1875        struct nfs_inode *nfsi = NFS_I(inode);
1876        int flags = FLUSH_SYNC;
1877        int ret = 0;
1878
1879        /* no commits means nothing needs to be done */
1880        if (!nfsi->commit_info.ncommit)
1881                return ret;
1882
1883        if (wbc->sync_mode == WB_SYNC_NONE) {
1884                /* Don't commit yet if this is a non-blocking flush and there
1885                 * are a lot of outstanding writes for this mapping.
1886                 */
1887                if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1))
1888                        goto out_mark_dirty;
1889
1890                /* don't wait for the COMMIT response */
1891                flags = 0;
1892        }
1893
1894        ret = nfs_commit_inode(inode, flags);
1895        if (ret >= 0) {
1896                if (wbc->sync_mode == WB_SYNC_NONE) {
1897                        if (ret < wbc->nr_to_write)
1898                                wbc->nr_to_write -= ret;
1899                        else
1900                                wbc->nr_to_write = 0;
1901                }
1902                return 0;
1903        }
1904out_mark_dirty:
1905        __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1906        return ret;
1907}
1908EXPORT_SYMBOL_GPL(nfs_write_inode);
1909
1910/*
1911 * Wrapper for filemap_write_and_wait_range()
1912 *
1913 * Needed for pNFS in order to ensure data becomes visible to the
1914 * client.
1915 */
1916int nfs_filemap_write_and_wait_range(struct address_space *mapping,
1917                loff_t lstart, loff_t lend)
1918{
1919        int ret;
1920
1921        ret = filemap_write_and_wait_range(mapping, lstart, lend);
1922        if (ret == 0)
1923                ret = pnfs_sync_inode(mapping->host, true);
1924        return ret;
1925}
1926EXPORT_SYMBOL_GPL(nfs_filemap_write_and_wait_range);
1927
1928/*
1929 * flush the inode to disk.
1930 */
1931int nfs_wb_all(struct inode *inode)
1932{
1933        int ret;
1934
1935        trace_nfs_writeback_inode_enter(inode);
1936
1937        ret = filemap_write_and_wait(inode->i_mapping);
1938        if (ret)
1939                goto out;
1940        ret = nfs_commit_inode(inode, FLUSH_SYNC);
1941        if (ret < 0)
1942                goto out;
1943        pnfs_sync_inode(inode, true);
1944        ret = 0;
1945
1946out:
1947        trace_nfs_writeback_inode_exit(inode, ret);
1948        return ret;
1949}
1950EXPORT_SYMBOL_GPL(nfs_wb_all);
1951
1952int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1953{
1954        struct nfs_page *req;
1955        int ret = 0;
1956
1957        wait_on_page_writeback(page);
1958
1959        /* blocking call to cancel all requests and join to a single (head)
1960         * request */
1961        req = nfs_lock_and_join_requests(page, false);
1962
1963        if (IS_ERR(req)) {
1964                ret = PTR_ERR(req);
1965        } else if (req) {
1966                /* all requests from this page have been cancelled by
1967                 * nfs_lock_and_join_requests, so just remove the head
1968                 * request from the inode / page_private pointer and
1969                 * release it */
1970                nfs_inode_remove_request(req);
1971                nfs_unlock_and_release_request(req);
1972        }
1973
1974        return ret;
1975}
1976
1977/*
1978 * Write back all requests on one page - we do this before reading it.
1979 */
1980int nfs_wb_page(struct inode *inode, struct page *page)
1981{
1982        loff_t range_start = page_file_offset(page);
1983        loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
1984        struct writeback_control wbc = {
1985                .sync_mode = WB_SYNC_ALL,
1986                .nr_to_write = 0,
1987                .range_start = range_start,
1988                .range_end = range_end,
1989        };
1990        int ret;
1991
1992        trace_nfs_writeback_page_enter(inode);
1993
1994        for (;;) {
1995                wait_on_page_writeback(page);
1996                if (clear_page_dirty_for_io(page)) {
1997                        ret = nfs_writepage_locked(page, &wbc);
1998                        if (ret < 0)
1999                                goto out_error;
2000                        continue;
2001                }
2002                ret = 0;
2003                if (!PagePrivate(page))
2004                        break;
2005                ret = nfs_commit_inode(inode, FLUSH_SYNC);
2006                if (ret < 0)
2007                        goto out_error;
2008        }
2009out_error:
2010        trace_nfs_writeback_page_exit(inode, ret);
2011        return ret;
2012}
2013
2014#ifdef CONFIG_MIGRATION
2015int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
2016                struct page *page, enum migrate_mode mode)
2017{
2018        /*
2019         * If PagePrivate is set, then the page is currently associated with
2020         * an in-progress read or write request. Don't try to migrate it.
2021         *
2022         * FIXME: we could do this in principle, but we'll need a way to ensure
2023         *        that we can safely release the inode reference while holding
2024         *        the page lock.
2025         */
2026        if (PagePrivate(page))
2027                return -EBUSY;
2028
2029        if (!nfs_fscache_release_page(page, GFP_KERNEL))
2030                return -EBUSY;
2031
2032        return migrate_page(mapping, newpage, page, mode);
2033}
2034#endif
2035
2036int __init nfs_init_writepagecache(void)
2037{
2038        nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
2039                                             sizeof(struct nfs_pgio_header),
2040                                             0, SLAB_HWCACHE_ALIGN,
2041                                             NULL);
2042        if (nfs_wdata_cachep == NULL)
2043                return -ENOMEM;
2044
2045        nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
2046                                                     nfs_wdata_cachep);
2047        if (nfs_wdata_mempool == NULL)
2048                goto out_destroy_write_cache;
2049
2050        nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
2051                                             sizeof(struct nfs_commit_data),
2052                                             0, SLAB_HWCACHE_ALIGN,
2053                                             NULL);
2054        if (nfs_cdata_cachep == NULL)
2055                goto out_destroy_write_mempool;
2056
2057        nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
2058                                                      nfs_cdata_cachep);
2059        if (nfs_commit_mempool == NULL)
2060                goto out_destroy_commit_cache;
2061
2062        /*
2063         * NFS congestion size, scale with available memory.
2064         *
2065         *  64MB:    8192k
2066         * 128MB:   11585k
2067         * 256MB:   16384k
2068         * 512MB:   23170k
2069         *   1GB:   32768k
2070         *   2GB:   46340k
2071         *   4GB:   65536k
2072         *   8GB:   92681k
2073         *  16GB:  131072k
2074         *
2075         * This allows larger machines to have larger/more transfers.
2076         * Limit the default to 256M
2077         */
2078        nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
2079        if (nfs_congestion_kb > 256*1024)
2080                nfs_congestion_kb = 256*1024;
2081
2082        return 0;
2083
2084out_destroy_commit_cache:
2085        kmem_cache_destroy(nfs_cdata_cachep);
2086out_destroy_write_mempool:
2087        mempool_destroy(nfs_wdata_mempool);
2088out_destroy_write_cache:
2089        kmem_cache_destroy(nfs_wdata_cachep);
2090        return -ENOMEM;
2091}
2092
2093void nfs_destroy_writepagecache(void)
2094{
2095        mempool_destroy(nfs_commit_mempool);
2096        kmem_cache_destroy(nfs_cdata_cachep);
2097        mempool_destroy(nfs_wdata_mempool);
2098        kmem_cache_destroy(nfs_wdata_cachep);
2099}
2100
2101static const struct nfs_rw_ops nfs_rw_write_ops = {
2102        .rw_alloc_header        = nfs_writehdr_alloc,
2103        .rw_free_header         = nfs_writehdr_free,
2104        .rw_done                = nfs_writeback_done,
2105        .rw_result              = nfs_writeback_result,
2106        .rw_initiate            = nfs_initiate_write,
2107};
2108