linux/fs/nfs/read.c
<<
>>
Prefs
   1/*
   2 * linux/fs/nfs/read.c
   3 *
   4 * Block I/O for NFS
   5 *
   6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c
   7 * modified for async RPC by okir@monad.swb.de
   8 */
   9
  10#include <linux/time.h>
  11#include <linux/kernel.h>
  12#include <linux/errno.h>
  13#include <linux/fcntl.h>
  14#include <linux/stat.h>
  15#include <linux/mm.h>
  16#include <linux/slab.h>
  17#include <linux/pagemap.h>
  18#include <linux/sunrpc/clnt.h>
  19#include <linux/nfs_fs.h>
  20#include <linux/nfs_page.h>
  21
  22#include <asm/system.h>
  23
  24#include "nfs4_fs.h"
  25#include "internal.h"
  26#include "iostat.h"
  27#include "fscache.h"
  28
  29#define NFSDBG_FACILITY         NFSDBG_PAGECACHE
  30
  31static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int);
  32static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int);
  33static const struct rpc_call_ops nfs_read_partial_ops;
  34static const struct rpc_call_ops nfs_read_full_ops;
  35
  36static struct kmem_cache *nfs_rdata_cachep;
  37static mempool_t *nfs_rdata_mempool;
  38
  39#define MIN_POOL_READ   (32)
  40
  41struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
  42{
  43        struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS);
  44
  45        if (p) {
  46                memset(p, 0, sizeof(*p));
  47                INIT_LIST_HEAD(&p->pages);
  48                p->npages = pagecount;
  49                p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
  50                if (pagecount <= ARRAY_SIZE(p->page_array))
  51                        p->pagevec = p->page_array;
  52                else {
  53                        p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
  54                        if (!p->pagevec) {
  55                                mempool_free(p, nfs_rdata_mempool);
  56                                p = NULL;
  57                        }
  58                }
  59        }
  60        return p;
  61}
  62
  63void nfs_readdata_free(struct nfs_read_data *p)
  64{
  65        if (p && (p->pagevec != &p->page_array[0]))
  66                kfree(p->pagevec);
  67        mempool_free(p, nfs_rdata_mempool);
  68}
  69
  70static void nfs_readdata_release(struct nfs_read_data *rdata)
  71{
  72        put_nfs_open_context(rdata->args.context);
  73        nfs_readdata_free(rdata);
  74}
  75
  76static
  77int nfs_return_empty_page(struct page *page)
  78{
  79        zero_user(page, 0, PAGE_CACHE_SIZE);
  80        SetPageUptodate(page);
  81        unlock_page(page);
  82        return 0;
  83}
  84
  85static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
  86{
  87        unsigned int remainder = data->args.count - data->res.count;
  88        unsigned int base = data->args.pgbase + data->res.count;
  89        unsigned int pglen;
  90        struct page **pages;
  91
  92        if (data->res.eof == 0 || remainder == 0)
  93                return;
  94        /*
  95         * Note: "remainder" can never be negative, since we check for
  96         *      this in the XDR code.
  97         */
  98        pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
  99        base &= ~PAGE_CACHE_MASK;
 100        pglen = PAGE_CACHE_SIZE - base;
 101        for (;;) {
 102                if (remainder <= pglen) {
 103                        zero_user(*pages, base, remainder);
 104                        break;
 105                }
 106                zero_user(*pages, base, pglen);
 107                pages++;
 108                remainder -= pglen;
 109                pglen = PAGE_CACHE_SIZE;
 110                base = 0;
 111        }
 112}
 113
 114int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 115                       struct page *page)
 116{
 117        LIST_HEAD(one_request);
 118        struct nfs_page *new;
 119        unsigned int len;
 120
 121        len = nfs_page_length(page);
 122        if (len == 0)
 123                return nfs_return_empty_page(page);
 124        new = nfs_create_request(ctx, inode, page, 0, len);
 125        if (IS_ERR(new)) {
 126                unlock_page(page);
 127                return PTR_ERR(new);
 128        }
 129        if (len < PAGE_CACHE_SIZE)
 130                zero_user_segment(page, len, PAGE_CACHE_SIZE);
 131
 132        nfs_list_add_request(new, &one_request);
 133        if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
 134                nfs_pagein_multi(inode, &one_request, 1, len, 0);
 135        else
 136                nfs_pagein_one(inode, &one_request, 1, len, 0);
 137        return 0;
 138}
 139
 140static void nfs_readpage_release(struct nfs_page *req)
 141{
 142        struct inode *d_inode = req->wb_context->path.dentry->d_inode;
 143
 144        if (PageUptodate(req->wb_page))
 145                nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
 146
 147        unlock_page(req->wb_page);
 148
 149        dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
 150                        req->wb_context->path.dentry->d_inode->i_sb->s_id,
 151                        (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
 152                        req->wb_bytes,
 153                        (long long)req_offset(req));
 154        nfs_clear_request(req);
 155        nfs_release_request(req);
 156}
 157
 158/*
 159 * Set up the NFS read request struct
 160 */
 161static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 162                const struct rpc_call_ops *call_ops,
 163                unsigned int count, unsigned int offset)
 164{
 165        struct inode *inode = req->wb_context->path.dentry->d_inode;
 166        int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
 167        struct rpc_task *task;
 168        struct rpc_message msg = {
 169                .rpc_argp = &data->args,
 170                .rpc_resp = &data->res,
 171                .rpc_cred = req->wb_context->cred,
 172        };
 173        struct rpc_task_setup task_setup_data = {
 174                .task = &data->task,
 175                .rpc_client = NFS_CLIENT(inode),
 176                .rpc_message = &msg,
 177                .callback_ops = call_ops,
 178                .callback_data = data,
 179                .workqueue = nfsiod_workqueue,
 180                .flags = RPC_TASK_ASYNC | swap_flags,
 181        };
 182
 183        data->req         = req;
 184        data->inode       = inode;
 185        data->cred        = msg.rpc_cred;
 186
 187        data->args.fh     = NFS_FH(inode);
 188        data->args.offset = req_offset(req) + offset;
 189        data->args.pgbase = req->wb_pgbase + offset;
 190        data->args.pages  = data->pagevec;
 191        data->args.count  = count;
 192        data->args.context = get_nfs_open_context(req->wb_context);
 193
 194        data->res.fattr   = &data->fattr;
 195        data->res.count   = count;
 196        data->res.eof     = 0;
 197        nfs_fattr_init(&data->fattr);
 198
 199        /* Set up the initial task struct. */
 200        NFS_PROTO(inode)->read_setup(data, &msg);
 201
 202        dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
 203                        data->task.tk_pid,
 204                        inode->i_sb->s_id,
 205                        (long long)NFS_FILEID(inode),
 206                        count,
 207                        (unsigned long long)data->args.offset);
 208
 209        task = rpc_run_task(&task_setup_data);
 210        if (IS_ERR(task))
 211                return PTR_ERR(task);
 212        rpc_put_task(task);
 213        return 0;
 214}
 215
 216static void
 217nfs_async_read_error(struct list_head *head)
 218{
 219        struct nfs_page *req;
 220
 221        while (!list_empty(head)) {
 222                req = nfs_list_entry(head->next);
 223                nfs_list_remove_request(req);
 224                SetPageError(req->wb_page);
 225                nfs_readpage_release(req);
 226        }
 227}
 228
 229/*
 230 * Generate multiple requests to fill a single page.
 231 *
 232 * We optimize to reduce the number of read operations on the wire.  If we
 233 * detect that we're reading a page, or an area of a page, that is past the
 234 * end of file, we do not generate NFS read operations but just clear the
 235 * parts of the page that would have come back zero from the server anyway.
 236 *
 237 * We rely on the cached value of i_size to make this determination; another
 238 * client can fill pages on the server past our cached end-of-file, but we
 239 * won't see the new data until our attribute cache is updated.  This is more
 240 * or less conventional NFS client behavior.
 241 */
 242static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags)
 243{
 244        struct nfs_page *req = nfs_list_entry(head->next);
 245        struct page *page = req->wb_page;
 246        struct nfs_read_data *data;
 247        size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
 248        unsigned int offset;
 249        int requests = 0;
 250        int ret = 0;
 251        LIST_HEAD(list);
 252
 253        nfs_list_remove_request(req);
 254
 255        nbytes = count;
 256        do {
 257                size_t len = min(nbytes,rsize);
 258
 259                data = nfs_readdata_alloc(1);
 260                if (!data)
 261                        goto out_bad;
 262                list_add(&data->pages, &list);
 263                requests++;
 264                nbytes -= len;
 265        } while(nbytes != 0);
 266        atomic_set(&req->wb_complete, requests);
 267
 268        ClearPageError(page);
 269        offset = 0;
 270        nbytes = count;
 271        do {
 272                int ret2;
 273
 274                data = list_entry(list.next, struct nfs_read_data, pages);
 275                list_del_init(&data->pages);
 276
 277                data->pagevec[0] = page;
 278
 279                if (nbytes < rsize)
 280                        rsize = nbytes;
 281                ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
 282                                  rsize, offset);
 283                if (ret == 0)
 284                        ret = ret2;
 285                offset += rsize;
 286                nbytes -= rsize;
 287        } while (nbytes != 0);
 288
 289        return ret;
 290
 291out_bad:
 292        while (!list_empty(&list)) {
 293                data = list_entry(list.next, struct nfs_read_data, pages);
 294                list_del(&data->pages);
 295                nfs_readdata_free(data);
 296        }
 297        SetPageError(page);
 298        nfs_readpage_release(req);
 299        return -ENOMEM;
 300}
 301
 302static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags)
 303{
 304        struct nfs_page         *req;
 305        struct page             **pages;
 306        struct nfs_read_data    *data;
 307        int ret = -ENOMEM;
 308
 309        data = nfs_readdata_alloc(npages);
 310        if (!data)
 311                goto out_bad;
 312
 313        pages = data->pagevec;
 314        while (!list_empty(head)) {
 315                req = nfs_list_entry(head->next);
 316                nfs_list_remove_request(req);
 317                nfs_list_add_request(req, &data->pages);
 318                ClearPageError(req->wb_page);
 319                *pages++ = req->wb_page;
 320        }
 321        req = nfs_list_entry(data->pages.next);
 322
 323        return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
 324out_bad:
 325        nfs_async_read_error(head);
 326        return ret;
 327}
 328
 329/*
 330 * This is the callback from RPC telling us whether a reply was
 331 * received or some error occurred (timeout or socket shutdown).
 332 */
 333int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 334{
 335        int status;
 336
 337        dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
 338                        task->tk_status);
 339
 340        status = NFS_PROTO(data->inode)->read_done(task, data);
 341        if (status != 0)
 342                return status;
 343
 344        nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
 345
 346        if (task->tk_status == -ESTALE) {
 347                set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags);
 348                nfs_mark_for_revalidate(data->inode);
 349        }
 350        return 0;
 351}
 352
 353static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
 354{
 355        struct nfs_readargs *argp = &data->args;
 356        struct nfs_readres *resp = &data->res;
 357
 358        if (resp->eof || resp->count == argp->count)
 359                goto out;
 360
 361        /* This is a short read! */
 362        nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
 363        /* Has the server at least made some progress? */
 364        if (resp->count == 0)
 365                goto out;
 366
 367        /* Yes, so retry the read at the end of the data */
 368        argp->offset += resp->count;
 369        argp->pgbase += resp->count;
 370        argp->count -= resp->count;
 371        nfs4_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client);
 372        return;
 373out:
 374        nfs4_sequence_free_slot(NFS_SERVER(data->inode)->nfs_client,
 375                                &data->res.seq_res);
 376        return;
 377
 378}
 379
 380/*
 381 * Handle a read reply that fills part of a page.
 382 */
 383static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
 384{
 385        struct nfs_read_data *data = calldata;
 386 
 387        if (nfs_readpage_result(task, data) != 0)
 388                return;
 389        if (task->tk_status < 0)
 390                return;
 391
 392        nfs_readpage_truncate_uninitialised_page(data);
 393        nfs_readpage_retry(task, data);
 394}
 395
 396static void nfs_readpage_release_partial(void *calldata)
 397{
 398        struct nfs_read_data *data = calldata;
 399        struct nfs_page *req = data->req;
 400        struct page *page = req->wb_page;
 401        int status = data->task.tk_status;
 402
 403        if (status < 0)
 404                SetPageError(page);
 405
 406        if (atomic_dec_and_test(&req->wb_complete)) {
 407                if (!PageError(page))
 408                        SetPageUptodate(page);
 409                nfs_readpage_release(req);
 410        }
 411        nfs_readdata_release(calldata);
 412}
 413
 414#if defined(CONFIG_NFS_V4_1)
 415void nfs_read_prepare(struct rpc_task *task, void *calldata)
 416{
 417        struct nfs_read_data *data = calldata;
 418
 419        if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client,
 420                                &data->args.seq_args, &data->res.seq_res,
 421                                0, task))
 422                return;
 423        rpc_call_start(task);
 424}
 425#endif /* CONFIG_NFS_V4_1 */
 426
 427static const struct rpc_call_ops nfs_read_partial_ops = {
 428#if defined(CONFIG_NFS_V4_1)
 429        .rpc_call_prepare = nfs_read_prepare,
 430#endif /* CONFIG_NFS_V4_1 */
 431        .rpc_call_done = nfs_readpage_result_partial,
 432        .rpc_release = nfs_readpage_release_partial,
 433};
 434
 435static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
 436{
 437        unsigned int count = data->res.count;
 438        unsigned int base = data->args.pgbase;
 439        struct page **pages;
 440
 441        if (data->res.eof)
 442                count = data->args.count;
 443        if (unlikely(count == 0))
 444                return;
 445        pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
 446        base &= ~PAGE_CACHE_MASK;
 447        count += base;
 448        for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
 449                SetPageUptodate(*pages);
 450        if (count == 0)
 451                return;
 452        /* Was this a short read? */
 453        if (data->res.eof || data->res.count == data->args.count)
 454                SetPageUptodate(*pages);
 455}
 456
 457/*
 458 * This is the callback from RPC telling us whether a reply was
 459 * received or some error occurred (timeout or socket shutdown).
 460 */
 461static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 462{
 463        struct nfs_read_data *data = calldata;
 464
 465        if (nfs_readpage_result(task, data) != 0)
 466                return;
 467        if (task->tk_status < 0)
 468                return;
 469        /*
 470         * Note: nfs_readpage_retry may change the values of
 471         * data->args. In the multi-page case, we therefore need
 472         * to ensure that we call nfs_readpage_set_pages_uptodate()
 473         * first.
 474         */
 475        nfs_readpage_truncate_uninitialised_page(data);
 476        nfs_readpage_set_pages_uptodate(data);
 477        nfs_readpage_retry(task, data);
 478}
 479
 480static void nfs_readpage_release_full(void *calldata)
 481{
 482        struct nfs_read_data *data = calldata;
 483
 484        while (!list_empty(&data->pages)) {
 485                struct nfs_page *req = nfs_list_entry(data->pages.next);
 486
 487                nfs_list_remove_request(req);
 488                nfs_readpage_release(req);
 489        }
 490        nfs_readdata_release(calldata);
 491}
 492
 493static const struct rpc_call_ops nfs_read_full_ops = {
 494#if defined(CONFIG_NFS_V4_1)
 495        .rpc_call_prepare = nfs_read_prepare,
 496#endif /* CONFIG_NFS_V4_1 */
 497        .rpc_call_done = nfs_readpage_result_full,
 498        .rpc_release = nfs_readpage_release_full,
 499};
 500
 501/*
 502 * Read a page over NFS.
 503 * We read the page synchronously in the following case:
 504 *  -   The error flag is set for this page. This happens only when a
 505 *      previous async read operation failed.
 506 */
 507int nfs_readpage(struct file *file, struct page *page)
 508{
 509        struct nfs_open_context *ctx;
 510        struct inode *inode = page->mapping->host;
 511        int             error;
 512
 513        dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
 514                page, PAGE_CACHE_SIZE, page->index);
 515        nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
 516        nfs_add_stats(inode, NFSIOS_READPAGES, 1);
 517
 518        /*
 519         * Try to flush any pending writes to the file..
 520         *
 521         * NOTE! Because we own the page lock, there cannot
 522         * be any new pending writes generated at this point
 523         * for this page (other pages can be written to).
 524         */
 525        error = nfs_wb_page(inode, page);
 526        if (error)
 527                goto out_unlock;
 528        if (PageUptodate(page))
 529                goto out_unlock;
 530
 531        error = -ESTALE;
 532        if (NFS_STALE(inode))
 533                goto out_unlock;
 534
 535        if (file == NULL) {
 536                error = -EBADF;
 537                ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 538                if (ctx == NULL)
 539                        goto out_unlock;
 540        } else
 541                ctx = get_nfs_open_context(nfs_file_open_context(file));
 542
 543        if (!IS_SYNC(inode)) {
 544                error = nfs_readpage_from_fscache(ctx, inode, page);
 545                if (error == 0)
 546                        goto out;
 547        }
 548
 549        error = nfs_readpage_async(ctx, inode, page);
 550
 551out:
 552        put_nfs_open_context(ctx);
 553        return error;
 554out_unlock:
 555        unlock_page(page);
 556        return error;
 557}
 558
 559struct nfs_readdesc {
 560        struct nfs_pageio_descriptor *pgio;
 561        struct nfs_open_context *ctx;
 562};
 563
 564static int
 565readpage_async_filler(void *data, struct page *page)
 566{
 567        struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
 568        struct inode *inode = page->mapping->host;
 569        struct nfs_page *new;
 570        unsigned int len;
 571        int error;
 572
 573        len = nfs_page_length(page);
 574        if (len == 0)
 575                return nfs_return_empty_page(page);
 576
 577        new = nfs_create_request(desc->ctx, inode, page, 0, len);
 578        if (IS_ERR(new))
 579                goto out_error;
 580
 581        if (len < PAGE_CACHE_SIZE)
 582                zero_user_segment(page, len, PAGE_CACHE_SIZE);
 583        if (!nfs_pageio_add_request(desc->pgio, new)) {
 584                error = desc->pgio->pg_error;
 585                goto out_unlock;
 586        }
 587        return 0;
 588out_error:
 589        error = PTR_ERR(new);
 590        SetPageError(page);
 591out_unlock:
 592        unlock_page(page);
 593        return error;
 594}
 595
 596int nfs_readpages(struct file *filp, struct address_space *mapping,
 597                struct list_head *pages, unsigned nr_pages)
 598{
 599        struct nfs_pageio_descriptor pgio;
 600        struct nfs_readdesc desc = {
 601                .pgio = &pgio,
 602        };
 603        struct inode *inode = mapping->host;
 604        struct nfs_server *server = NFS_SERVER(inode);
 605        size_t rsize = server->rsize;
 606        unsigned long npages;
 607        int ret = -ESTALE;
 608
 609        dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
 610                        inode->i_sb->s_id,
 611                        (long long)NFS_FILEID(inode),
 612                        nr_pages);
 613        nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 614
 615        if (NFS_STALE(inode))
 616                goto out;
 617
 618        if (filp == NULL) {
 619                desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 620                if (desc.ctx == NULL)
 621                        return -EBADF;
 622        } else
 623                desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
 624
 625        /* attempt to read as many of the pages as possible from the cache
 626         * - this returns -ENOBUFS immediately if the cookie is negative
 627         */
 628        ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping,
 629                                         pages, &nr_pages);
 630        if (ret == 0)
 631                goto read_complete; /* all pages were read */
 632
 633        if (rsize < PAGE_CACHE_SIZE)
 634                nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
 635        else
 636                nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0);
 637
 638        ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 639
 640        nfs_pageio_complete(&pgio);
 641        npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 642        nfs_add_stats(inode, NFSIOS_READPAGES, npages);
 643read_complete:
 644        put_nfs_open_context(desc.ctx);
 645out:
 646        return ret;
 647}
 648
 649int __init nfs_init_readpagecache(void)
 650{
 651        nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
 652                                             sizeof(struct nfs_read_data),
 653                                             0, SLAB_HWCACHE_ALIGN,
 654                                             NULL);
 655        if (nfs_rdata_cachep == NULL)
 656                return -ENOMEM;
 657
 658        nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ,
 659                                                     nfs_rdata_cachep);
 660        if (nfs_rdata_mempool == NULL)
 661                return -ENOMEM;
 662
 663        return 0;
 664}
 665
 666void nfs_destroy_readpagecache(void)
 667{
 668        mempool_destroy(nfs_rdata_mempool);
 669        kmem_cache_destroy(nfs_rdata_cachep);
 670}
 671