linux/fs/cachefiles/rdwr.c
<<
>>
Prefs
   1/* Storage object read/write
   2 *
   3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
   4 * Written by David Howells (dhowells@redhat.com)
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public Licence
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the Licence, or (at your option) any later version.
  10 */
  11
  12#include <linux/mount.h>
  13#include <linux/slab.h>
  14#include <linux/file.h>
  15#include "internal.h"
  16
  17/*
  18 * detect wake up events generated by the unlocking of pages in which we're
  19 * interested
  20 * - we use this to detect read completion of backing pages
  21 * - the caller holds the waitqueue lock
  22 */
  23static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
  24                                  int sync, void *_key)
  25{
  26        struct cachefiles_one_read *monitor =
  27                container_of(wait, struct cachefiles_one_read, monitor);
  28        struct cachefiles_object *object;
  29        struct wait_bit_key *key = _key;
  30        struct page *page = wait->private;
  31
  32        ASSERT(key);
  33
  34        _enter("{%lu},%u,%d,{%p,%u}",
  35               monitor->netfs_page->index, mode, sync,
  36               key->flags, key->bit_nr);
  37
  38        if (key->flags != &page->flags ||
  39            key->bit_nr != PG_locked)
  40                return 0;
  41
  42        _debug("--- monitor %p %lx ---", page, page->flags);
  43
  44        if (!PageUptodate(page) && !PageError(page)) {
  45                /* unlocked, not uptodate and not erronous? */
  46                _debug("page probably truncated");
  47        }
  48
  49        /* remove from the waitqueue */
  50        list_del(&wait->task_list);
  51
  52        /* move onto the action list and queue for FS-Cache thread pool */
  53        ASSERT(monitor->op);
  54
  55        object = container_of(monitor->op->op.object,
  56                              struct cachefiles_object, fscache);
  57
  58        spin_lock(&object->work_lock);
  59        list_add_tail(&monitor->op_link, &monitor->op->to_do);
  60        spin_unlock(&object->work_lock);
  61
  62        fscache_enqueue_retrieval(monitor->op);
  63        return 0;
  64}
  65
  66/*
  67 * handle a probably truncated page
  68 * - check to see if the page is still relevant and reissue the read if
  69 *   possible
  70 * - return -EIO on error, -ENODATA if the page is gone, -EINPROGRESS if we
  71 *   must wait again and 0 if successful
  72 */
  73static int cachefiles_read_reissue(struct cachefiles_object *object,
  74                                   struct cachefiles_one_read *monitor)
  75{
  76        struct address_space *bmapping = object->backer->d_inode->i_mapping;
  77        struct page *backpage = monitor->back_page, *backpage2;
  78        int ret;
  79
  80        _enter("{ino=%lx},{%lx,%lx}",
  81               object->backer->d_inode->i_ino,
  82               backpage->index, backpage->flags);
  83
  84        /* skip if the page was truncated away completely */
  85        if (backpage->mapping != bmapping) {
  86                _leave(" = -ENODATA [mapping]");
  87                return -ENODATA;
  88        }
  89
  90        backpage2 = find_get_page(bmapping, backpage->index);
  91        if (!backpage2) {
  92                _leave(" = -ENODATA [gone]");
  93                return -ENODATA;
  94        }
  95
  96        if (backpage != backpage2) {
  97                put_page(backpage2);
  98                _leave(" = -ENODATA [different]");
  99                return -ENODATA;
 100        }
 101
 102        /* the page is still there and we already have a ref on it, so we don't
 103         * need a second */
 104        put_page(backpage2);
 105
 106        INIT_LIST_HEAD(&monitor->op_link);
 107        add_page_wait_queue(backpage, &monitor->monitor);
 108
 109        if (trylock_page(backpage)) {
 110                ret = -EIO;
 111                if (PageError(backpage))
 112                        goto unlock_discard;
 113                ret = 0;
 114                if (PageUptodate(backpage))
 115                        goto unlock_discard;
 116
 117                _debug("reissue read");
 118                ret = bmapping->a_ops->readpage(NULL, backpage);
 119                if (ret < 0)
 120                        goto unlock_discard;
 121        }
 122
 123        /* but the page may have been read before the monitor was installed, so
 124         * the monitor may miss the event - so we have to ensure that we do get
 125         * one in such a case */
 126        if (trylock_page(backpage)) {
 127                _debug("jumpstart %p {%lx}", backpage, backpage->flags);
 128                unlock_page(backpage);
 129        }
 130
 131        /* it'll reappear on the todo list */
 132        _leave(" = -EINPROGRESS");
 133        return -EINPROGRESS;
 134
 135unlock_discard:
 136        unlock_page(backpage);
 137        spin_lock_irq(&object->work_lock);
 138        list_del(&monitor->op_link);
 139        spin_unlock_irq(&object->work_lock);
 140        _leave(" = %d", ret);
 141        return ret;
 142}
 143
 144/*
 145 * copy data from backing pages to netfs pages to complete a read operation
 146 * - driven by FS-Cache's thread pool
 147 */
 148static void cachefiles_read_copier(struct fscache_operation *_op)
 149{
 150        struct cachefiles_one_read *monitor;
 151        struct cachefiles_object *object;
 152        struct fscache_retrieval *op;
 153        struct pagevec pagevec;
 154        int error, max;
 155
 156        op = container_of(_op, struct fscache_retrieval, op);
 157        object = container_of(op->op.object,
 158                              struct cachefiles_object, fscache);
 159
 160        _enter("{ino=%lu}", object->backer->d_inode->i_ino);
 161
 162        pagevec_init(&pagevec, 0);
 163
 164        max = 8;
 165        spin_lock_irq(&object->work_lock);
 166
 167        while (!list_empty(&op->to_do)) {
 168                monitor = list_entry(op->to_do.next,
 169                                     struct cachefiles_one_read, op_link);
 170                list_del(&monitor->op_link);
 171
 172                spin_unlock_irq(&object->work_lock);
 173
 174                _debug("- copy {%lu}", monitor->back_page->index);
 175
 176        recheck:
 177                if (test_bit(FSCACHE_COOKIE_INVALIDATING,
 178                             &object->fscache.cookie->flags)) {
 179                        error = -ESTALE;
 180                } else if (PageUptodate(monitor->back_page)) {
 181                        copy_highpage(monitor->netfs_page, monitor->back_page);
 182                        fscache_mark_page_cached(monitor->op,
 183                                                 monitor->netfs_page);
 184                        error = 0;
 185                } else if (!PageError(monitor->back_page)) {
 186                        /* the page has probably been truncated */
 187                        error = cachefiles_read_reissue(object, monitor);
 188                        if (error == -EINPROGRESS)
 189                                goto next;
 190                        goto recheck;
 191                } else {
 192                        cachefiles_io_error_obj(
 193                                object,
 194                                "Readpage failed on backing file %lx",
 195                                (unsigned long) monitor->back_page->flags);
 196                        error = -EIO;
 197                }
 198
 199                page_cache_release(monitor->back_page);
 200
 201                fscache_end_io(op, monitor->netfs_page, error);
 202                page_cache_release(monitor->netfs_page);
 203                fscache_retrieval_complete(op, 1);
 204                fscache_put_retrieval(op);
 205                kfree(monitor);
 206
 207        next:
 208                /* let the thread pool have some air occasionally */
 209                max--;
 210                if (max < 0 || need_resched()) {
 211                        if (!list_empty(&op->to_do))
 212                                fscache_enqueue_retrieval(op);
 213                        _leave(" [maxed out]");
 214                        return;
 215                }
 216
 217                spin_lock_irq(&object->work_lock);
 218        }
 219
 220        spin_unlock_irq(&object->work_lock);
 221        _leave("");
 222}
 223
 224/*
 225 * read the corresponding page to the given set from the backing file
 226 * - an uncertain page is simply discarded, to be tried again another time
 227 */
 228static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
 229                                            struct fscache_retrieval *op,
 230                                            struct page *netpage,
 231                                            struct pagevec *pagevec)
 232{
 233        struct cachefiles_one_read *monitor;
 234        struct address_space *bmapping;
 235        struct page *newpage, *backpage;
 236        int ret;
 237
 238        _enter("");
 239
 240        pagevec_reinit(pagevec);
 241
 242        _debug("read back %p{%lu,%d}",
 243               netpage, netpage->index, page_count(netpage));
 244
 245        monitor = kzalloc(sizeof(*monitor), cachefiles_gfp);
 246        if (!monitor)
 247                goto nomem;
 248
 249        monitor->netfs_page = netpage;
 250        monitor->op = fscache_get_retrieval(op);
 251
 252        init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter);
 253
 254        /* attempt to get hold of the backing page */
 255        bmapping = object->backer->d_inode->i_mapping;
 256        newpage = NULL;
 257
 258        for (;;) {
 259                backpage = find_get_page(bmapping, netpage->index);
 260                if (backpage)
 261                        goto backing_page_already_present;
 262
 263                if (!newpage) {
 264                        newpage = __page_cache_alloc(cachefiles_gfp |
 265                                                     __GFP_COLD);
 266                        if (!newpage)
 267                                goto nomem_monitor;
 268                }
 269
 270                ret = add_to_page_cache(newpage, bmapping,
 271                                        netpage->index, cachefiles_gfp);
 272                if (ret == 0)
 273                        goto installed_new_backing_page;
 274                if (ret != -EEXIST)
 275                        goto nomem_page;
 276        }
 277
 278        /* we've installed a new backing page, so now we need to add it
 279         * to the LRU list and start it reading */
 280installed_new_backing_page:
 281        _debug("- new %p", newpage);
 282
 283        backpage = newpage;
 284        newpage = NULL;
 285
 286        page_cache_get(backpage);
 287        pagevec_add(pagevec, backpage);
 288        __pagevec_lru_add_file(pagevec);
 289
 290read_backing_page:
 291        ret = bmapping->a_ops->readpage(NULL, backpage);
 292        if (ret < 0)
 293                goto read_error;
 294
 295        /* set the monitor to transfer the data across */
 296monitor_backing_page:
 297        _debug("- monitor add");
 298
 299        /* install the monitor */
 300        page_cache_get(monitor->netfs_page);
 301        page_cache_get(backpage);
 302        monitor->back_page = backpage;
 303        monitor->monitor.private = backpage;
 304        add_page_wait_queue(backpage, &monitor->monitor);
 305        monitor = NULL;
 306
 307        /* but the page may have been read before the monitor was installed, so
 308         * the monitor may miss the event - so we have to ensure that we do get
 309         * one in such a case */
 310        if (trylock_page(backpage)) {
 311                _debug("jumpstart %p {%lx}", backpage, backpage->flags);
 312                unlock_page(backpage);
 313        }
 314        goto success;
 315
 316        /* if the backing page is already present, it can be in one of
 317         * three states: read in progress, read failed or read okay */
 318backing_page_already_present:
 319        _debug("- present");
 320
 321        if (newpage) {
 322                page_cache_release(newpage);
 323                newpage = NULL;
 324        }
 325
 326        if (PageError(backpage))
 327                goto io_error;
 328
 329        if (PageUptodate(backpage))
 330                goto backing_page_already_uptodate;
 331
 332        if (!trylock_page(backpage))
 333                goto monitor_backing_page;
 334        _debug("read %p {%lx}", backpage, backpage->flags);
 335        goto read_backing_page;
 336
 337        /* the backing page is already up to date, attach the netfs
 338         * page to the pagecache and LRU and copy the data across */
 339backing_page_already_uptodate:
 340        _debug("- uptodate");
 341
 342        fscache_mark_page_cached(op, netpage);
 343
 344        copy_highpage(netpage, backpage);
 345        fscache_end_io(op, netpage, 0);
 346        fscache_retrieval_complete(op, 1);
 347
 348success:
 349        _debug("success");
 350        ret = 0;
 351
 352out:
 353        if (backpage)
 354                page_cache_release(backpage);
 355        if (monitor) {
 356                fscache_put_retrieval(monitor->op);
 357                kfree(monitor);
 358        }
 359        _leave(" = %d", ret);
 360        return ret;
 361
 362read_error:
 363        _debug("read error %d", ret);
 364        if (ret == -ENOMEM) {
 365                fscache_retrieval_complete(op, 1);
 366                goto out;
 367        }
 368io_error:
 369        cachefiles_io_error_obj(object, "Page read error on backing file");
 370        fscache_retrieval_complete(op, 1);
 371        ret = -ENOBUFS;
 372        goto out;
 373
 374nomem_page:
 375        page_cache_release(newpage);
 376nomem_monitor:
 377        fscache_put_retrieval(monitor->op);
 378        kfree(monitor);
 379nomem:
 380        fscache_retrieval_complete(op, 1);
 381        _leave(" = -ENOMEM");
 382        return -ENOMEM;
 383}
 384
 385/*
 386 * read a page from the cache or allocate a block in which to store it
 387 * - cache withdrawal is prevented by the caller
 388 * - returns -EINTR if interrupted
 389 * - returns -ENOMEM if ran out of memory
 390 * - returns -ENOBUFS if no buffers can be made available
 391 * - returns -ENOBUFS if page is beyond EOF
 392 * - if the page is backed by a block in the cache:
 393 *   - a read will be started which will call the callback on completion
 394 *   - 0 will be returned
 395 * - else if the page is unbacked:
 396 *   - the metadata will be retained
 397 *   - -ENODATA will be returned
 398 */
 399int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 400                                  struct page *page,
 401                                  gfp_t gfp)
 402{
 403        struct cachefiles_object *object;
 404        struct cachefiles_cache *cache;
 405        struct pagevec pagevec;
 406        struct inode *inode;
 407        sector_t block0, block;
 408        unsigned shift;
 409        int ret;
 410
 411        object = container_of(op->op.object,
 412                              struct cachefiles_object, fscache);
 413        cache = container_of(object->fscache.cache,
 414                             struct cachefiles_cache, cache);
 415
 416        _enter("{%p},{%lx},,,", object, page->index);
 417
 418        if (!object->backer)
 419                goto enobufs;
 420
 421        inode = object->backer->d_inode;
 422        ASSERT(S_ISREG(inode->i_mode));
 423        ASSERT(inode->i_mapping->a_ops->bmap);
 424        ASSERT(inode->i_mapping->a_ops->readpages);
 425
 426        /* calculate the shift required to use bmap */
 427        if (inode->i_sb->s_blocksize > PAGE_SIZE)
 428                goto enobufs;
 429
 430        shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
 431
 432        op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
 433        op->op.flags |= FSCACHE_OP_ASYNC;
 434        op->op.processor = cachefiles_read_copier;
 435
 436        pagevec_init(&pagevec, 0);
 437
 438        /* we assume the absence or presence of the first block is a good
 439         * enough indication for the page as a whole
 440         * - TODO: don't use bmap() for this as it is _not_ actually good
 441         *   enough for this as it doesn't indicate errors, but it's all we've
 442         *   got for the moment
 443         */
 444        block0 = page->index;
 445        block0 <<= shift;
 446
 447        block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
 448        _debug("%llx -> %llx",
 449               (unsigned long long) block0,
 450               (unsigned long long) block);
 451
 452        if (block) {
 453                /* submit the apparently valid page to the backing fs to be
 454                 * read from disk */
 455                ret = cachefiles_read_backing_file_one(object, op, page,
 456                                                       &pagevec);
 457        } else if (cachefiles_has_space(cache, 0, 1) == 0) {
 458                /* there's space in the cache we can use */
 459                fscache_mark_page_cached(op, page);
 460                fscache_retrieval_complete(op, 1);
 461                ret = -ENODATA;
 462        } else {
 463                goto enobufs;
 464        }
 465
 466        _leave(" = %d", ret);
 467        return ret;
 468
 469enobufs:
 470        fscache_retrieval_complete(op, 1);
 471        _leave(" = -ENOBUFS");
 472        return -ENOBUFS;
 473}
 474
 475/*
 476 * read the corresponding pages to the given set from the backing file
 477 * - any uncertain pages are simply discarded, to be tried again another time
 478 */
 479static int cachefiles_read_backing_file(struct cachefiles_object *object,
 480                                        struct fscache_retrieval *op,
 481                                        struct list_head *list)
 482{
 483        struct cachefiles_one_read *monitor = NULL;
 484        struct address_space *bmapping = object->backer->d_inode->i_mapping;
 485        struct pagevec lru_pvec;
 486        struct page *newpage = NULL, *netpage, *_n, *backpage = NULL;
 487        int ret = 0;
 488
 489        _enter("");
 490
 491        pagevec_init(&lru_pvec, 0);
 492
 493        list_for_each_entry_safe(netpage, _n, list, lru) {
 494                list_del(&netpage->lru);
 495
 496                _debug("read back %p{%lu,%d}",
 497                       netpage, netpage->index, page_count(netpage));
 498
 499                if (!monitor) {
 500                        monitor = kzalloc(sizeof(*monitor), cachefiles_gfp);
 501                        if (!monitor)
 502                                goto nomem;
 503
 504                        monitor->op = fscache_get_retrieval(op);
 505                        init_waitqueue_func_entry(&monitor->monitor,
 506                                                  cachefiles_read_waiter);
 507                }
 508
 509                for (;;) {
 510                        backpage = find_get_page(bmapping, netpage->index);
 511                        if (backpage)
 512                                goto backing_page_already_present;
 513
 514                        if (!newpage) {
 515                                newpage = __page_cache_alloc(cachefiles_gfp |
 516                                                             __GFP_COLD);
 517                                if (!newpage)
 518                                        goto nomem;
 519                        }
 520
 521                        ret = add_to_page_cache(newpage, bmapping,
 522                                                netpage->index, cachefiles_gfp);
 523                        if (ret == 0)
 524                                goto installed_new_backing_page;
 525                        if (ret != -EEXIST)
 526                                goto nomem;
 527                }
 528
 529                /* we've installed a new backing page, so now we need to add it
 530                 * to the LRU list and start it reading */
 531        installed_new_backing_page:
 532                _debug("- new %p", newpage);
 533
 534                backpage = newpage;
 535                newpage = NULL;
 536
 537                page_cache_get(backpage);
 538                if (!pagevec_add(&lru_pvec, backpage))
 539                        __pagevec_lru_add_file(&lru_pvec);
 540
 541        reread_backing_page:
 542                ret = bmapping->a_ops->readpage(NULL, backpage);
 543                if (ret < 0)
 544                        goto read_error;
 545
 546                /* add the netfs page to the pagecache and LRU, and set the
 547                 * monitor to transfer the data across */
 548        monitor_backing_page:
 549                _debug("- monitor add");
 550
 551                ret = add_to_page_cache(netpage, op->mapping, netpage->index,
 552                                        cachefiles_gfp);
 553                if (ret < 0) {
 554                        if (ret == -EEXIST) {
 555                                page_cache_release(netpage);
 556                                fscache_retrieval_complete(op, 1);
 557                                continue;
 558                        }
 559                        goto nomem;
 560                }
 561
 562                page_cache_get(netpage);
 563                if (!pagevec_add(&lru_pvec, netpage))
 564                        __pagevec_lru_add_file(&lru_pvec);
 565
 566                /* install a monitor */
 567                page_cache_get(netpage);
 568                monitor->netfs_page = netpage;
 569
 570                page_cache_get(backpage);
 571                monitor->back_page = backpage;
 572                monitor->monitor.private = backpage;
 573                add_page_wait_queue(backpage, &monitor->monitor);
 574                monitor = NULL;
 575
 576                /* but the page may have been read before the monitor was
 577                 * installed, so the monitor may miss the event - so we have to
 578                 * ensure that we do get one in such a case */
 579                if (trylock_page(backpage)) {
 580                        _debug("2unlock %p {%lx}", backpage, backpage->flags);
 581                        unlock_page(backpage);
 582                }
 583
 584                page_cache_release(backpage);
 585                backpage = NULL;
 586
 587                page_cache_release(netpage);
 588                netpage = NULL;
 589                continue;
 590
 591                /* if the backing page is already present, it can be in one of
 592                 * three states: read in progress, read failed or read okay */
 593        backing_page_already_present:
 594                _debug("- present %p", backpage);
 595
 596                if (PageError(backpage))
 597                        goto io_error;
 598
 599                if (PageUptodate(backpage))
 600                        goto backing_page_already_uptodate;
 601
 602                _debug("- not ready %p{%lx}", backpage, backpage->flags);
 603
 604                if (!trylock_page(backpage))
 605                        goto monitor_backing_page;
 606
 607                if (PageError(backpage)) {
 608                        _debug("error %lx", backpage->flags);
 609                        unlock_page(backpage);
 610                        goto io_error;
 611                }
 612
 613                if (PageUptodate(backpage))
 614                        goto backing_page_already_uptodate_unlock;
 615
 616                /* we've locked a page that's neither up to date nor erroneous,
 617                 * so we need to attempt to read it again */
 618                goto reread_backing_page;
 619
 620                /* the backing page is already up to date, attach the netfs
 621                 * page to the pagecache and LRU and copy the data across */
 622        backing_page_already_uptodate_unlock:
 623                _debug("uptodate %lx", backpage->flags);
 624                unlock_page(backpage);
 625        backing_page_already_uptodate:
 626                _debug("- uptodate");
 627
 628                ret = add_to_page_cache(netpage, op->mapping, netpage->index,
 629                                        cachefiles_gfp);
 630                if (ret < 0) {
 631                        if (ret == -EEXIST) {
 632                                page_cache_release(netpage);
 633                                fscache_retrieval_complete(op, 1);
 634                                continue;
 635                        }
 636                        goto nomem;
 637                }
 638
 639                copy_highpage(netpage, backpage);
 640
 641                page_cache_release(backpage);
 642                backpage = NULL;
 643
 644                fscache_mark_page_cached(op, netpage);
 645
 646                page_cache_get(netpage);
 647                if (!pagevec_add(&lru_pvec, netpage))
 648                        __pagevec_lru_add_file(&lru_pvec);
 649
 650                /* the netpage is unlocked and marked up to date here */
 651                fscache_end_io(op, netpage, 0);
 652                page_cache_release(netpage);
 653                netpage = NULL;
 654                fscache_retrieval_complete(op, 1);
 655                continue;
 656        }
 657
 658        netpage = NULL;
 659
 660        _debug("out");
 661
 662out:
 663        /* tidy up */
 664        pagevec_lru_add_file(&lru_pvec);
 665
 666        if (newpage)
 667                page_cache_release(newpage);
 668        if (netpage)
 669                page_cache_release(netpage);
 670        if (backpage)
 671                page_cache_release(backpage);
 672        if (monitor) {
 673                fscache_put_retrieval(op);
 674                kfree(monitor);
 675        }
 676
 677        list_for_each_entry_safe(netpage, _n, list, lru) {
 678                list_del(&netpage->lru);
 679                page_cache_release(netpage);
 680                fscache_retrieval_complete(op, 1);
 681        }
 682
 683        _leave(" = %d", ret);
 684        return ret;
 685
 686nomem:
 687        _debug("nomem");
 688        ret = -ENOMEM;
 689        goto record_page_complete;
 690
 691read_error:
 692        _debug("read error %d", ret);
 693        if (ret == -ENOMEM)
 694                goto record_page_complete;
 695io_error:
 696        cachefiles_io_error_obj(object, "Page read error on backing file");
 697        ret = -ENOBUFS;
 698record_page_complete:
 699        fscache_retrieval_complete(op, 1);
 700        goto out;
 701}
 702
 703/*
 704 * read a list of pages from the cache or allocate blocks in which to store
 705 * them
 706 */
 707int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 708                                   struct list_head *pages,
 709                                   unsigned *nr_pages,
 710                                   gfp_t gfp)
 711{
 712        struct cachefiles_object *object;
 713        struct cachefiles_cache *cache;
 714        struct list_head backpages;
 715        struct pagevec pagevec;
 716        struct inode *inode;
 717        struct page *page, *_n;
 718        unsigned shift, nrbackpages;
 719        int ret, ret2, space;
 720
 721        object = container_of(op->op.object,
 722                              struct cachefiles_object, fscache);
 723        cache = container_of(object->fscache.cache,
 724                             struct cachefiles_cache, cache);
 725
 726        _enter("{OBJ%x,%d},,%d,,",
 727               object->fscache.debug_id, atomic_read(&op->op.usage),
 728               *nr_pages);
 729
 730        if (!object->backer)
 731                goto all_enobufs;
 732
 733        space = 1;
 734        if (cachefiles_has_space(cache, 0, *nr_pages) < 0)
 735                space = 0;
 736
 737        inode = object->backer->d_inode;
 738        ASSERT(S_ISREG(inode->i_mode));
 739        ASSERT(inode->i_mapping->a_ops->bmap);
 740        ASSERT(inode->i_mapping->a_ops->readpages);
 741
 742        /* calculate the shift required to use bmap */
 743        if (inode->i_sb->s_blocksize > PAGE_SIZE)
 744                goto all_enobufs;
 745
 746        shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
 747
 748        pagevec_init(&pagevec, 0);
 749
 750        op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
 751        op->op.flags |= FSCACHE_OP_ASYNC;
 752        op->op.processor = cachefiles_read_copier;
 753
 754        INIT_LIST_HEAD(&backpages);
 755        nrbackpages = 0;
 756
 757        ret = space ? -ENODATA : -ENOBUFS;
 758        list_for_each_entry_safe(page, _n, pages, lru) {
 759                sector_t block0, block;
 760
 761                /* we assume the absence or presence of the first block is a
 762                 * good enough indication for the page as a whole
 763                 * - TODO: don't use bmap() for this as it is _not_ actually
 764                 *   good enough for this as it doesn't indicate errors, but
 765                 *   it's all we've got for the moment
 766                 */
 767                block0 = page->index;
 768                block0 <<= shift;
 769
 770                block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
 771                                                      block0);
 772                _debug("%llx -> %llx",
 773                       (unsigned long long) block0,
 774                       (unsigned long long) block);
 775
 776                if (block) {
 777                        /* we have data - add it to the list to give to the
 778                         * backing fs */
 779                        list_move(&page->lru, &backpages);
 780                        (*nr_pages)--;
 781                        nrbackpages++;
 782                } else if (space && pagevec_add(&pagevec, page) == 0) {
 783                        fscache_mark_pages_cached(op, &pagevec);
 784                        fscache_retrieval_complete(op, 1);
 785                        ret = -ENODATA;
 786                } else {
 787                        fscache_retrieval_complete(op, 1);
 788                }
 789        }
 790
 791        if (pagevec_count(&pagevec) > 0)
 792                fscache_mark_pages_cached(op, &pagevec);
 793
 794        if (list_empty(pages))
 795                ret = 0;
 796
 797        /* submit the apparently valid pages to the backing fs to be read from
 798         * disk */
 799        if (nrbackpages > 0) {
 800                ret2 = cachefiles_read_backing_file(object, op, &backpages);
 801                if (ret2 == -ENOMEM || ret2 == -EINTR)
 802                        ret = ret2;
 803        }
 804
 805        _leave(" = %d [nr=%u%s]",
 806               ret, *nr_pages, list_empty(pages) ? " empty" : "");
 807        return ret;
 808
 809all_enobufs:
 810        fscache_retrieval_complete(op, *nr_pages);
 811        return -ENOBUFS;
 812}
 813
 814/*
 815 * allocate a block in the cache in which to store a page
 816 * - cache withdrawal is prevented by the caller
 817 * - returns -EINTR if interrupted
 818 * - returns -ENOMEM if ran out of memory
 819 * - returns -ENOBUFS if no buffers can be made available
 820 * - returns -ENOBUFS if page is beyond EOF
 821 * - otherwise:
 822 *   - the metadata will be retained
 823 *   - 0 will be returned
 824 */
 825int cachefiles_allocate_page(struct fscache_retrieval *op,
 826                             struct page *page,
 827                             gfp_t gfp)
 828{
 829        struct cachefiles_object *object;
 830        struct cachefiles_cache *cache;
 831        int ret;
 832
 833        object = container_of(op->op.object,
 834                              struct cachefiles_object, fscache);
 835        cache = container_of(object->fscache.cache,
 836                             struct cachefiles_cache, cache);
 837
 838        _enter("%p,{%lx},", object, page->index);
 839
 840        ret = cachefiles_has_space(cache, 0, 1);
 841        if (ret == 0)
 842                fscache_mark_page_cached(op, page);
 843        else
 844                ret = -ENOBUFS;
 845
 846        fscache_retrieval_complete(op, 1);
 847        _leave(" = %d", ret);
 848        return ret;
 849}
 850
 851/*
 852 * allocate blocks in the cache in which to store a set of pages
 853 * - cache withdrawal is prevented by the caller
 854 * - returns -EINTR if interrupted
 855 * - returns -ENOMEM if ran out of memory
 856 * - returns -ENOBUFS if some buffers couldn't be made available
 857 * - returns -ENOBUFS if some pages are beyond EOF
 858 * - otherwise:
 859 *   - -ENODATA will be returned
 860 * - metadata will be retained for any page marked
 861 */
 862int cachefiles_allocate_pages(struct fscache_retrieval *op,
 863                              struct list_head *pages,
 864                              unsigned *nr_pages,
 865                              gfp_t gfp)
 866{
 867        struct cachefiles_object *object;
 868        struct cachefiles_cache *cache;
 869        struct pagevec pagevec;
 870        struct page *page;
 871        int ret;
 872
 873        object = container_of(op->op.object,
 874                              struct cachefiles_object, fscache);
 875        cache = container_of(object->fscache.cache,
 876                             struct cachefiles_cache, cache);
 877
 878        _enter("%p,,,%d,", object, *nr_pages);
 879
 880        ret = cachefiles_has_space(cache, 0, *nr_pages);
 881        if (ret == 0) {
 882                pagevec_init(&pagevec, 0);
 883
 884                list_for_each_entry(page, pages, lru) {
 885                        if (pagevec_add(&pagevec, page) == 0)
 886                                fscache_mark_pages_cached(op, &pagevec);
 887                }
 888
 889                if (pagevec_count(&pagevec) > 0)
 890                        fscache_mark_pages_cached(op, &pagevec);
 891                ret = -ENODATA;
 892        } else {
 893                ret = -ENOBUFS;
 894        }
 895
 896        fscache_retrieval_complete(op, *nr_pages);
 897        _leave(" = %d", ret);
 898        return ret;
 899}
 900
 901/*
 902 * request a page be stored in the cache
 903 * - cache withdrawal is prevented by the caller
 904 * - this request may be ignored if there's no cache block available, in which
 905 *   case -ENOBUFS will be returned
 906 * - if the op is in progress, 0 will be returned
 907 */
 908int cachefiles_write_page(struct fscache_storage *op, struct page *page)
 909{
 910        struct cachefiles_object *object;
 911        struct cachefiles_cache *cache;
 912        mm_segment_t old_fs;
 913        struct file *file;
 914        struct path path;
 915        loff_t pos, eof;
 916        size_t len;
 917        void *data;
 918        int ret;
 919
 920        ASSERT(op != NULL);
 921        ASSERT(page != NULL);
 922
 923        object = container_of(op->op.object,
 924                              struct cachefiles_object, fscache);
 925
 926        _enter("%p,%p{%lx},,,", object, page, page->index);
 927
 928        if (!object->backer) {
 929                _leave(" = -ENOBUFS");
 930                return -ENOBUFS;
 931        }
 932
 933        ASSERT(S_ISREG(object->backer->d_inode->i_mode));
 934
 935        cache = container_of(object->fscache.cache,
 936                             struct cachefiles_cache, cache);
 937
 938        /* write the page to the backing filesystem and let it store it in its
 939         * own time */
 940        path.mnt = cache->mnt;
 941        path.dentry = object->backer;
 942        file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred);
 943        if (IS_ERR(file)) {
 944                ret = PTR_ERR(file);
 945        } else {
 946                ret = -EIO;
 947                if (file->f_op->write) {
 948                        pos = (loff_t) page->index << PAGE_SHIFT;
 949
 950                        /* we mustn't write more data than we have, so we have
 951                         * to beware of a partial page at EOF */
 952                        eof = object->fscache.store_limit_l;
 953                        len = PAGE_SIZE;
 954                        if (eof & ~PAGE_MASK) {
 955                                ASSERTCMP(pos, <, eof);
 956                                if (eof - pos < PAGE_SIZE) {
 957                                        _debug("cut short %llx to %llx",
 958                                               pos, eof);
 959                                        len = eof - pos;
 960                                        ASSERTCMP(pos + len, ==, eof);
 961                                }
 962                        }
 963
 964                        data = kmap(page);
 965                        file_start_write(file);
 966                        old_fs = get_fs();
 967                        set_fs(KERNEL_DS);
 968                        ret = file->f_op->write(
 969                                file, (const void __user *) data, len, &pos);
 970                        set_fs(old_fs);
 971                        kunmap(page);
 972                        file_end_write(file);
 973                        if (ret != len)
 974                                ret = -EIO;
 975                }
 976                fput(file);
 977        }
 978
 979        if (ret < 0) {
 980                if (ret == -EIO)
 981                        cachefiles_io_error_obj(
 982                                object, "Write page to backing file failed");
 983                ret = -ENOBUFS;
 984        }
 985
 986        _leave(" = %d", ret);
 987        return ret;
 988}
 989
 990/*
 991 * detach a backing block from a page
 992 * - cache withdrawal is prevented by the caller
 993 */
 994void cachefiles_uncache_page(struct fscache_object *_object, struct page *page)
 995{
 996        struct cachefiles_object *object;
 997        struct cachefiles_cache *cache;
 998
 999        object = container_of(_object, struct cachefiles_object, fscache);
1000        cache = container_of(object->fscache.cache,
1001                             struct cachefiles_cache, cache);
1002
1003        _enter("%p,{%lu}", object, page->index);
1004
1005        spin_unlock(&object->fscache.cookie->lock);
1006}
1007