linux/drivers/staging/lustre/lustre/obdclass/cl_page.c
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see
  18 * http://www.gnu.org/licenses/gpl-2.0.html
  19 *
  20 * GPL HEADER END
  21 */
  22/*
  23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24 * Use is subject to license terms.
  25 *
  26 * Copyright (c) 2011, 2015, Intel Corporation.
  27 */
  28/*
  29 * This file is part of Lustre, http://www.lustre.org/
  30 * Lustre is a trademark of Sun Microsystems, Inc.
  31 *
  32 * Client Lustre Page.
  33 *
  34 *   Author: Nikita Danilov <nikita.danilov@sun.com>
  35 *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  36 */
  37
  38#define DEBUG_SUBSYSTEM S_CLASS
  39
  40#include "../../include/linux/libcfs/libcfs.h"
  41#include "../include/obd_class.h"
  42#include "../include/obd_support.h"
  43#include <linux/list.h>
  44
  45#include "../include/cl_object.h"
  46#include "cl_internal.h"
  47
  48static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
  49
  50# define PASSERT(env, page, expr)                                          \
  51        do {                                                               \
  52                if (unlikely(!(expr))) {                                   \
  53                        CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n"); \
  54                        LASSERT(0);                                        \
  55                }                                                          \
  56        } while (0)
  57
  58# define PINVRNT(env, page, exp) \
  59        ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
  60
  61/**
  62 * Internal version of cl_page_get().
  63 *
  64 * This function can be used to obtain initial reference to previously
  65 * unreferenced cached object. It can be called only if concurrent page
  66 * reclamation is somehow prevented, e.g., by keeping a lock on a VM page,
  67 * associated with \a page.
  68 *
  69 * Use with care! Not exported.
  70 */
  71static void cl_page_get_trust(struct cl_page *page)
  72{
  73        LASSERT(atomic_read(&page->cp_ref) > 0);
  74        atomic_inc(&page->cp_ref);
  75}
  76
  77/**
  78 * Returns a slice within a page, corresponding to the given layer in the
  79 * device stack.
  80 *
  81 * \see cl_lock_at()
  82 */
  83static const struct cl_page_slice *
  84cl_page_at_trusted(const struct cl_page *page,
  85                   const struct lu_device_type *dtype)
  86{
  87        const struct cl_page_slice *slice;
  88
  89        list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
  90                if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
  91                        return slice;
  92        }
  93        return NULL;
  94}
  95
  96static void cl_page_free(const struct lu_env *env, struct cl_page *page)
  97{
  98        struct cl_object *obj  = page->cp_obj;
  99
 100        PASSERT(env, page, list_empty(&page->cp_batch));
 101        PASSERT(env, page, !page->cp_owner);
 102        PASSERT(env, page, !page->cp_req);
 103        PASSERT(env, page, page->cp_state == CPS_FREEING);
 104
 105        while (!list_empty(&page->cp_layers)) {
 106                struct cl_page_slice *slice;
 107
 108                slice = list_entry(page->cp_layers.next,
 109                                   struct cl_page_slice, cpl_linkage);
 110                list_del_init(page->cp_layers.next);
 111                if (unlikely(slice->cpl_ops->cpo_fini))
 112                        slice->cpl_ops->cpo_fini(env, slice);
 113        }
 114        lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
 115        cl_object_put(env, obj);
 116        lu_ref_fini(&page->cp_reference);
 117        kfree(page);
 118}
 119
 120/**
 121 * Helper function updating page state. This is the only place in the code
 122 * where cl_page::cp_state field is mutated.
 123 */
 124static inline void cl_page_state_set_trust(struct cl_page *page,
 125                                           enum cl_page_state state)
 126{
 127        /* bypass const. */
 128        *(enum cl_page_state *)&page->cp_state = state;
 129}
 130
 131struct cl_page *cl_page_alloc(const struct lu_env *env,
 132                              struct cl_object *o, pgoff_t ind,
 133                              struct page *vmpage,
 134                              enum cl_page_type type)
 135{
 136        struct cl_page    *page;
 137        struct lu_object_header *head;
 138
 139        page = kzalloc(cl_object_header(o)->coh_page_bufsize, GFP_NOFS);
 140        if (page) {
 141                int result = 0;
 142
 143                atomic_set(&page->cp_ref, 1);
 144                page->cp_obj = o;
 145                cl_object_get(o);
 146                lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
 147                                     page);
 148                page->cp_vmpage = vmpage;
 149                cl_page_state_set_trust(page, CPS_CACHED);
 150                page->cp_type = type;
 151                INIT_LIST_HEAD(&page->cp_layers);
 152                INIT_LIST_HEAD(&page->cp_batch);
 153                INIT_LIST_HEAD(&page->cp_flight);
 154                lu_ref_init(&page->cp_reference);
 155                head = o->co_lu.lo_header;
 156                list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) {
 157                        if (o->co_ops->coo_page_init) {
 158                                result = o->co_ops->coo_page_init(env, o, page,
 159                                                                  ind);
 160                                if (result != 0) {
 161                                        cl_page_delete0(env, page);
 162                                        cl_page_free(env, page);
 163                                        page = ERR_PTR(result);
 164                                        break;
 165                                }
 166                        }
 167                }
 168        } else {
 169                page = ERR_PTR(-ENOMEM);
 170        }
 171        return page;
 172}
 173
 174/**
 175 * Returns a cl_page with index \a idx at the object \a o, and associated with
 176 * the VM page \a vmpage.
 177 *
 178 * This is the main entry point into the cl_page caching interface. First, a
 179 * cache (implemented as a per-object radix tree) is consulted. If page is
 180 * found there, it is returned immediately. Otherwise new page is allocated
 181 * and returned. In any case, additional reference to page is acquired.
 182 *
 183 * \see cl_object_find(), cl_lock_find()
 184 */
 185struct cl_page *cl_page_find(const struct lu_env *env,
 186                             struct cl_object *o,
 187                             pgoff_t idx, struct page *vmpage,
 188                             enum cl_page_type type)
 189{
 190        struct cl_page    *page = NULL;
 191        struct cl_object_header *hdr;
 192
 193        LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
 194        might_sleep();
 195
 196        hdr = cl_object_header(o);
 197
 198        CDEBUG(D_PAGE, "%lu@"DFID" %p %lx %d\n",
 199               idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
 200        /* fast path. */
 201        if (type == CPT_CACHEABLE) {
 202                /*
 203                 * vmpage lock is used to protect the child/parent
 204                 * relationship
 205                 */
 206                KLASSERT(PageLocked(vmpage));
 207                /*
 208                 * cl_vmpage_page() can be called here without any locks as
 209                 *
 210                 *     - "vmpage" is locked (which prevents ->private from
 211                 *       concurrent updates), and
 212                 *
 213                 *     - "o" cannot be destroyed while current thread holds a
 214                 *       reference on it.
 215                 */
 216                page = cl_vmpage_page(vmpage, o);
 217
 218                if (page)
 219                        return page;
 220        }
 221
 222        /* allocate and initialize cl_page */
 223        page = cl_page_alloc(env, o, idx, vmpage, type);
 224        return page;
 225}
 226EXPORT_SYMBOL(cl_page_find);
 227
 228static inline int cl_page_invariant(const struct cl_page *pg)
 229{
 230        return cl_page_in_use_noref(pg);
 231}
 232
 233static void cl_page_state_set0(const struct lu_env *env,
 234                               struct cl_page *page, enum cl_page_state state)
 235{
 236        enum cl_page_state old;
 237
 238        /*
 239         * Matrix of allowed state transitions [old][new], for sanity
 240         * checking.
 241         */
 242        static const int allowed_transitions[CPS_NR][CPS_NR] = {
 243                [CPS_CACHED] = {
 244                        [CPS_CACHED]  = 0,
 245                        [CPS_OWNED]   = 1, /* io finds existing cached page */
 246                        [CPS_PAGEIN]  = 0,
 247                        [CPS_PAGEOUT] = 1, /* write-out from the cache */
 248                        [CPS_FREEING] = 1, /* eviction on the memory pressure */
 249                },
 250                [CPS_OWNED] = {
 251                        [CPS_CACHED]  = 1, /* release to the cache */
 252                        [CPS_OWNED]   = 0,
 253                        [CPS_PAGEIN]  = 1, /* start read immediately */
 254                        [CPS_PAGEOUT] = 1, /* start write immediately */
 255                        [CPS_FREEING] = 1, /* lock invalidation or truncate */
 256                },
 257                [CPS_PAGEIN] = {
 258                        [CPS_CACHED]  = 1, /* io completion */
 259                        [CPS_OWNED]   = 0,
 260                        [CPS_PAGEIN]  = 0,
 261                        [CPS_PAGEOUT] = 0,
 262                        [CPS_FREEING] = 0,
 263                },
 264                [CPS_PAGEOUT] = {
 265                        [CPS_CACHED]  = 1, /* io completion */
 266                        [CPS_OWNED]   = 0,
 267                        [CPS_PAGEIN]  = 0,
 268                        [CPS_PAGEOUT] = 0,
 269                        [CPS_FREEING] = 0,
 270                },
 271                [CPS_FREEING] = {
 272                        [CPS_CACHED]  = 0,
 273                        [CPS_OWNED]   = 0,
 274                        [CPS_PAGEIN]  = 0,
 275                        [CPS_PAGEOUT] = 0,
 276                        [CPS_FREEING] = 0,
 277                }
 278        };
 279
 280        old = page->cp_state;
 281        PASSERT(env, page, allowed_transitions[old][state]);
 282        CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
 283        PASSERT(env, page, page->cp_state == old);
 284        PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner));
 285        cl_page_state_set_trust(page, state);
 286}
 287
 288static void cl_page_state_set(const struct lu_env *env,
 289                              struct cl_page *page, enum cl_page_state state)
 290{
 291        cl_page_state_set0(env, page, state);
 292}
 293
 294/**
 295 * Acquires an additional reference to a page.
 296 *
 297 * This can be called only by caller already possessing a reference to \a
 298 * page.
 299 *
 300 * \see cl_object_get(), cl_lock_get().
 301 */
 302void cl_page_get(struct cl_page *page)
 303{
 304        cl_page_get_trust(page);
 305}
 306EXPORT_SYMBOL(cl_page_get);
 307
 308/**
 309 * Releases a reference to a page.
 310 *
 311 * When last reference is released, page is returned to the cache, unless it
 312 * is in cl_page_state::CPS_FREEING state, in which case it is immediately
 313 * destroyed.
 314 *
 315 * \see cl_object_put(), cl_lock_put().
 316 */
 317void cl_page_put(const struct lu_env *env, struct cl_page *page)
 318{
 319        CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
 320                       atomic_read(&page->cp_ref));
 321
 322        if (atomic_dec_and_test(&page->cp_ref)) {
 323                LASSERT(page->cp_state == CPS_FREEING);
 324
 325                LASSERT(atomic_read(&page->cp_ref) == 0);
 326                PASSERT(env, page, !page->cp_owner);
 327                PASSERT(env, page, list_empty(&page->cp_batch));
 328                /*
 329                 * Page is no longer reachable by other threads. Tear
 330                 * it down.
 331                 */
 332                cl_page_free(env, page);
 333        }
 334}
 335EXPORT_SYMBOL(cl_page_put);
 336
 337/**
 338 * Returns a cl_page associated with a VM page, and given cl_object.
 339 */
 340struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
 341{
 342        struct cl_page *page;
 343
 344        KLASSERT(PageLocked(vmpage));
 345
 346        /*
 347         * NOTE: absence of races and liveness of data are guaranteed by page
 348         *       lock on a "vmpage". That works because object destruction has
 349         *       bottom-to-top pass.
 350         */
 351
 352        page = (struct cl_page *)vmpage->private;
 353        if (page) {
 354                cl_page_get_trust(page);
 355                LASSERT(page->cp_type == CPT_CACHEABLE);
 356        }
 357        return page;
 358}
 359EXPORT_SYMBOL(cl_vmpage_page);
 360
 361const struct cl_page_slice *cl_page_at(const struct cl_page *page,
 362                                       const struct lu_device_type *dtype)
 363{
 364        return cl_page_at_trusted(page, dtype);
 365}
 366EXPORT_SYMBOL(cl_page_at);
 367
 368#define CL_PAGE_OP(opname) offsetof(struct cl_page_operations, opname)
 369
 370#define CL_PAGE_INVOKE(_env, _page, _op, _proto, ...)              \
 371({                                                                    \
 372        const struct lu_env     *__env  = (_env);                   \
 373        struct cl_page       *__page = (_page);            \
 374        const struct cl_page_slice *__scan;                          \
 375        int                      __result;                         \
 376        ptrdiff_t                  __op   = (_op);                   \
 377        int                    (*__method)_proto;                   \
 378                                                                        \
 379        __result = 0;                                              \
 380        list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
 381                __method = *(void **)((char *)__scan->cpl_ops +  __op); \
 382                if (__method) {                                         \
 383                        __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
 384                        if (__result != 0)                              \
 385                                break;                                  \
 386                }                                                       \
 387        }                                                               \
 388        if (__result > 0)                                              \
 389                __result = 0;                                      \
 390        __result;                                                      \
 391})
 392
 393#define CL_PAGE_INVOKE_REVERSE(_env, _page, _op, _proto, ...)           \
 394({                                                                      \
 395        const struct lu_env        *__env  = (_env);                    \
 396        struct cl_page             *__page = (_page);                   \
 397        const struct cl_page_slice *__scan;                             \
 398        int                         __result;                           \
 399        ptrdiff_t                   __op   = (_op);                     \
 400        int                       (*__method)_proto;                    \
 401                                                                        \
 402        __result = 0;                                                   \
 403        list_for_each_entry_reverse(__scan, &__page->cp_layers,         \
 404                                        cpl_linkage) {                  \
 405                __method = *(void **)((char *)__scan->cpl_ops +  __op); \
 406                if (__method) {                                         \
 407                        __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
 408                        if (__result != 0)                              \
 409                                break;                                  \
 410                }                                                       \
 411        }                                                               \
 412        if (__result > 0)                                               \
 413                __result = 0;                                           \
 414        __result;                                                       \
 415})
 416
 417#define CL_PAGE_INVOID(_env, _page, _op, _proto, ...)              \
 418do {                                                                \
 419        const struct lu_env     *__env  = (_env);                   \
 420        struct cl_page       *__page = (_page);            \
 421        const struct cl_page_slice *__scan;                          \
 422        ptrdiff_t                  __op   = (_op);                   \
 423        void                  (*__method)_proto;                    \
 424                                                                        \
 425        list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
 426                __method = *(void **)((char *)__scan->cpl_ops + __op);  \
 427                if (__method)                                           \
 428                        (*__method)(__env, __scan, ## __VA_ARGS__);     \
 429        }                                                               \
 430} while (0)
 431
 432#define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...)          \
 433do {                                                                    \
 434        const struct lu_env     *__env  = (_env);                       \
 435        struct cl_page       *__page = (_page);                \
 436        const struct cl_page_slice *__scan;                              \
 437        ptrdiff_t                  __op   = (_op);                       \
 438        void                  (*__method)_proto;                        \
 439                                                                            \
 440        list_for_each_entry_reverse(__scan, &__page->cp_layers, cpl_linkage) { \
 441                __method = *(void **)((char *)__scan->cpl_ops + __op);  \
 442                if (__method)                                           \
 443                        (*__method)(__env, __scan, ## __VA_ARGS__);     \
 444        }                                                               \
 445} while (0)
 446
 447static int cl_page_invoke(const struct lu_env *env,
 448                          struct cl_io *io, struct cl_page *page, ptrdiff_t op)
 449
 450{
 451        PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
 452        return CL_PAGE_INVOKE(env, page, op,
 453                              (const struct lu_env *,
 454                               const struct cl_page_slice *, struct cl_io *),
 455                              io);
 456}
 457
 458static void cl_page_invoid(const struct lu_env *env,
 459                           struct cl_io *io, struct cl_page *page, ptrdiff_t op)
 460
 461{
 462        PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
 463        CL_PAGE_INVOID(env, page, op,
 464                       (const struct lu_env *,
 465                        const struct cl_page_slice *, struct cl_io *), io);
 466}
 467
 468static void cl_page_owner_clear(struct cl_page *page)
 469{
 470        if (page->cp_owner) {
 471                LASSERT(page->cp_owner->ci_owned_nr > 0);
 472                page->cp_owner->ci_owned_nr--;
 473                page->cp_owner = NULL;
 474        }
 475}
 476
 477static void cl_page_owner_set(struct cl_page *page)
 478{
 479        page->cp_owner->ci_owned_nr++;
 480}
 481
 482void cl_page_disown0(const struct lu_env *env,
 483                     struct cl_io *io, struct cl_page *pg)
 484{
 485        enum cl_page_state state;
 486
 487        state = pg->cp_state;
 488        PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
 489        PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING);
 490        cl_page_owner_clear(pg);
 491
 492        if (state == CPS_OWNED)
 493                cl_page_state_set(env, pg, CPS_CACHED);
 494        /*
 495         * Completion call-backs are executed in the bottom-up order, so that
 496         * uppermost layer (llite), responsible for VFS/VM interaction runs
 497         * last and can release locks safely.
 498         */
 499        CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_disown),
 500                               (const struct lu_env *,
 501                                const struct cl_page_slice *, struct cl_io *),
 502                               io);
 503}
 504
 505/**
 506 * returns true, iff page is owned by the given io.
 507 */
 508int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
 509{
 510        struct cl_io *top = cl_io_top((struct cl_io *)io);
 511        LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
 512        return pg->cp_state == CPS_OWNED && pg->cp_owner == top;
 513}
 514EXPORT_SYMBOL(cl_page_is_owned);
 515
 516/**
 517 * Try to own a page by IO.
 518 *
 519 * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it
 520 * into cl_page_state::CPS_OWNED state.
 521 *
 522 * \pre  !cl_page_is_owned(pg, io)
 523 * \post result == 0 iff cl_page_is_owned(pg, io)
 524 *
 525 * \retval 0   success
 526 *
 527 * \retval -ve failure, e.g., page was destroyed (and landed in
 528 *           cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED).
 529 *           or, page was owned by another thread, or in IO.
 530 *
 531 * \see cl_page_disown()
 532 * \see cl_page_operations::cpo_own()
 533 * \see cl_page_own_try()
 534 * \see cl_page_own
 535 */
 536static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
 537                        struct cl_page *pg, int nonblock)
 538{
 539        int result;
 540
 541        PINVRNT(env, pg, !cl_page_is_owned(pg, io));
 542
 543        io = cl_io_top(io);
 544
 545        if (pg->cp_state == CPS_FREEING) {
 546                result = -ENOENT;
 547        } else {
 548                result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(cpo_own),
 549                                        (const struct lu_env *,
 550                                         const struct cl_page_slice *,
 551                                         struct cl_io *, int),
 552                                        io, nonblock);
 553                if (result == 0) {
 554                        PASSERT(env, pg, !pg->cp_owner);
 555                        PASSERT(env, pg, !pg->cp_req);
 556                        pg->cp_owner = cl_io_top(io);
 557                        cl_page_owner_set(pg);
 558                        if (pg->cp_state != CPS_FREEING) {
 559                                cl_page_state_set(env, pg, CPS_OWNED);
 560                        } else {
 561                                cl_page_disown0(env, io, pg);
 562                                result = -ENOENT;
 563                        }
 564                }
 565        }
 566        PINVRNT(env, pg, ergo(result == 0, cl_page_invariant(pg)));
 567        return result;
 568}
 569
 570/**
 571 * Own a page, might be blocked.
 572 *
 573 * \see cl_page_own0()
 574 */
 575int cl_page_own(const struct lu_env *env, struct cl_io *io, struct cl_page *pg)
 576{
 577        return cl_page_own0(env, io, pg, 0);
 578}
 579EXPORT_SYMBOL(cl_page_own);
 580
 581/**
 582 * Nonblock version of cl_page_own().
 583 *
 584 * \see cl_page_own0()
 585 */
 586int cl_page_own_try(const struct lu_env *env, struct cl_io *io,
 587                    struct cl_page *pg)
 588{
 589        return cl_page_own0(env, io, pg, 1);
 590}
 591EXPORT_SYMBOL(cl_page_own_try);
 592
 593/**
 594 * Assume page ownership.
 595 *
 596 * Called when page is already locked by the hosting VM.
 597 *
 598 * \pre !cl_page_is_owned(pg, io)
 599 * \post cl_page_is_owned(pg, io)
 600 *
 601 * \see cl_page_operations::cpo_assume()
 602 */
 603void cl_page_assume(const struct lu_env *env,
 604                    struct cl_io *io, struct cl_page *pg)
 605{
 606        PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
 607
 608        io = cl_io_top(io);
 609
 610        cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
 611        PASSERT(env, pg, !pg->cp_owner);
 612        pg->cp_owner = cl_io_top(io);
 613        cl_page_owner_set(pg);
 614        cl_page_state_set(env, pg, CPS_OWNED);
 615}
 616EXPORT_SYMBOL(cl_page_assume);
 617
 618/**
 619 * Releases page ownership without unlocking the page.
 620 *
 621 * Moves page into cl_page_state::CPS_CACHED without releasing a lock on the
 622 * underlying VM page (as VM is supposed to do this itself).
 623 *
 624 * \pre   cl_page_is_owned(pg, io)
 625 * \post !cl_page_is_owned(pg, io)
 626 *
 627 * \see cl_page_assume()
 628 */
 629void cl_page_unassume(const struct lu_env *env,
 630                      struct cl_io *io, struct cl_page *pg)
 631{
 632        PINVRNT(env, pg, cl_page_is_owned(pg, io));
 633        PINVRNT(env, pg, cl_page_invariant(pg));
 634
 635        io = cl_io_top(io);
 636        cl_page_owner_clear(pg);
 637        cl_page_state_set(env, pg, CPS_CACHED);
 638        CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_unassume),
 639                               (const struct lu_env *,
 640                                const struct cl_page_slice *, struct cl_io *),
 641                               io);
 642}
 643EXPORT_SYMBOL(cl_page_unassume);
 644
 645/**
 646 * Releases page ownership.
 647 *
 648 * Moves page into cl_page_state::CPS_CACHED.
 649 *
 650 * \pre   cl_page_is_owned(pg, io)
 651 * \post !cl_page_is_owned(pg, io)
 652 *
 653 * \see cl_page_own()
 654 * \see cl_page_operations::cpo_disown()
 655 */
 656void cl_page_disown(const struct lu_env *env,
 657                    struct cl_io *io, struct cl_page *pg)
 658{
 659        PINVRNT(env, pg, cl_page_is_owned(pg, io) ||
 660                pg->cp_state == CPS_FREEING);
 661
 662        io = cl_io_top(io);
 663        cl_page_disown0(env, io, pg);
 664}
 665EXPORT_SYMBOL(cl_page_disown);
 666
 667/**
 668 * Called when page is to be removed from the object, e.g., as a result of
 669 * truncate.
 670 *
 671 * Calls cl_page_operations::cpo_discard() top-to-bottom.
 672 *
 673 * \pre cl_page_is_owned(pg, io)
 674 *
 675 * \see cl_page_operations::cpo_discard()
 676 */
 677void cl_page_discard(const struct lu_env *env,
 678                     struct cl_io *io, struct cl_page *pg)
 679{
 680        PINVRNT(env, pg, cl_page_is_owned(pg, io));
 681        PINVRNT(env, pg, cl_page_invariant(pg));
 682
 683        cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_discard));
 684}
 685EXPORT_SYMBOL(cl_page_discard);
 686
 687/**
 688 * Version of cl_page_delete() that can be called for not fully constructed
 689 * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0()
 690 * path. Doesn't check page invariant.
 691 */
 692static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg)
 693{
 694        PASSERT(env, pg, pg->cp_state != CPS_FREEING);
 695
 696        /*
 697         * Severe all ways to obtain new pointers to @pg.
 698         */
 699        cl_page_owner_clear(pg);
 700
 701        cl_page_state_set0(env, pg, CPS_FREEING);
 702
 703        CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete),
 704                               (const struct lu_env *,
 705                                const struct cl_page_slice *));
 706}
 707
 708/**
 709 * Called when a decision is made to throw page out of memory.
 710 *
 711 * Notifies all layers about page destruction by calling
 712 * cl_page_operations::cpo_delete() method top-to-bottom.
 713 *
 714 * Moves page into cl_page_state::CPS_FREEING state (this is the only place
 715 * where transition to this state happens).
 716 *
 717 * Eliminates all venues through which new references to the page can be
 718 * obtained:
 719 *
 720 *     - removes page from the radix trees,
 721 *
 722 *     - breaks linkage from VM page to cl_page.
 723 *
 724 * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
 725 * drain after some time, at which point page will be recycled.
 726 *
 727 * \pre  VM page is locked
 728 * \post pg->cp_state == CPS_FREEING
 729 *
 730 * \see cl_page_operations::cpo_delete()
 731 */
 732void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
 733{
 734        PINVRNT(env, pg, cl_page_invariant(pg));
 735        cl_page_delete0(env, pg);
 736}
 737EXPORT_SYMBOL(cl_page_delete);
 738
 739/**
 740 * Marks page up-to-date.
 741 *
 742 * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
 743 * layer responsible for VM interaction has to mark/clear page as up-to-date
 744 * by the \a uptodate argument.
 745 *
 746 * \see cl_page_operations::cpo_export()
 747 */
 748void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate)
 749{
 750        PINVRNT(env, pg, cl_page_invariant(pg));
 751        CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_export),
 752                       (const struct lu_env *,
 753                        const struct cl_page_slice *, int), uptodate);
 754}
 755EXPORT_SYMBOL(cl_page_export);
 756
 757/**
 758 * Returns true, iff \a pg is VM locked in a suitable sense by the calling
 759 * thread.
 760 */
 761int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
 762{
 763        int result;
 764        const struct cl_page_slice *slice;
 765
 766        slice = container_of(pg->cp_layers.next,
 767                             const struct cl_page_slice, cpl_linkage);
 768        PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked);
 769        /*
 770         * Call ->cpo_is_vmlocked() directly instead of going through
 771         * CL_PAGE_INVOKE(), because cl_page_is_vmlocked() is used by
 772         * cl_page_invariant().
 773         */
 774        result = slice->cpl_ops->cpo_is_vmlocked(env, slice);
 775        PASSERT(env, pg, result == -EBUSY || result == -ENODATA);
 776        return result == -EBUSY;
 777}
 778EXPORT_SYMBOL(cl_page_is_vmlocked);
 779
 780static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
 781{
 782        return crt == CRT_WRITE ? CPS_PAGEOUT : CPS_PAGEIN;
 783}
 784
 785static void cl_page_io_start(const struct lu_env *env,
 786                             struct cl_page *pg, enum cl_req_type crt)
 787{
 788        /*
 789         * Page is queued for IO, change its state.
 790         */
 791        cl_page_owner_clear(pg);
 792        cl_page_state_set(env, pg, cl_req_type_state(crt));
 793}
 794
 795/**
 796 * Prepares page for immediate transfer. cl_page_operations::cpo_prep() is
 797 * called top-to-bottom. Every layer either agrees to submit this page (by
 798 * returning 0), or requests to omit this page (by returning -EALREADY). Layer
 799 * handling interactions with the VM also has to inform VM that page is under
 800 * transfer now.
 801 */
 802int cl_page_prep(const struct lu_env *env, struct cl_io *io,
 803                 struct cl_page *pg, enum cl_req_type crt)
 804{
 805        int result;
 806
 807        PINVRNT(env, pg, cl_page_is_owned(pg, io));
 808        PINVRNT(env, pg, cl_page_invariant(pg));
 809        PINVRNT(env, pg, crt < CRT_NR);
 810
 811        /*
 812         * XXX this has to be called bottom-to-top, so that llite can set up
 813         * PG_writeback without risking other layers deciding to skip this
 814         * page.
 815         */
 816        if (crt >= CRT_NR)
 817                return -EINVAL;
 818        result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_prep));
 819        if (result == 0)
 820                cl_page_io_start(env, pg, crt);
 821
 822        CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
 823        return result;
 824}
 825EXPORT_SYMBOL(cl_page_prep);
 826
 827/**
 828 * Notify layers about transfer completion.
 829 *
 830 * Invoked by transfer sub-system (which is a part of osc) to notify layers
 831 * that a transfer, of which this page is a part of has completed.
 832 *
 833 * Completion call-backs are executed in the bottom-up order, so that
 834 * uppermost layer (llite), responsible for the VFS/VM interaction runs last
 835 * and can release locks safely.
 836 *
 837 * \pre  pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
 838 * \post pg->cp_state == CPS_CACHED
 839 *
 840 * \see cl_page_operations::cpo_completion()
 841 */
 842void cl_page_completion(const struct lu_env *env,
 843                        struct cl_page *pg, enum cl_req_type crt, int ioret)
 844{
 845        struct cl_sync_io *anchor = pg->cp_sync_io;
 846
 847        PASSERT(env, pg, crt < CRT_NR);
 848        /* cl_page::cp_req already cleared by the caller (osc_completion()) */
 849        PASSERT(env, pg, !pg->cp_req);
 850        PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
 851
 852        CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
 853
 854        cl_page_state_set(env, pg, CPS_CACHED);
 855        if (crt >= CRT_NR)
 856                return;
 857        CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(io[crt].cpo_completion),
 858                               (const struct lu_env *,
 859                                const struct cl_page_slice *, int), ioret);
 860        if (anchor) {
 861                LASSERT(pg->cp_sync_io == anchor);
 862                pg->cp_sync_io = NULL;
 863        }
 864        /*
 865         * As page->cp_obj is pinned by a reference from page->cp_req, it is
 866         * safe to call cl_page_put() without risking object destruction in a
 867         * non-blocking context.
 868         */
 869        cl_page_put(env, pg);
 870
 871        if (anchor)
 872                cl_sync_io_note(env, anchor, ioret);
 873}
 874EXPORT_SYMBOL(cl_page_completion);
 875
 876/**
 877 * Notify layers that transfer formation engine decided to yank this page from
 878 * the cache and to make it a part of a transfer.
 879 *
 880 * \pre  pg->cp_state == CPS_CACHED
 881 * \post pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
 882 *
 883 * \see cl_page_operations::cpo_make_ready()
 884 */
 885int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
 886                       enum cl_req_type crt)
 887{
 888        int result;
 889
 890        PINVRNT(env, pg, crt < CRT_NR);
 891
 892        if (crt >= CRT_NR)
 893                return -EINVAL;
 894        result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(io[crt].cpo_make_ready),
 895                                (const struct lu_env *,
 896                                 const struct cl_page_slice *));
 897        if (result == 0) {
 898                PASSERT(env, pg, pg->cp_state == CPS_CACHED);
 899                cl_page_io_start(env, pg, crt);
 900        }
 901        CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
 902        return result;
 903}
 904EXPORT_SYMBOL(cl_page_make_ready);
 905
 906/**
 907 * Called if a pge is being written back by kernel's intention.
 908 *
 909 * \pre  cl_page_is_owned(pg, io)
 910 * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
 911 *
 912 * \see cl_page_operations::cpo_flush()
 913 */
 914int cl_page_flush(const struct lu_env *env, struct cl_io *io,
 915                  struct cl_page *pg)
 916{
 917        int result;
 918
 919        PINVRNT(env, pg, cl_page_is_owned(pg, io));
 920        PINVRNT(env, pg, cl_page_invariant(pg));
 921
 922        result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush));
 923
 924        CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result);
 925        return result;
 926}
 927EXPORT_SYMBOL(cl_page_flush);
 928
 929/**
 930 * Checks whether page is protected by any extent lock is at least required
 931 * mode.
 932 *
 933 * \return the same as in cl_page_operations::cpo_is_under_lock() method.
 934 * \see cl_page_operations::cpo_is_under_lock()
 935 */
 936int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
 937                          struct cl_page *page, pgoff_t *max_index)
 938{
 939        int rc;
 940
 941        PINVRNT(env, page, cl_page_invariant(page));
 942
 943        rc = CL_PAGE_INVOKE_REVERSE(env, page, CL_PAGE_OP(cpo_is_under_lock),
 944                                    (const struct lu_env *,
 945                                     const struct cl_page_slice *,
 946                                      struct cl_io *, pgoff_t *),
 947                                    io, max_index);
 948        return rc;
 949}
 950EXPORT_SYMBOL(cl_page_is_under_lock);
 951
 952/**
 953 * Tells transfer engine that only part of a page is to be transmitted.
 954 *
 955 * \see cl_page_operations::cpo_clip()
 956 */
 957void cl_page_clip(const struct lu_env *env, struct cl_page *pg,
 958                  int from, int to)
 959{
 960        PINVRNT(env, pg, cl_page_invariant(pg));
 961
 962        CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", from, to);
 963        CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_clip),
 964                       (const struct lu_env *,
 965                        const struct cl_page_slice *, int, int),
 966                       from, to);
 967}
 968EXPORT_SYMBOL(cl_page_clip);
 969
 970/**
 971 * Prints human readable representation of \a pg to the \a f.
 972 */
 973void cl_page_header_print(const struct lu_env *env, void *cookie,
 974                          lu_printer_t printer, const struct cl_page *pg)
 975{
 976        (*printer)(env, cookie,
 977                   "page@%p[%d %p %d %d %p %p]\n",
 978                   pg, atomic_read(&pg->cp_ref), pg->cp_obj,
 979                   pg->cp_state, pg->cp_type,
 980                   pg->cp_owner, pg->cp_req);
 981}
 982EXPORT_SYMBOL(cl_page_header_print);
 983
 984/**
 985 * Prints human readable representation of \a pg to the \a f.
 986 */
 987void cl_page_print(const struct lu_env *env, void *cookie,
 988                   lu_printer_t printer, const struct cl_page *pg)
 989{
 990        cl_page_header_print(env, cookie, printer, pg);
 991        CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
 992                       (const struct lu_env *env,
 993                        const struct cl_page_slice *slice,
 994                        void *cookie, lu_printer_t p), cookie, printer);
 995        (*printer)(env, cookie, "end page@%p\n", pg);
 996}
 997EXPORT_SYMBOL(cl_page_print);
 998
 999/**
1000 * Cancel a page which is still in a transfer.
1001 */
1002int cl_page_cancel(const struct lu_env *env, struct cl_page *page)
1003{
1004        return CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_cancel),
1005                              (const struct lu_env *,
1006                               const struct cl_page_slice *));
1007}
1008
1009/**
1010 * Converts a byte offset within object \a obj into a page index.
1011 */
1012loff_t cl_offset(const struct cl_object *obj, pgoff_t idx)
1013{
1014        /*
1015         * XXX for now.
1016         */
1017        return (loff_t)idx << PAGE_SHIFT;
1018}
1019EXPORT_SYMBOL(cl_offset);
1020
1021/**
1022 * Converts a page index into a byte offset within object \a obj.
1023 */
1024pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
1025{
1026        /*
1027         * XXX for now.
1028         */
1029        return offset >> PAGE_SHIFT;
1030}
1031EXPORT_SYMBOL(cl_index);
1032
1033size_t cl_page_size(const struct cl_object *obj)
1034{
1035        return 1UL << PAGE_SHIFT;
1036}
1037EXPORT_SYMBOL(cl_page_size);
1038
1039/**
1040 * Adds page slice to the compound page.
1041 *
1042 * This is called by cl_object_operations::coo_page_init() methods to add a
1043 * per-layer state to the page. New state is added at the end of
1044 * cl_page::cp_layers list, that is, it is at the bottom of the stack.
1045 *
1046 * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
1047 */
1048void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
1049                       struct cl_object *obj, pgoff_t index,
1050                       const struct cl_page_operations *ops)
1051{
1052        list_add_tail(&slice->cpl_linkage, &page->cp_layers);
1053        slice->cpl_obj  = obj;
1054        slice->cpl_index = index;
1055        slice->cpl_ops  = ops;
1056        slice->cpl_page = page;
1057}
1058EXPORT_SYMBOL(cl_page_slice_add);
1059
1060/**
1061 * Allocate and initialize cl_cache, called by ll_init_sbi().
1062 */
1063struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
1064{
1065        struct cl_client_cache  *cache = NULL;
1066
1067        cache = kzalloc(sizeof(*cache), GFP_KERNEL);
1068        if (!cache)
1069                return NULL;
1070
1071        /* Initialize cache data */
1072        atomic_set(&cache->ccc_users, 1);
1073        cache->ccc_lru_max = lru_page_max;
1074        atomic_long_set(&cache->ccc_lru_left, lru_page_max);
1075        spin_lock_init(&cache->ccc_lru_lock);
1076        INIT_LIST_HEAD(&cache->ccc_lru);
1077
1078        atomic_long_set(&cache->ccc_unstable_nr, 0);
1079        init_waitqueue_head(&cache->ccc_unstable_waitq);
1080
1081        return cache;
1082}
1083EXPORT_SYMBOL(cl_cache_init);
1084
1085/**
1086 * Increase cl_cache refcount
1087 */
1088void cl_cache_incref(struct cl_client_cache *cache)
1089{
1090        atomic_inc(&cache->ccc_users);
1091}
1092EXPORT_SYMBOL(cl_cache_incref);
1093
1094/**
1095 * Decrease cl_cache refcount and free the cache if refcount=0.
1096 * Since llite, lov and osc all hold cl_cache refcount,
1097 * the free will not cause race. (LU-6173)
1098 */
1099void cl_cache_decref(struct cl_client_cache *cache)
1100{
1101        if (atomic_dec_and_test(&cache->ccc_users))
1102                kfree(cache);
1103}
1104EXPORT_SYMBOL(cl_cache_decref);
1105