linux/drivers/staging/lustre/lustre/lov/lov_lock.c
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see
  18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  19 *
  20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  21 * CA 95054 USA or visit www.sun.com if you need additional information or
  22 * have any questions.
  23 *
  24 * GPL HEADER END
  25 */
  26/*
  27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  28 * Use is subject to license terms.
  29 *
  30 * Copyright (c) 2011, 2012, Intel Corporation.
  31 */
  32/*
  33 * This file is part of Lustre, http://www.lustre.org/
  34 * Lustre is a trademark of Sun Microsystems, Inc.
  35 *
  36 * Implementation of cl_lock for LOV layer.
  37 *
  38 *   Author: Nikita Danilov <nikita.danilov@sun.com>
  39 */
  40
  41#define DEBUG_SUBSYSTEM S_LOV
  42
  43#include "lov_cl_internal.h"
  44
  45/** \addtogroup lov
  46 *  @{
  47 */
  48
  49static struct cl_lock_closure *lov_closure_get(const struct lu_env *env,
  50                                               struct cl_lock *parent);
  51
  52static int lov_lock_unuse(const struct lu_env *env,
  53                          const struct cl_lock_slice *slice);
  54/*****************************************************************************
  55 *
  56 * Lov lock operations.
  57 *
  58 */
  59
  60static struct lov_sublock_env *lov_sublock_env_get(const struct lu_env *env,
  61                                                   struct cl_lock *parent,
  62                                                   struct lov_lock_sub *lls)
  63{
  64        struct lov_sublock_env *subenv;
  65        struct lov_io     *lio    = lov_env_io(env);
  66        struct cl_io       *io     = lio->lis_cl.cis_io;
  67        struct lov_io_sub      *sub;
  68
  69        subenv = &lov_env_session(env)->ls_subenv;
  70
  71        /*
  72         * FIXME: We tend to use the subio's env & io to call the sublock
  73         * lock operations because osc lock sometimes stores some control
  74         * variables in thread's IO information(Now only lockless information).
  75         * However, if the lock's host(object) is different from the object
  76         * for current IO, we have no way to get the subenv and subio because
  77         * they are not initialized at all. As a temp fix, in this case,
  78         * we still borrow the parent's env to call sublock operations.
  79         */
  80        if (!io || !cl_object_same(io->ci_obj, parent->cll_descr.cld_obj)) {
  81                subenv->lse_env = env;
  82                subenv->lse_io  = io;
  83                subenv->lse_sub = NULL;
  84        } else {
  85                sub = lov_sub_get(env, lio, lls->sub_stripe);
  86                if (!IS_ERR(sub)) {
  87                        subenv->lse_env = sub->sub_env;
  88                        subenv->lse_io  = sub->sub_io;
  89                        subenv->lse_sub = sub;
  90                } else {
  91                        subenv = (void *)sub;
  92                }
  93        }
  94        return subenv;
  95}
  96
  97static void lov_sublock_env_put(struct lov_sublock_env *subenv)
  98{
  99        if (subenv && subenv->lse_sub)
 100                lov_sub_put(subenv->lse_sub);
 101}
 102
 103static void lov_sublock_adopt(const struct lu_env *env, struct lov_lock *lck,
 104                              struct cl_lock *sublock, int idx,
 105                              struct lov_lock_link *link)
 106{
 107        struct lovsub_lock *lsl;
 108        struct cl_lock     *parent = lck->lls_cl.cls_lock;
 109        int              rc;
 110
 111        LASSERT(cl_lock_is_mutexed(parent));
 112        LASSERT(cl_lock_is_mutexed(sublock));
 113
 114        lsl = cl2sub_lock(sublock);
 115        /*
 116         * check that sub-lock doesn't have lock link to this top-lock.
 117         */
 118        LASSERT(!lov_lock_link_find(env, lck, lsl));
 119        LASSERT(idx < lck->lls_nr);
 120
 121        lck->lls_sub[idx].sub_lock = lsl;
 122        lck->lls_nr_filled++;
 123        LASSERT(lck->lls_nr_filled <= lck->lls_nr);
 124        list_add_tail(&link->lll_list, &lsl->lss_parents);
 125        link->lll_idx = idx;
 126        link->lll_super = lck;
 127        cl_lock_get(parent);
 128        lu_ref_add(&parent->cll_reference, "lov-child", sublock);
 129        lck->lls_sub[idx].sub_flags |= LSF_HELD;
 130        cl_lock_user_add(env, sublock);
 131
 132        rc = lov_sublock_modify(env, lck, lsl, &sublock->cll_descr, idx);
 133        LASSERT(rc == 0); /* there is no way this can fail, currently */
 134}
 135
 136static struct cl_lock *lov_sublock_alloc(const struct lu_env *env,
 137                                         const struct cl_io *io,
 138                                         struct lov_lock *lck,
 139                                         int idx, struct lov_lock_link **out)
 140{
 141        struct cl_lock       *sublock;
 142        struct cl_lock       *parent;
 143        struct lov_lock_link *link;
 144
 145        LASSERT(idx < lck->lls_nr);
 146
 147        link = kmem_cache_zalloc(lov_lock_link_kmem, GFP_NOFS);
 148        if (link) {
 149                struct lov_sublock_env *subenv;
 150                struct lov_lock_sub  *lls;
 151                struct cl_lock_descr *descr;
 152
 153                parent = lck->lls_cl.cls_lock;
 154                lls    = &lck->lls_sub[idx];
 155                descr  = &lls->sub_got;
 156
 157                subenv = lov_sublock_env_get(env, parent, lls);
 158                if (!IS_ERR(subenv)) {
 159                        /* CAVEAT: Don't try to add a field in lov_lock_sub
 160                         * to remember the subio. This is because lock is able
 161                         * to be cached, but this is not true for IO. This
 162                         * further means a sublock might be referenced in
 163                         * different io context. -jay
 164                         */
 165
 166                        sublock = cl_lock_hold(subenv->lse_env, subenv->lse_io,
 167                                               descr, "lov-parent", parent);
 168                        lov_sublock_env_put(subenv);
 169                } else {
 170                        /* error occurs. */
 171                        sublock = (void *)subenv;
 172                }
 173
 174                if (!IS_ERR(sublock))
 175                        *out = link;
 176                else
 177                        kmem_cache_free(lov_lock_link_kmem, link);
 178        } else
 179                sublock = ERR_PTR(-ENOMEM);
 180        return sublock;
 181}
 182
 183static void lov_sublock_unlock(const struct lu_env *env,
 184                               struct lovsub_lock *lsl,
 185                               struct cl_lock_closure *closure,
 186                               struct lov_sublock_env *subenv)
 187{
 188        lov_sublock_env_put(subenv);
 189        lsl->lss_active = NULL;
 190        cl_lock_disclosure(env, closure);
 191}
 192
 193static int lov_sublock_lock(const struct lu_env *env,
 194                            struct lov_lock *lck,
 195                            struct lov_lock_sub *lls,
 196                            struct cl_lock_closure *closure,
 197                            struct lov_sublock_env **lsep)
 198{
 199        struct lovsub_lock *sublock;
 200        struct cl_lock     *child;
 201        int              result = 0;
 202
 203        LASSERT(list_empty(&closure->clc_list));
 204
 205        sublock = lls->sub_lock;
 206        child = sublock->lss_cl.cls_lock;
 207        result = cl_lock_closure_build(env, child, closure);
 208        if (result == 0) {
 209                struct cl_lock *parent = closure->clc_origin;
 210
 211                LASSERT(cl_lock_is_mutexed(child));
 212                sublock->lss_active = parent;
 213
 214                if (unlikely((child->cll_state == CLS_FREEING) ||
 215                             (child->cll_flags & CLF_CANCELLED))) {
 216                        struct lov_lock_link *link;
 217                        /*
 218                         * we could race with lock deletion which temporarily
 219                         * put the lock in freeing state, bug 19080.
 220                         */
 221                        LASSERT(!(lls->sub_flags & LSF_HELD));
 222
 223                        link = lov_lock_link_find(env, lck, sublock);
 224                        LASSERT(link);
 225                        lov_lock_unlink(env, link, sublock);
 226                        lov_sublock_unlock(env, sublock, closure, NULL);
 227                        lck->lls_cancel_race = 1;
 228                        result = CLO_REPEAT;
 229                } else if (lsep) {
 230                        struct lov_sublock_env *subenv;
 231
 232                        subenv = lov_sublock_env_get(env, parent, lls);
 233                        if (IS_ERR(subenv)) {
 234                                lov_sublock_unlock(env, sublock,
 235                                                   closure, NULL);
 236                                result = PTR_ERR(subenv);
 237                        } else {
 238                                *lsep = subenv;
 239                        }
 240                }
 241        }
 242        return result;
 243}
 244
 245/**
 246 * Updates the result of a top-lock operation from a result of sub-lock
 247 * sub-operations. Top-operations like lov_lock_{enqueue,use,unuse}() iterate
 248 * over sub-locks and lov_subresult() is used to calculate return value of a
 249 * top-operation. To this end, possible return values of sub-operations are
 250 * ordered as
 251 *
 252 *     - 0                success
 253 *     - CLO_WAIT          wait for event
 254 *     - CLO_REPEAT      repeat top-operation
 255 *     - -ne            fundamental error
 256 *
 257 * Top-level return code can only go down through this list. CLO_REPEAT
 258 * overwrites CLO_WAIT, because lock mutex was released and sleeping condition
 259 * has to be rechecked by the upper layer.
 260 */
 261static int lov_subresult(int result, int rc)
 262{
 263        int result_rank;
 264        int rc_rank;
 265
 266        LASSERTF(result <= 0 || result == CLO_REPEAT || result == CLO_WAIT,
 267                 "result = %d\n", result);
 268        LASSERTF(rc <= 0 || rc == CLO_REPEAT || rc == CLO_WAIT,
 269                 "rc = %d\n", rc);
 270        CLASSERT(CLO_WAIT < CLO_REPEAT);
 271
 272        /* calculate ranks in the ordering above */
 273        result_rank = result < 0 ? 1 + CLO_REPEAT : result;
 274        rc_rank = rc < 0 ? 1 + CLO_REPEAT : rc;
 275
 276        if (result_rank < rc_rank)
 277                result = rc;
 278        return result;
 279}
 280
 281/**
 282 * Creates sub-locks for a given lov_lock for the first time.
 283 *
 284 * Goes through all sub-objects of top-object, and creates sub-locks on every
 285 * sub-object intersecting with top-lock extent. This is complicated by the
 286 * fact that top-lock (that is being created) can be accessed concurrently
 287 * through already created sub-locks (possibly shared with other top-locks).
 288 */
 289static int lov_lock_sub_init(const struct lu_env *env,
 290                             struct lov_lock *lck, const struct cl_io *io)
 291{
 292        int result = 0;
 293        int i;
 294        int nr;
 295        u64 start;
 296        u64 end;
 297        u64 file_start;
 298        u64 file_end;
 299
 300        struct lov_object       *loo    = cl2lov(lck->lls_cl.cls_obj);
 301        struct lov_layout_raid0 *r0     = lov_r0(loo);
 302        struct cl_lock    *parent = lck->lls_cl.cls_lock;
 303
 304        lck->lls_orig = parent->cll_descr;
 305        file_start = cl_offset(lov2cl(loo), parent->cll_descr.cld_start);
 306        file_end   = cl_offset(lov2cl(loo), parent->cll_descr.cld_end + 1) - 1;
 307
 308        for (i = 0, nr = 0; i < r0->lo_nr; i++) {
 309                /*
 310                 * XXX for wide striping smarter algorithm is desirable,
 311                 * breaking out of the loop, early.
 312                 */
 313                if (likely(r0->lo_sub[i]) &&
 314                    lov_stripe_intersects(loo->lo_lsm, i,
 315                                          file_start, file_end, &start, &end))
 316                        nr++;
 317        }
 318        LASSERT(nr > 0);
 319        lck->lls_sub = libcfs_kvzalloc(nr * sizeof(lck->lls_sub[0]), GFP_NOFS);
 320        if (!lck->lls_sub)
 321                return -ENOMEM;
 322
 323        lck->lls_nr = nr;
 324        /*
 325         * First, fill in sub-lock descriptions in
 326         * lck->lls_sub[].sub_descr. They are used by lov_sublock_alloc()
 327         * (called below in this function, and by lov_lock_enqueue()) to
 328         * create sub-locks. At this moment, no other thread can access
 329         * top-lock.
 330         */
 331        for (i = 0, nr = 0; i < r0->lo_nr; ++i) {
 332                if (likely(r0->lo_sub[i]) &&
 333                    lov_stripe_intersects(loo->lo_lsm, i,
 334                                          file_start, file_end, &start, &end)) {
 335                        struct cl_lock_descr *descr;
 336
 337                        descr = &lck->lls_sub[nr].sub_descr;
 338
 339                        LASSERT(!descr->cld_obj);
 340                        descr->cld_obj   = lovsub2cl(r0->lo_sub[i]);
 341                        descr->cld_start = cl_index(descr->cld_obj, start);
 342                        descr->cld_end   = cl_index(descr->cld_obj, end);
 343                        descr->cld_mode  = parent->cll_descr.cld_mode;
 344                        descr->cld_gid   = parent->cll_descr.cld_gid;
 345                        descr->cld_enq_flags   = parent->cll_descr.cld_enq_flags;
 346                        /* XXX has no effect */
 347                        lck->lls_sub[nr].sub_got = *descr;
 348                        lck->lls_sub[nr].sub_stripe = i;
 349                        nr++;
 350                }
 351        }
 352        LASSERT(nr == lck->lls_nr);
 353
 354        /*
 355         * Some sub-locks can be missing at this point. This is not a problem,
 356         * because enqueue will create them anyway. Main duty of this function
 357         * is to fill in sub-lock descriptions in a race free manner.
 358         */
 359        return result;
 360}
 361
 362static int lov_sublock_release(const struct lu_env *env, struct lov_lock *lck,
 363                               int i, int deluser, int rc)
 364{
 365        struct cl_lock *parent = lck->lls_cl.cls_lock;
 366
 367        LASSERT(cl_lock_is_mutexed(parent));
 368
 369        if (lck->lls_sub[i].sub_flags & LSF_HELD) {
 370                struct cl_lock    *sublock;
 371                int dying;
 372
 373                sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock;
 374                LASSERT(cl_lock_is_mutexed(sublock));
 375
 376                lck->lls_sub[i].sub_flags &= ~LSF_HELD;
 377                if (deluser)
 378                        cl_lock_user_del(env, sublock);
 379                /*
 380                 * If the last hold is released, and cancellation is pending
 381                 * for a sub-lock, release parent mutex, to avoid keeping it
 382                 * while sub-lock is being paged out.
 383                 */
 384                dying = (sublock->cll_descr.cld_mode == CLM_PHANTOM ||
 385                         sublock->cll_descr.cld_mode == CLM_GROUP ||
 386                         (sublock->cll_flags & (CLF_CANCELPEND|CLF_DOOMED))) &&
 387                        sublock->cll_holds == 1;
 388                if (dying)
 389                        cl_lock_mutex_put(env, parent);
 390                cl_lock_unhold(env, sublock, "lov-parent", parent);
 391                if (dying) {
 392                        cl_lock_mutex_get(env, parent);
 393                        rc = lov_subresult(rc, CLO_REPEAT);
 394                }
 395                /*
 396                 * From now on lck->lls_sub[i].sub_lock is a "weak" pointer,
 397                 * not backed by a reference on a
 398                 * sub-lock. lovsub_lock_delete() will clear
 399                 * lck->lls_sub[i].sub_lock under semaphores, just before
 400                 * sub-lock is destroyed.
 401                 */
 402        }
 403        return rc;
 404}
 405
 406static void lov_sublock_hold(const struct lu_env *env, struct lov_lock *lck,
 407                             int i)
 408{
 409        struct cl_lock *parent = lck->lls_cl.cls_lock;
 410
 411        LASSERT(cl_lock_is_mutexed(parent));
 412
 413        if (!(lck->lls_sub[i].sub_flags & LSF_HELD)) {
 414                struct cl_lock *sublock;
 415
 416                sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock;
 417                LASSERT(cl_lock_is_mutexed(sublock));
 418                LASSERT(sublock->cll_state != CLS_FREEING);
 419
 420                lck->lls_sub[i].sub_flags |= LSF_HELD;
 421
 422                cl_lock_get_trust(sublock);
 423                cl_lock_hold_add(env, sublock, "lov-parent", parent);
 424                cl_lock_user_add(env, sublock);
 425                cl_lock_put(env, sublock);
 426        }
 427}
 428
 429static void lov_lock_fini(const struct lu_env *env,
 430                          struct cl_lock_slice *slice)
 431{
 432        struct lov_lock *lck;
 433        int i;
 434
 435        lck = cl2lov_lock(slice);
 436        LASSERT(lck->lls_nr_filled == 0);
 437        if (lck->lls_sub) {
 438                for (i = 0; i < lck->lls_nr; ++i)
 439                        /*
 440                         * No sub-locks exists at this point, as sub-lock has
 441                         * a reference on its parent.
 442                         */
 443                        LASSERT(!lck->lls_sub[i].sub_lock);
 444                kvfree(lck->lls_sub);
 445        }
 446        kmem_cache_free(lov_lock_kmem, lck);
 447}
 448
 449static int lov_lock_enqueue_wait(const struct lu_env *env,
 450                                 struct lov_lock *lck,
 451                                 struct cl_lock *sublock)
 452{
 453        struct cl_lock *lock = lck->lls_cl.cls_lock;
 454        int          result;
 455
 456        LASSERT(cl_lock_is_mutexed(lock));
 457
 458        cl_lock_mutex_put(env, lock);
 459        result = cl_lock_enqueue_wait(env, sublock, 0);
 460        cl_lock_mutex_get(env, lock);
 461        return result ?: CLO_REPEAT;
 462}
 463
 464/**
 465 * Tries to advance a state machine of a given sub-lock toward enqueuing of
 466 * the top-lock.
 467 *
 468 * \retval 0 if state-transition can proceed
 469 * \retval -ve otherwise.
 470 */
 471static int lov_lock_enqueue_one(const struct lu_env *env, struct lov_lock *lck,
 472                                struct cl_lock *sublock,
 473                                struct cl_io *io, __u32 enqflags, int last)
 474{
 475        int result;
 476
 477        /* first, try to enqueue a sub-lock ... */
 478        result = cl_enqueue_try(env, sublock, io, enqflags);
 479        if ((sublock->cll_state == CLS_ENQUEUED) && !(enqflags & CEF_AGL)) {
 480                /* if it is enqueued, try to `wait' on it---maybe it's already
 481                 * granted
 482                 */
 483                result = cl_wait_try(env, sublock);
 484                if (result == CLO_REENQUEUED)
 485                        result = CLO_WAIT;
 486        }
 487        /*
 488         * If CEF_ASYNC flag is set, then all sub-locks can be enqueued in
 489         * parallel, otherwise---enqueue has to wait until sub-lock is granted
 490         * before proceeding to the next one.
 491         */
 492        if ((result == CLO_WAIT) && (sublock->cll_state <= CLS_HELD) &&
 493            (enqflags & CEF_ASYNC) && (!last || (enqflags & CEF_AGL)))
 494                result = 0;
 495        return result;
 496}
 497
 498/**
 499 * Helper function for lov_lock_enqueue() that creates missing sub-lock.
 500 */
 501static int lov_sublock_fill(const struct lu_env *env, struct cl_lock *parent,
 502                            struct cl_io *io, struct lov_lock *lck, int idx)
 503{
 504        struct lov_lock_link *link = NULL;
 505        struct cl_lock       *sublock;
 506        int                result;
 507
 508        LASSERT(parent->cll_depth == 1);
 509        cl_lock_mutex_put(env, parent);
 510        sublock = lov_sublock_alloc(env, io, lck, idx, &link);
 511        if (!IS_ERR(sublock))
 512                cl_lock_mutex_get(env, sublock);
 513        cl_lock_mutex_get(env, parent);
 514
 515        if (!IS_ERR(sublock)) {
 516                cl_lock_get_trust(sublock);
 517                if (parent->cll_state == CLS_QUEUING &&
 518                    !lck->lls_sub[idx].sub_lock) {
 519                        lov_sublock_adopt(env, lck, sublock, idx, link);
 520                } else {
 521                        kmem_cache_free(lov_lock_link_kmem, link);
 522                        /* other thread allocated sub-lock, or enqueue is no
 523                         * longer going on
 524                         */
 525                        cl_lock_mutex_put(env, parent);
 526                        cl_lock_unhold(env, sublock, "lov-parent", parent);
 527                        cl_lock_mutex_get(env, parent);
 528                }
 529                cl_lock_mutex_put(env, sublock);
 530                cl_lock_put(env, sublock);
 531                result = CLO_REPEAT;
 532        } else
 533                result = PTR_ERR(sublock);
 534        return result;
 535}
 536
 537/**
 538 * Implementation of cl_lock_operations::clo_enqueue() for lov layer. This
 539 * function is rather subtle, as it enqueues top-lock (i.e., advances top-lock
 540 * state machine from CLS_QUEUING to CLS_ENQUEUED states) by juggling sub-lock
 541 * state machines in the face of sub-locks sharing (by multiple top-locks),
 542 * and concurrent sub-lock cancellations.
 543 */
 544static int lov_lock_enqueue(const struct lu_env *env,
 545                            const struct cl_lock_slice *slice,
 546                            struct cl_io *io, __u32 enqflags)
 547{
 548        struct cl_lock   *lock    = slice->cls_lock;
 549        struct lov_lock *lck     = cl2lov_lock(slice);
 550        struct cl_lock_closure *closure = lov_closure_get(env, lock);
 551        int i;
 552        int result;
 553        enum cl_lock_state minstate;
 554
 555        for (result = 0, minstate = CLS_FREEING, i = 0; i < lck->lls_nr; ++i) {
 556                int rc;
 557                struct lovsub_lock     *sub;
 558                struct lov_lock_sub    *lls;
 559                struct cl_lock   *sublock;
 560                struct lov_sublock_env *subenv;
 561
 562                if (lock->cll_state != CLS_QUEUING) {
 563                        /*
 564                         * Lock might have left QUEUING state if previous
 565                         * iteration released its mutex. Stop enqueing in this
 566                         * case and let the upper layer to decide what to do.
 567                         */
 568                        LASSERT(i > 0 && result != 0);
 569                        break;
 570                }
 571
 572                lls = &lck->lls_sub[i];
 573                sub = lls->sub_lock;
 574                /*
 575                 * Sub-lock might have been canceled, while top-lock was
 576                 * cached.
 577                 */
 578                if (!sub) {
 579                        result = lov_sublock_fill(env, lock, io, lck, i);
 580                        /* lov_sublock_fill() released @lock mutex,
 581                         * restart.
 582                         */
 583                        break;
 584                }
 585                sublock = sub->lss_cl.cls_lock;
 586                rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
 587                if (rc == 0) {
 588                        lov_sublock_hold(env, lck, i);
 589                        rc = lov_lock_enqueue_one(subenv->lse_env, lck, sublock,
 590                                                  subenv->lse_io, enqflags,
 591                                                  i == lck->lls_nr - 1);
 592                        minstate = min(minstate, sublock->cll_state);
 593                        if (rc == CLO_WAIT) {
 594                                switch (sublock->cll_state) {
 595                                case CLS_QUEUING:
 596                                        /* take recursive mutex, the lock is
 597                                         * released in lov_lock_enqueue_wait.
 598                                         */
 599                                        cl_lock_mutex_get(env, sublock);
 600                                        lov_sublock_unlock(env, sub, closure,
 601                                                           subenv);
 602                                        rc = lov_lock_enqueue_wait(env, lck,
 603                                                                   sublock);
 604                                        break;
 605                                case CLS_CACHED:
 606                                        cl_lock_get(sublock);
 607                                        /* take recursive mutex of sublock */
 608                                        cl_lock_mutex_get(env, sublock);
 609                                        /* need to release all locks in closure
 610                                         * otherwise it may deadlock. LU-2683.
 611                                         */
 612                                        lov_sublock_unlock(env, sub, closure,
 613                                                           subenv);
 614                                        /* sublock and parent are held. */
 615                                        rc = lov_sublock_release(env, lck, i,
 616                                                                 1, rc);
 617                                        cl_lock_mutex_put(env, sublock);
 618                                        cl_lock_put(env, sublock);
 619                                        break;
 620                                default:
 621                                        lov_sublock_unlock(env, sub, closure,
 622                                                           subenv);
 623                                        break;
 624                                }
 625                        } else {
 626                                LASSERT(!sublock->cll_conflict);
 627                                lov_sublock_unlock(env, sub, closure, subenv);
 628                        }
 629                }
 630                result = lov_subresult(result, rc);
 631                if (result != 0)
 632                        break;
 633        }
 634        cl_lock_closure_fini(closure);
 635        return result ?: minstate >= CLS_ENQUEUED ? 0 : CLO_WAIT;
 636}
 637
 638static int lov_lock_unuse(const struct lu_env *env,
 639                          const struct cl_lock_slice *slice)
 640{
 641        struct lov_lock *lck     = cl2lov_lock(slice);
 642        struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
 643        int i;
 644        int result;
 645
 646        for (result = 0, i = 0; i < lck->lls_nr; ++i) {
 647                int rc;
 648                struct lovsub_lock     *sub;
 649                struct cl_lock   *sublock;
 650                struct lov_lock_sub    *lls;
 651                struct lov_sublock_env *subenv;
 652
 653                /* top-lock state cannot change concurrently, because single
 654                 * thread (one that released the last hold) carries unlocking
 655                 * to the completion.
 656                 */
 657                LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
 658                lls = &lck->lls_sub[i];
 659                sub = lls->sub_lock;
 660                if (!sub)
 661                        continue;
 662
 663                sublock = sub->lss_cl.cls_lock;
 664                rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
 665                if (rc == 0) {
 666                        if (lls->sub_flags & LSF_HELD) {
 667                                LASSERT(sublock->cll_state == CLS_HELD ||
 668                                        sublock->cll_state == CLS_ENQUEUED);
 669                                rc = cl_unuse_try(subenv->lse_env, sublock);
 670                                rc = lov_sublock_release(env, lck, i, 0, rc);
 671                        }
 672                        lov_sublock_unlock(env, sub, closure, subenv);
 673                }
 674                result = lov_subresult(result, rc);
 675        }
 676
 677        if (result == 0 && lck->lls_cancel_race) {
 678                lck->lls_cancel_race = 0;
 679                result = -ESTALE;
 680        }
 681        cl_lock_closure_fini(closure);
 682        return result;
 683}
 684
 685static void lov_lock_cancel(const struct lu_env *env,
 686                            const struct cl_lock_slice *slice)
 687{
 688        struct lov_lock *lck     = cl2lov_lock(slice);
 689        struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
 690        int i;
 691        int result;
 692
 693        for (result = 0, i = 0; i < lck->lls_nr; ++i) {
 694                int rc;
 695                struct lovsub_lock     *sub;
 696                struct cl_lock   *sublock;
 697                struct lov_lock_sub    *lls;
 698                struct lov_sublock_env *subenv;
 699
 700                /* top-lock state cannot change concurrently, because single
 701                 * thread (one that released the last hold) carries unlocking
 702                 * to the completion.
 703                 */
 704                lls = &lck->lls_sub[i];
 705                sub = lls->sub_lock;
 706                if (!sub)
 707                        continue;
 708
 709                sublock = sub->lss_cl.cls_lock;
 710                rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
 711                if (rc == 0) {
 712                        if (!(lls->sub_flags & LSF_HELD)) {
 713                                lov_sublock_unlock(env, sub, closure, subenv);
 714                                continue;
 715                        }
 716
 717                        switch (sublock->cll_state) {
 718                        case CLS_HELD:
 719                                rc = cl_unuse_try(subenv->lse_env, sublock);
 720                                lov_sublock_release(env, lck, i, 0, 0);
 721                                break;
 722                        default:
 723                                lov_sublock_release(env, lck, i, 1, 0);
 724                                break;
 725                        }
 726                        lov_sublock_unlock(env, sub, closure, subenv);
 727                }
 728
 729                if (rc == CLO_REPEAT) {
 730                        --i;
 731                        continue;
 732                }
 733
 734                result = lov_subresult(result, rc);
 735        }
 736
 737        if (result)
 738                CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock,
 739                              "lov_lock_cancel fails with %d.\n", result);
 740
 741        cl_lock_closure_fini(closure);
 742}
 743
 744static int lov_lock_wait(const struct lu_env *env,
 745                         const struct cl_lock_slice *slice)
 746{
 747        struct lov_lock *lck     = cl2lov_lock(slice);
 748        struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
 749        enum cl_lock_state      minstate;
 750        int                  reenqueued;
 751        int                  result;
 752        int                  i;
 753
 754again:
 755        for (result = 0, minstate = CLS_FREEING, i = 0, reenqueued = 0;
 756             i < lck->lls_nr; ++i) {
 757                int rc;
 758                struct lovsub_lock     *sub;
 759                struct cl_lock   *sublock;
 760                struct lov_lock_sub    *lls;
 761                struct lov_sublock_env *subenv;
 762
 763                lls = &lck->lls_sub[i];
 764                sub = lls->sub_lock;
 765                sublock = sub->lss_cl.cls_lock;
 766                rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
 767                if (rc == 0) {
 768                        LASSERT(sublock->cll_state >= CLS_ENQUEUED);
 769                        if (sublock->cll_state < CLS_HELD)
 770                                rc = cl_wait_try(env, sublock);
 771
 772                        minstate = min(minstate, sublock->cll_state);
 773                        lov_sublock_unlock(env, sub, closure, subenv);
 774                }
 775                if (rc == CLO_REENQUEUED) {
 776                        reenqueued++;
 777                        rc = 0;
 778                }
 779                result = lov_subresult(result, rc);
 780                if (result != 0)
 781                        break;
 782        }
 783        /* Each sublock only can be reenqueued once, so will not loop
 784         * forever.
 785         */
 786        if (result == 0 && reenqueued != 0)
 787                goto again;
 788        cl_lock_closure_fini(closure);
 789        return result ?: minstate >= CLS_HELD ? 0 : CLO_WAIT;
 790}
 791
 792static int lov_lock_use(const struct lu_env *env,
 793                        const struct cl_lock_slice *slice)
 794{
 795        struct lov_lock *lck     = cl2lov_lock(slice);
 796        struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
 797        int                  result;
 798        int                  i;
 799
 800        LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
 801
 802        for (result = 0, i = 0; i < lck->lls_nr; ++i) {
 803                int rc;
 804                struct lovsub_lock     *sub;
 805                struct cl_lock   *sublock;
 806                struct lov_lock_sub    *lls;
 807                struct lov_sublock_env *subenv;
 808
 809                LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
 810
 811                lls = &lck->lls_sub[i];
 812                sub = lls->sub_lock;
 813                if (!sub) {
 814                        /*
 815                         * Sub-lock might have been canceled, while top-lock was
 816                         * cached.
 817                         */
 818                        result = -ESTALE;
 819                        break;
 820                }
 821
 822                sublock = sub->lss_cl.cls_lock;
 823                rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
 824                if (rc == 0) {
 825                        LASSERT(sublock->cll_state != CLS_FREEING);
 826                        lov_sublock_hold(env, lck, i);
 827                        if (sublock->cll_state == CLS_CACHED) {
 828                                rc = cl_use_try(subenv->lse_env, sublock, 0);
 829                                if (rc != 0)
 830                                        rc = lov_sublock_release(env, lck,
 831                                                                 i, 1, rc);
 832                        } else if (sublock->cll_state == CLS_NEW) {
 833                                /* Sub-lock might have been canceled, while
 834                                 * top-lock was cached.
 835                                 */
 836                                result = -ESTALE;
 837                                lov_sublock_release(env, lck, i, 1, result);
 838                        }
 839                        lov_sublock_unlock(env, sub, closure, subenv);
 840                }
 841                result = lov_subresult(result, rc);
 842                if (result != 0)
 843                        break;
 844        }
 845
 846        if (lck->lls_cancel_race) {
 847                /*
 848                 * If there is unlocking happened at the same time, then
 849                 * sublock_lock state should be FREEING, and lov_sublock_lock
 850                 * should return CLO_REPEAT. In this case, it should return
 851                 * ESTALE, and up layer should reset the lock state to be NEW.
 852                 */
 853                lck->lls_cancel_race = 0;
 854                LASSERT(result != 0);
 855                result = -ESTALE;
 856        }
 857        cl_lock_closure_fini(closure);
 858        return result;
 859}
 860
 861/**
 862 * Check if the extent region \a descr is covered by \a child against the
 863 * specific \a stripe.
 864 */
 865static int lov_lock_stripe_is_matching(const struct lu_env *env,
 866                                       struct lov_object *lov, int stripe,
 867                                       const struct cl_lock_descr *child,
 868                                       const struct cl_lock_descr *descr)
 869{
 870        struct lov_stripe_md *lsm = lov->lo_lsm;
 871        u64 start;
 872        u64 end;
 873        int result;
 874
 875        if (lov_r0(lov)->lo_nr == 1)
 876                return cl_lock_ext_match(child, descr);
 877
 878        /*
 879         * For a multi-stripes object:
 880         * - make sure the descr only covers child's stripe, and
 881         * - check if extent is matching.
 882         */
 883        start = cl_offset(&lov->lo_cl, descr->cld_start);
 884        end   = cl_offset(&lov->lo_cl, descr->cld_end + 1) - 1;
 885        result = 0;
 886        /* glimpse should work on the object with LOV EA hole. */
 887        if (end - start <= lsm->lsm_stripe_size) {
 888                int idx;
 889
 890                idx = lov_stripe_number(lsm, start);
 891                if (idx == stripe ||
 892                    unlikely(!lov_r0(lov)->lo_sub[idx])) {
 893                        idx = lov_stripe_number(lsm, end);
 894                        if (idx == stripe ||
 895                            unlikely(!lov_r0(lov)->lo_sub[idx]))
 896                                result = 1;
 897                }
 898        }
 899
 900        if (result != 0) {
 901                struct cl_lock_descr *subd = &lov_env_info(env)->lti_ldescr;
 902                u64 sub_start;
 903                u64 sub_end;
 904
 905                subd->cld_obj  = NULL;   /* don't need sub object at all */
 906                subd->cld_mode = descr->cld_mode;
 907                subd->cld_gid  = descr->cld_gid;
 908                result = lov_stripe_intersects(lsm, stripe, start, end,
 909                                               &sub_start, &sub_end);
 910                LASSERT(result);
 911                subd->cld_start = cl_index(child->cld_obj, sub_start);
 912                subd->cld_end   = cl_index(child->cld_obj, sub_end);
 913                result = cl_lock_ext_match(child, subd);
 914        }
 915        return result;
 916}
 917
 918/**
 919 * An implementation of cl_lock_operations::clo_fits_into() method.
 920 *
 921 * Checks whether a lock (given by \a slice) is suitable for \a
 922 * io. Multi-stripe locks can be used only for "quick" io, like truncate, or
 923 * O_APPEND write.
 924 *
 925 * \see ccc_lock_fits_into().
 926 */
 927static int lov_lock_fits_into(const struct lu_env *env,
 928                              const struct cl_lock_slice *slice,
 929                              const struct cl_lock_descr *need,
 930                              const struct cl_io *io)
 931{
 932        struct lov_lock   *lov = cl2lov_lock(slice);
 933        struct lov_object *obj = cl2lov(slice->cls_obj);
 934        int result;
 935
 936        LASSERT(cl_object_same(need->cld_obj, slice->cls_obj));
 937        LASSERT(lov->lls_nr > 0);
 938
 939        /* for top lock, it's necessary to match enq flags otherwise it will
 940         * run into problem if a sublock is missing and reenqueue.
 941         */
 942        if (need->cld_enq_flags != lov->lls_orig.cld_enq_flags)
 943                return 0;
 944
 945        if (need->cld_mode == CLM_GROUP)
 946                /*
 947                 * always allow to match group lock.
 948                 */
 949                result = cl_lock_ext_match(&lov->lls_orig, need);
 950        else if (lov->lls_nr == 1) {
 951                struct cl_lock_descr *got = &lov->lls_sub[0].sub_got;
 952
 953                result = lov_lock_stripe_is_matching(env,
 954                                                     cl2lov(slice->cls_obj),
 955                                                     lov->lls_sub[0].sub_stripe,
 956                                                     got, need);
 957        } else if (io->ci_type != CIT_SETATTR && io->ci_type != CIT_MISC &&
 958                   !cl_io_is_append(io) && need->cld_mode != CLM_PHANTOM)
 959                /*
 960                 * Multi-stripe locks are only suitable for `quick' IO and for
 961                 * glimpse.
 962                 */
 963                result = 0;
 964        else
 965                /*
 966                 * Most general case: multi-stripe existing lock, and
 967                 * (potentially) multi-stripe @need lock. Check that @need is
 968                 * covered by @lov's sub-locks.
 969                 *
 970                 * For now, ignore lock expansions made by the server, and
 971                 * match against original lock extent.
 972                 */
 973                result = cl_lock_ext_match(&lov->lls_orig, need);
 974        CDEBUG(D_DLMTRACE, DDESCR"/"DDESCR" %d %d/%d: %d\n",
 975               PDESCR(&lov->lls_orig), PDESCR(&lov->lls_sub[0].sub_got),
 976               lov->lls_sub[0].sub_stripe, lov->lls_nr, lov_r0(obj)->lo_nr,
 977               result);
 978        return result;
 979}
 980
 981void lov_lock_unlink(const struct lu_env *env,
 982                     struct lov_lock_link *link, struct lovsub_lock *sub)
 983{
 984        struct lov_lock *lck    = link->lll_super;
 985        struct cl_lock  *parent = lck->lls_cl.cls_lock;
 986
 987        LASSERT(cl_lock_is_mutexed(parent));
 988        LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock));
 989
 990        list_del_init(&link->lll_list);
 991        LASSERT(lck->lls_sub[link->lll_idx].sub_lock == sub);
 992        /* yank this sub-lock from parent's array */
 993        lck->lls_sub[link->lll_idx].sub_lock = NULL;
 994        LASSERT(lck->lls_nr_filled > 0);
 995        lck->lls_nr_filled--;
 996        lu_ref_del(&parent->cll_reference, "lov-child", sub->lss_cl.cls_lock);
 997        cl_lock_put(env, parent);
 998        kmem_cache_free(lov_lock_link_kmem, link);
 999}
1000
1001struct lov_lock_link *lov_lock_link_find(const struct lu_env *env,
1002                                         struct lov_lock *lck,
1003                                         struct lovsub_lock *sub)
1004{
1005        struct lov_lock_link *scan;
1006
1007        LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock));
1008
1009        list_for_each_entry(scan, &sub->lss_parents, lll_list) {
1010                if (scan->lll_super == lck)
1011                        return scan;
1012        }
1013        return NULL;
1014}
1015
1016/**
1017 * An implementation of cl_lock_operations::clo_delete() method. This is
1018 * invoked for "top-to-bottom" delete, when lock destruction starts from the
1019 * top-lock, e.g., as a result of inode destruction.
1020 *
1021 * Unlinks top-lock from all its sub-locks. Sub-locks are not deleted there:
1022 * this is done separately elsewhere:
1023 *
1024 *     - for inode destruction, lov_object_delete() calls cl_object_kill() for
1025 *       each sub-object, purging its locks;
1026 *
1027 *     - in other cases (e.g., a fatal error with a top-lock) sub-locks are
1028 *       left in the cache.
1029 */
1030static void lov_lock_delete(const struct lu_env *env,
1031                            const struct cl_lock_slice *slice)
1032{
1033        struct lov_lock *lck     = cl2lov_lock(slice);
1034        struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
1035        struct lov_lock_link   *link;
1036        int                  rc;
1037        int                  i;
1038
1039        LASSERT(slice->cls_lock->cll_state == CLS_FREEING);
1040
1041        for (i = 0; i < lck->lls_nr; ++i) {
1042                struct lov_lock_sub *lls = &lck->lls_sub[i];
1043                struct lovsub_lock  *lsl = lls->sub_lock;
1044
1045                if (!lsl) /* already removed */
1046                        continue;
1047
1048                rc = lov_sublock_lock(env, lck, lls, closure, NULL);
1049                if (rc == CLO_REPEAT) {
1050                        --i;
1051                        continue;
1052                }
1053
1054                LASSERT(rc == 0);
1055                LASSERT(lsl->lss_cl.cls_lock->cll_state < CLS_FREEING);
1056
1057                if (lls->sub_flags & LSF_HELD)
1058                        lov_sublock_release(env, lck, i, 1, 0);
1059
1060                link = lov_lock_link_find(env, lck, lsl);
1061                LASSERT(link);
1062                lov_lock_unlink(env, link, lsl);
1063                LASSERT(!lck->lls_sub[i].sub_lock);
1064
1065                lov_sublock_unlock(env, lsl, closure, NULL);
1066        }
1067
1068        cl_lock_closure_fini(closure);
1069}
1070
1071static int lov_lock_print(const struct lu_env *env, void *cookie,
1072                          lu_printer_t p, const struct cl_lock_slice *slice)
1073{
1074        struct lov_lock *lck = cl2lov_lock(slice);
1075        int           i;
1076
1077        (*p)(env, cookie, "%d\n", lck->lls_nr);
1078        for (i = 0; i < lck->lls_nr; ++i) {
1079                struct lov_lock_sub *sub;
1080
1081                sub = &lck->lls_sub[i];
1082                (*p)(env, cookie, "    %d %x: ", i, sub->sub_flags);
1083                if (sub->sub_lock)
1084                        cl_lock_print(env, cookie, p,
1085                                      sub->sub_lock->lss_cl.cls_lock);
1086                else
1087                        (*p)(env, cookie, "---\n");
1088        }
1089        return 0;
1090}
1091
1092static const struct cl_lock_operations lov_lock_ops = {
1093        .clo_fini      = lov_lock_fini,
1094        .clo_enqueue   = lov_lock_enqueue,
1095        .clo_wait      = lov_lock_wait,
1096        .clo_use       = lov_lock_use,
1097        .clo_unuse     = lov_lock_unuse,
1098        .clo_cancel    = lov_lock_cancel,
1099        .clo_fits_into = lov_lock_fits_into,
1100        .clo_delete    = lov_lock_delete,
1101        .clo_print     = lov_lock_print
1102};
1103
1104int lov_lock_init_raid0(const struct lu_env *env, struct cl_object *obj,
1105                        struct cl_lock *lock, const struct cl_io *io)
1106{
1107        struct lov_lock *lck;
1108        int result;
1109
1110        lck = kmem_cache_zalloc(lov_lock_kmem, GFP_NOFS);
1111        if (lck) {
1112                cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_lock_ops);
1113                result = lov_lock_sub_init(env, lck, io);
1114        } else
1115                result = -ENOMEM;
1116        return result;
1117}
1118
1119static void lov_empty_lock_fini(const struct lu_env *env,
1120                                struct cl_lock_slice *slice)
1121{
1122        struct lov_lock *lck = cl2lov_lock(slice);
1123
1124        kmem_cache_free(lov_lock_kmem, lck);
1125}
1126
1127static int lov_empty_lock_print(const struct lu_env *env, void *cookie,
1128                                lu_printer_t p,
1129                                const struct cl_lock_slice *slice)
1130{
1131        (*p)(env, cookie, "empty\n");
1132        return 0;
1133}
1134
1135/* XXX: more methods will be added later. */
1136static const struct cl_lock_operations lov_empty_lock_ops = {
1137        .clo_fini  = lov_empty_lock_fini,
1138        .clo_print = lov_empty_lock_print
1139};
1140
1141int lov_lock_init_empty(const struct lu_env *env, struct cl_object *obj,
1142                        struct cl_lock *lock, const struct cl_io *io)
1143{
1144        struct lov_lock *lck;
1145        int result = -ENOMEM;
1146
1147        lck = kmem_cache_zalloc(lov_lock_kmem, GFP_NOFS);
1148        if (lck) {
1149                cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_empty_lock_ops);
1150                lck->lls_orig = lock->cll_descr;
1151                result = 0;
1152        }
1153        return result;
1154}
1155
1156static struct cl_lock_closure *lov_closure_get(const struct lu_env *env,
1157                                               struct cl_lock *parent)
1158{
1159        struct cl_lock_closure *closure;
1160
1161        closure = &lov_env_info(env)->lti_closure;
1162        LASSERT(list_empty(&closure->clc_list));
1163        cl_lock_closure_init(env, closure, parent, 1);
1164        return closure;
1165}
1166
1167/** @} lov */
1168