linux/fs/ocfs2/dlm/dlmast.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * dlmast.c
   5 *
   6 * AST and BAST functionality for local and remote nodes
   7 *
   8 * Copyright (C) 2004 Oracle.  All rights reserved.
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public
  12 * License as published by the Free Software Foundation; either
  13 * version 2 of the License, or (at your option) any later version.
  14 *
  15 * This program is distributed in the hope that it will be useful,
  16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 * General Public License for more details.
  19 *
  20 * You should have received a copy of the GNU General Public
  21 * License along with this program; if not, write to the
  22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23 * Boston, MA 021110-1307, USA.
  24 *
  25 */
  26
  27
  28#include <linux/module.h>
  29#include <linux/fs.h>
  30#include <linux/types.h>
  31#include <linux/highmem.h>
  32#include <linux/init.h>
  33#include <linux/sysctl.h>
  34#include <linux/random.h>
  35#include <linux/blkdev.h>
  36#include <linux/socket.h>
  37#include <linux/inet.h>
  38#include <linux/spinlock.h>
  39
  40
  41#include "cluster/heartbeat.h"
  42#include "cluster/nodemanager.h"
  43#include "cluster/tcp.h"
  44
  45#include "dlmapi.h"
  46#include "dlmcommon.h"
  47
  48#define MLOG_MASK_PREFIX ML_DLM
  49#include "cluster/masklog.h"
  50
  51static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
  52                           struct dlm_lock *lock);
  53static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
  54
  55/* Should be called as an ast gets queued to see if the new
  56 * lock level will obsolete a pending bast.
  57 * For example, if dlm_thread queued a bast for an EX lock that
  58 * was blocking another EX, but before sending the bast the
  59 * lock owner downconverted to NL, the bast is now obsolete.
  60 * Only the ast should be sent.
  61 * This is needed because the lock and convert paths can queue
  62 * asts out-of-band (not waiting for dlm_thread) in order to
  63 * allow for LKM_NOQUEUE to get immediate responses. */
  64static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
  65{
  66        assert_spin_locked(&dlm->ast_lock);
  67        assert_spin_locked(&lock->spinlock);
  68
  69        if (lock->ml.highest_blocked == LKM_IVMODE)
  70                return 0;
  71        BUG_ON(lock->ml.highest_blocked == LKM_NLMODE);
  72
  73        if (lock->bast_pending &&
  74            list_empty(&lock->bast_list))
  75                /* old bast already sent, ok */
  76                return 0;
  77
  78        if (lock->ml.type == LKM_EXMODE)
  79                /* EX blocks anything left, any bast still valid */
  80                return 0;
  81        else if (lock->ml.type == LKM_NLMODE)
  82                /* NL blocks nothing, no reason to send any bast, cancel it */
  83                return 1;
  84        else if (lock->ml.highest_blocked != LKM_EXMODE)
  85                /* PR only blocks EX */
  86                return 1;
  87
  88        return 0;
  89}
  90
  91void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
  92{
  93        struct dlm_lock_resource *res;
  94
  95        BUG_ON(!dlm);
  96        BUG_ON(!lock);
  97
  98        res = lock->lockres;
  99
 100        assert_spin_locked(&dlm->ast_lock);
 101
 102        if (!list_empty(&lock->ast_list)) {
 103                mlog(ML_ERROR, "%s: res %.*s, lock %u:%llu, "
 104                     "AST list not empty, pending %d, newlevel %d\n",
 105                     dlm->name, res->lockname.len, res->lockname.name,
 106                     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
 107                     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
 108                     lock->ast_pending, lock->ml.type);
 109                BUG();
 110        }
 111        if (lock->ast_pending)
 112                mlog(0, "%s: res %.*s, lock %u:%llu, AST getting flushed\n",
 113                     dlm->name, res->lockname.len, res->lockname.name,
 114                     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
 115                     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
 116
 117        /* putting lock on list, add a ref */
 118        dlm_lock_get(lock);
 119        spin_lock(&lock->spinlock);
 120
 121        /* check to see if this ast obsoletes the bast */
 122        if (dlm_should_cancel_bast(dlm, lock)) {
 123                mlog(0, "%s: res %.*s, lock %u:%llu, Cancelling BAST\n",
 124                     dlm->name, res->lockname.len, res->lockname.name,
 125                     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
 126                     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
 127                lock->bast_pending = 0;
 128                list_del_init(&lock->bast_list);
 129                lock->ml.highest_blocked = LKM_IVMODE;
 130                /* removing lock from list, remove a ref.  guaranteed
 131                 * this won't be the last ref because of the get above,
 132                 * so res->spinlock will not be taken here */
 133                dlm_lock_put(lock);
 134                /* free up the reserved bast that we are cancelling.
 135                 * guaranteed that this will not be the last reserved
 136                 * ast because *both* an ast and a bast were reserved
 137                 * to get to this point.  the res->spinlock will not be
 138                 * taken here */
 139                dlm_lockres_release_ast(dlm, res);
 140        }
 141        list_add_tail(&lock->ast_list, &dlm->pending_asts);
 142        lock->ast_pending = 1;
 143        spin_unlock(&lock->spinlock);
 144}
 145
 146void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 147{
 148        BUG_ON(!dlm);
 149        BUG_ON(!lock);
 150
 151        spin_lock(&dlm->ast_lock);
 152        __dlm_queue_ast(dlm, lock);
 153        spin_unlock(&dlm->ast_lock);
 154}
 155
 156
 157void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 158{
 159        struct dlm_lock_resource *res;
 160
 161        BUG_ON(!dlm);
 162        BUG_ON(!lock);
 163
 164        assert_spin_locked(&dlm->ast_lock);
 165
 166        res = lock->lockres;
 167
 168        BUG_ON(!list_empty(&lock->bast_list));
 169        if (lock->bast_pending)
 170                mlog(0, "%s: res %.*s, lock %u:%llu, BAST getting flushed\n",
 171                     dlm->name, res->lockname.len, res->lockname.name,
 172                     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
 173                     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
 174
 175        /* putting lock on list, add a ref */
 176        dlm_lock_get(lock);
 177        spin_lock(&lock->spinlock);
 178        list_add_tail(&lock->bast_list, &dlm->pending_basts);
 179        lock->bast_pending = 1;
 180        spin_unlock(&lock->spinlock);
 181}
 182
 183void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 184{
 185        BUG_ON(!dlm);
 186        BUG_ON(!lock);
 187
 188        spin_lock(&dlm->ast_lock);
 189        __dlm_queue_bast(dlm, lock);
 190        spin_unlock(&dlm->ast_lock);
 191}
 192
 193static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 194                           struct dlm_lock *lock)
 195{
 196        struct dlm_lockstatus *lksb = lock->lksb;
 197        BUG_ON(!lksb);
 198
 199        /* only updates if this node masters the lockres */
 200        spin_lock(&res->spinlock);
 201        if (res->owner == dlm->node_num) {
 202                /* check the lksb flags for the direction */
 203                if (lksb->flags & DLM_LKSB_GET_LVB) {
 204                        mlog(0, "getting lvb from lockres for %s node\n",
 205                                  lock->ml.node == dlm->node_num ? "master" :
 206                                  "remote");
 207                        memcpy(lksb->lvb, res->lvb, DLM_LVB_LEN);
 208                }
 209                /* Do nothing for lvb put requests - they should be done in
 210                 * place when the lock is downconverted - otherwise we risk
 211                 * racing gets and puts which could result in old lvb data
 212                 * being propagated. We leave the put flag set and clear it
 213                 * here. In the future we might want to clear it at the time
 214                 * the put is actually done.
 215                 */
 216        }
 217        spin_unlock(&res->spinlock);
 218
 219        /* reset any lvb flags on the lksb */
 220        lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
 221}
 222
 223void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 224                      struct dlm_lock *lock)
 225{
 226        dlm_astlockfunc_t *fn;
 227        struct dlm_lockstatus *lksb;
 228
 229        mlog(0, "%s: res %.*s, lock %u:%llu, Local AST\n", dlm->name,
 230             res->lockname.len, res->lockname.name,
 231             dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
 232             dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
 233
 234        lksb = lock->lksb;
 235        fn = lock->ast;
 236        BUG_ON(lock->ml.node != dlm->node_num);
 237
 238        dlm_update_lvb(dlm, res, lock);
 239        (*fn)(lock->astdata);
 240}
 241
 242
 243int dlm_do_remote_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 244                      struct dlm_lock *lock)
 245{
 246        int ret;
 247        struct dlm_lockstatus *lksb;
 248        int lksbflags;
 249
 250        mlog(0, "%s: res %.*s, lock %u:%llu, Remote AST\n", dlm->name,
 251             res->lockname.len, res->lockname.name,
 252             dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
 253             dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
 254
 255        lksb = lock->lksb;
 256        BUG_ON(lock->ml.node == dlm->node_num);
 257
 258        lksbflags = lksb->flags;
 259        dlm_update_lvb(dlm, res, lock);
 260
 261        /* lock request came from another node
 262         * go do the ast over there */
 263        ret = dlm_send_proxy_ast(dlm, res, lock, lksbflags);
 264        return ret;
 265}
 266
 267void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 268                       struct dlm_lock *lock, int blocked_type)
 269{
 270        dlm_bastlockfunc_t *fn = lock->bast;
 271
 272        BUG_ON(lock->ml.node != dlm->node_num);
 273
 274        mlog(0, "%s: res %.*s, lock %u:%llu, Local BAST, blocked %d\n",
 275             dlm->name, res->lockname.len, res->lockname.name,
 276             dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
 277             dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
 278             blocked_type);
 279
 280        (*fn)(lock->astdata, blocked_type);
 281}
 282
 283
 284
 285int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
 286                          void **ret_data)
 287{
 288        int ret;
 289        unsigned int locklen;
 290        struct dlm_ctxt *dlm = data;
 291        struct dlm_lock_resource *res = NULL;
 292        struct dlm_lock *lock = NULL;
 293        struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf;
 294        char *name;
 295        struct list_head *head = NULL;
 296        __be64 cookie;
 297        u32 flags;
 298        u8 node;
 299
 300        if (!dlm_grab(dlm)) {
 301                dlm_error(DLM_REJECTED);
 302                return DLM_REJECTED;
 303        }
 304
 305        mlog_bug_on_msg(!dlm_domain_fully_joined(dlm),
 306                        "Domain %s not fully joined!\n", dlm->name);
 307
 308        name = past->name;
 309        locklen = past->namelen;
 310        cookie = past->cookie;
 311        flags = be32_to_cpu(past->flags);
 312        node = past->node_idx;
 313
 314        if (locklen > DLM_LOCKID_NAME_MAX) {
 315                ret = DLM_IVBUFLEN;
 316                mlog(ML_ERROR, "Invalid name length (%d) in proxy ast "
 317                     "handler!\n", locklen);
 318                goto leave;
 319        }
 320
 321        if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) ==
 322             (LKM_PUT_LVB|LKM_GET_LVB)) {
 323                mlog(ML_ERROR, "Both PUT and GET lvb specified, (0x%x)\n",
 324                     flags);
 325                ret = DLM_BADARGS;
 326                goto leave;
 327        }
 328
 329        mlog(0, "lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" :
 330                  (flags & LKM_GET_LVB ? "get lvb" : "none"));
 331
 332        mlog(0, "type=%d, blocked_type=%d\n", past->type, past->blocked_type);
 333
 334        if (past->type != DLM_AST &&
 335            past->type != DLM_BAST) {
 336                mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu"
 337                     "name=%.*s, node=%u\n", past->type,
 338                     dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
 339                     dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
 340                     locklen, name, node);
 341                ret = DLM_IVLOCKID;
 342                goto leave;
 343        }
 344
 345        res = dlm_lookup_lockres(dlm, name, locklen);
 346        if (!res) {
 347                mlog(0, "Got %sast for unknown lockres! cookie=%u:%llu, "
 348                     "name=%.*s, node=%u\n", (past->type == DLM_AST ? "" : "b"),
 349                     dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
 350                     dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
 351                     locklen, name, node);
 352                ret = DLM_IVLOCKID;
 353                goto leave;
 354        }
 355
 356        /* cannot get a proxy ast message if this node owns it */
 357        BUG_ON(res->owner == dlm->node_num);
 358
 359        mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len,
 360             res->lockname.name);
 361
 362        spin_lock(&res->spinlock);
 363        if (res->state & DLM_LOCK_RES_RECOVERING) {
 364                mlog(0, "Responding with DLM_RECOVERING!\n");
 365                ret = DLM_RECOVERING;
 366                goto unlock_out;
 367        }
 368        if (res->state & DLM_LOCK_RES_MIGRATING) {
 369                mlog(0, "Responding with DLM_MIGRATING!\n");
 370                ret = DLM_MIGRATING;
 371                goto unlock_out;
 372        }
 373        /* try convert queue for both ast/bast */
 374        head = &res->converting;
 375        lock = NULL;
 376        list_for_each_entry(lock, head, list) {
 377                if (lock->ml.cookie == cookie)
 378                        goto do_ast;
 379        }
 380
 381        /* if not on convert, try blocked for ast, granted for bast */
 382        if (past->type == DLM_AST)
 383                head = &res->blocked;
 384        else
 385                head = &res->granted;
 386
 387        list_for_each_entry(lock, head, list) {
 388                /* if lock is found but unlock is pending ignore the bast */
 389                if (lock->ml.cookie == cookie) {
 390                        if (lock->unlock_pending)
 391                                break;
 392                        goto do_ast;
 393                }
 394        }
 395
 396        mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, "
 397             "node=%u\n", past->type == DLM_AST ? "" : "b",
 398             dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
 399             dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
 400             locklen, name, node);
 401
 402        ret = DLM_NORMAL;
 403unlock_out:
 404        spin_unlock(&res->spinlock);
 405        goto leave;
 406
 407do_ast:
 408        ret = DLM_NORMAL;
 409        if (past->type == DLM_AST) {
 410                /* do not alter lock refcount.  switching lists. */
 411                list_move_tail(&lock->list, &res->granted);
 412                mlog(0, "%s: res %.*s, lock %u:%llu, Granted type %d => %d\n",
 413                     dlm->name, res->lockname.len, res->lockname.name,
 414                     dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
 415                     dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
 416                     lock->ml.type, lock->ml.convert_type);
 417
 418                if (lock->ml.convert_type != LKM_IVMODE) {
 419                        lock->ml.type = lock->ml.convert_type;
 420                        lock->ml.convert_type = LKM_IVMODE;
 421                } else {
 422                        // should already be there....
 423                }
 424
 425                lock->lksb->status = DLM_NORMAL;
 426
 427                /* if we requested the lvb, fetch it into our lksb now */
 428                if (flags & LKM_GET_LVB) {
 429                        BUG_ON(!(lock->lksb->flags & DLM_LKSB_GET_LVB));
 430                        memcpy(lock->lksb->lvb, past->lvb, DLM_LVB_LEN);
 431                }
 432        }
 433        spin_unlock(&res->spinlock);
 434
 435        if (past->type == DLM_AST)
 436                dlm_do_local_ast(dlm, res, lock);
 437        else
 438                dlm_do_local_bast(dlm, res, lock, past->blocked_type);
 439
 440leave:
 441        if (res)
 442                dlm_lockres_put(res);
 443
 444        dlm_put(dlm);
 445        return ret;
 446}
 447
 448
 449
 450int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 451                           struct dlm_lock *lock, int msg_type,
 452                           int blocked_type, int flags)
 453{
 454        int ret = 0;
 455        struct dlm_proxy_ast past;
 456        struct kvec vec[2];
 457        size_t veclen = 1;
 458        int status;
 459
 460        mlog(0, "%s: res %.*s, to %u, type %d, blocked_type %d\n", dlm->name,
 461             res->lockname.len, res->lockname.name, lock->ml.node, msg_type,
 462             blocked_type);
 463
 464        memset(&past, 0, sizeof(struct dlm_proxy_ast));
 465        past.node_idx = dlm->node_num;
 466        past.type = msg_type;
 467        past.blocked_type = blocked_type;
 468        past.namelen = res->lockname.len;
 469        memcpy(past.name, res->lockname.name, past.namelen);
 470        past.cookie = lock->ml.cookie;
 471
 472        vec[0].iov_len = sizeof(struct dlm_proxy_ast);
 473        vec[0].iov_base = &past;
 474        if (flags & DLM_LKSB_GET_LVB) {
 475                be32_add_cpu(&past.flags, LKM_GET_LVB);
 476                vec[1].iov_len = DLM_LVB_LEN;
 477                vec[1].iov_base = lock->lksb->lvb;
 478                veclen++;
 479        }
 480
 481        ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
 482                                     lock->ml.node, &status);
 483        if (ret < 0)
 484                mlog(ML_ERROR, "%s: res %.*s, error %d send AST to node %u\n",
 485                     dlm->name, res->lockname.len, res->lockname.name, ret,
 486                     lock->ml.node);
 487        else {
 488                if (status == DLM_RECOVERING) {
 489                        mlog(ML_ERROR, "sent AST to node %u, it thinks this "
 490                             "node is dead!\n", lock->ml.node);
 491                        BUG();
 492                } else if (status == DLM_MIGRATING) {
 493                        mlog(ML_ERROR, "sent AST to node %u, it returned "
 494                             "DLM_MIGRATING!\n", lock->ml.node);
 495                        BUG();
 496                } else if (status != DLM_NORMAL && status != DLM_IVLOCKID) {
 497                        mlog(ML_ERROR, "AST to node %u returned %d!\n",
 498                             lock->ml.node, status);
 499                        /* ignore it */
 500                }
 501                ret = 0;
 502        }
 503        return ret;
 504}
 505