linux/fs/dlm/plock.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
   3 *
   4 * This copyrighted material is made available to anyone wishing to use,
   5 * modify, copy, or redistribute it subject to the terms and conditions
   6 * of the GNU General Public License version 2.
   7 */
   8
   9#include <linux/fs.h>
  10#include <linux/miscdevice.h>
  11#include <linux/poll.h>
  12#include <linux/dlm.h>
  13#include <linux/dlm_plock.h>
  14
  15#include "dlm_internal.h"
  16#include "lockspace.h"
  17
  18static spinlock_t ops_lock;
  19static struct list_head send_list;
  20static struct list_head recv_list;
  21static wait_queue_head_t send_wq;
  22static wait_queue_head_t recv_wq;
  23
  24struct plock_op {
  25        struct list_head list;
  26        int done;
  27        struct dlm_plock_info info;
  28};
  29
  30struct plock_xop {
  31        struct plock_op xop;
  32        void *callback;
  33        void *fl;
  34        void *file;
  35        struct file_lock flc;
  36};
  37
  38
  39static inline void set_version(struct dlm_plock_info *info)
  40{
  41        info->version[0] = DLM_PLOCK_VERSION_MAJOR;
  42        info->version[1] = DLM_PLOCK_VERSION_MINOR;
  43        info->version[2] = DLM_PLOCK_VERSION_PATCH;
  44}
  45
  46static int check_version(struct dlm_plock_info *info)
  47{
  48        if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
  49            (DLM_PLOCK_VERSION_MINOR < info->version[1])) {
  50                log_print("plock device version mismatch: "
  51                          "kernel (%u.%u.%u), user (%u.%u.%u)",
  52                          DLM_PLOCK_VERSION_MAJOR,
  53                          DLM_PLOCK_VERSION_MINOR,
  54                          DLM_PLOCK_VERSION_PATCH,
  55                          info->version[0],
  56                          info->version[1],
  57                          info->version[2]);
  58                return -EINVAL;
  59        }
  60        return 0;
  61}
  62
  63static void send_op(struct plock_op *op)
  64{
  65        set_version(&op->info);
  66        INIT_LIST_HEAD(&op->list);
  67        spin_lock(&ops_lock);
  68        list_add_tail(&op->list, &send_list);
  69        spin_unlock(&ops_lock);
  70        wake_up(&send_wq);
  71}
  72
  73int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
  74                   int cmd, struct file_lock *fl)
  75{
  76        struct dlm_ls *ls;
  77        struct plock_op *op;
  78        struct plock_xop *xop;
  79        int rv;
  80
  81        ls = dlm_find_lockspace_local(lockspace);
  82        if (!ls)
  83                return -EINVAL;
  84
  85        xop = kzalloc(sizeof(*xop), GFP_KERNEL);
  86        if (!xop) {
  87                rv = -ENOMEM;
  88                goto out;
  89        }
  90
  91        op = &xop->xop;
  92        op->info.optype         = DLM_PLOCK_OP_LOCK;
  93        op->info.pid            = fl->fl_pid;
  94        op->info.ex             = (fl->fl_type == F_WRLCK);
  95        op->info.wait           = IS_SETLKW(cmd);
  96        op->info.fsid           = ls->ls_global_id;
  97        op->info.number         = number;
  98        op->info.start          = fl->fl_start;
  99        op->info.end            = fl->fl_end;
 100        if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
 101                /* fl_owner is lockd which doesn't distinguish
 102                   processes on the nfs client */
 103                op->info.owner  = (__u64) fl->fl_pid;
 104                xop->callback   = fl->fl_lmops->fl_grant;
 105                locks_init_lock(&xop->flc);
 106                locks_copy_lock(&xop->flc, fl);
 107                xop->fl         = fl;
 108                xop->file       = file;
 109        } else {
 110                op->info.owner  = (__u64)(long) fl->fl_owner;
 111                xop->callback   = NULL;
 112        }
 113
 114        send_op(op);
 115
 116        if (xop->callback == NULL)
 117                wait_event(recv_wq, (op->done != 0));
 118        else {
 119                rv = FILE_LOCK_DEFERRED;
 120                goto out;
 121        }
 122
 123        spin_lock(&ops_lock);
 124        if (!list_empty(&op->list)) {
 125                log_error(ls, "dlm_posix_lock: op on list %llx",
 126                          (unsigned long long)number);
 127                list_del(&op->list);
 128        }
 129        spin_unlock(&ops_lock);
 130
 131        rv = op->info.rv;
 132
 133        if (!rv) {
 134                if (posix_lock_file_wait(file, fl) < 0)
 135                        log_error(ls, "dlm_posix_lock: vfs lock error %llx",
 136                                  (unsigned long long)number);
 137        }
 138
 139        kfree(xop);
 140out:
 141        dlm_put_lockspace(ls);
 142        return rv;
 143}
 144EXPORT_SYMBOL_GPL(dlm_posix_lock);
 145
 146/* Returns failure iff a succesful lock operation should be canceled */
 147static int dlm_plock_callback(struct plock_op *op)
 148{
 149        struct file *file;
 150        struct file_lock *fl;
 151        struct file_lock *flc;
 152        int (*notify)(void *, void *, int) = NULL;
 153        struct plock_xop *xop = (struct plock_xop *)op;
 154        int rv = 0;
 155
 156        spin_lock(&ops_lock);
 157        if (!list_empty(&op->list)) {
 158                log_print("dlm_plock_callback: op on list %llx",
 159                          (unsigned long long)op->info.number);
 160                list_del(&op->list);
 161        }
 162        spin_unlock(&ops_lock);
 163
 164        /* check if the following 2 are still valid or make a copy */
 165        file = xop->file;
 166        flc = &xop->flc;
 167        fl = xop->fl;
 168        notify = xop->callback;
 169
 170        if (op->info.rv) {
 171                notify(fl, NULL, op->info.rv);
 172                goto out;
 173        }
 174
 175        /* got fs lock; bookkeep locally as well: */
 176        flc->fl_flags &= ~FL_SLEEP;
 177        if (posix_lock_file(file, flc, NULL)) {
 178                /*
 179                 * This can only happen in the case of kmalloc() failure.
 180                 * The filesystem's own lock is the authoritative lock,
 181                 * so a failure to get the lock locally is not a disaster.
 182                 * As long as the fs cannot reliably cancel locks (especially
 183                 * in a low-memory situation), we're better off ignoring
 184                 * this failure than trying to recover.
 185                 */
 186                log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
 187                          (unsigned long long)op->info.number, file, fl);
 188        }
 189
 190        rv = notify(fl, NULL, 0);
 191        if (rv) {
 192                /* XXX: We need to cancel the fs lock here: */
 193                log_print("dlm_plock_callback: lock granted after lock request "
 194                          "failed; dangling lock!\n");
 195                goto out;
 196        }
 197
 198out:
 199        kfree(xop);
 200        return rv;
 201}
 202
 203int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 204                     struct file_lock *fl)
 205{
 206        struct dlm_ls *ls;
 207        struct plock_op *op;
 208        int rv;
 209
 210        ls = dlm_find_lockspace_local(lockspace);
 211        if (!ls)
 212                return -EINVAL;
 213
 214        op = kzalloc(sizeof(*op), GFP_KERNEL);
 215        if (!op) {
 216                rv = -ENOMEM;
 217                goto out;
 218        }
 219
 220        if (posix_lock_file_wait(file, fl) < 0)
 221                log_error(ls, "dlm_posix_unlock: vfs unlock error %llx",
 222                          (unsigned long long)number);
 223
 224        op->info.optype         = DLM_PLOCK_OP_UNLOCK;
 225        op->info.pid            = fl->fl_pid;
 226        op->info.fsid           = ls->ls_global_id;
 227        op->info.number         = number;
 228        op->info.start          = fl->fl_start;
 229        op->info.end            = fl->fl_end;
 230        if (fl->fl_lmops && fl->fl_lmops->fl_grant)
 231                op->info.owner  = (__u64) fl->fl_pid;
 232        else
 233                op->info.owner  = (__u64)(long) fl->fl_owner;
 234
 235        send_op(op);
 236        wait_event(recv_wq, (op->done != 0));
 237
 238        spin_lock(&ops_lock);
 239        if (!list_empty(&op->list)) {
 240                log_error(ls, "dlm_posix_unlock: op on list %llx",
 241                          (unsigned long long)number);
 242                list_del(&op->list);
 243        }
 244        spin_unlock(&ops_lock);
 245
 246        rv = op->info.rv;
 247
 248        if (rv == -ENOENT)
 249                rv = 0;
 250
 251        kfree(op);
 252out:
 253        dlm_put_lockspace(ls);
 254        return rv;
 255}
 256EXPORT_SYMBOL_GPL(dlm_posix_unlock);
 257
 258int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 259                  struct file_lock *fl)
 260{
 261        struct dlm_ls *ls;
 262        struct plock_op *op;
 263        int rv;
 264
 265        ls = dlm_find_lockspace_local(lockspace);
 266        if (!ls)
 267                return -EINVAL;
 268
 269        op = kzalloc(sizeof(*op), GFP_KERNEL);
 270        if (!op) {
 271                rv = -ENOMEM;
 272                goto out;
 273        }
 274
 275        op->info.optype         = DLM_PLOCK_OP_GET;
 276        op->info.pid            = fl->fl_pid;
 277        op->info.ex             = (fl->fl_type == F_WRLCK);
 278        op->info.fsid           = ls->ls_global_id;
 279        op->info.number         = number;
 280        op->info.start          = fl->fl_start;
 281        op->info.end            = fl->fl_end;
 282        if (fl->fl_lmops && fl->fl_lmops->fl_grant)
 283                op->info.owner  = (__u64) fl->fl_pid;
 284        else
 285                op->info.owner  = (__u64)(long) fl->fl_owner;
 286
 287        send_op(op);
 288        wait_event(recv_wq, (op->done != 0));
 289
 290        spin_lock(&ops_lock);
 291        if (!list_empty(&op->list)) {
 292                log_error(ls, "dlm_posix_get: op on list %llx",
 293                          (unsigned long long)number);
 294                list_del(&op->list);
 295        }
 296        spin_unlock(&ops_lock);
 297
 298        /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
 299           -ENOENT if there are no locks on the file */
 300
 301        rv = op->info.rv;
 302
 303        fl->fl_type = F_UNLCK;
 304        if (rv == -ENOENT)
 305                rv = 0;
 306        else if (rv > 0) {
 307                locks_init_lock(fl);
 308                fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
 309                fl->fl_flags = FL_POSIX;
 310                fl->fl_pid = op->info.pid;
 311                fl->fl_start = op->info.start;
 312                fl->fl_end = op->info.end;
 313                rv = 0;
 314        }
 315
 316        kfree(op);
 317out:
 318        dlm_put_lockspace(ls);
 319        return rv;
 320}
 321EXPORT_SYMBOL_GPL(dlm_posix_get);
 322
 323/* a read copies out one plock request from the send list */
 324static ssize_t dev_read(struct file *file, char __user *u, size_t count,
 325                        loff_t *ppos)
 326{
 327        struct dlm_plock_info info;
 328        struct plock_op *op = NULL;
 329
 330        if (count < sizeof(info))
 331                return -EINVAL;
 332
 333        spin_lock(&ops_lock);
 334        if (!list_empty(&send_list)) {
 335                op = list_entry(send_list.next, struct plock_op, list);
 336                list_move(&op->list, &recv_list);
 337                memcpy(&info, &op->info, sizeof(info));
 338        }
 339        spin_unlock(&ops_lock);
 340
 341        if (!op)
 342                return -EAGAIN;
 343
 344        if (copy_to_user(u, &info, sizeof(info)))
 345                return -EFAULT;
 346        return sizeof(info);
 347}
 348
 349/* a write copies in one plock result that should match a plock_op
 350   on the recv list */
 351static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
 352                         loff_t *ppos)
 353{
 354        struct dlm_plock_info info;
 355        struct plock_op *op;
 356        int found = 0, do_callback = 0;
 357
 358        if (count != sizeof(info))
 359                return -EINVAL;
 360
 361        if (copy_from_user(&info, u, sizeof(info)))
 362                return -EFAULT;
 363
 364        if (check_version(&info))
 365                return -EINVAL;
 366
 367        spin_lock(&ops_lock);
 368        list_for_each_entry(op, &recv_list, list) {
 369                if (op->info.fsid == info.fsid &&
 370                    op->info.number == info.number &&
 371                    op->info.owner == info.owner) {
 372                        struct plock_xop *xop = (struct plock_xop *)op;
 373                        list_del_init(&op->list);
 374                        memcpy(&op->info, &info, sizeof(info));
 375                        if (xop->callback)
 376                                do_callback = 1;
 377                        else
 378                                op->done = 1;
 379                        found = 1;
 380                        break;
 381                }
 382        }
 383        spin_unlock(&ops_lock);
 384
 385        if (found) {
 386                if (do_callback)
 387                        dlm_plock_callback(op);
 388                else
 389                        wake_up(&recv_wq);
 390        } else
 391                log_print("dev_write no op %x %llx", info.fsid,
 392                          (unsigned long long)info.number);
 393        return count;
 394}
 395
 396static unsigned int dev_poll(struct file *file, poll_table *wait)
 397{
 398        unsigned int mask = 0;
 399
 400        poll_wait(file, &send_wq, wait);
 401
 402        spin_lock(&ops_lock);
 403        if (!list_empty(&send_list))
 404                mask = POLLIN | POLLRDNORM;
 405        spin_unlock(&ops_lock);
 406
 407        return mask;
 408}
 409
 410static const struct file_operations dev_fops = {
 411        .read    = dev_read,
 412        .write   = dev_write,
 413        .poll    = dev_poll,
 414        .owner   = THIS_MODULE
 415};
 416
 417static struct miscdevice plock_dev_misc = {
 418        .minor = MISC_DYNAMIC_MINOR,
 419        .name = DLM_PLOCK_MISC_NAME,
 420        .fops = &dev_fops
 421};
 422
 423int dlm_plock_init(void)
 424{
 425        int rv;
 426
 427        spin_lock_init(&ops_lock);
 428        INIT_LIST_HEAD(&send_list);
 429        INIT_LIST_HEAD(&recv_list);
 430        init_waitqueue_head(&send_wq);
 431        init_waitqueue_head(&recv_wq);
 432
 433        rv = misc_register(&plock_dev_misc);
 434        if (rv)
 435                log_print("dlm_plock_init: misc_register failed %d", rv);
 436        return rv;
 437}
 438
 439void dlm_plock_exit(void)
 440{
 441        if (misc_deregister(&plock_dev_misc) < 0)
 442                log_print("dlm_plock_exit: misc_deregister failed");
 443}
 444
 445