linux/fs/ocfs2/stack_user.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * stack_user.c
   5 *
   6 * Code which interfaces ocfs2 with fs/dlm and a userspace stack.
   7 *
   8 * Copyright (C) 2007 Oracle.  All rights reserved.
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public
  12 * License as published by the Free Software Foundation, version 2.
  13 *
  14 * This program is distributed in the hope that it will be useful,
  15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * General Public License for more details.
  18 */
  19
  20#include <linux/module.h>
  21#include <linux/fs.h>
  22#include <linux/miscdevice.h>
  23#include <linux/mutex.h>
  24#include <linux/slab.h>
  25#include <linux/reboot.h>
  26#include <asm/uaccess.h>
  27
  28#include "stackglue.h"
  29
  30#include <linux/dlm_plock.h>
  31
  32/*
  33 * The control protocol starts with a handshake.  Until the handshake
  34 * is complete, the control device will fail all write(2)s.
  35 *
  36 * The handshake is simple.  First, the client reads until EOF.  Each line
  37 * of output is a supported protocol tag.  All protocol tags are a single
  38 * character followed by a two hex digit version number.  Currently the
  39 * only things supported is T01, for "Text-base version 0x01".  Next, the
  40 * client writes the version they would like to use, including the newline.
  41 * Thus, the protocol tag is 'T01\n'.  If the version tag written is
  42 * unknown, -EINVAL is returned.  Once the negotiation is complete, the
  43 * client can start sending messages.
  44 *
  45 * The T01 protocol has three messages.  First is the "SETN" message.
  46 * It has the following syntax:
  47 *
  48 *  SETN<space><8-char-hex-nodenum><newline>
  49 *
  50 * This is 14 characters.
  51 *
  52 * The "SETN" message must be the first message following the protocol.
  53 * It tells ocfs2_control the local node number.
  54 *
  55 * Next comes the "SETV" message.  It has the following syntax:
  56 *
  57 *  SETV<space><2-char-hex-major><space><2-char-hex-minor><newline>
  58 *
  59 * This is 11 characters.
  60 *
  61 * The "SETV" message sets the filesystem locking protocol version as
  62 * negotiated by the client.  The client negotiates based on the maximum
  63 * version advertised in /sys/fs/ocfs2/max_locking_protocol.  The major
  64 * number from the "SETV" message must match
  65 * ocfs2_user_plugin.sp_max_proto.pv_major, and the minor number
  66 * must be less than or equal to ...sp_max_version.pv_minor.
  67 *
  68 * Once this information has been set, mounts will be allowed.  From this
  69 * point on, the "DOWN" message can be sent for node down notification.
  70 * It has the following syntax:
  71 *
  72 *  DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline>
  73 *
  74 * eg:
  75 *
  76 *  DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n
  77 *
  78 * This is 47 characters.
  79 */
  80
  81/*
  82 * Whether or not the client has done the handshake.
  83 * For now, we have just one protocol version.
  84 */
  85#define OCFS2_CONTROL_PROTO                     "T01\n"
  86#define OCFS2_CONTROL_PROTO_LEN                 4
  87
  88/* Handshake states */
  89#define OCFS2_CONTROL_HANDSHAKE_INVALID         (0)
  90#define OCFS2_CONTROL_HANDSHAKE_READ            (1)
  91#define OCFS2_CONTROL_HANDSHAKE_PROTOCOL        (2)
  92#define OCFS2_CONTROL_HANDSHAKE_VALID           (3)
  93
  94/* Messages */
  95#define OCFS2_CONTROL_MESSAGE_OP_LEN            4
  96#define OCFS2_CONTROL_MESSAGE_SETNODE_OP        "SETN"
  97#define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14
  98#define OCFS2_CONTROL_MESSAGE_SETVERSION_OP     "SETV"
  99#define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN      11
 100#define OCFS2_CONTROL_MESSAGE_DOWN_OP           "DOWN"
 101#define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN    47
 102#define OCFS2_TEXT_UUID_LEN                     32
 103#define OCFS2_CONTROL_MESSAGE_VERNUM_LEN        2
 104#define OCFS2_CONTROL_MESSAGE_NODENUM_LEN       8
 105
 106/*
 107 * ocfs2_live_connection is refcounted because the filesystem and
 108 * miscdevice sides can detach in different order.  Let's just be safe.
 109 */
 110struct ocfs2_live_connection {
 111        struct list_head                oc_list;
 112        struct ocfs2_cluster_connection *oc_conn;
 113};
 114
 115struct ocfs2_control_private {
 116        struct list_head op_list;
 117        int op_state;
 118        int op_this_node;
 119        struct ocfs2_protocol_version op_proto;
 120};
 121
 122/* SETN<space><8-char-hex-nodenum><newline> */
 123struct ocfs2_control_message_setn {
 124        char    tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
 125        char    space;
 126        char    nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
 127        char    newline;
 128};
 129
 130/* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */
 131struct ocfs2_control_message_setv {
 132        char    tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
 133        char    space1;
 134        char    major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
 135        char    space2;
 136        char    minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
 137        char    newline;
 138};
 139
 140/* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */
 141struct ocfs2_control_message_down {
 142        char    tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
 143        char    space1;
 144        char    uuid[OCFS2_TEXT_UUID_LEN];
 145        char    space2;
 146        char    nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
 147        char    newline;
 148};
 149
 150union ocfs2_control_message {
 151        char                                    tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
 152        struct ocfs2_control_message_setn       u_setn;
 153        struct ocfs2_control_message_setv       u_setv;
 154        struct ocfs2_control_message_down       u_down;
 155};
 156
 157static struct ocfs2_stack_plugin ocfs2_user_plugin;
 158
 159static atomic_t ocfs2_control_opened;
 160static int ocfs2_control_this_node = -1;
 161static struct ocfs2_protocol_version running_proto;
 162
 163static LIST_HEAD(ocfs2_live_connection_list);
 164static LIST_HEAD(ocfs2_control_private_list);
 165static DEFINE_MUTEX(ocfs2_control_lock);
 166
 167static inline void ocfs2_control_set_handshake_state(struct file *file,
 168                                                     int state)
 169{
 170        struct ocfs2_control_private *p = file->private_data;
 171        p->op_state = state;
 172}
 173
 174static inline int ocfs2_control_get_handshake_state(struct file *file)
 175{
 176        struct ocfs2_control_private *p = file->private_data;
 177        return p->op_state;
 178}
 179
 180static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
 181{
 182        size_t len = strlen(name);
 183        struct ocfs2_live_connection *c;
 184
 185        BUG_ON(!mutex_is_locked(&ocfs2_control_lock));
 186
 187        list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) {
 188                if ((c->oc_conn->cc_namelen == len) &&
 189                    !strncmp(c->oc_conn->cc_name, name, len))
 190                        return c;
 191        }
 192
 193        return NULL;
 194}
 195
 196/*
 197 * ocfs2_live_connection structures are created underneath the ocfs2
 198 * mount path.  Since the VFS prevents multiple calls to
 199 * fill_super(), we can't get dupes here.
 200 */
 201static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
 202                                     struct ocfs2_live_connection **c_ret)
 203{
 204        int rc = 0;
 205        struct ocfs2_live_connection *c;
 206
 207        c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
 208        if (!c)
 209                return -ENOMEM;
 210
 211        mutex_lock(&ocfs2_control_lock);
 212        c->oc_conn = conn;
 213
 214        if (atomic_read(&ocfs2_control_opened))
 215                list_add(&c->oc_list, &ocfs2_live_connection_list);
 216        else {
 217                printk(KERN_ERR
 218                       "ocfs2: Userspace control daemon is not present\n");
 219                rc = -ESRCH;
 220        }
 221
 222        mutex_unlock(&ocfs2_control_lock);
 223
 224        if (!rc)
 225                *c_ret = c;
 226        else
 227                kfree(c);
 228
 229        return rc;
 230}
 231
 232/*
 233 * This function disconnects the cluster connection from ocfs2_control.
 234 * Afterwards, userspace can't affect the cluster connection.
 235 */
 236static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c)
 237{
 238        mutex_lock(&ocfs2_control_lock);
 239        list_del_init(&c->oc_list);
 240        c->oc_conn = NULL;
 241        mutex_unlock(&ocfs2_control_lock);
 242
 243        kfree(c);
 244}
 245
 246static int ocfs2_control_cfu(void *target, size_t target_len,
 247                             const char __user *buf, size_t count)
 248{
 249        /* The T01 expects write(2) calls to have exactly one command */
 250        if ((count != target_len) ||
 251            (count > sizeof(union ocfs2_control_message)))
 252                return -EINVAL;
 253
 254        if (copy_from_user(target, buf, target_len))
 255                return -EFAULT;
 256
 257        return 0;
 258}
 259
 260static ssize_t ocfs2_control_validate_protocol(struct file *file,
 261                                               const char __user *buf,
 262                                               size_t count)
 263{
 264        ssize_t ret;
 265        char kbuf[OCFS2_CONTROL_PROTO_LEN];
 266
 267        ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN,
 268                                buf, count);
 269        if (ret)
 270                return ret;
 271
 272        if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN))
 273                return -EINVAL;
 274
 275        ocfs2_control_set_handshake_state(file,
 276                                          OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
 277
 278        return count;
 279}
 280
 281static void ocfs2_control_send_down(const char *uuid,
 282                                    int nodenum)
 283{
 284        struct ocfs2_live_connection *c;
 285
 286        mutex_lock(&ocfs2_control_lock);
 287
 288        c = ocfs2_connection_find(uuid);
 289        if (c) {
 290                BUG_ON(c->oc_conn == NULL);
 291                c->oc_conn->cc_recovery_handler(nodenum,
 292                                                c->oc_conn->cc_recovery_data);
 293        }
 294
 295        mutex_unlock(&ocfs2_control_lock);
 296}
 297
 298/*
 299 * Called whenever configuration elements are sent to /dev/ocfs2_control.
 300 * If all configuration elements are present, try to set the global
 301 * values.  If there is a problem, return an error.  Skip any missing
 302 * elements, and only bump ocfs2_control_opened when we have all elements
 303 * and are successful.
 304 */
 305static int ocfs2_control_install_private(struct file *file)
 306{
 307        int rc = 0;
 308        int set_p = 1;
 309        struct ocfs2_control_private *p = file->private_data;
 310
 311        BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL);
 312
 313        mutex_lock(&ocfs2_control_lock);
 314
 315        if (p->op_this_node < 0) {
 316                set_p = 0;
 317        } else if ((ocfs2_control_this_node >= 0) &&
 318                   (ocfs2_control_this_node != p->op_this_node)) {
 319                rc = -EINVAL;
 320                goto out_unlock;
 321        }
 322
 323        if (!p->op_proto.pv_major) {
 324                set_p = 0;
 325        } else if (!list_empty(&ocfs2_live_connection_list) &&
 326                   ((running_proto.pv_major != p->op_proto.pv_major) ||
 327                    (running_proto.pv_minor != p->op_proto.pv_minor))) {
 328                rc = -EINVAL;
 329                goto out_unlock;
 330        }
 331
 332        if (set_p) {
 333                ocfs2_control_this_node = p->op_this_node;
 334                running_proto.pv_major = p->op_proto.pv_major;
 335                running_proto.pv_minor = p->op_proto.pv_minor;
 336        }
 337
 338out_unlock:
 339        mutex_unlock(&ocfs2_control_lock);
 340
 341        if (!rc && set_p) {
 342                /* We set the global values successfully */
 343                atomic_inc(&ocfs2_control_opened);
 344                ocfs2_control_set_handshake_state(file,
 345                                        OCFS2_CONTROL_HANDSHAKE_VALID);
 346        }
 347
 348        return rc;
 349}
 350
 351static int ocfs2_control_get_this_node(void)
 352{
 353        int rc;
 354
 355        mutex_lock(&ocfs2_control_lock);
 356        if (ocfs2_control_this_node < 0)
 357                rc = -EINVAL;
 358        else
 359                rc = ocfs2_control_this_node;
 360        mutex_unlock(&ocfs2_control_lock);
 361
 362        return rc;
 363}
 364
 365static int ocfs2_control_do_setnode_msg(struct file *file,
 366                                        struct ocfs2_control_message_setn *msg)
 367{
 368        long nodenum;
 369        char *ptr = NULL;
 370        struct ocfs2_control_private *p = file->private_data;
 371
 372        if (ocfs2_control_get_handshake_state(file) !=
 373            OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
 374                return -EINVAL;
 375
 376        if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
 377                    OCFS2_CONTROL_MESSAGE_OP_LEN))
 378                return -EINVAL;
 379
 380        if ((msg->space != ' ') || (msg->newline != '\n'))
 381                return -EINVAL;
 382        msg->space = msg->newline = '\0';
 383
 384        nodenum = simple_strtol(msg->nodestr, &ptr, 16);
 385        if (!ptr || *ptr)
 386                return -EINVAL;
 387
 388        if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
 389            (nodenum > INT_MAX) || (nodenum < 0))
 390                return -ERANGE;
 391        p->op_this_node = nodenum;
 392
 393        return ocfs2_control_install_private(file);
 394}
 395
 396static int ocfs2_control_do_setversion_msg(struct file *file,
 397                                           struct ocfs2_control_message_setv *msg)
 398 {
 399        long major, minor;
 400        char *ptr = NULL;
 401        struct ocfs2_control_private *p = file->private_data;
 402        struct ocfs2_protocol_version *max =
 403                &ocfs2_user_plugin.sp_max_proto;
 404
 405        if (ocfs2_control_get_handshake_state(file) !=
 406            OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
 407                return -EINVAL;
 408
 409        if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
 410                    OCFS2_CONTROL_MESSAGE_OP_LEN))
 411                return -EINVAL;
 412
 413        if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
 414            (msg->newline != '\n'))
 415                return -EINVAL;
 416        msg->space1 = msg->space2 = msg->newline = '\0';
 417
 418        major = simple_strtol(msg->major, &ptr, 16);
 419        if (!ptr || *ptr)
 420                return -EINVAL;
 421        minor = simple_strtol(msg->minor, &ptr, 16);
 422        if (!ptr || *ptr)
 423                return -EINVAL;
 424
 425        /*
 426         * The major must be between 1 and 255, inclusive.  The minor
 427         * must be between 0 and 255, inclusive.  The version passed in
 428         * must be within the maximum version supported by the filesystem.
 429         */
 430        if ((major == LONG_MIN) || (major == LONG_MAX) ||
 431            (major > (u8)-1) || (major < 1))
 432                return -ERANGE;
 433        if ((minor == LONG_MIN) || (minor == LONG_MAX) ||
 434            (minor > (u8)-1) || (minor < 0))
 435                return -ERANGE;
 436        if ((major != max->pv_major) ||
 437            (minor > max->pv_minor))
 438                return -EINVAL;
 439
 440        p->op_proto.pv_major = major;
 441        p->op_proto.pv_minor = minor;
 442
 443        return ocfs2_control_install_private(file);
 444}
 445
 446static int ocfs2_control_do_down_msg(struct file *file,
 447                                     struct ocfs2_control_message_down *msg)
 448{
 449        long nodenum;
 450        char *p = NULL;
 451
 452        if (ocfs2_control_get_handshake_state(file) !=
 453            OCFS2_CONTROL_HANDSHAKE_VALID)
 454                return -EINVAL;
 455
 456        if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
 457                    OCFS2_CONTROL_MESSAGE_OP_LEN))
 458                return -EINVAL;
 459
 460        if ((msg->space1 != ' ') || (msg->space2 != ' ') ||
 461            (msg->newline != '\n'))
 462                return -EINVAL;
 463        msg->space1 = msg->space2 = msg->newline = '\0';
 464
 465        nodenum = simple_strtol(msg->nodestr, &p, 16);
 466        if (!p || *p)
 467                return -EINVAL;
 468
 469        if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
 470            (nodenum > INT_MAX) || (nodenum < 0))
 471                return -ERANGE;
 472
 473        ocfs2_control_send_down(msg->uuid, nodenum);
 474
 475        return 0;
 476}
 477
 478static ssize_t ocfs2_control_message(struct file *file,
 479                                     const char __user *buf,
 480                                     size_t count)
 481{
 482        ssize_t ret;
 483        union ocfs2_control_message msg;
 484
 485        /* Try to catch padding issues */
 486        WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) !=
 487                (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1)));
 488
 489        memset(&msg, 0, sizeof(union ocfs2_control_message));
 490        ret = ocfs2_control_cfu(&msg, count, buf, count);
 491        if (ret)
 492                goto out;
 493
 494        if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) &&
 495            !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP,
 496                     OCFS2_CONTROL_MESSAGE_OP_LEN))
 497                ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn);
 498        else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) &&
 499                 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP,
 500                          OCFS2_CONTROL_MESSAGE_OP_LEN))
 501                ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv);
 502        else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) &&
 503                 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP,
 504                          OCFS2_CONTROL_MESSAGE_OP_LEN))
 505                ret = ocfs2_control_do_down_msg(file, &msg.u_down);
 506        else
 507                ret = -EINVAL;
 508
 509out:
 510        return ret ? ret : count;
 511}
 512
 513static ssize_t ocfs2_control_write(struct file *file,
 514                                   const char __user *buf,
 515                                   size_t count,
 516                                   loff_t *ppos)
 517{
 518        ssize_t ret;
 519
 520        switch (ocfs2_control_get_handshake_state(file)) {
 521                case OCFS2_CONTROL_HANDSHAKE_INVALID:
 522                        ret = -EINVAL;
 523                        break;
 524
 525                case OCFS2_CONTROL_HANDSHAKE_READ:
 526                        ret = ocfs2_control_validate_protocol(file, buf,
 527                                                              count);
 528                        break;
 529
 530                case OCFS2_CONTROL_HANDSHAKE_PROTOCOL:
 531                case OCFS2_CONTROL_HANDSHAKE_VALID:
 532                        ret = ocfs2_control_message(file, buf, count);
 533                        break;
 534
 535                default:
 536                        BUG();
 537                        ret = -EIO;
 538                        break;
 539        }
 540
 541        return ret;
 542}
 543
 544/*
 545 * This is a naive version.  If we ever have a new protocol, we'll expand
 546 * it.  Probably using seq_file.
 547 */
 548static ssize_t ocfs2_control_read(struct file *file,
 549                                  char __user *buf,
 550                                  size_t count,
 551                                  loff_t *ppos)
 552{
 553        ssize_t ret;
 554
 555        ret = simple_read_from_buffer(buf, count, ppos,
 556                        OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN);
 557
 558        /* Have we read the whole protocol list? */
 559        if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN)
 560                ocfs2_control_set_handshake_state(file,
 561                                                  OCFS2_CONTROL_HANDSHAKE_READ);
 562
 563        return ret;
 564}
 565
 566static int ocfs2_control_release(struct inode *inode, struct file *file)
 567{
 568        struct ocfs2_control_private *p = file->private_data;
 569
 570        mutex_lock(&ocfs2_control_lock);
 571
 572        if (ocfs2_control_get_handshake_state(file) !=
 573            OCFS2_CONTROL_HANDSHAKE_VALID)
 574                goto out;
 575
 576        if (atomic_dec_and_test(&ocfs2_control_opened)) {
 577                if (!list_empty(&ocfs2_live_connection_list)) {
 578                        /* XXX: Do bad things! */
 579                        printk(KERN_ERR
 580                               "ocfs2: Unexpected release of ocfs2_control!\n"
 581                               "       Loss of cluster connection requires "
 582                               "an emergency restart!\n");
 583                        emergency_restart();
 584                }
 585                /*
 586                 * Last valid close clears the node number and resets
 587                 * the locking protocol version
 588                 */
 589                ocfs2_control_this_node = -1;
 590                running_proto.pv_major = 0;
 591                running_proto.pv_major = 0;
 592        }
 593
 594out:
 595        list_del_init(&p->op_list);
 596        file->private_data = NULL;
 597
 598        mutex_unlock(&ocfs2_control_lock);
 599
 600        kfree(p);
 601
 602        return 0;
 603}
 604
 605static int ocfs2_control_open(struct inode *inode, struct file *file)
 606{
 607        struct ocfs2_control_private *p;
 608
 609        p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL);
 610        if (!p)
 611                return -ENOMEM;
 612        p->op_this_node = -1;
 613
 614        mutex_lock(&ocfs2_control_lock);
 615        file->private_data = p;
 616        list_add(&p->op_list, &ocfs2_control_private_list);
 617        mutex_unlock(&ocfs2_control_lock);
 618
 619        return 0;
 620}
 621
 622static const struct file_operations ocfs2_control_fops = {
 623        .open    = ocfs2_control_open,
 624        .release = ocfs2_control_release,
 625        .read    = ocfs2_control_read,
 626        .write   = ocfs2_control_write,
 627        .owner   = THIS_MODULE,
 628        .llseek  = default_llseek,
 629};
 630
 631static struct miscdevice ocfs2_control_device = {
 632        .minor          = MISC_DYNAMIC_MINOR,
 633        .name           = "ocfs2_control",
 634        .fops           = &ocfs2_control_fops,
 635};
 636
 637static int ocfs2_control_init(void)
 638{
 639        int rc;
 640
 641        atomic_set(&ocfs2_control_opened, 0);
 642
 643        rc = misc_register(&ocfs2_control_device);
 644        if (rc)
 645                printk(KERN_ERR
 646                       "ocfs2: Unable to register ocfs2_control device "
 647                       "(errno %d)\n",
 648                       -rc);
 649
 650        return rc;
 651}
 652
 653static void ocfs2_control_exit(void)
 654{
 655        int rc;
 656
 657        rc = misc_deregister(&ocfs2_control_device);
 658        if (rc)
 659                printk(KERN_ERR
 660                       "ocfs2: Unable to deregister ocfs2_control device "
 661                       "(errno %d)\n",
 662                       -rc);
 663}
 664
 665static void fsdlm_lock_ast_wrapper(void *astarg)
 666{
 667        struct ocfs2_dlm_lksb *lksb = astarg;
 668        int status = lksb->lksb_fsdlm.sb_status;
 669
 670        /*
 671         * For now we're punting on the issue of other non-standard errors
 672         * where we can't tell if the unlock_ast or lock_ast should be called.
 673         * The main "other error" that's possible is EINVAL which means the
 674         * function was called with invalid args, which shouldn't be possible
 675         * since the caller here is under our control.  Other non-standard
 676         * errors probably fall into the same category, or otherwise are fatal
 677         * which means we can't carry on anyway.
 678         */
 679
 680        if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL)
 681                lksb->lksb_conn->cc_proto->lp_unlock_ast(lksb, 0);
 682        else
 683                lksb->lksb_conn->cc_proto->lp_lock_ast(lksb);
 684}
 685
 686static void fsdlm_blocking_ast_wrapper(void *astarg, int level)
 687{
 688        struct ocfs2_dlm_lksb *lksb = astarg;
 689
 690        lksb->lksb_conn->cc_proto->lp_blocking_ast(lksb, level);
 691}
 692
 693static int user_dlm_lock(struct ocfs2_cluster_connection *conn,
 694                         int mode,
 695                         struct ocfs2_dlm_lksb *lksb,
 696                         u32 flags,
 697                         void *name,
 698                         unsigned int namelen)
 699{
 700        int ret;
 701
 702        if (!lksb->lksb_fsdlm.sb_lvbptr)
 703                lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
 704                                             sizeof(struct dlm_lksb);
 705
 706        ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm,
 707                       flags|DLM_LKF_NODLCKWT, name, namelen, 0,
 708                       fsdlm_lock_ast_wrapper, lksb,
 709                       fsdlm_blocking_ast_wrapper);
 710        return ret;
 711}
 712
 713static int user_dlm_unlock(struct ocfs2_cluster_connection *conn,
 714                           struct ocfs2_dlm_lksb *lksb,
 715                           u32 flags)
 716{
 717        int ret;
 718
 719        ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid,
 720                         flags, &lksb->lksb_fsdlm, lksb);
 721        return ret;
 722}
 723
 724static int user_dlm_lock_status(struct ocfs2_dlm_lksb *lksb)
 725{
 726        return lksb->lksb_fsdlm.sb_status;
 727}
 728
 729static int user_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb)
 730{
 731        int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID;
 732
 733        return !invalid;
 734}
 735
 736static void *user_dlm_lvb(struct ocfs2_dlm_lksb *lksb)
 737{
 738        if (!lksb->lksb_fsdlm.sb_lvbptr)
 739                lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
 740                                             sizeof(struct dlm_lksb);
 741        return (void *)(lksb->lksb_fsdlm.sb_lvbptr);
 742}
 743
 744static void user_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb)
 745{
 746}
 747
 748static int user_plock(struct ocfs2_cluster_connection *conn,
 749                      u64 ino,
 750                      struct file *file,
 751                      int cmd,
 752                      struct file_lock *fl)
 753{
 754        /*
 755         * This more or less just demuxes the plock request into any
 756         * one of three dlm calls.
 757         *
 758         * Internally, fs/dlm will pass these to a misc device, which
 759         * a userspace daemon will read and write to.
 760         *
 761         * For now, cancel requests (which happen internally only),
 762         * are turned into unlocks. Most of this function taken from
 763         * gfs2_lock.
 764         */
 765
 766        if (cmd == F_CANCELLK) {
 767                cmd = F_SETLK;
 768                fl->fl_type = F_UNLCK;
 769        }
 770
 771        if (IS_GETLK(cmd))
 772                return dlm_posix_get(conn->cc_lockspace, ino, file, fl);
 773        else if (fl->fl_type == F_UNLCK)
 774                return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl);
 775        else
 776                return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl);
 777}
 778
 779/*
 780 * Compare a requested locking protocol version against the current one.
 781 *
 782 * If the major numbers are different, they are incompatible.
 783 * If the current minor is greater than the request, they are incompatible.
 784 * If the current minor is less than or equal to the request, they are
 785 * compatible, and the requester should run at the current minor version.
 786 */
 787static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
 788                               struct ocfs2_protocol_version *request)
 789{
 790        if (existing->pv_major != request->pv_major)
 791                return 1;
 792
 793        if (existing->pv_minor > request->pv_minor)
 794                return 1;
 795
 796        if (existing->pv_minor < request->pv_minor)
 797                request->pv_minor = existing->pv_minor;
 798
 799        return 0;
 800}
 801
 802static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
 803{
 804        dlm_lockspace_t *fsdlm;
 805        struct ocfs2_live_connection *uninitialized_var(control);
 806        int rc = 0;
 807
 808        BUG_ON(conn == NULL);
 809
 810        rc = ocfs2_live_connection_new(conn, &control);
 811        if (rc)
 812                goto out;
 813
 814        /*
 815         * running_proto must have been set before we allowed any mounts
 816         * to proceed.
 817         */
 818        if (fs_protocol_compare(&running_proto, &conn->cc_version)) {
 819                printk(KERN_ERR
 820                       "Unable to mount with fs locking protocol version "
 821                       "%u.%u because the userspace control daemon has "
 822                       "negotiated %u.%u\n",
 823                       conn->cc_version.pv_major, conn->cc_version.pv_minor,
 824                       running_proto.pv_major, running_proto.pv_minor);
 825                rc = -EPROTO;
 826                ocfs2_live_connection_drop(control);
 827                goto out;
 828        }
 829
 830        rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN,
 831                               NULL, NULL, NULL, &fsdlm);
 832        if (rc) {
 833                ocfs2_live_connection_drop(control);
 834                goto out;
 835        }
 836
 837        conn->cc_private = control;
 838        conn->cc_lockspace = fsdlm;
 839out:
 840        return rc;
 841}
 842
 843static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
 844{
 845        dlm_release_lockspace(conn->cc_lockspace, 2);
 846        conn->cc_lockspace = NULL;
 847        ocfs2_live_connection_drop(conn->cc_private);
 848        conn->cc_private = NULL;
 849        return 0;
 850}
 851
 852static int user_cluster_this_node(unsigned int *this_node)
 853{
 854        int rc;
 855
 856        rc = ocfs2_control_get_this_node();
 857        if (rc < 0)
 858                return rc;
 859
 860        *this_node = rc;
 861        return 0;
 862}
 863
 864static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
 865        .connect        = user_cluster_connect,
 866        .disconnect     = user_cluster_disconnect,
 867        .this_node      = user_cluster_this_node,
 868        .dlm_lock       = user_dlm_lock,
 869        .dlm_unlock     = user_dlm_unlock,
 870        .lock_status    = user_dlm_lock_status,
 871        .lvb_valid      = user_dlm_lvb_valid,
 872        .lock_lvb       = user_dlm_lvb,
 873        .plock          = user_plock,
 874        .dump_lksb      = user_dlm_dump_lksb,
 875};
 876
 877static struct ocfs2_stack_plugin ocfs2_user_plugin = {
 878        .sp_name        = "user",
 879        .sp_ops         = &ocfs2_user_plugin_ops,
 880        .sp_owner       = THIS_MODULE,
 881};
 882
 883
 884static int __init ocfs2_user_plugin_init(void)
 885{
 886        int rc;
 887
 888        rc = ocfs2_control_init();
 889        if (!rc) {
 890                rc = ocfs2_stack_glue_register(&ocfs2_user_plugin);
 891                if (rc)
 892                        ocfs2_control_exit();
 893        }
 894
 895        return rc;
 896}
 897
 898static void __exit ocfs2_user_plugin_exit(void)
 899{
 900        ocfs2_stack_glue_unregister(&ocfs2_user_plugin);
 901        ocfs2_control_exit();
 902}
 903
 904MODULE_AUTHOR("Oracle");
 905MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks");
 906MODULE_LICENSE("GPL");
 907module_init(ocfs2_user_plugin_init);
 908module_exit(ocfs2_user_plugin_exit);
 909