linux/fs/ceph/locks.c
<<
>>
Prefs
   1#include <linux/ceph/ceph_debug.h>
   2
   3#include <linux/file.h>
   4#include <linux/namei.h>
   5#include <linux/random.h>
   6
   7#include "super.h"
   8#include "mds_client.h"
   9#include <linux/ceph/pagelist.h>
  10
  11static u64 lock_secret;
  12static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
  13                                         struct ceph_mds_request *req);
  14
  15static inline u64 secure_addr(void *addr)
  16{
  17        u64 v = lock_secret ^ (u64)(unsigned long)addr;
  18        /*
  19         * Set the most significant bit, so that MDS knows the 'owner'
  20         * is sufficient to identify the owner of lock. (old code uses
  21         * both 'owner' and 'pid')
  22         */
  23        v |= (1ULL << 63);
  24        return v;
  25}
  26
  27void __init ceph_flock_init(void)
  28{
  29        get_random_bytes(&lock_secret, sizeof(lock_secret));
  30}
  31
  32/**
  33 * Implement fcntl and flock locking functions.
  34 */
  35static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
  36                             int cmd, u8 wait, struct file_lock *fl)
  37{
  38        struct inode *inode = file_inode(file);
  39        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
  40        struct ceph_mds_request *req;
  41        int err;
  42        u64 length = 0;
  43        u64 owner;
  44
  45        if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
  46                wait = 0;
  47
  48        req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
  49        if (IS_ERR(req))
  50                return PTR_ERR(req);
  51        req->r_inode = inode;
  52        ihold(inode);
  53        req->r_num_caps = 1;
  54
  55        /* mds requires start and length rather than start and end */
  56        if (LLONG_MAX == fl->fl_end)
  57                length = 0;
  58        else
  59                length = fl->fl_end - fl->fl_start + 1;
  60
  61        owner = secure_addr(fl->fl_owner);
  62
  63        dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
  64             "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type,
  65             (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
  66             wait, fl->fl_type);
  67
  68        req->r_args.filelock_change.rule = lock_type;
  69        req->r_args.filelock_change.type = cmd;
  70        req->r_args.filelock_change.owner = cpu_to_le64(owner);
  71        req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
  72        req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
  73        req->r_args.filelock_change.length = cpu_to_le64(length);
  74        req->r_args.filelock_change.wait = wait;
  75
  76        if (wait)
  77                req->r_wait_for_completion = ceph_lock_wait_for_completion;
  78
  79        err = ceph_mdsc_do_request(mdsc, inode, req);
  80
  81        if (operation == CEPH_MDS_OP_GETFILELOCK) {
  82                fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
  83                if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
  84                        fl->fl_type = F_RDLCK;
  85                else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
  86                        fl->fl_type = F_WRLCK;
  87                else
  88                        fl->fl_type = F_UNLCK;
  89
  90                fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
  91                length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
  92                                                 le64_to_cpu(req->r_reply_info.filelock_reply->length);
  93                if (length >= 1)
  94                        fl->fl_end = length -1;
  95                else
  96                        fl->fl_end = 0;
  97
  98        }
  99        ceph_mdsc_put_request(req);
 100        dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
 101             "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
 102             (int)operation, (u64)fl->fl_pid, fl->fl_start,
 103             length, wait, fl->fl_type, err);
 104        return err;
 105}
 106
 107static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
 108                                         struct ceph_mds_request *req)
 109{
 110        struct ceph_mds_request *intr_req;
 111        struct inode *inode = req->r_inode;
 112        int err, lock_type;
 113
 114        BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK);
 115        if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL)
 116                lock_type = CEPH_LOCK_FCNTL_INTR;
 117        else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK)
 118                lock_type = CEPH_LOCK_FLOCK_INTR;
 119        else
 120                BUG_ON(1);
 121        BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK);
 122
 123        err = wait_for_completion_interruptible(&req->r_completion);
 124        if (!err)
 125                return 0;
 126
 127        dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
 128             req->r_tid);
 129
 130        intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
 131                                            USE_AUTH_MDS);
 132        if (IS_ERR(intr_req))
 133                return PTR_ERR(intr_req);
 134
 135        intr_req->r_inode = inode;
 136        ihold(inode);
 137        intr_req->r_num_caps = 1;
 138
 139        intr_req->r_args.filelock_change = req->r_args.filelock_change;
 140        intr_req->r_args.filelock_change.rule = lock_type;
 141        intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK;
 142
 143        err = ceph_mdsc_do_request(mdsc, inode, intr_req);
 144        ceph_mdsc_put_request(intr_req);
 145
 146        if (err && err != -ERESTARTSYS)
 147                return err;
 148
 149        wait_for_completion(&req->r_completion);
 150        return 0;
 151}
 152
 153/**
 154 * Attempt to set an fcntl lock.
 155 * For now, this just goes away to the server. Later it may be more awesome.
 156 */
 157int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 158{
 159        u8 lock_cmd;
 160        int err;
 161        u8 wait = 0;
 162        u16 op = CEPH_MDS_OP_SETFILELOCK;
 163
 164        if (!(fl->fl_flags & FL_POSIX))
 165                return -ENOLCK;
 166        /* No mandatory locks */
 167        if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
 168                return -ENOLCK;
 169
 170        dout("ceph_lock, fl_owner: %p", fl->fl_owner);
 171
 172        /* set wait bit as appropriate, then make command as Ceph expects it*/
 173        if (IS_GETLK(cmd))
 174                op = CEPH_MDS_OP_GETFILELOCK;
 175        else if (IS_SETLKW(cmd))
 176                wait = 1;
 177
 178        if (F_RDLCK == fl->fl_type)
 179                lock_cmd = CEPH_LOCK_SHARED;
 180        else if (F_WRLCK == fl->fl_type)
 181                lock_cmd = CEPH_LOCK_EXCL;
 182        else
 183                lock_cmd = CEPH_LOCK_UNLOCK;
 184
 185        err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
 186        if (!err) {
 187                if (op != CEPH_MDS_OP_GETFILELOCK) {
 188                        dout("mds locked, locking locally");
 189                        err = posix_lock_file(file, fl, NULL);
 190                        if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
 191                                /* undo! This should only happen if
 192                                 * the kernel detects local
 193                                 * deadlock. */
 194                                ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
 195                                                  CEPH_LOCK_UNLOCK, 0, fl);
 196                                dout("got %d on posix_lock_file, undid lock",
 197                                     err);
 198                        }
 199                }
 200        }
 201        return err;
 202}
 203
 204int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 205{
 206        u8 lock_cmd;
 207        int err;
 208        u8 wait = 0;
 209
 210        if (!(fl->fl_flags & FL_FLOCK))
 211                return -ENOLCK;
 212        /* No mandatory locks */
 213        if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
 214                return -ENOLCK;
 215
 216        dout("ceph_flock, fl_file: %p", fl->fl_file);
 217
 218        if (IS_SETLKW(cmd))
 219                wait = 1;
 220
 221        if (F_RDLCK == fl->fl_type)
 222                lock_cmd = CEPH_LOCK_SHARED;
 223        else if (F_WRLCK == fl->fl_type)
 224                lock_cmd = CEPH_LOCK_EXCL;
 225        else
 226                lock_cmd = CEPH_LOCK_UNLOCK;
 227
 228        err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
 229                                file, lock_cmd, wait, fl);
 230        if (!err) {
 231                err = flock_lock_file_wait(file, fl);
 232                if (err) {
 233                        ceph_lock_message(CEPH_LOCK_FLOCK,
 234                                          CEPH_MDS_OP_SETFILELOCK,
 235                                          file, CEPH_LOCK_UNLOCK, 0, fl);
 236                        dout("got %d on flock_lock_file_wait, undid lock", err);
 237                }
 238        }
 239        return err;
 240}
 241
 242/*
 243 * Fills in the passed counter variables, so you can prepare pagelist metadata
 244 * before calling ceph_encode_locks.
 245 */
 246void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
 247{
 248        struct file_lock *lock;
 249        struct file_lock_context *ctx;
 250
 251        *fcntl_count = 0;
 252        *flock_count = 0;
 253
 254        ctx = inode->i_flctx;
 255        if (ctx) {
 256                spin_lock(&ctx->flc_lock);
 257                list_for_each_entry(lock, &ctx->flc_posix, fl_list)
 258                        ++(*fcntl_count);
 259                list_for_each_entry(lock, &ctx->flc_flock, fl_list)
 260                        ++(*flock_count);
 261                spin_unlock(&ctx->flc_lock);
 262        }
 263        dout("counted %d flock locks and %d fcntl locks",
 264             *flock_count, *fcntl_count);
 265}
 266
 267/**
 268 * Encode the flock and fcntl locks for the given inode into the ceph_filelock
 269 * array. Must be called with inode->i_lock already held.
 270 * If we encounter more of a specific lock type than expected, return -ENOSPC.
 271 */
 272int ceph_encode_locks_to_buffer(struct inode *inode,
 273                                struct ceph_filelock *flocks,
 274                                int num_fcntl_locks, int num_flock_locks)
 275{
 276        struct file_lock *lock;
 277        struct file_lock_context *ctx = inode->i_flctx;
 278        int err = 0;
 279        int seen_fcntl = 0;
 280        int seen_flock = 0;
 281        int l = 0;
 282
 283        dout("encoding %d flock and %d fcntl locks", num_flock_locks,
 284             num_fcntl_locks);
 285
 286        if (!ctx)
 287                return 0;
 288
 289        spin_lock(&ctx->flc_lock);
 290        list_for_each_entry(lock, &ctx->flc_flock, fl_list) {
 291                ++seen_fcntl;
 292                if (seen_fcntl > num_fcntl_locks) {
 293                        err = -ENOSPC;
 294                        goto fail;
 295                }
 296                err = lock_to_ceph_filelock(lock, &flocks[l]);
 297                if (err)
 298                        goto fail;
 299                ++l;
 300        }
 301        list_for_each_entry(lock, &ctx->flc_flock, fl_list) {
 302                ++seen_flock;
 303                if (seen_flock > num_flock_locks) {
 304                        err = -ENOSPC;
 305                        goto fail;
 306                }
 307                err = lock_to_ceph_filelock(lock, &flocks[l]);
 308                if (err)
 309                        goto fail;
 310                ++l;
 311        }
 312fail:
 313        spin_unlock(&ctx->flc_lock);
 314        return err;
 315}
 316
 317/**
 318 * Copy the encoded flock and fcntl locks into the pagelist.
 319 * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
 320 * sequential flock locks.
 321 * Returns zero on success.
 322 */
 323int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
 324                           struct ceph_pagelist *pagelist,
 325                           int num_fcntl_locks, int num_flock_locks)
 326{
 327        int err = 0;
 328        __le32 nlocks;
 329
 330        nlocks = cpu_to_le32(num_fcntl_locks);
 331        err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
 332        if (err)
 333                goto out_fail;
 334
 335        err = ceph_pagelist_append(pagelist, flocks,
 336                                   num_fcntl_locks * sizeof(*flocks));
 337        if (err)
 338                goto out_fail;
 339
 340        nlocks = cpu_to_le32(num_flock_locks);
 341        err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
 342        if (err)
 343                goto out_fail;
 344
 345        err = ceph_pagelist_append(pagelist,
 346                                   &flocks[num_fcntl_locks],
 347                                   num_flock_locks * sizeof(*flocks));
 348out_fail:
 349        return err;
 350}
 351
 352/*
 353 * Given a pointer to a lock, convert it to a ceph filelock
 354 */
 355int lock_to_ceph_filelock(struct file_lock *lock,
 356                          struct ceph_filelock *cephlock)
 357{
 358        int err = 0;
 359        cephlock->start = cpu_to_le64(lock->fl_start);
 360        cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
 361        cephlock->client = cpu_to_le64(0);
 362        cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
 363        cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
 364
 365        switch (lock->fl_type) {
 366        case F_RDLCK:
 367                cephlock->type = CEPH_LOCK_SHARED;
 368                break;
 369        case F_WRLCK:
 370                cephlock->type = CEPH_LOCK_EXCL;
 371                break;
 372        case F_UNLCK:
 373                cephlock->type = CEPH_LOCK_UNLOCK;
 374                break;
 375        default:
 376                dout("Have unknown lock type %d", lock->fl_type);
 377                err = -EINVAL;
 378        }
 379
 380        return err;
 381}
 382