1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28#include <linux/module.h>
29#include <linux/fs.h>
30#include <linux/types.h>
31#include <linux/slab.h>
32#include <linux/highmem.h>
33#include <linux/init.h>
34#include <linux/sysctl.h>
35#include <linux/random.h>
36#include <linux/blkdev.h>
37#include <linux/socket.h>
38#include <linux/inet.h>
39#include <linux/spinlock.h>
40#include <linux/delay.h>
41
42
43#include "cluster/heartbeat.h"
44#include "cluster/nodemanager.h"
45#include "cluster/tcp.h"
46
47#include "dlmapi.h"
48#include "dlmcommon.h"
49
50#include "dlmconvert.h"
51
52#define MLOG_MASK_PREFIX ML_DLM
53#include "cluster/masklog.h"
54
55static struct kmem_cache *dlm_lock_cache;
56
57static DEFINE_SPINLOCK(dlm_cookie_lock);
58static u64 dlm_next_cookie = 1;
59
60static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
61 struct dlm_lock_resource *res,
62 struct dlm_lock *lock, int flags);
63static void dlm_init_lock(struct dlm_lock *newlock, int type,
64 u8 node, u64 cookie);
65static void dlm_lock_release(struct kref *kref);
66static void dlm_lock_detach_lockres(struct dlm_lock *lock);
67
68int dlm_init_lock_cache(void)
69{
70 dlm_lock_cache = kmem_cache_create("o2dlm_lock",
71 sizeof(struct dlm_lock),
72 0, SLAB_HWCACHE_ALIGN, NULL);
73 if (dlm_lock_cache == NULL)
74 return -ENOMEM;
75 return 0;
76}
77
78void dlm_destroy_lock_cache(void)
79{
80 kmem_cache_destroy(dlm_lock_cache);
81}
82
83
84
85
86
87
88
89
90static int dlm_can_grant_new_lock(struct dlm_lock_resource *res,
91 struct dlm_lock *lock)
92{
93 struct dlm_lock *tmplock;
94
95 list_for_each_entry(tmplock, &res->granted, list) {
96 if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
97 return 0;
98 }
99
100 list_for_each_entry(tmplock, &res->converting, list) {
101 if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
102 return 0;
103 if (!dlm_lock_compatible(tmplock->ml.convert_type,
104 lock->ml.type))
105 return 0;
106 }
107
108 return 1;
109}
110
111
112
113
114
115
116
117
118static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
119 struct dlm_lock_resource *res,
120 struct dlm_lock *lock, int flags)
121{
122 int call_ast = 0, kick_thread = 0;
123 enum dlm_status status = DLM_NORMAL;
124
125 mlog(0, "type=%d\n", lock->ml.type);
126
127 spin_lock(&res->spinlock);
128
129
130 status = __dlm_lockres_state_to_status(res);
131 if (status != DLM_NORMAL &&
132 lock->ml.node != dlm->node_num) {
133
134 spin_unlock(&res->spinlock);
135 dlm_error(status);
136 return status;
137 }
138 __dlm_wait_on_lockres(res);
139 __dlm_lockres_reserve_ast(res);
140
141 if (dlm_can_grant_new_lock(res, lock)) {
142 mlog(0, "I can grant this lock right away\n");
143
144 lock->lksb->status = DLM_NORMAL;
145 status = DLM_NORMAL;
146 dlm_lock_get(lock);
147 list_add_tail(&lock->list, &res->granted);
148
149
150
151
152
153
154 if (!dlm_is_recovery_lock(res->lockname.name,
155 res->lockname.len)) {
156 kick_thread = 1;
157 call_ast = 1;
158 } else {
159 mlog(0, "%s: returning DLM_NORMAL to "
160 "node %u for reco lock\n", dlm->name,
161 lock->ml.node);
162 }
163 } else {
164
165
166 if (flags & LKM_NOQUEUE) {
167 status = DLM_NOTQUEUED;
168 if (dlm_is_recovery_lock(res->lockname.name,
169 res->lockname.len)) {
170 mlog(0, "%s: returning NOTQUEUED to "
171 "node %u for reco lock\n", dlm->name,
172 lock->ml.node);
173 }
174 } else {
175 status = DLM_NORMAL;
176 dlm_lock_get(lock);
177 list_add_tail(&lock->list, &res->blocked);
178 kick_thread = 1;
179 }
180 }
181
182 spin_unlock(&res->spinlock);
183 wake_up(&res->wq);
184
185
186 if (call_ast)
187 dlm_queue_ast(dlm, lock);
188 else
189 dlm_lockres_release_ast(dlm, res);
190
191 dlm_lockres_calc_usage(dlm, res);
192 if (kick_thread)
193 dlm_kick_thread(dlm, res);
194
195 return status;
196}
197
198void dlm_revert_pending_lock(struct dlm_lock_resource *res,
199 struct dlm_lock *lock)
200{
201
202 list_del_init(&lock->list);
203 lock->lksb->flags &= ~DLM_LKSB_GET_LVB;
204}
205
206
207
208
209
210
211
212
213
214static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
215 struct dlm_lock_resource *res,
216 struct dlm_lock *lock, int flags)
217{
218 enum dlm_status status = DLM_DENIED;
219 int lockres_changed = 1;
220
221 mlog(0, "type=%d, lockres %.*s, flags = 0x%x\n",
222 lock->ml.type, res->lockname.len,
223 res->lockname.name, flags);
224
225
226
227
228
229 spin_lock(&res->spinlock);
230 __dlm_wait_on_lockres(res);
231 if (res->owner == dlm->node_num) {
232 spin_unlock(&res->spinlock);
233 return DLM_RECOVERING;
234 }
235 res->state |= DLM_LOCK_RES_IN_PROGRESS;
236
237
238 dlm_lock_get(lock);
239 list_add_tail(&lock->list, &res->blocked);
240 lock->lock_pending = 1;
241 spin_unlock(&res->spinlock);
242
243
244
245 status = dlm_send_remote_lock_request(dlm, res, lock, flags);
246
247 spin_lock(&res->spinlock);
248 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
249 lock->lock_pending = 0;
250 if (status != DLM_NORMAL) {
251 if (status == DLM_RECOVERING &&
252 dlm_is_recovery_lock(res->lockname.name,
253 res->lockname.len)) {
254
255
256
257 mlog(0, "%s: recovery lock was owned by "
258 "dead node %u, remaster it now.\n",
259 dlm->name, res->owner);
260 } else if (status != DLM_NOTQUEUED) {
261
262
263
264
265
266
267 lockres_changed = 0;
268 dlm_error(status);
269 }
270 dlm_revert_pending_lock(res, lock);
271 dlm_lock_put(lock);
272 } else if (dlm_is_recovery_lock(res->lockname.name,
273 res->lockname.len)) {
274
275
276
277
278 mlog(0, "%s: $RECOVERY lock for this node (%u) is "
279 "mastered by %u; got lock, manually granting (no ast)\n",
280 dlm->name, dlm->node_num, res->owner);
281 list_move_tail(&lock->list, &res->granted);
282 }
283 spin_unlock(&res->spinlock);
284
285 if (lockres_changed)
286 dlm_lockres_calc_usage(dlm, res);
287
288 wake_up(&res->wq);
289 return status;
290}
291
292
293
294
295
296
297
298
299
300static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
301 struct dlm_lock_resource *res,
302 struct dlm_lock *lock, int flags)
303{
304 struct dlm_create_lock create;
305 int tmpret, status = 0;
306 enum dlm_status ret;
307
308 memset(&create, 0, sizeof(create));
309 create.node_idx = dlm->node_num;
310 create.requested_type = lock->ml.type;
311 create.cookie = lock->ml.cookie;
312 create.namelen = res->lockname.len;
313 create.flags = cpu_to_be32(flags);
314 memcpy(create.name, res->lockname.name, create.namelen);
315
316 tmpret = o2net_send_message(DLM_CREATE_LOCK_MSG, dlm->key, &create,
317 sizeof(create), res->owner, &status);
318 if (tmpret >= 0) {
319 ret = status;
320 if (ret == DLM_REJECTED) {
321 mlog(ML_ERROR, "%s: res %.*s, Stale lockres no longer "
322 "owned by node %u. That node is coming back up "
323 "currently.\n", dlm->name, create.namelen,
324 create.name, res->owner);
325 dlm_print_one_lock_resource(res);
326 BUG();
327 }
328 } else {
329 mlog(ML_ERROR, "%s: res %.*s, Error %d send CREATE LOCK to "
330 "node %u\n", dlm->name, create.namelen, create.name,
331 tmpret, res->owner);
332 if (dlm_is_host_down(tmpret))
333 ret = DLM_RECOVERING;
334 else
335 ret = dlm_err_to_dlm_status(tmpret);
336 }
337
338 return ret;
339}
340
341void dlm_lock_get(struct dlm_lock *lock)
342{
343 kref_get(&lock->lock_refs);
344}
345
346void dlm_lock_put(struct dlm_lock *lock)
347{
348 kref_put(&lock->lock_refs, dlm_lock_release);
349}
350
351static void dlm_lock_release(struct kref *kref)
352{
353 struct dlm_lock *lock;
354
355 lock = container_of(kref, struct dlm_lock, lock_refs);
356
357 BUG_ON(!list_empty(&lock->list));
358 BUG_ON(!list_empty(&lock->ast_list));
359 BUG_ON(!list_empty(&lock->bast_list));
360 BUG_ON(lock->ast_pending);
361 BUG_ON(lock->bast_pending);
362
363 dlm_lock_detach_lockres(lock);
364
365 if (lock->lksb_kernel_allocated) {
366 mlog(0, "freeing kernel-allocated lksb\n");
367 kfree(lock->lksb);
368 }
369 kmem_cache_free(dlm_lock_cache, lock);
370}
371
372
373void dlm_lock_attach_lockres(struct dlm_lock *lock,
374 struct dlm_lock_resource *res)
375{
376 dlm_lockres_get(res);
377 lock->lockres = res;
378}
379
380
381static void dlm_lock_detach_lockres(struct dlm_lock *lock)
382{
383 struct dlm_lock_resource *res;
384
385 res = lock->lockres;
386 if (res) {
387 lock->lockres = NULL;
388 mlog(0, "removing lock's lockres reference\n");
389 dlm_lockres_put(res);
390 }
391}
392
393static void dlm_init_lock(struct dlm_lock *newlock, int type,
394 u8 node, u64 cookie)
395{
396 INIT_LIST_HEAD(&newlock->list);
397 INIT_LIST_HEAD(&newlock->ast_list);
398 INIT_LIST_HEAD(&newlock->bast_list);
399 spin_lock_init(&newlock->spinlock);
400 newlock->ml.type = type;
401 newlock->ml.convert_type = LKM_IVMODE;
402 newlock->ml.highest_blocked = LKM_IVMODE;
403 newlock->ml.node = node;
404 newlock->ml.pad1 = 0;
405 newlock->ml.list = 0;
406 newlock->ml.flags = 0;
407 newlock->ast = NULL;
408 newlock->bast = NULL;
409 newlock->astdata = NULL;
410 newlock->ml.cookie = cpu_to_be64(cookie);
411 newlock->ast_pending = 0;
412 newlock->bast_pending = 0;
413 newlock->convert_pending = 0;
414 newlock->lock_pending = 0;
415 newlock->unlock_pending = 0;
416 newlock->cancel_pending = 0;
417 newlock->lksb_kernel_allocated = 0;
418
419 kref_init(&newlock->lock_refs);
420}
421
422struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
423 struct dlm_lockstatus *lksb)
424{
425 struct dlm_lock *lock;
426 int kernel_allocated = 0;
427
428 lock = kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS);
429 if (!lock)
430 return NULL;
431
432 if (!lksb) {
433
434 lksb = kzalloc(sizeof(*lksb), GFP_NOFS);
435 if (!lksb) {
436 kmem_cache_free(dlm_lock_cache, lock);
437 return NULL;
438 }
439 kernel_allocated = 1;
440 }
441
442 dlm_init_lock(lock, type, node, cookie);
443 if (kernel_allocated)
444 lock->lksb_kernel_allocated = 1;
445 lock->lksb = lksb;
446 lksb->lockid = lock;
447 return lock;
448}
449
450
451
452
453
454
455
456
457int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data,
458 void **ret_data)
459{
460 struct dlm_ctxt *dlm = data;
461 struct dlm_create_lock *create = (struct dlm_create_lock *)msg->buf;
462 struct dlm_lock_resource *res = NULL;
463 struct dlm_lock *newlock = NULL;
464 struct dlm_lockstatus *lksb = NULL;
465 enum dlm_status status = DLM_NORMAL;
466 char *name;
467 unsigned int namelen;
468
469 BUG_ON(!dlm);
470
471 if (!dlm_grab(dlm))
472 return DLM_REJECTED;
473
474 name = create->name;
475 namelen = create->namelen;
476 status = DLM_REJECTED;
477 if (!dlm_domain_fully_joined(dlm)) {
478 mlog(ML_ERROR, "Domain %s not fully joined, but node %u is "
479 "sending a create_lock message for lock %.*s!\n",
480 dlm->name, create->node_idx, namelen, name);
481 dlm_error(status);
482 goto leave;
483 }
484
485 status = DLM_IVBUFLEN;
486 if (namelen > DLM_LOCKID_NAME_MAX) {
487 dlm_error(status);
488 goto leave;
489 }
490
491 status = DLM_SYSERR;
492 newlock = dlm_new_lock(create->requested_type,
493 create->node_idx,
494 be64_to_cpu(create->cookie), NULL);
495 if (!newlock) {
496 dlm_error(status);
497 goto leave;
498 }
499
500 lksb = newlock->lksb;
501
502 if (be32_to_cpu(create->flags) & LKM_GET_LVB) {
503 lksb->flags |= DLM_LKSB_GET_LVB;
504 mlog(0, "set DLM_LKSB_GET_LVB flag\n");
505 }
506
507 status = DLM_IVLOCKID;
508 res = dlm_lookup_lockres(dlm, name, namelen);
509 if (!res) {
510 dlm_error(status);
511 goto leave;
512 }
513
514 spin_lock(&res->spinlock);
515 status = __dlm_lockres_state_to_status(res);
516 spin_unlock(&res->spinlock);
517
518 if (status != DLM_NORMAL) {
519 mlog(0, "lockres recovering/migrating/in-progress\n");
520 goto leave;
521 }
522
523 dlm_lock_attach_lockres(newlock, res);
524
525 status = dlmlock_master(dlm, res, newlock, be32_to_cpu(create->flags));
526leave:
527 if (status != DLM_NORMAL)
528 if (newlock)
529 dlm_lock_put(newlock);
530
531 if (res)
532 dlm_lockres_put(res);
533
534 dlm_put(dlm);
535
536 return status;
537}
538
539
540
541static inline void dlm_get_next_cookie(u8 node_num, u64 *cookie)
542{
543 u64 tmpnode = node_num;
544
545
546 tmpnode <<= 56;
547
548 spin_lock(&dlm_cookie_lock);
549 *cookie = (dlm_next_cookie | tmpnode);
550 if (++dlm_next_cookie & 0xff00000000000000ull) {
551 mlog(0, "This node's cookie will now wrap!\n");
552 dlm_next_cookie = 1;
553 }
554 spin_unlock(&dlm_cookie_lock);
555}
556
557enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
558 struct dlm_lockstatus *lksb, int flags,
559 const char *name, int namelen, dlm_astlockfunc_t *ast,
560 void *data, dlm_bastlockfunc_t *bast)
561{
562 enum dlm_status status;
563 struct dlm_lock_resource *res = NULL;
564 struct dlm_lock *lock = NULL;
565 int convert = 0, recovery = 0;
566
567
568
569
570 if (!lksb) {
571 dlm_error(DLM_BADARGS);
572 return DLM_BADARGS;
573 }
574
575 status = DLM_BADPARAM;
576 if (mode != LKM_EXMODE && mode != LKM_PRMODE && mode != LKM_NLMODE) {
577 dlm_error(status);
578 goto error;
579 }
580
581 if (flags & ~LKM_VALID_FLAGS) {
582 dlm_error(status);
583 goto error;
584 }
585
586 convert = (flags & LKM_CONVERT);
587 recovery = (flags & LKM_RECOVERY);
588
589 if (recovery &&
590 (!dlm_is_recovery_lock(name, namelen) || convert) ) {
591 dlm_error(status);
592 goto error;
593 }
594 if (convert && (flags & LKM_LOCAL)) {
595 mlog(ML_ERROR, "strange LOCAL convert request!\n");
596 goto error;
597 }
598
599 if (convert) {
600
601
602
603 lock = lksb->lockid;
604 if (!lock) {
605 mlog(ML_ERROR, "NULL lock pointer in convert "
606 "request\n");
607 goto error;
608 }
609
610 res = lock->lockres;
611 if (!res) {
612 mlog(ML_ERROR, "NULL lockres pointer in convert "
613 "request\n");
614 goto error;
615 }
616 dlm_lockres_get(res);
617
618
619
620
621
622
623 if (lock->lksb != lksb || lock->ast != ast ||
624 lock->bast != bast || lock->astdata != data) {
625 status = DLM_BADARGS;
626 mlog(ML_ERROR, "new args: lksb=%p, ast=%p, bast=%p, "
627 "astdata=%p\n", lksb, ast, bast, data);
628 mlog(ML_ERROR, "orig args: lksb=%p, ast=%p, bast=%p, "
629 "astdata=%p\n", lock->lksb, lock->ast,
630 lock->bast, lock->astdata);
631 goto error;
632 }
633retry_convert:
634 dlm_wait_for_recovery(dlm);
635
636 if (res->owner == dlm->node_num)
637 status = dlmconvert_master(dlm, res, lock, flags, mode);
638 else
639 status = dlmconvert_remote(dlm, res, lock, flags, mode);
640 if (status == DLM_RECOVERING || status == DLM_MIGRATING ||
641 status == DLM_FORWARD) {
642
643
644
645
646 mlog(0, "retrying convert with migration/recovery/"
647 "in-progress\n");
648 msleep(100);
649 goto retry_convert;
650 }
651 } else {
652 u64 tmpcookie;
653
654
655 status = DLM_BADARGS;
656 if (!name) {
657 dlm_error(status);
658 goto error;
659 }
660
661 status = DLM_IVBUFLEN;
662 if (namelen > DLM_LOCKID_NAME_MAX || namelen < 1) {
663 dlm_error(status);
664 goto error;
665 }
666
667 dlm_get_next_cookie(dlm->node_num, &tmpcookie);
668 lock = dlm_new_lock(mode, dlm->node_num, tmpcookie, lksb);
669 if (!lock) {
670 dlm_error(status);
671 goto error;
672 }
673
674 if (!recovery)
675 dlm_wait_for_recovery(dlm);
676
677
678 res = dlm_get_lock_resource(dlm, name, namelen, flags);
679 if (!res) {
680 status = DLM_IVLOCKID;
681 dlm_error(status);
682 goto error;
683 }
684
685 mlog(0, "type=%d, flags = 0x%x\n", mode, flags);
686 mlog(0, "creating lock: lock=%p res=%p\n", lock, res);
687
688 dlm_lock_attach_lockres(lock, res);
689 lock->ast = ast;
690 lock->bast = bast;
691 lock->astdata = data;
692
693retry_lock:
694 if (flags & LKM_VALBLK) {
695 mlog(0, "LKM_VALBLK passed by caller\n");
696
697
698
699 if (mode < LKM_PRMODE)
700 flags &= ~LKM_VALBLK;
701 else {
702 flags |= LKM_GET_LVB;
703 lock->lksb->flags |= DLM_LKSB_GET_LVB;
704 }
705 }
706
707 if (res->owner == dlm->node_num)
708 status = dlmlock_master(dlm, res, lock, flags);
709 else
710 status = dlmlock_remote(dlm, res, lock, flags);
711
712 if (status == DLM_RECOVERING || status == DLM_MIGRATING ||
713 status == DLM_FORWARD) {
714 msleep(100);
715 if (recovery) {
716 if (status != DLM_RECOVERING)
717 goto retry_lock;
718
719
720
721 dlm_wait_for_node_death(dlm, res->owner,
722 DLM_NODE_DEATH_WAIT_MAX);
723 } else {
724 dlm_wait_for_recovery(dlm);
725 goto retry_lock;
726 }
727 }
728
729
730 spin_lock(&res->spinlock);
731 dlm_lockres_drop_inflight_ref(dlm, res);
732 spin_unlock(&res->spinlock);
733
734 dlm_lockres_calc_usage(dlm, res);
735 dlm_kick_thread(dlm, res);
736
737 if (status != DLM_NORMAL) {
738 lock->lksb->flags &= ~DLM_LKSB_GET_LVB;
739 if (status != DLM_NOTQUEUED)
740 dlm_error(status);
741 goto error;
742 }
743 }
744
745error:
746 if (status != DLM_NORMAL) {
747 if (lock && !convert)
748 dlm_lock_put(lock);
749
750 lksb->status = status;
751 }
752
753
754
755 if (res)
756 dlm_lockres_put(res);
757
758 return status;
759}
760EXPORT_SYMBOL_GPL(dlmlock);
761