1
2
3
4
5
6
7
8
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12#include <linux/fs.h>
13#include <linux/dlm.h>
14#include <linux/slab.h>
15#include <linux/types.h>
16#include <linux/delay.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/sched/signal.h>
19
20#include "incore.h"
21#include "glock.h"
22#include "glops.h"
23#include "recovery.h"
24#include "util.h"
25#include "sys.h"
26#include "trace_gfs2.h"
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
49 s64 sample)
50{
51 s64 delta = sample - s->stats[index];
52 s->stats[index] += (delta >> 3);
53 index++;
54 s->stats[index] += ((abs(delta) - s->stats[index]) >> 2);
55}
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74static inline void gfs2_update_reply_times(struct gfs2_glock *gl)
75{
76 struct gfs2_pcpu_lkstats *lks;
77 const unsigned gltype = gl->gl_name.ln_type;
78 unsigned index = test_bit(GLF_BLOCKING, &gl->gl_flags) ?
79 GFS2_LKS_SRTTB : GFS2_LKS_SRTT;
80 s64 rtt;
81
82 preempt_disable();
83 rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp));
84 lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats);
85 gfs2_update_stats(&gl->gl_stats, index, rtt);
86 gfs2_update_stats(&lks->lkstats[gltype], index, rtt);
87 preempt_enable();
88
89 trace_gfs2_glock_lock_time(gl, rtt);
90}
91
92
93
94
95
96
97
98
99
100
101static inline void gfs2_update_request_times(struct gfs2_glock *gl)
102{
103 struct gfs2_pcpu_lkstats *lks;
104 const unsigned gltype = gl->gl_name.ln_type;
105 ktime_t dstamp;
106 s64 irt;
107
108 preempt_disable();
109 dstamp = gl->gl_dstamp;
110 gl->gl_dstamp = ktime_get_real();
111 irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp));
112 lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats);
113 gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt);
114 gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt);
115 preempt_enable();
116}
117
118static void gdlm_ast(void *arg)
119{
120 struct gfs2_glock *gl = arg;
121 unsigned ret = gl->gl_state;
122
123 gfs2_update_reply_times(gl);
124 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
125
126 if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && gl->gl_lksb.sb_lvbptr)
127 memset(gl->gl_lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
128
129 switch (gl->gl_lksb.sb_status) {
130 case -DLM_EUNLOCK:
131 if (gl->gl_ops->go_free)
132 gl->gl_ops->go_free(gl);
133 gfs2_glock_free(gl);
134 return;
135 case -DLM_ECANCEL:
136 ret |= LM_OUT_CANCELED;
137 goto out;
138 case -EAGAIN:
139 case -EDEADLK:
140 goto out;
141 case -ETIMEDOUT:
142 ret |= LM_OUT_ERROR;
143 goto out;
144 case 0:
145 break;
146 default:
147 BUG();
148 }
149
150 ret = gl->gl_req;
151 if (gl->gl_lksb.sb_flags & DLM_SBF_ALTMODE) {
152 if (gl->gl_req == LM_ST_SHARED)
153 ret = LM_ST_DEFERRED;
154 else if (gl->gl_req == LM_ST_DEFERRED)
155 ret = LM_ST_SHARED;
156 else
157 BUG();
158 }
159
160 set_bit(GLF_INITIAL, &gl->gl_flags);
161 gfs2_glock_complete(gl, ret);
162 return;
163out:
164 if (!test_bit(GLF_INITIAL, &gl->gl_flags))
165 gl->gl_lksb.sb_lkid = 0;
166 gfs2_glock_complete(gl, ret);
167}
168
169static void gdlm_bast(void *arg, int mode)
170{
171 struct gfs2_glock *gl = arg;
172
173 switch (mode) {
174 case DLM_LOCK_EX:
175 gfs2_glock_cb(gl, LM_ST_UNLOCKED);
176 break;
177 case DLM_LOCK_CW:
178 gfs2_glock_cb(gl, LM_ST_DEFERRED);
179 break;
180 case DLM_LOCK_PR:
181 gfs2_glock_cb(gl, LM_ST_SHARED);
182 break;
183 default:
184 fs_err(gl->gl_name.ln_sbd, "unknown bast mode %d\n", mode);
185 BUG();
186 }
187}
188
189
190
191static int make_mode(struct gfs2_sbd *sdp, const unsigned int lmstate)
192{
193 switch (lmstate) {
194 case LM_ST_UNLOCKED:
195 return DLM_LOCK_NL;
196 case LM_ST_EXCLUSIVE:
197 return DLM_LOCK_EX;
198 case LM_ST_DEFERRED:
199 return DLM_LOCK_CW;
200 case LM_ST_SHARED:
201 return DLM_LOCK_PR;
202 }
203 fs_err(sdp, "unknown LM state %d\n", lmstate);
204 BUG();
205 return -1;
206}
207
208static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
209 const int req)
210{
211 u32 lkf = 0;
212
213 if (gl->gl_lksb.sb_lvbptr)
214 lkf |= DLM_LKF_VALBLK;
215
216 if (gfs_flags & LM_FLAG_TRY)
217 lkf |= DLM_LKF_NOQUEUE;
218
219 if (gfs_flags & LM_FLAG_TRY_1CB) {
220 lkf |= DLM_LKF_NOQUEUE;
221 lkf |= DLM_LKF_NOQUEUEBAST;
222 }
223
224 if (gfs_flags & LM_FLAG_PRIORITY) {
225 lkf |= DLM_LKF_NOORDER;
226 lkf |= DLM_LKF_HEADQUE;
227 }
228
229 if (gfs_flags & LM_FLAG_ANY) {
230 if (req == DLM_LOCK_PR)
231 lkf |= DLM_LKF_ALTCW;
232 else if (req == DLM_LOCK_CW)
233 lkf |= DLM_LKF_ALTPR;
234 else
235 BUG();
236 }
237
238 if (gl->gl_lksb.sb_lkid != 0) {
239 lkf |= DLM_LKF_CONVERT;
240 if (test_bit(GLF_BLOCKING, &gl->gl_flags))
241 lkf |= DLM_LKF_QUECVT;
242 }
243
244 return lkf;
245}
246
247static void gfs2_reverse_hex(char *c, u64 value)
248{
249 *c = '0';
250 while (value) {
251 *c-- = hex_asc[value & 0x0f];
252 value >>= 4;
253 }
254}
255
256static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
257 unsigned int flags)
258{
259 struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
260 int req;
261 u32 lkf;
262 char strname[GDLM_STRNAME_BYTES] = "";
263
264 req = make_mode(gl->gl_name.ln_sbd, req_state);
265 lkf = make_flags(gl, flags, req);
266 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
267 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
268 if (gl->gl_lksb.sb_lkid) {
269 gfs2_update_request_times(gl);
270 } else {
271 memset(strname, ' ', GDLM_STRNAME_BYTES - 1);
272 strname[GDLM_STRNAME_BYTES - 1] = '\0';
273 gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type);
274 gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number);
275 gl->gl_dstamp = ktime_get_real();
276 }
277
278
279
280
281 return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
282 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
283}
284
285static void gdlm_put_lock(struct gfs2_glock *gl)
286{
287 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
288 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
289 int lvb_needs_unlock = 0;
290 int error;
291
292 if (gl->gl_lksb.sb_lkid == 0) {
293 gfs2_glock_free(gl);
294 return;
295 }
296
297 clear_bit(GLF_BLOCKING, &gl->gl_flags);
298 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
299 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
300 gfs2_update_request_times(gl);
301
302
303
304 if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
305 lvb_needs_unlock = 1;
306
307 if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
308 !lvb_needs_unlock) {
309 gfs2_glock_free(gl);
310 return;
311 }
312
313 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
314 NULL, gl);
315 if (error) {
316 fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n",
317 gl->gl_name.ln_type,
318 (unsigned long long)gl->gl_name.ln_number, error);
319 return;
320 }
321}
322
323static void gdlm_cancel(struct gfs2_glock *gl)
324{
325 struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
326 dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
327}
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470#define JID_BITMAP_OFFSET 8
471
472static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen,
473 char *lvb_bits)
474{
475 __le32 gen;
476 memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE);
477 memcpy(&gen, lvb_bits, sizeof(__le32));
478 *lvb_gen = le32_to_cpu(gen);
479}
480
481static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
482 char *lvb_bits)
483{
484 __le32 gen;
485 memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE);
486 gen = cpu_to_le32(lvb_gen);
487 memcpy(ls->ls_control_lvb, &gen, sizeof(__le32));
488}
489
490static int all_jid_bits_clear(char *lvb)
491{
492 return !memchr_inv(lvb + JID_BITMAP_OFFSET, 0,
493 GDLM_LVB_SIZE - JID_BITMAP_OFFSET);
494}
495
496static void sync_wait_cb(void *arg)
497{
498 struct lm_lockstruct *ls = arg;
499 complete(&ls->ls_sync_wait);
500}
501
502static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name)
503{
504 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
505 int error;
506
507 error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls);
508 if (error) {
509 fs_err(sdp, "%s lkid %x error %d\n",
510 name, lksb->sb_lkid, error);
511 return error;
512 }
513
514 wait_for_completion(&ls->ls_sync_wait);
515
516 if (lksb->sb_status != -DLM_EUNLOCK) {
517 fs_err(sdp, "%s lkid %x status %d\n",
518 name, lksb->sb_lkid, lksb->sb_status);
519 return -1;
520 }
521 return 0;
522}
523
524static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags,
525 unsigned int num, struct dlm_lksb *lksb, char *name)
526{
527 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
528 char strname[GDLM_STRNAME_BYTES];
529 int error, status;
530
531 memset(strname, 0, GDLM_STRNAME_BYTES);
532 snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num);
533
534 error = dlm_lock(ls->ls_dlm, mode, lksb, flags,
535 strname, GDLM_STRNAME_BYTES - 1,
536 0, sync_wait_cb, ls, NULL);
537 if (error) {
538 fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n",
539 name, lksb->sb_lkid, flags, mode, error);
540 return error;
541 }
542
543 wait_for_completion(&ls->ls_sync_wait);
544
545 status = lksb->sb_status;
546
547 if (status && status != -EAGAIN) {
548 fs_err(sdp, "%s lkid %x flags %x mode %d status %d\n",
549 name, lksb->sb_lkid, flags, mode, status);
550 }
551
552 return status;
553}
554
555static int mounted_unlock(struct gfs2_sbd *sdp)
556{
557 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
558 return sync_unlock(sdp, &ls->ls_mounted_lksb, "mounted_lock");
559}
560
561static int mounted_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
562{
563 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
564 return sync_lock(sdp, mode, flags, GFS2_MOUNTED_LOCK,
565 &ls->ls_mounted_lksb, "mounted_lock");
566}
567
568static int control_unlock(struct gfs2_sbd *sdp)
569{
570 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
571 return sync_unlock(sdp, &ls->ls_control_lksb, "control_lock");
572}
573
574static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
575{
576 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
577 return sync_lock(sdp, mode, flags, GFS2_CONTROL_LOCK,
578 &ls->ls_control_lksb, "control_lock");
579}
580
581
582
583
584
585static void remote_withdraw(struct gfs2_sbd *sdp)
586{
587 struct gfs2_jdesc *jd;
588 int ret = 0, count = 0;
589
590 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
591 if (jd->jd_jid == sdp->sd_lockstruct.ls_jid)
592 continue;
593 ret = gfs2_recover_journal(jd, true);
594 if (ret)
595 break;
596 count++;
597 }
598
599
600 fs_err(sdp, "Journals checked: %d, ret = %d.\n", count, ret);
601}
602
603static void gfs2_control_func(struct work_struct *work)
604{
605 struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work);
606 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
607 uint32_t block_gen, start_gen, lvb_gen, flags;
608 int recover_set = 0;
609 int write_lvb = 0;
610 int recover_size;
611 int i, error;
612
613
614 if (test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) {
615 remote_withdraw(sdp);
616 clear_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags);
617 return;
618 }
619
620 spin_lock(&ls->ls_recover_spin);
621
622
623
624
625
626
627
628
629
630 if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
631 test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
632 spin_unlock(&ls->ls_recover_spin);
633 return;
634 }
635 block_gen = ls->ls_recover_block;
636 start_gen = ls->ls_recover_start;
637 spin_unlock(&ls->ls_recover_spin);
638
639
640
641
642
643
644
645
646 if (block_gen == start_gen)
647 return;
648
649
650
651
652
653
654
655
656
657
658
659
660
661 error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
662 if (error) {
663 fs_err(sdp, "control lock EX error %d\n", error);
664 return;
665 }
666
667 control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits);
668
669 spin_lock(&ls->ls_recover_spin);
670 if (block_gen != ls->ls_recover_block ||
671 start_gen != ls->ls_recover_start) {
672 fs_info(sdp, "recover generation %u block1 %u %u\n",
673 start_gen, block_gen, ls->ls_recover_block);
674 spin_unlock(&ls->ls_recover_spin);
675 control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
676 return;
677 }
678
679 recover_size = ls->ls_recover_size;
680
681 if (lvb_gen <= start_gen) {
682
683
684
685
686
687
688
689
690
691 for (i = 0; i < recover_size; i++) {
692 if (ls->ls_recover_result[i] != LM_RD_SUCCESS)
693 continue;
694
695 ls->ls_recover_result[i] = 0;
696
697 if (!test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET))
698 continue;
699
700 __clear_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET);
701 write_lvb = 1;
702 }
703 }
704
705 if (lvb_gen == start_gen) {
706
707
708
709 for (i = 0; i < recover_size; i++) {
710 if (!ls->ls_recover_submit[i])
711 continue;
712 if (ls->ls_recover_submit[i] < lvb_gen)
713 ls->ls_recover_submit[i] = 0;
714 }
715 } else if (lvb_gen < start_gen) {
716
717
718
719 for (i = 0; i < recover_size; i++) {
720 if (!ls->ls_recover_submit[i])
721 continue;
722 if (ls->ls_recover_submit[i] < start_gen) {
723 ls->ls_recover_submit[i] = 0;
724 __set_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET);
725 }
726 }
727
728
729 write_lvb = 1;
730 } else {
731
732
733
734 }
735 spin_unlock(&ls->ls_recover_spin);
736
737 if (write_lvb) {
738 control_lvb_write(ls, start_gen, ls->ls_lvb_bits);
739 flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK;
740 } else {
741 flags = DLM_LKF_CONVERT;
742 }
743
744 error = control_lock(sdp, DLM_LOCK_NL, flags);
745 if (error) {
746 fs_err(sdp, "control lock NL error %d\n", error);
747 return;
748 }
749
750
751
752
753
754
755
756
757 for (i = 0; i < recover_size; i++) {
758 if (test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET)) {
759 fs_info(sdp, "recover generation %u jid %d\n",
760 start_gen, i);
761 gfs2_recover_set(sdp, i);
762 recover_set++;
763 }
764 }
765 if (recover_set)
766 return;
767
768
769
770
771
772
773
774 spin_lock(&ls->ls_recover_spin);
775 if (ls->ls_recover_block == block_gen &&
776 ls->ls_recover_start == start_gen) {
777 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
778 spin_unlock(&ls->ls_recover_spin);
779 fs_info(sdp, "recover generation %u done\n", start_gen);
780 gfs2_glock_thaw(sdp);
781 } else {
782 fs_info(sdp, "recover generation %u block2 %u %u\n",
783 start_gen, block_gen, ls->ls_recover_block);
784 spin_unlock(&ls->ls_recover_spin);
785 }
786}
787
788static int control_mount(struct gfs2_sbd *sdp)
789{
790 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
791 uint32_t start_gen, block_gen, mount_gen, lvb_gen;
792 int mounted_mode;
793 int retries = 0;
794 int error;
795
796 memset(&ls->ls_mounted_lksb, 0, sizeof(struct dlm_lksb));
797 memset(&ls->ls_control_lksb, 0, sizeof(struct dlm_lksb));
798 memset(&ls->ls_control_lvb, 0, GDLM_LVB_SIZE);
799 ls->ls_control_lksb.sb_lvbptr = ls->ls_control_lvb;
800 init_completion(&ls->ls_sync_wait);
801
802 set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
803
804 error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_VALBLK);
805 if (error) {
806 fs_err(sdp, "control_mount control_lock NL error %d\n", error);
807 return error;
808 }
809
810 error = mounted_lock(sdp, DLM_LOCK_NL, 0);
811 if (error) {
812 fs_err(sdp, "control_mount mounted_lock NL error %d\n", error);
813 control_unlock(sdp);
814 return error;
815 }
816 mounted_mode = DLM_LOCK_NL;
817
818restart:
819 if (retries++ && signal_pending(current)) {
820 error = -EINTR;
821 goto fail;
822 }
823
824
825
826
827
828
829 if (mounted_mode != DLM_LOCK_NL) {
830 error = mounted_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
831 if (error)
832 goto fail;
833 mounted_mode = DLM_LOCK_NL;
834 }
835
836
837
838
839
840
841
842 msleep_interruptible(500);
843
844
845
846
847
848
849
850 error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE|DLM_LKF_VALBLK);
851 if (error == -EAGAIN) {
852 goto restart;
853 } else if (error) {
854 fs_err(sdp, "control_mount control_lock EX error %d\n", error);
855 goto fail;
856 }
857
858
859
860
861
862 if (sdp->sd_args.ar_spectator)
863 goto locks_done;
864
865 error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
866 if (!error) {
867 mounted_mode = DLM_LOCK_EX;
868 goto locks_done;
869 } else if (error != -EAGAIN) {
870 fs_err(sdp, "control_mount mounted_lock EX error %d\n", error);
871 goto fail;
872 }
873
874 error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
875 if (!error) {
876 mounted_mode = DLM_LOCK_PR;
877 goto locks_done;
878 } else {
879
880 fs_err(sdp, "control_mount mounted_lock PR error %d\n", error);
881 goto fail;
882 }
883
884locks_done:
885
886
887
888
889
890
891
892
893
894
895
896 control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits);
897
898 if (lvb_gen == 0xFFFFFFFF) {
899
900 fs_err(sdp, "control_mount control_lock disabled\n");
901 error = -EINVAL;
902 goto fail;
903 }
904
905 if (mounted_mode == DLM_LOCK_EX) {
906
907 spin_lock(&ls->ls_recover_spin);
908 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
909 set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags);
910 set_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
911 spin_unlock(&ls->ls_recover_spin);
912 fs_info(sdp, "first mounter control generation %u\n", lvb_gen);
913 return 0;
914 }
915
916 error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
917 if (error)
918 goto fail;
919
920
921
922
923
924
925
926 if (!all_jid_bits_clear(ls->ls_lvb_bits)) {
927
928 fs_info(sdp, "control_mount wait for journal recovery\n");
929 goto restart;
930 }
931
932 spin_lock(&ls->ls_recover_spin);
933 block_gen = ls->ls_recover_block;
934 start_gen = ls->ls_recover_start;
935 mount_gen = ls->ls_recover_mount;
936
937 if (lvb_gen < mount_gen) {
938
939
940 if (sdp->sd_args.ar_spectator) {
941 fs_info(sdp, "Recovery is required. Waiting for a "
942 "non-spectator to mount.\n");
943 msleep_interruptible(1000);
944 } else {
945 fs_info(sdp, "control_mount wait1 block %u start %u "
946 "mount %u lvb %u flags %lx\n", block_gen,
947 start_gen, mount_gen, lvb_gen,
948 ls->ls_recover_flags);
949 }
950 spin_unlock(&ls->ls_recover_spin);
951 goto restart;
952 }
953
954 if (lvb_gen != start_gen) {
955
956
957 fs_info(sdp, "control_mount wait2 block %u start %u mount %u "
958 "lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
959 lvb_gen, ls->ls_recover_flags);
960 spin_unlock(&ls->ls_recover_spin);
961 goto restart;
962 }
963
964 if (block_gen == start_gen) {
965
966 fs_info(sdp, "control_mount wait3 block %u start %u mount %u "
967 "lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
968 lvb_gen, ls->ls_recover_flags);
969 spin_unlock(&ls->ls_recover_spin);
970 goto restart;
971 }
972
973 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
974 set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags);
975 memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t));
976 memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
977 spin_unlock(&ls->ls_recover_spin);
978 return 0;
979
980fail:
981 mounted_unlock(sdp);
982 control_unlock(sdp);
983 return error;
984}
985
986static int control_first_done(struct gfs2_sbd *sdp)
987{
988 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
989 uint32_t start_gen, block_gen;
990 int error;
991
992restart:
993 spin_lock(&ls->ls_recover_spin);
994 start_gen = ls->ls_recover_start;
995 block_gen = ls->ls_recover_block;
996
997 if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags) ||
998 !test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
999 !test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
1000
1001 fs_err(sdp, "control_first_done start %u block %u flags %lx\n",
1002 start_gen, block_gen, ls->ls_recover_flags);
1003 spin_unlock(&ls->ls_recover_spin);
1004 control_unlock(sdp);
1005 return -1;
1006 }
1007
1008 if (start_gen == block_gen) {
1009
1010
1011
1012
1013
1014
1015
1016 spin_unlock(&ls->ls_recover_spin);
1017 fs_info(sdp, "control_first_done wait gen %u\n", start_gen);
1018
1019 wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY,
1020 TASK_UNINTERRUPTIBLE);
1021 goto restart;
1022 }
1023
1024 clear_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
1025 set_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags);
1026 memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t));
1027 memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
1028 spin_unlock(&ls->ls_recover_spin);
1029
1030 memset(ls->ls_lvb_bits, 0, GDLM_LVB_SIZE);
1031 control_lvb_write(ls, start_gen, ls->ls_lvb_bits);
1032
1033 error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT);
1034 if (error)
1035 fs_err(sdp, "control_first_done mounted PR error %d\n", error);
1036
1037 error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
1038 if (error)
1039 fs_err(sdp, "control_first_done control NL error %d\n", error);
1040
1041 return error;
1042}
1043
1044
1045
1046
1047
1048
1049
1050#define RECOVER_SIZE_INC 16
1051
1052static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
1053 int num_slots)
1054{
1055 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1056 uint32_t *submit = NULL;
1057 uint32_t *result = NULL;
1058 uint32_t old_size, new_size;
1059 int i, max_jid;
1060
1061 if (!ls->ls_lvb_bits) {
1062 ls->ls_lvb_bits = kzalloc(GDLM_LVB_SIZE, GFP_NOFS);
1063 if (!ls->ls_lvb_bits)
1064 return -ENOMEM;
1065 }
1066
1067 max_jid = 0;
1068 for (i = 0; i < num_slots; i++) {
1069 if (max_jid < slots[i].slot - 1)
1070 max_jid = slots[i].slot - 1;
1071 }
1072
1073 old_size = ls->ls_recover_size;
1074
1075 if (old_size >= max_jid + 1)
1076 return 0;
1077
1078 new_size = old_size + RECOVER_SIZE_INC;
1079
1080 submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
1081 result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
1082 if (!submit || !result) {
1083 kfree(submit);
1084 kfree(result);
1085 return -ENOMEM;
1086 }
1087
1088 spin_lock(&ls->ls_recover_spin);
1089 memcpy(submit, ls->ls_recover_submit, old_size * sizeof(uint32_t));
1090 memcpy(result, ls->ls_recover_result, old_size * sizeof(uint32_t));
1091 kfree(ls->ls_recover_submit);
1092 kfree(ls->ls_recover_result);
1093 ls->ls_recover_submit = submit;
1094 ls->ls_recover_result = result;
1095 ls->ls_recover_size = new_size;
1096 spin_unlock(&ls->ls_recover_spin);
1097 return 0;
1098}
1099
1100static void free_recover_size(struct lm_lockstruct *ls)
1101{
1102 kfree(ls->ls_lvb_bits);
1103 kfree(ls->ls_recover_submit);
1104 kfree(ls->ls_recover_result);
1105 ls->ls_recover_submit = NULL;
1106 ls->ls_recover_result = NULL;
1107 ls->ls_recover_size = 0;
1108 ls->ls_lvb_bits = NULL;
1109}
1110
1111
1112
1113static void gdlm_recover_prep(void *arg)
1114{
1115 struct gfs2_sbd *sdp = arg;
1116 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1117
1118 if (gfs2_withdrawn(sdp)) {
1119 fs_err(sdp, "recover_prep ignored due to withdraw.\n");
1120 return;
1121 }
1122 spin_lock(&ls->ls_recover_spin);
1123 ls->ls_recover_block = ls->ls_recover_start;
1124 set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
1125
1126 if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
1127 test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
1128 spin_unlock(&ls->ls_recover_spin);
1129 return;
1130 }
1131 set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
1132 spin_unlock(&ls->ls_recover_spin);
1133}
1134
1135
1136
1137
1138static void gdlm_recover_slot(void *arg, struct dlm_slot *slot)
1139{
1140 struct gfs2_sbd *sdp = arg;
1141 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1142 int jid = slot->slot - 1;
1143
1144 if (gfs2_withdrawn(sdp)) {
1145 fs_err(sdp, "recover_slot jid %d ignored due to withdraw.\n",
1146 jid);
1147 return;
1148 }
1149 spin_lock(&ls->ls_recover_spin);
1150 if (ls->ls_recover_size < jid + 1) {
1151 fs_err(sdp, "recover_slot jid %d gen %u short size %d\n",
1152 jid, ls->ls_recover_block, ls->ls_recover_size);
1153 spin_unlock(&ls->ls_recover_spin);
1154 return;
1155 }
1156
1157 if (ls->ls_recover_submit[jid]) {
1158 fs_info(sdp, "recover_slot jid %d gen %u prev %u\n",
1159 jid, ls->ls_recover_block, ls->ls_recover_submit[jid]);
1160 }
1161 ls->ls_recover_submit[jid] = ls->ls_recover_block;
1162 spin_unlock(&ls->ls_recover_spin);
1163}
1164
1165
1166
1167static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots,
1168 int our_slot, uint32_t generation)
1169{
1170 struct gfs2_sbd *sdp = arg;
1171 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1172
1173 if (gfs2_withdrawn(sdp)) {
1174 fs_err(sdp, "recover_done ignored due to withdraw.\n");
1175 return;
1176 }
1177
1178 set_recover_size(sdp, slots, num_slots);
1179
1180 spin_lock(&ls->ls_recover_spin);
1181 ls->ls_recover_start = generation;
1182
1183 if (!ls->ls_recover_mount) {
1184 ls->ls_recover_mount = generation;
1185 ls->ls_jid = our_slot - 1;
1186 }
1187
1188 if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
1189 queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
1190
1191 clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
1192 smp_mb__after_atomic();
1193 wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY);
1194 spin_unlock(&ls->ls_recover_spin);
1195}
1196
1197
1198
1199static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
1200 unsigned int result)
1201{
1202 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1203
1204 if (gfs2_withdrawn(sdp)) {
1205 fs_err(sdp, "recovery_result jid %d ignored due to withdraw.\n",
1206 jid);
1207 return;
1208 }
1209 if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
1210 return;
1211
1212
1213 if (jid == ls->ls_jid)
1214 return;
1215
1216 spin_lock(&ls->ls_recover_spin);
1217 if (test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
1218 spin_unlock(&ls->ls_recover_spin);
1219 return;
1220 }
1221 if (ls->ls_recover_size < jid + 1) {
1222 fs_err(sdp, "recovery_result jid %d short size %d\n",
1223 jid, ls->ls_recover_size);
1224 spin_unlock(&ls->ls_recover_spin);
1225 return;
1226 }
1227
1228 fs_info(sdp, "recover jid %d result %s\n", jid,
1229 result == LM_RD_GAVEUP ? "busy" : "success");
1230
1231 ls->ls_recover_result[jid] = result;
1232
1233
1234
1235
1236
1237 if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
1238 queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work,
1239 result == LM_RD_GAVEUP ? HZ : 0);
1240 spin_unlock(&ls->ls_recover_spin);
1241}
1242
1243static const struct dlm_lockspace_ops gdlm_lockspace_ops = {
1244 .recover_prep = gdlm_recover_prep,
1245 .recover_slot = gdlm_recover_slot,
1246 .recover_done = gdlm_recover_done,
1247};
1248
1249static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
1250{
1251 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1252 char cluster[GFS2_LOCKNAME_LEN];
1253 const char *fsname;
1254 uint32_t flags;
1255 int error, ops_result;
1256
1257
1258
1259
1260
1261 INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func);
1262 spin_lock_init(&ls->ls_recover_spin);
1263 ls->ls_recover_flags = 0;
1264 ls->ls_recover_mount = 0;
1265 ls->ls_recover_start = 0;
1266 ls->ls_recover_block = 0;
1267 ls->ls_recover_size = 0;
1268 ls->ls_recover_submit = NULL;
1269 ls->ls_recover_result = NULL;
1270 ls->ls_lvb_bits = NULL;
1271
1272 error = set_recover_size(sdp, NULL, 0);
1273 if (error)
1274 goto fail;
1275
1276
1277
1278
1279
1280 fsname = strchr(table, ':');
1281 if (!fsname) {
1282 fs_info(sdp, "no fsname found\n");
1283 error = -EINVAL;
1284 goto fail_free;
1285 }
1286 memset(cluster, 0, sizeof(cluster));
1287 memcpy(cluster, table, strlen(table) - strlen(fsname));
1288 fsname++;
1289
1290 flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL;
1291
1292
1293
1294
1295
1296 error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE,
1297 &gdlm_lockspace_ops, sdp, &ops_result,
1298 &ls->ls_dlm);
1299 if (error) {
1300 fs_err(sdp, "dlm_new_lockspace error %d\n", error);
1301 goto fail_free;
1302 }
1303
1304 if (ops_result < 0) {
1305
1306
1307
1308
1309 fs_info(sdp, "dlm lockspace ops not used\n");
1310 free_recover_size(ls);
1311 set_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags);
1312 return 0;
1313 }
1314
1315 if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) {
1316 fs_err(sdp, "dlm lockspace ops disallow jid preset\n");
1317 error = -EINVAL;
1318 goto fail_release;
1319 }
1320
1321
1322
1323
1324
1325
1326 error = control_mount(sdp);
1327 if (error) {
1328 fs_err(sdp, "mount control error %d\n", error);
1329 goto fail_release;
1330 }
1331
1332 ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
1333 clear_bit(SDF_NOJOURNALID, &sdp->sd_flags);
1334 smp_mb__after_atomic();
1335 wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
1336 return 0;
1337
1338fail_release:
1339 dlm_release_lockspace(ls->ls_dlm, 2);
1340fail_free:
1341 free_recover_size(ls);
1342fail:
1343 return error;
1344}
1345
1346static void gdlm_first_done(struct gfs2_sbd *sdp)
1347{
1348 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1349 int error;
1350
1351 if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
1352 return;
1353
1354 error = control_first_done(sdp);
1355 if (error)
1356 fs_err(sdp, "mount first_done error %d\n", error);
1357}
1358
1359static void gdlm_unmount(struct gfs2_sbd *sdp)
1360{
1361 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1362
1363 if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
1364 goto release;
1365
1366
1367
1368 spin_lock(&ls->ls_recover_spin);
1369 set_bit(DFL_UNMOUNT, &ls->ls_recover_flags);
1370 spin_unlock(&ls->ls_recover_spin);
1371 flush_delayed_work(&sdp->sd_control_work);
1372
1373
1374release:
1375 if (ls->ls_dlm) {
1376 dlm_release_lockspace(ls->ls_dlm, 2);
1377 ls->ls_dlm = NULL;
1378 }
1379
1380 free_recover_size(ls);
1381}
1382
1383static const match_table_t dlm_tokens = {
1384 { Opt_jid, "jid=%d"},
1385 { Opt_id, "id=%d"},
1386 { Opt_first, "first=%d"},
1387 { Opt_nodir, "nodir=%d"},
1388 { Opt_err, NULL },
1389};
1390
1391const struct lm_lockops gfs2_dlm_ops = {
1392 .lm_proto_name = "lock_dlm",
1393 .lm_mount = gdlm_mount,
1394 .lm_first_done = gdlm_first_done,
1395 .lm_recovery_result = gdlm_recovery_result,
1396 .lm_unmount = gdlm_unmount,
1397 .lm_put_lock = gdlm_put_lock,
1398 .lm_lock = gdlm_lock,
1399 .lm_cancel = gdlm_cancel,
1400 .lm_tokens = &dlm_tokens,
1401};
1402
1403