1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/bitops.h>
26#include <linux/vmalloc.h>
27#include <linux/string.h>
28#include <linux/drbd.h>
29#include <linux/slab.h>
30#include <asm/kmap_types.h>
31
32#include "drbd_int.h"
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93struct drbd_bitmap {
94 struct page **bm_pages;
95 spinlock_t bm_lock;
96
97
98
99 unsigned long bm_set;
100 unsigned long bm_bits;
101 size_t bm_words;
102 size_t bm_number_of_pages;
103 sector_t bm_dev_capacity;
104 struct mutex bm_change;
105
106 wait_queue_head_t bm_io_wait;
107
108 enum bm_flag bm_flags;
109
110
111 char *bm_why;
112 struct task_struct *bm_task;
113};
114
115#define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
116static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
117{
118 struct drbd_bitmap *b = mdev->bitmap;
119 if (!__ratelimit(&drbd_ratelimit_state))
120 return;
121 dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
122 current == mdev->receiver.task ? "receiver" :
123 current == mdev->asender.task ? "asender" :
124 current == mdev->worker.task ? "worker" : current->comm,
125 func, b->bm_why ?: "?",
126 b->bm_task == mdev->receiver.task ? "receiver" :
127 b->bm_task == mdev->asender.task ? "asender" :
128 b->bm_task == mdev->worker.task ? "worker" : "?");
129}
130
131void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
132{
133 struct drbd_bitmap *b = mdev->bitmap;
134 int trylock_failed;
135
136 if (!b) {
137 dev_err(DEV, "FIXME no bitmap in drbd_bm_lock!?\n");
138 return;
139 }
140
141 trylock_failed = !mutex_trylock(&b->bm_change);
142
143 if (trylock_failed) {
144 dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
145 current == mdev->receiver.task ? "receiver" :
146 current == mdev->asender.task ? "asender" :
147 current == mdev->worker.task ? "worker" : current->comm,
148 why, b->bm_why ?: "?",
149 b->bm_task == mdev->receiver.task ? "receiver" :
150 b->bm_task == mdev->asender.task ? "asender" :
151 b->bm_task == mdev->worker.task ? "worker" : "?");
152 mutex_lock(&b->bm_change);
153 }
154 if (BM_LOCKED_MASK & b->bm_flags)
155 dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
156 b->bm_flags |= flags & BM_LOCKED_MASK;
157
158 b->bm_why = why;
159 b->bm_task = current;
160}
161
162void drbd_bm_unlock(struct drbd_conf *mdev)
163{
164 struct drbd_bitmap *b = mdev->bitmap;
165 if (!b) {
166 dev_err(DEV, "FIXME no bitmap in drbd_bm_unlock!?\n");
167 return;
168 }
169
170 if (!(BM_LOCKED_MASK & mdev->bitmap->bm_flags))
171 dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n");
172
173 b->bm_flags &= ~BM_LOCKED_MASK;
174 b->bm_why = NULL;
175 b->bm_task = NULL;
176 mutex_unlock(&b->bm_change);
177}
178
179
180
181
182
183
184
185
186
187
188#define BM_PAGE_IDX_MASK ((1UL<<24)-1)
189
190#define BM_PAGE_IO_LOCK 31
191
192#define BM_PAGE_IO_ERROR 30
193
194
195#define BM_PAGE_NEED_WRITEOUT 29
196
197
198#define BM_PAGE_LAZY_WRITEOUT 28
199
200
201
202
203
204
205static void bm_store_page_idx(struct page *page, unsigned long idx)
206{
207 BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK));
208 page_private(page) |= idx;
209}
210
211static unsigned long bm_page_to_idx(struct page *page)
212{
213 return page_private(page) & BM_PAGE_IDX_MASK;
214}
215
216
217
218
219static void bm_page_lock_io(struct drbd_conf *mdev, int page_nr)
220{
221 struct drbd_bitmap *b = mdev->bitmap;
222 void *addr = &page_private(b->bm_pages[page_nr]);
223 wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr));
224}
225
226static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
227{
228 struct drbd_bitmap *b = mdev->bitmap;
229 void *addr = &page_private(b->bm_pages[page_nr]);
230 clear_bit(BM_PAGE_IO_LOCK, addr);
231 smp_mb__after_clear_bit();
232 wake_up(&mdev->bitmap->bm_io_wait);
233}
234
235
236
237static void bm_set_page_unchanged(struct page *page)
238{
239
240 clear_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
241 clear_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
242}
243
244static void bm_set_page_need_writeout(struct page *page)
245{
246 set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
247}
248
249static int bm_test_page_unchanged(struct page *page)
250{
251 volatile const unsigned long *addr = &page_private(page);
252 return (*addr & ((1UL<<BM_PAGE_NEED_WRITEOUT)|(1UL<<BM_PAGE_LAZY_WRITEOUT))) == 0;
253}
254
255static void bm_set_page_io_err(struct page *page)
256{
257 set_bit(BM_PAGE_IO_ERROR, &page_private(page));
258}
259
260static void bm_clear_page_io_err(struct page *page)
261{
262 clear_bit(BM_PAGE_IO_ERROR, &page_private(page));
263}
264
265static void bm_set_page_lazy_writeout(struct page *page)
266{
267 set_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
268}
269
270static int bm_test_page_lazy_writeout(struct page *page)
271{
272 return test_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
273}
274
275
276static unsigned int bm_word_to_page_idx(struct drbd_bitmap *b, unsigned long long_nr)
277{
278
279 unsigned int page_nr = long_nr >> (PAGE_SHIFT - LN2_BPL + 3);
280 BUG_ON(page_nr >= b->bm_number_of_pages);
281 return page_nr;
282}
283
284static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr)
285{
286
287 unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3);
288 BUG_ON(page_nr >= b->bm_number_of_pages);
289 return page_nr;
290}
291
292static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km)
293{
294 struct page *page = b->bm_pages[idx];
295 return (unsigned long *) kmap_atomic(page, km);
296}
297
298static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
299{
300 return __bm_map_pidx(b, idx, KM_IRQ1);
301}
302
303static void __bm_unmap(unsigned long *p_addr, const enum km_type km)
304{
305 kunmap_atomic(p_addr, km);
306};
307
308static void bm_unmap(unsigned long *p_addr)
309{
310 return __bm_unmap(p_addr, KM_IRQ1);
311}
312
313
314#define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
315
316
317
318
319
320
321#define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
322
323
324#define LWPP (PAGE_SIZE/sizeof(long))
325
326
327
328
329
330
331
332
333static void bm_free_pages(struct page **pages, unsigned long number)
334{
335 unsigned long i;
336 if (!pages)
337 return;
338
339 for (i = 0; i < number; i++) {
340 if (!pages[i]) {
341 printk(KERN_ALERT "drbd: bm_free_pages tried to free "
342 "a NULL pointer; i=%lu n=%lu\n",
343 i, number);
344 continue;
345 }
346 __free_page(pages[i]);
347 pages[i] = NULL;
348 }
349}
350
351static void bm_vk_free(void *ptr, int v)
352{
353 if (v)
354 vfree(ptr);
355 else
356 kfree(ptr);
357}
358
359
360
361
362static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
363{
364 struct page **old_pages = b->bm_pages;
365 struct page **new_pages, *page;
366 unsigned int i, bytes, vmalloced = 0;
367 unsigned long have = b->bm_number_of_pages;
368
369 BUG_ON(have == 0 && old_pages != NULL);
370 BUG_ON(have != 0 && old_pages == NULL);
371
372 if (have == want)
373 return old_pages;
374
375
376
377
378
379
380 bytes = sizeof(struct page *)*want;
381 new_pages = kzalloc(bytes, GFP_KERNEL);
382 if (!new_pages) {
383 new_pages = vzalloc(bytes);
384 if (!new_pages)
385 return NULL;
386 vmalloced = 1;
387 }
388
389 if (want >= have) {
390 for (i = 0; i < have; i++)
391 new_pages[i] = old_pages[i];
392 for (; i < want; i++) {
393 page = alloc_page(GFP_HIGHUSER);
394 if (!page) {
395 bm_free_pages(new_pages + have, i - have);
396 bm_vk_free(new_pages, vmalloced);
397 return NULL;
398 }
399
400
401 bm_store_page_idx(page, i);
402 new_pages[i] = page;
403 }
404 } else {
405 for (i = 0; i < want; i++)
406 new_pages[i] = old_pages[i];
407
408
409
410 }
411
412 if (vmalloced)
413 b->bm_flags |= BM_P_VMALLOCED;
414 else
415 b->bm_flags &= ~BM_P_VMALLOCED;
416
417 return new_pages;
418}
419
420
421
422
423
424int drbd_bm_init(struct drbd_conf *mdev)
425{
426 struct drbd_bitmap *b = mdev->bitmap;
427 WARN_ON(b != NULL);
428 b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL);
429 if (!b)
430 return -ENOMEM;
431 spin_lock_init(&b->bm_lock);
432 mutex_init(&b->bm_change);
433 init_waitqueue_head(&b->bm_io_wait);
434
435 mdev->bitmap = b;
436
437 return 0;
438}
439
440sector_t drbd_bm_capacity(struct drbd_conf *mdev)
441{
442 ERR_IF(!mdev->bitmap) return 0;
443 return mdev->bitmap->bm_dev_capacity;
444}
445
446
447
448void drbd_bm_cleanup(struct drbd_conf *mdev)
449{
450 ERR_IF (!mdev->bitmap) return;
451 bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
452 bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
453 kfree(mdev->bitmap);
454 mdev->bitmap = NULL;
455}
456
457
458
459
460
461
462#define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3))
463#define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1)
464#define BITS_PER_LONG_MASK (BITS_PER_LONG - 1)
465static int bm_clear_surplus(struct drbd_bitmap *b)
466{
467 unsigned long mask;
468 unsigned long *p_addr, *bm;
469 int tmp;
470 int cleared = 0;
471
472
473 tmp = (b->bm_bits & BITS_PER_PAGE_MASK);
474
475 mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1;
476
477
478 mask = cpu_to_lel(mask);
479
480 p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1);
481 bm = p_addr + (tmp/BITS_PER_LONG);
482 if (mask) {
483
484
485
486
487 cleared = hweight_long(*bm & ~mask);
488 *bm &= mask;
489 bm++;
490 }
491
492 if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) {
493
494
495 cleared += hweight_long(*bm);
496 *bm = 0;
497 }
498 bm_unmap(p_addr);
499 return cleared;
500}
501
502static void bm_set_surplus(struct drbd_bitmap *b)
503{
504 unsigned long mask;
505 unsigned long *p_addr, *bm;
506 int tmp;
507
508
509 tmp = (b->bm_bits & BITS_PER_PAGE_MASK);
510
511 mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1;
512
513
514 mask = cpu_to_lel(mask);
515
516 p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1);
517 bm = p_addr + (tmp/BITS_PER_LONG);
518 if (mask) {
519
520
521
522
523 *bm |= ~mask;
524 bm++;
525 }
526
527 if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) {
528
529
530 *bm = ~0UL;
531 }
532 bm_unmap(p_addr);
533}
534
535
536
537static unsigned long bm_count_bits(struct drbd_bitmap *b)
538{
539 unsigned long *p_addr;
540 unsigned long bits = 0;
541 unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1;
542 int idx, i, last_word;
543
544
545 for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
546 p_addr = __bm_map_pidx(b, idx, KM_USER0);
547 for (i = 0; i < LWPP; i++)
548 bits += hweight_long(p_addr[i]);
549 __bm_unmap(p_addr, KM_USER0);
550 cond_resched();
551 }
552
553 last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
554 p_addr = __bm_map_pidx(b, idx, KM_USER0);
555 for (i = 0; i < last_word; i++)
556 bits += hweight_long(p_addr[i]);
557 p_addr[last_word] &= cpu_to_lel(mask);
558 bits += hweight_long(p_addr[last_word]);
559
560 if (BITS_PER_LONG == 32 && (last_word & 1) == 0)
561 p_addr[last_word+1] = 0;
562 __bm_unmap(p_addr, KM_USER0);
563 return bits;
564}
565
566
567static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
568{
569 unsigned long *p_addr, *bm;
570 unsigned int idx;
571 size_t do_now, end;
572
573 end = offset + len;
574
575 if (end > b->bm_words) {
576 printk(KERN_ALERT "drbd: bm_memset end > bm_words\n");
577 return;
578 }
579
580 while (offset < end) {
581 do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset;
582 idx = bm_word_to_page_idx(b, offset);
583 p_addr = bm_map_pidx(b, idx);
584 bm = p_addr + MLPP(offset);
585 if (bm+do_now > p_addr + LWPP) {
586 printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
587 p_addr, bm, (int)do_now);
588 } else
589 memset(bm, c, do_now * sizeof(long));
590 bm_unmap(p_addr);
591 bm_set_page_need_writeout(b->bm_pages[idx]);
592 offset += do_now;
593 }
594}
595
596
597
598
599
600
601
602
603
604int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
605{
606 struct drbd_bitmap *b = mdev->bitmap;
607 unsigned long bits, words, owords, obits;
608 unsigned long want, have, onpages;
609 struct page **npages, **opages = NULL;
610 int err = 0, growing;
611 int opages_vmalloced;
612
613 ERR_IF(!b) return -ENOMEM;
614
615 drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);
616
617 dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n",
618 (unsigned long long)capacity);
619
620 if (capacity == b->bm_dev_capacity)
621 goto out;
622
623 opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags);
624
625 if (capacity == 0) {
626 spin_lock_irq(&b->bm_lock);
627 opages = b->bm_pages;
628 onpages = b->bm_number_of_pages;
629 owords = b->bm_words;
630 b->bm_pages = NULL;
631 b->bm_number_of_pages =
632 b->bm_set =
633 b->bm_bits =
634 b->bm_words =
635 b->bm_dev_capacity = 0;
636 spin_unlock_irq(&b->bm_lock);
637 bm_free_pages(opages, onpages);
638 bm_vk_free(opages, opages_vmalloced);
639 goto out;
640 }
641 bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
642
643
644
645
646
647
648 words = ALIGN(bits, 64) >> LN2_BPL;
649
650 if (get_ldev(mdev)) {
651 u64 bits_on_disk = ((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12;
652 put_ldev(mdev);
653 if (bits > bits_on_disk) {
654 dev_info(DEV, "bits = %lu\n", bits);
655 dev_info(DEV, "bits_on_disk = %llu\n", bits_on_disk);
656 err = -ENOSPC;
657 goto out;
658 }
659 }
660
661 want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
662 have = b->bm_number_of_pages;
663 if (want == have) {
664 D_ASSERT(b->bm_pages != NULL);
665 npages = b->bm_pages;
666 } else {
667 if (drbd_insert_fault(mdev, DRBD_FAULT_BM_ALLOC))
668 npages = NULL;
669 else
670 npages = bm_realloc_pages(b, want);
671 }
672
673 if (!npages) {
674 err = -ENOMEM;
675 goto out;
676 }
677
678 spin_lock_irq(&b->bm_lock);
679 opages = b->bm_pages;
680 owords = b->bm_words;
681 obits = b->bm_bits;
682
683 growing = bits > obits;
684 if (opages && growing && set_new_bits)
685 bm_set_surplus(b);
686
687 b->bm_pages = npages;
688 b->bm_number_of_pages = want;
689 b->bm_bits = bits;
690 b->bm_words = words;
691 b->bm_dev_capacity = capacity;
692
693 if (growing) {
694 if (set_new_bits) {
695 bm_memset(b, owords, 0xff, words-owords);
696 b->bm_set += bits - obits;
697 } else
698 bm_memset(b, owords, 0x00, words-owords);
699
700 }
701
702 if (want < have) {
703
704 bm_free_pages(opages + want, have - want);
705 }
706
707 (void)bm_clear_surplus(b);
708
709 spin_unlock_irq(&b->bm_lock);
710 if (opages != npages)
711 bm_vk_free(opages, opages_vmalloced);
712 if (!growing)
713 b->bm_set = bm_count_bits(b);
714 dev_info(DEV, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
715
716 out:
717 drbd_bm_unlock(mdev);
718 return err;
719}
720
721
722
723
724
725
726
727
728
729unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
730{
731 struct drbd_bitmap *b = mdev->bitmap;
732 unsigned long s;
733 unsigned long flags;
734
735 ERR_IF(!b) return 0;
736 ERR_IF(!b->bm_pages) return 0;
737
738 spin_lock_irqsave(&b->bm_lock, flags);
739 s = b->bm_set;
740 spin_unlock_irqrestore(&b->bm_lock, flags);
741
742 return s;
743}
744
745unsigned long drbd_bm_total_weight(struct drbd_conf *mdev)
746{
747 unsigned long s;
748
749 if (!get_ldev_if_state(mdev, D_NEGOTIATING))
750 return 0;
751 s = _drbd_bm_total_weight(mdev);
752 put_ldev(mdev);
753 return s;
754}
755
756size_t drbd_bm_words(struct drbd_conf *mdev)
757{
758 struct drbd_bitmap *b = mdev->bitmap;
759 ERR_IF(!b) return 0;
760 ERR_IF(!b->bm_pages) return 0;
761
762 return b->bm_words;
763}
764
765unsigned long drbd_bm_bits(struct drbd_conf *mdev)
766{
767 struct drbd_bitmap *b = mdev->bitmap;
768 ERR_IF(!b) return 0;
769
770 return b->bm_bits;
771}
772
773
774
775
776
777
778void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
779 unsigned long *buffer)
780{
781 struct drbd_bitmap *b = mdev->bitmap;
782 unsigned long *p_addr, *bm;
783 unsigned long word, bits;
784 unsigned int idx;
785 size_t end, do_now;
786
787 end = offset + number;
788
789 ERR_IF(!b) return;
790 ERR_IF(!b->bm_pages) return;
791 if (number == 0)
792 return;
793 WARN_ON(offset >= b->bm_words);
794 WARN_ON(end > b->bm_words);
795
796 spin_lock_irq(&b->bm_lock);
797 while (offset < end) {
798 do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
799 idx = bm_word_to_page_idx(b, offset);
800 p_addr = bm_map_pidx(b, idx);
801 bm = p_addr + MLPP(offset);
802 offset += do_now;
803 while (do_now--) {
804 bits = hweight_long(*bm);
805 word = *bm | *buffer++;
806 *bm++ = word;
807 b->bm_set += hweight_long(word) - bits;
808 }
809 bm_unmap(p_addr);
810 bm_set_page_need_writeout(b->bm_pages[idx]);
811 }
812
813
814
815
816
817 if (end == b->bm_words)
818 b->bm_set -= bm_clear_surplus(b);
819 spin_unlock_irq(&b->bm_lock);
820}
821
822
823
824
825void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
826 unsigned long *buffer)
827{
828 struct drbd_bitmap *b = mdev->bitmap;
829 unsigned long *p_addr, *bm;
830 size_t end, do_now;
831
832 end = offset + number;
833
834 ERR_IF(!b) return;
835 ERR_IF(!b->bm_pages) return;
836
837 spin_lock_irq(&b->bm_lock);
838 if ((offset >= b->bm_words) ||
839 (end > b->bm_words) ||
840 (number <= 0))
841 dev_err(DEV, "offset=%lu number=%lu bm_words=%lu\n",
842 (unsigned long) offset,
843 (unsigned long) number,
844 (unsigned long) b->bm_words);
845 else {
846 while (offset < end) {
847 do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
848 p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, offset));
849 bm = p_addr + MLPP(offset);
850 offset += do_now;
851 while (do_now--)
852 *buffer++ = *bm++;
853 bm_unmap(p_addr);
854 }
855 }
856 spin_unlock_irq(&b->bm_lock);
857}
858
859
860void drbd_bm_set_all(struct drbd_conf *mdev)
861{
862 struct drbd_bitmap *b = mdev->bitmap;
863 ERR_IF(!b) return;
864 ERR_IF(!b->bm_pages) return;
865
866 spin_lock_irq(&b->bm_lock);
867 bm_memset(b, 0, 0xff, b->bm_words);
868 (void)bm_clear_surplus(b);
869 b->bm_set = b->bm_bits;
870 spin_unlock_irq(&b->bm_lock);
871}
872
873
874void drbd_bm_clear_all(struct drbd_conf *mdev)
875{
876 struct drbd_bitmap *b = mdev->bitmap;
877 ERR_IF(!b) return;
878 ERR_IF(!b->bm_pages) return;
879
880 spin_lock_irq(&b->bm_lock);
881 bm_memset(b, 0, 0, b->bm_words);
882 b->bm_set = 0;
883 spin_unlock_irq(&b->bm_lock);
884}
885
886struct bm_aio_ctx {
887 struct drbd_conf *mdev;
888 atomic_t in_flight;
889 struct completion done;
890 unsigned flags;
891#define BM_AIO_COPY_PAGES 1
892 int error;
893};
894
895
896static void bm_async_io_complete(struct bio *bio, int error)
897{
898 struct bm_aio_ctx *ctx = bio->bi_private;
899 struct drbd_conf *mdev = ctx->mdev;
900 struct drbd_bitmap *b = mdev->bitmap;
901 unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page);
902 int uptodate = bio_flagged(bio, BIO_UPTODATE);
903
904
905
906
907
908
909 if (!error && !uptodate)
910 error = -EIO;
911
912 if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 &&
913 !bm_test_page_unchanged(b->bm_pages[idx]))
914 dev_warn(DEV, "bitmap page idx %u changed during IO!\n", idx);
915
916 if (error) {
917
918
919 ctx->error = error;
920 bm_set_page_io_err(b->bm_pages[idx]);
921
922
923 if (__ratelimit(&drbd_ratelimit_state))
924 dev_err(DEV, "IO ERROR %d on bitmap page idx %u\n",
925 error, idx);
926 } else {
927 bm_clear_page_io_err(b->bm_pages[idx]);
928 dynamic_dev_dbg(DEV, "bitmap page idx %u completed\n", idx);
929 }
930
931 bm_page_unlock_io(mdev, idx);
932
933
934 if (ctx->flags & BM_AIO_COPY_PAGES)
935 put_page(bio->bi_io_vec[0].bv_page);
936
937 bio_put(bio);
938
939 if (atomic_dec_and_test(&ctx->in_flight))
940 complete(&ctx->done);
941}
942
943static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
944{
945
946 struct bio *bio = bio_alloc(GFP_KERNEL, 1);
947 struct drbd_conf *mdev = ctx->mdev;
948 struct drbd_bitmap *b = mdev->bitmap;
949 struct page *page;
950 unsigned int len;
951
952 sector_t on_disk_sector =
953 mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset;
954 on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
955
956
957
958
959 len = min_t(unsigned int, PAGE_SIZE,
960 (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9);
961
962
963 bm_page_lock_io(mdev, page_nr);
964
965
966 bm_set_page_unchanged(b->bm_pages[page_nr]);
967
968 if (ctx->flags & BM_AIO_COPY_PAGES) {
969
970
971 void *src, *dest;
972 page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT);
973 dest = kmap_atomic(page, KM_USER0);
974 src = kmap_atomic(b->bm_pages[page_nr], KM_USER1);
975 memcpy(dest, src, PAGE_SIZE);
976 kunmap_atomic(src, KM_USER1);
977 kunmap_atomic(dest, KM_USER0);
978 bm_store_page_idx(page, page_nr);
979 } else
980 page = b->bm_pages[page_nr];
981
982 bio->bi_bdev = mdev->ldev->md_bdev;
983 bio->bi_sector = on_disk_sector;
984 bio_add_page(bio, page, len, 0);
985 bio->bi_private = ctx;
986 bio->bi_end_io = bm_async_io_complete;
987
988 if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
989 bio->bi_rw |= rw;
990 bio_endio(bio, -EIO);
991 } else {
992 submit_bio(rw, bio);
993
994
995 atomic_add(len >> 9, &mdev->rs_sect_ev);
996 }
997}
998
999
1000
1001
1002static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local)
1003{
1004 struct bm_aio_ctx ctx = {
1005 .mdev = mdev,
1006 .in_flight = ATOMIC_INIT(1),
1007 .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
1008 .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0,
1009 };
1010 struct drbd_bitmap *b = mdev->bitmap;
1011 int num_pages, i, count = 0;
1012 unsigned long now;
1013 char ppb[10];
1014 int err = 0;
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024 if (!ctx.flags)
1025 WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
1026
1027 num_pages = b->bm_number_of_pages;
1028
1029 now = jiffies;
1030
1031
1032 for (i = 0; i < num_pages; i++) {
1033
1034 if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
1035 break;
1036 if (rw & WRITE) {
1037 if (bm_test_page_unchanged(b->bm_pages[i])) {
1038 dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
1039 continue;
1040 }
1041
1042
1043 if (lazy_writeout_upper_idx &&
1044 !bm_test_page_lazy_writeout(b->bm_pages[i])) {
1045 dynamic_dev_dbg(DEV, "skipped bm lazy write for idx %u\n", i);
1046 continue;
1047 }
1048 }
1049 atomic_inc(&ctx.in_flight);
1050 bm_page_io_async(&ctx, i, rw);
1051 ++count;
1052 cond_resched();
1053 }
1054
1055
1056
1057
1058
1059
1060 if (!atomic_dec_and_test(&ctx.in_flight))
1061 wait_for_completion(&ctx.done);
1062 dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
1063 rw == WRITE ? "WRITE" : "READ",
1064 count, jiffies - now);
1065
1066 if (ctx.error) {
1067 dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
1068 drbd_chk_io_error(mdev, 1, true);
1069 err = -EIO;
1070 }
1071
1072 now = jiffies;
1073 if (rw == WRITE) {
1074 drbd_md_flush(mdev);
1075 } else {
1076 b->bm_set = bm_count_bits(b);
1077 dev_info(DEV, "recounting of set bits took additional %lu jiffies\n",
1078 jiffies - now);
1079 }
1080 now = b->bm_set;
1081
1082 dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
1083 ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
1084
1085 return err;
1086}
1087
1088
1089
1090
1091
1092int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
1093{
1094 return bm_rw(mdev, READ, 0);
1095}
1096
1097
1098
1099
1100
1101
1102
1103int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
1104{
1105 return bm_rw(mdev, WRITE, 0);
1106}
1107
1108
1109
1110
1111
1112
1113int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local)
1114{
1115 return bm_rw(mdev, WRITE, upper_idx);
1116}
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local)
1132{
1133 struct bm_aio_ctx ctx = {
1134 .mdev = mdev,
1135 .in_flight = ATOMIC_INIT(1),
1136 .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
1137 .flags = BM_AIO_COPY_PAGES,
1138 };
1139
1140 if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) {
1141 dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx);
1142 return 0;
1143 }
1144
1145 bm_page_io_async(&ctx, idx, WRITE_SYNC);
1146 wait_for_completion(&ctx.done);
1147
1148 if (ctx.error)
1149 drbd_chk_io_error(mdev, 1, true);
1150
1151
1152
1153 mdev->bm_writ_cnt++;
1154 return ctx.error;
1155}
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
1166 const int find_zero_bit, const enum km_type km)
1167{
1168 struct drbd_bitmap *b = mdev->bitmap;
1169 unsigned long *p_addr;
1170 unsigned long bit_offset;
1171 unsigned i;
1172
1173
1174 if (bm_fo > b->bm_bits) {
1175 dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
1176 bm_fo = DRBD_END_OF_BITMAP;
1177 } else {
1178 while (bm_fo < b->bm_bits) {
1179
1180 bit_offset = bm_fo & ~BITS_PER_PAGE_MASK;
1181 p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km);
1182
1183 if (find_zero_bit)
1184 i = find_next_zero_bit_le(p_addr,
1185 PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
1186 else
1187 i = find_next_bit_le(p_addr,
1188 PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
1189
1190 __bm_unmap(p_addr, km);
1191 if (i < PAGE_SIZE*8) {
1192 bm_fo = bit_offset + i;
1193 if (bm_fo >= b->bm_bits)
1194 break;
1195 goto found;
1196 }
1197 bm_fo = bit_offset + PAGE_SIZE*8;
1198 }
1199 bm_fo = DRBD_END_OF_BITMAP;
1200 }
1201 found:
1202 return bm_fo;
1203}
1204
1205static unsigned long bm_find_next(struct drbd_conf *mdev,
1206 unsigned long bm_fo, const int find_zero_bit)
1207{
1208 struct drbd_bitmap *b = mdev->bitmap;
1209 unsigned long i = DRBD_END_OF_BITMAP;
1210
1211 ERR_IF(!b) return i;
1212 ERR_IF(!b->bm_pages) return i;
1213
1214 spin_lock_irq(&b->bm_lock);
1215 if (BM_DONT_TEST & b->bm_flags)
1216 bm_print_lock_info(mdev);
1217
1218 i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1);
1219
1220 spin_unlock_irq(&b->bm_lock);
1221 return i;
1222}
1223
1224unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
1225{
1226 return bm_find_next(mdev, bm_fo, 0);
1227}
1228
1229#if 0
1230
1231unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
1232{
1233 return bm_find_next(mdev, bm_fo, 1);
1234}
1235#endif
1236
1237
1238
1239unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
1240{
1241
1242 return __bm_find_next(mdev, bm_fo, 0, KM_USER1);
1243}
1244
1245unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
1246{
1247
1248 return __bm_find_next(mdev, bm_fo, 1, KM_USER1);
1249}
1250
1251
1252
1253
1254
1255
1256
1257static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
1258 unsigned long e, int val)
1259{
1260 struct drbd_bitmap *b = mdev->bitmap;
1261 unsigned long *p_addr = NULL;
1262 unsigned long bitnr;
1263 unsigned int last_page_nr = -1U;
1264 int c = 0;
1265 int changed_total = 0;
1266
1267 if (e >= b->bm_bits) {
1268 dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
1269 s, e, b->bm_bits);
1270 e = b->bm_bits ? b->bm_bits -1 : 0;
1271 }
1272 for (bitnr = s; bitnr <= e; bitnr++) {
1273 unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
1274 if (page_nr != last_page_nr) {
1275 if (p_addr)
1276 __bm_unmap(p_addr, KM_IRQ1);
1277 if (c < 0)
1278 bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
1279 else if (c > 0)
1280 bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
1281 changed_total += c;
1282 c = 0;
1283 p_addr = __bm_map_pidx(b, page_nr, KM_IRQ1);
1284 last_page_nr = page_nr;
1285 }
1286 if (val)
1287 c += (0 == __test_and_set_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
1288 else
1289 c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
1290 }
1291 if (p_addr)
1292 __bm_unmap(p_addr, KM_IRQ1);
1293 if (c < 0)
1294 bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
1295 else if (c > 0)
1296 bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
1297 changed_total += c;
1298 b->bm_set += changed_total;
1299 return changed_total;
1300}
1301
1302
1303
1304
1305
1306static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
1307 const unsigned long e, int val)
1308{
1309 unsigned long flags;
1310 struct drbd_bitmap *b = mdev->bitmap;
1311 int c = 0;
1312
1313 ERR_IF(!b) return 1;
1314 ERR_IF(!b->bm_pages) return 0;
1315
1316 spin_lock_irqsave(&b->bm_lock, flags);
1317 if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
1318 bm_print_lock_info(mdev);
1319
1320 c = __bm_change_bits_to(mdev, s, e, val);
1321
1322 spin_unlock_irqrestore(&b->bm_lock, flags);
1323 return c;
1324}
1325
1326
1327int drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
1328{
1329 return bm_change_bits_to(mdev, s, e, 1);
1330}
1331
1332
1333int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
1334{
1335 return -bm_change_bits_to(mdev, s, e, 0);
1336}
1337
1338
1339
1340static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
1341 int page_nr, int first_word, int last_word)
1342{
1343 int i;
1344 int bits;
1345 unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1);
1346 for (i = first_word; i < last_word; i++) {
1347 bits = hweight_long(paddr[i]);
1348 paddr[i] = ~0UL;
1349 b->bm_set += BITS_PER_LONG - bits;
1350 }
1351 kunmap_atomic(paddr, KM_IRQ1);
1352}
1353
1354
1355
1356
1357
1358
1359void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
1360{
1361
1362
1363
1364
1365
1366
1367
1368
1369 struct drbd_bitmap *b = mdev->bitmap;
1370 unsigned long sl = ALIGN(s,BITS_PER_LONG);
1371 unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1);
1372 int first_page;
1373 int last_page;
1374 int page_nr;
1375 int first_word;
1376 int last_word;
1377
1378 if (e - s <= 3*BITS_PER_LONG) {
1379
1380 spin_lock_irq(&b->bm_lock);
1381 __bm_change_bits_to(mdev, s, e, 1);
1382 spin_unlock_irq(&b->bm_lock);
1383 return;
1384 }
1385
1386
1387
1388 spin_lock_irq(&b->bm_lock);
1389
1390
1391 if (sl)
1392 __bm_change_bits_to(mdev, s, sl-1, 1);
1393
1394 first_page = sl >> (3 + PAGE_SHIFT);
1395 last_page = el >> (3 + PAGE_SHIFT);
1396
1397
1398
1399 first_word = MLPP(sl >> LN2_BPL);
1400 last_word = LWPP;
1401
1402
1403 for (page_nr = first_page; page_nr < last_page; page_nr++) {
1404 bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word);
1405 spin_unlock_irq(&b->bm_lock);
1406 cond_resched();
1407 first_word = 0;
1408 spin_lock_irq(&b->bm_lock);
1409 }
1410
1411
1412 last_word = MLPP(el >> LN2_BPL);
1413 bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
1414
1415
1416
1417
1418
1419
1420 if (el <= e)
1421 __bm_change_bits_to(mdev, el, e, 1);
1422 spin_unlock_irq(&b->bm_lock);
1423}
1424
1425
1426
1427
1428
1429
1430
1431
1432int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
1433{
1434 unsigned long flags;
1435 struct drbd_bitmap *b = mdev->bitmap;
1436 unsigned long *p_addr;
1437 int i;
1438
1439 ERR_IF(!b) return 0;
1440 ERR_IF(!b->bm_pages) return 0;
1441
1442 spin_lock_irqsave(&b->bm_lock, flags);
1443 if (BM_DONT_TEST & b->bm_flags)
1444 bm_print_lock_info(mdev);
1445 if (bitnr < b->bm_bits) {
1446 p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr));
1447 i = test_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0;
1448 bm_unmap(p_addr);
1449 } else if (bitnr == b->bm_bits) {
1450 i = -1;
1451 } else {
1452 dev_err(DEV, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits);
1453 i = 0;
1454 }
1455
1456 spin_unlock_irqrestore(&b->bm_lock, flags);
1457 return i;
1458}
1459
1460
1461int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
1462{
1463 unsigned long flags;
1464 struct drbd_bitmap *b = mdev->bitmap;
1465 unsigned long *p_addr = NULL;
1466 unsigned long bitnr;
1467 unsigned int page_nr = -1U;
1468 int c = 0;
1469
1470
1471
1472
1473
1474 ERR_IF(!b) return 1;
1475 ERR_IF(!b->bm_pages) return 1;
1476
1477 spin_lock_irqsave(&b->bm_lock, flags);
1478 if (BM_DONT_TEST & b->bm_flags)
1479 bm_print_lock_info(mdev);
1480 for (bitnr = s; bitnr <= e; bitnr++) {
1481 unsigned int idx = bm_bit_to_page_idx(b, bitnr);
1482 if (page_nr != idx) {
1483 page_nr = idx;
1484 if (p_addr)
1485 bm_unmap(p_addr);
1486 p_addr = bm_map_pidx(b, idx);
1487 }
1488 ERR_IF (bitnr >= b->bm_bits) {
1489 dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
1490 } else {
1491 c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
1492 }
1493 }
1494 if (p_addr)
1495 bm_unmap(p_addr);
1496 spin_unlock_irqrestore(&b->bm_lock, flags);
1497 return c;
1498}
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
1516{
1517 struct drbd_bitmap *b = mdev->bitmap;
1518 int count, s, e;
1519 unsigned long flags;
1520 unsigned long *p_addr, *bm;
1521
1522 ERR_IF(!b) return 0;
1523 ERR_IF(!b->bm_pages) return 0;
1524
1525 spin_lock_irqsave(&b->bm_lock, flags);
1526 if (BM_DONT_TEST & b->bm_flags)
1527 bm_print_lock_info(mdev);
1528
1529 s = S2W(enr);
1530 e = min((size_t)S2W(enr+1), b->bm_words);
1531 count = 0;
1532 if (s < b->bm_words) {
1533 int n = e-s;
1534 p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
1535 bm = p_addr + MLPP(s);
1536 while (n--)
1537 count += hweight_long(*bm++);
1538 bm_unmap(p_addr);
1539 } else {
1540 dev_err(DEV, "start offset (%d) too large in drbd_bm_e_weight\n", s);
1541 }
1542 spin_unlock_irqrestore(&b->bm_lock, flags);
1543 return count;
1544}
1545
1546
1547
1548unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
1549{
1550 struct drbd_bitmap *b = mdev->bitmap;
1551 unsigned long *p_addr, *bm;
1552 unsigned long weight;
1553 unsigned long s, e;
1554 int count, i, do_now;
1555 ERR_IF(!b) return 0;
1556 ERR_IF(!b->bm_pages) return 0;
1557
1558 spin_lock_irq(&b->bm_lock);
1559 if (BM_DONT_SET & b->bm_flags)
1560 bm_print_lock_info(mdev);
1561 weight = b->bm_set;
1562
1563 s = al_enr * BM_WORDS_PER_AL_EXT;
1564 e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
1565
1566 D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
1567 == s >> (PAGE_SHIFT - LN2_BPL + 3));
1568 count = 0;
1569 if (s < b->bm_words) {
1570 i = do_now = e-s;
1571 p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
1572 bm = p_addr + MLPP(s);
1573 while (i--) {
1574 count += hweight_long(*bm);
1575 *bm = -1UL;
1576 bm++;
1577 }
1578 bm_unmap(p_addr);
1579 b->bm_set += do_now*BITS_PER_LONG - count;
1580 if (e == b->bm_words)
1581 b->bm_set -= bm_clear_surplus(b);
1582 } else {
1583 dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);
1584 }
1585 weight = b->bm_set - weight;
1586 spin_unlock_irq(&b->bm_lock);
1587 return weight;
1588}
1589