1
2
3
4
5
6
7
8#include <linux/dm-dirty-log.h>
9#include <linux/dm-region-hash.h>
10
11#include <linux/ctype.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/vmalloc.h>
16
17#include "dm.h"
18
19#define DM_MSG_PREFIX "region hash"
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57struct dm_region_hash {
58 uint32_t region_size;
59 unsigned region_shift;
60
61
62 struct dm_dirty_log *log;
63
64
65 rwlock_t hash_lock;
66 mempool_t *region_pool;
67 unsigned mask;
68 unsigned nr_buckets;
69 unsigned prime;
70 unsigned shift;
71 struct list_head *buckets;
72
73 unsigned max_recovery;
74
75 spinlock_t region_lock;
76 atomic_t recovery_in_flight;
77 struct semaphore recovery_count;
78 struct list_head clean_regions;
79 struct list_head quiesced_regions;
80 struct list_head recovered_regions;
81 struct list_head failed_recovered_regions;
82
83
84
85
86 int flush_failure;
87
88 void *context;
89 sector_t target_begin;
90
91
92 void (*dispatch_bios)(void *context, struct bio_list *bios);
93
94
95 void (*wakeup_workers)(void *context);
96
97
98 void (*wakeup_all_recovery_waiters)(void *context);
99};
100
101struct dm_region {
102 struct dm_region_hash *rh;
103 region_t key;
104 int state;
105
106 struct list_head hash_list;
107 struct list_head list;
108
109 atomic_t pending;
110 struct bio_list delayed_bios;
111};
112
113
114
115
116static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector)
117{
118 return sector >> rh->region_shift;
119}
120
121sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region)
122{
123 return region << rh->region_shift;
124}
125EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
126
127region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
128{
129 return dm_rh_sector_to_region(rh, bio->bi_iter.bi_sector -
130 rh->target_begin);
131}
132EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
133
134void *dm_rh_region_context(struct dm_region *reg)
135{
136 return reg->rh->context;
137}
138EXPORT_SYMBOL_GPL(dm_rh_region_context);
139
140region_t dm_rh_get_region_key(struct dm_region *reg)
141{
142 return reg->key;
143}
144EXPORT_SYMBOL_GPL(dm_rh_get_region_key);
145
146sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
147{
148 return rh->region_size;
149}
150EXPORT_SYMBOL_GPL(dm_rh_get_region_size);
151
152
153
154
155
156#define RH_HASH_MULT 2654435387U
157#define RH_HASH_SHIFT 12
158
159#define MIN_REGIONS 64
160struct dm_region_hash *dm_region_hash_create(
161 void *context, void (*dispatch_bios)(void *context,
162 struct bio_list *bios),
163 void (*wakeup_workers)(void *context),
164 void (*wakeup_all_recovery_waiters)(void *context),
165 sector_t target_begin, unsigned max_recovery,
166 struct dm_dirty_log *log, uint32_t region_size,
167 region_t nr_regions)
168{
169 struct dm_region_hash *rh;
170 unsigned nr_buckets, max_buckets;
171 size_t i;
172
173
174
175
176
177 max_buckets = nr_regions >> 6;
178 for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
179 ;
180 nr_buckets >>= 1;
181
182 rh = kmalloc(sizeof(*rh), GFP_KERNEL);
183 if (!rh) {
184 DMERR("unable to allocate region hash memory");
185 return ERR_PTR(-ENOMEM);
186 }
187
188 rh->context = context;
189 rh->dispatch_bios = dispatch_bios;
190 rh->wakeup_workers = wakeup_workers;
191 rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters;
192 rh->target_begin = target_begin;
193 rh->max_recovery = max_recovery;
194 rh->log = log;
195 rh->region_size = region_size;
196 rh->region_shift = ffs(region_size) - 1;
197 rwlock_init(&rh->hash_lock);
198 rh->mask = nr_buckets - 1;
199 rh->nr_buckets = nr_buckets;
200
201 rh->shift = RH_HASH_SHIFT;
202 rh->prime = RH_HASH_MULT;
203
204 rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets));
205 if (!rh->buckets) {
206 DMERR("unable to allocate region hash bucket memory");
207 kfree(rh);
208 return ERR_PTR(-ENOMEM);
209 }
210
211 for (i = 0; i < nr_buckets; i++)
212 INIT_LIST_HEAD(rh->buckets + i);
213
214 spin_lock_init(&rh->region_lock);
215 sema_init(&rh->recovery_count, 0);
216 atomic_set(&rh->recovery_in_flight, 0);
217 INIT_LIST_HEAD(&rh->clean_regions);
218 INIT_LIST_HEAD(&rh->quiesced_regions);
219 INIT_LIST_HEAD(&rh->recovered_regions);
220 INIT_LIST_HEAD(&rh->failed_recovered_regions);
221 rh->flush_failure = 0;
222
223 rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
224 sizeof(struct dm_region));
225 if (!rh->region_pool) {
226 vfree(rh->buckets);
227 kfree(rh);
228 rh = ERR_PTR(-ENOMEM);
229 }
230
231 return rh;
232}
233EXPORT_SYMBOL_GPL(dm_region_hash_create);
234
235void dm_region_hash_destroy(struct dm_region_hash *rh)
236{
237 unsigned h;
238 struct dm_region *reg, *nreg;
239
240 BUG_ON(!list_empty(&rh->quiesced_regions));
241 for (h = 0; h < rh->nr_buckets; h++) {
242 list_for_each_entry_safe(reg, nreg, rh->buckets + h,
243 hash_list) {
244 BUG_ON(atomic_read(®->pending));
245 mempool_free(reg, rh->region_pool);
246 }
247 }
248
249 if (rh->log)
250 dm_dirty_log_destroy(rh->log);
251
252 if (rh->region_pool)
253 mempool_destroy(rh->region_pool);
254
255 vfree(rh->buckets);
256 kfree(rh);
257}
258EXPORT_SYMBOL_GPL(dm_region_hash_destroy);
259
260struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh)
261{
262 return rh->log;
263}
264EXPORT_SYMBOL_GPL(dm_rh_dirty_log);
265
266static unsigned rh_hash(struct dm_region_hash *rh, region_t region)
267{
268 return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask;
269}
270
271static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region)
272{
273 struct dm_region *reg;
274 struct list_head *bucket = rh->buckets + rh_hash(rh, region);
275
276 list_for_each_entry(reg, bucket, hash_list)
277 if (reg->key == region)
278 return reg;
279
280 return NULL;
281}
282
283static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg)
284{
285 list_add(®->hash_list, rh->buckets + rh_hash(rh, reg->key));
286}
287
288static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
289{
290 struct dm_region *reg, *nreg;
291
292 nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
293 if (unlikely(!nreg))
294 nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
295
296 nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
297 DM_RH_CLEAN : DM_RH_NOSYNC;
298 nreg->rh = rh;
299 nreg->key = region;
300 INIT_LIST_HEAD(&nreg->list);
301 atomic_set(&nreg->pending, 0);
302 bio_list_init(&nreg->delayed_bios);
303
304 write_lock_irq(&rh->hash_lock);
305 reg = __rh_lookup(rh, region);
306 if (reg)
307
308 mempool_free(nreg, rh->region_pool);
309 else {
310 __rh_insert(rh, nreg);
311 if (nreg->state == DM_RH_CLEAN) {
312 spin_lock(&rh->region_lock);
313 list_add(&nreg->list, &rh->clean_regions);
314 spin_unlock(&rh->region_lock);
315 }
316
317 reg = nreg;
318 }
319 write_unlock_irq(&rh->hash_lock);
320
321 return reg;
322}
323
324static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region)
325{
326 struct dm_region *reg;
327
328 reg = __rh_lookup(rh, region);
329 if (!reg) {
330 read_unlock(&rh->hash_lock);
331 reg = __rh_alloc(rh, region);
332 read_lock(&rh->hash_lock);
333 }
334
335 return reg;
336}
337
338int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block)
339{
340 int r;
341 struct dm_region *reg;
342
343 read_lock(&rh->hash_lock);
344 reg = __rh_lookup(rh, region);
345 read_unlock(&rh->hash_lock);
346
347 if (reg)
348 return reg->state;
349
350
351
352
353
354 r = rh->log->type->in_sync(rh->log, region, may_block);
355
356
357
358
359
360 return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
361}
362EXPORT_SYMBOL_GPL(dm_rh_get_state);
363
364static void complete_resync_work(struct dm_region *reg, int success)
365{
366 struct dm_region_hash *rh = reg->rh;
367
368 rh->log->type->set_region_sync(rh->log, reg->key, success);
369
370
371
372
373
374
375
376
377
378
379 rh->dispatch_bios(rh->context, ®->delayed_bios);
380 if (atomic_dec_and_test(&rh->recovery_in_flight))
381 rh->wakeup_all_recovery_waiters(rh->context);
382 up(&rh->recovery_count);
383}
384
385
386
387
388
389
390
391
392
393
394
395void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
396{
397 unsigned long flags;
398 struct dm_dirty_log *log = rh->log;
399 struct dm_region *reg;
400 region_t region = dm_rh_bio_to_region(rh, bio);
401 int recovering = 0;
402
403 if (bio->bi_rw & REQ_FLUSH) {
404 rh->flush_failure = 1;
405 return;
406 }
407
408 if (bio->bi_rw & REQ_DISCARD)
409 return;
410
411
412 log->type->set_region_sync(log, region, 0);
413
414 read_lock(&rh->hash_lock);
415 reg = __rh_find(rh, region);
416 read_unlock(&rh->hash_lock);
417
418
419 BUG_ON(!reg);
420 BUG_ON(!list_empty(®->list));
421
422 spin_lock_irqsave(&rh->region_lock, flags);
423
424
425
426
427
428
429
430 recovering = (reg->state == DM_RH_RECOVERING);
431 reg->state = DM_RH_NOSYNC;
432 BUG_ON(!list_empty(®->list));
433 spin_unlock_irqrestore(&rh->region_lock, flags);
434
435 if (recovering)
436 complete_resync_work(reg, 0);
437}
438EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
439
440void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
441{
442 struct dm_region *reg, *next;
443
444 LIST_HEAD(clean);
445 LIST_HEAD(recovered);
446 LIST_HEAD(failed_recovered);
447
448
449
450
451 write_lock_irq(&rh->hash_lock);
452 spin_lock(&rh->region_lock);
453 if (!list_empty(&rh->clean_regions)) {
454 list_splice_init(&rh->clean_regions, &clean);
455
456 list_for_each_entry(reg, &clean, list)
457 list_del(®->hash_list);
458 }
459
460 if (!list_empty(&rh->recovered_regions)) {
461 list_splice_init(&rh->recovered_regions, &recovered);
462
463 list_for_each_entry(reg, &recovered, list)
464 list_del(®->hash_list);
465 }
466
467 if (!list_empty(&rh->failed_recovered_regions)) {
468 list_splice_init(&rh->failed_recovered_regions,
469 &failed_recovered);
470
471 list_for_each_entry(reg, &failed_recovered, list)
472 list_del(®->hash_list);
473 }
474
475 spin_unlock(&rh->region_lock);
476 write_unlock_irq(&rh->hash_lock);
477
478
479
480
481
482
483 list_for_each_entry_safe(reg, next, &recovered, list) {
484 rh->log->type->clear_region(rh->log, reg->key);
485 complete_resync_work(reg, 1);
486 mempool_free(reg, rh->region_pool);
487 }
488
489 list_for_each_entry_safe(reg, next, &failed_recovered, list) {
490 complete_resync_work(reg, errors_handled ? 0 : 1);
491 mempool_free(reg, rh->region_pool);
492 }
493
494 list_for_each_entry_safe(reg, next, &clean, list) {
495 rh->log->type->clear_region(rh->log, reg->key);
496 mempool_free(reg, rh->region_pool);
497 }
498
499 rh->log->type->flush(rh->log);
500}
501EXPORT_SYMBOL_GPL(dm_rh_update_states);
502
503static void rh_inc(struct dm_region_hash *rh, region_t region)
504{
505 struct dm_region *reg;
506
507 read_lock(&rh->hash_lock);
508 reg = __rh_find(rh, region);
509
510 spin_lock_irq(&rh->region_lock);
511 atomic_inc(®->pending);
512
513 if (reg->state == DM_RH_CLEAN) {
514 reg->state = DM_RH_DIRTY;
515 list_del_init(®->list);
516 spin_unlock_irq(&rh->region_lock);
517
518 rh->log->type->mark_region(rh->log, reg->key);
519 } else
520 spin_unlock_irq(&rh->region_lock);
521
522
523 read_unlock(&rh->hash_lock);
524}
525
526void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
527{
528 struct bio *bio;
529
530 for (bio = bios->head; bio; bio = bio->bi_next) {
531 if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))
532 continue;
533 rh_inc(rh, dm_rh_bio_to_region(rh, bio));
534 }
535}
536EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
537
538void dm_rh_dec(struct dm_region_hash *rh, region_t region)
539{
540 unsigned long flags;
541 struct dm_region *reg;
542 int should_wake = 0;
543
544 read_lock(&rh->hash_lock);
545 reg = __rh_lookup(rh, region);
546 read_unlock(&rh->hash_lock);
547
548 spin_lock_irqsave(&rh->region_lock, flags);
549 if (atomic_dec_and_test(®->pending)) {
550
551
552
553
554
555
556
557
558
559
560
561
562 if (unlikely(rh->flush_failure)) {
563
564
565
566
567
568 reg->state = DM_RH_NOSYNC;
569 } else if (reg->state == DM_RH_RECOVERING) {
570 list_add_tail(®->list, &rh->quiesced_regions);
571 } else if (reg->state == DM_RH_DIRTY) {
572 reg->state = DM_RH_CLEAN;
573 list_add(®->list, &rh->clean_regions);
574 }
575 should_wake = 1;
576 }
577 spin_unlock_irqrestore(&rh->region_lock, flags);
578
579 if (should_wake)
580 rh->wakeup_workers(rh->context);
581}
582EXPORT_SYMBOL_GPL(dm_rh_dec);
583
584
585
586
587static int __rh_recovery_prepare(struct dm_region_hash *rh)
588{
589 int r;
590 region_t region;
591 struct dm_region *reg;
592
593
594
595
596 r = rh->log->type->get_resync_work(rh->log, ®ion);
597 if (r <= 0)
598 return r;
599
600
601
602
603
604 read_lock(&rh->hash_lock);
605 reg = __rh_find(rh, region);
606 read_unlock(&rh->hash_lock);
607
608 spin_lock_irq(&rh->region_lock);
609 reg->state = DM_RH_RECOVERING;
610
611
612 if (atomic_read(®->pending))
613 list_del_init(®->list);
614 else
615 list_move(®->list, &rh->quiesced_regions);
616
617 spin_unlock_irq(&rh->region_lock);
618
619 return 1;
620}
621
622void dm_rh_recovery_prepare(struct dm_region_hash *rh)
623{
624
625 atomic_inc(&rh->recovery_in_flight);
626
627 while (!down_trylock(&rh->recovery_count)) {
628 atomic_inc(&rh->recovery_in_flight);
629 if (__rh_recovery_prepare(rh) <= 0) {
630 atomic_dec(&rh->recovery_in_flight);
631 up(&rh->recovery_count);
632 break;
633 }
634 }
635
636
637 if (atomic_dec_and_test(&rh->recovery_in_flight))
638 rh->wakeup_all_recovery_waiters(rh->context);
639}
640EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
641
642
643
644
645struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh)
646{
647 struct dm_region *reg = NULL;
648
649 spin_lock_irq(&rh->region_lock);
650 if (!list_empty(&rh->quiesced_regions)) {
651 reg = list_entry(rh->quiesced_regions.next,
652 struct dm_region, list);
653 list_del_init(®->list);
654 }
655 spin_unlock_irq(&rh->region_lock);
656
657 return reg;
658}
659EXPORT_SYMBOL_GPL(dm_rh_recovery_start);
660
661void dm_rh_recovery_end(struct dm_region *reg, int success)
662{
663 struct dm_region_hash *rh = reg->rh;
664
665 spin_lock_irq(&rh->region_lock);
666 if (success)
667 list_add(®->list, ®->rh->recovered_regions);
668 else
669 list_add(®->list, ®->rh->failed_recovered_regions);
670
671 spin_unlock_irq(&rh->region_lock);
672
673 rh->wakeup_workers(rh->context);
674}
675EXPORT_SYMBOL_GPL(dm_rh_recovery_end);
676
677
678int dm_rh_recovery_in_flight(struct dm_region_hash *rh)
679{
680 return atomic_read(&rh->recovery_in_flight);
681}
682EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight);
683
684int dm_rh_flush(struct dm_region_hash *rh)
685{
686 return rh->log->type->flush(rh->log);
687}
688EXPORT_SYMBOL_GPL(dm_rh_flush);
689
690void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio)
691{
692 struct dm_region *reg;
693
694 read_lock(&rh->hash_lock);
695 reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio));
696 bio_list_add(®->delayed_bios, bio);
697 read_unlock(&rh->hash_lock);
698}
699EXPORT_SYMBOL_GPL(dm_rh_delay);
700
701void dm_rh_stop_recovery(struct dm_region_hash *rh)
702{
703 int i;
704
705
706 for (i = 0; i < rh->max_recovery; i++)
707 down(&rh->recovery_count);
708}
709EXPORT_SYMBOL_GPL(dm_rh_stop_recovery);
710
711void dm_rh_start_recovery(struct dm_region_hash *rh)
712{
713 int i;
714
715 for (i = 0; i < rh->max_recovery; i++)
716 up(&rh->recovery_count);
717
718 rh->wakeup_workers(rh->context);
719}
720EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
721
722MODULE_DESCRIPTION(DM_NAME " region hash");
723MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
724MODULE_LICENSE("GPL");
725