1
2
3
4
5
6
7
8#include <linux/dm-dirty-log.h>
9#include <linux/dm-region-hash.h>
10
11#include <linux/ctype.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/vmalloc.h>
16
17#include "dm.h"
18
19#define DM_MSG_PREFIX "region hash"
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57struct dm_region_hash {
58 uint32_t region_size;
59 unsigned region_shift;
60
61
62 struct dm_dirty_log *log;
63
64
65 rwlock_t hash_lock;
66 mempool_t *region_pool;
67 unsigned mask;
68 unsigned nr_buckets;
69 unsigned prime;
70 unsigned shift;
71 struct list_head *buckets;
72
73 unsigned max_recovery;
74
75 spinlock_t region_lock;
76 atomic_t recovery_in_flight;
77 struct semaphore recovery_count;
78 struct list_head clean_regions;
79 struct list_head quiesced_regions;
80 struct list_head recovered_regions;
81 struct list_head failed_recovered_regions;
82
83
84
85
86 int flush_failure;
87
88 void *context;
89 sector_t target_begin;
90
91
92 void (*dispatch_bios)(void *context, struct bio_list *bios);
93
94
95 void (*wakeup_workers)(void *context);
96
97
98 void (*wakeup_all_recovery_waiters)(void *context);
99};
100
101struct dm_region {
102 struct dm_region_hash *rh;
103 region_t key;
104 int state;
105
106 struct list_head hash_list;
107 struct list_head list;
108
109 atomic_t pending;
110 struct bio_list delayed_bios;
111};
112
113
114
115
116static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector)
117{
118 return sector >> rh->region_shift;
119}
120
121sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region)
122{
123 return region << rh->region_shift;
124}
125EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
126
127region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
128{
129 return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin);
130}
131EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
132
133void *dm_rh_region_context(struct dm_region *reg)
134{
135 return reg->rh->context;
136}
137EXPORT_SYMBOL_GPL(dm_rh_region_context);
138
139region_t dm_rh_get_region_key(struct dm_region *reg)
140{
141 return reg->key;
142}
143EXPORT_SYMBOL_GPL(dm_rh_get_region_key);
144
145sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
146{
147 return rh->region_size;
148}
149EXPORT_SYMBOL_GPL(dm_rh_get_region_size);
150
151
152
153
154
155#define RH_HASH_MULT 2654435387U
156#define RH_HASH_SHIFT 12
157
158#define MIN_REGIONS 64
159struct dm_region_hash *dm_region_hash_create(
160 void *context, void (*dispatch_bios)(void *context,
161 struct bio_list *bios),
162 void (*wakeup_workers)(void *context),
163 void (*wakeup_all_recovery_waiters)(void *context),
164 sector_t target_begin, unsigned max_recovery,
165 struct dm_dirty_log *log, uint32_t region_size,
166 region_t nr_regions)
167{
168 struct dm_region_hash *rh;
169 unsigned nr_buckets, max_buckets;
170 size_t i;
171
172
173
174
175
176 max_buckets = nr_regions >> 6;
177 for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
178 ;
179 nr_buckets >>= 1;
180
181 rh = kmalloc(sizeof(*rh), GFP_KERNEL);
182 if (!rh) {
183 DMERR("unable to allocate region hash memory");
184 return ERR_PTR(-ENOMEM);
185 }
186
187 rh->context = context;
188 rh->dispatch_bios = dispatch_bios;
189 rh->wakeup_workers = wakeup_workers;
190 rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters;
191 rh->target_begin = target_begin;
192 rh->max_recovery = max_recovery;
193 rh->log = log;
194 rh->region_size = region_size;
195 rh->region_shift = ffs(region_size) - 1;
196 rwlock_init(&rh->hash_lock);
197 rh->mask = nr_buckets - 1;
198 rh->nr_buckets = nr_buckets;
199
200 rh->shift = RH_HASH_SHIFT;
201 rh->prime = RH_HASH_MULT;
202
203 rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets));
204 if (!rh->buckets) {
205 DMERR("unable to allocate region hash bucket memory");
206 kfree(rh);
207 return ERR_PTR(-ENOMEM);
208 }
209
210 for (i = 0; i < nr_buckets; i++)
211 INIT_LIST_HEAD(rh->buckets + i);
212
213 spin_lock_init(&rh->region_lock);
214 sema_init(&rh->recovery_count, 0);
215 atomic_set(&rh->recovery_in_flight, 0);
216 INIT_LIST_HEAD(&rh->clean_regions);
217 INIT_LIST_HEAD(&rh->quiesced_regions);
218 INIT_LIST_HEAD(&rh->recovered_regions);
219 INIT_LIST_HEAD(&rh->failed_recovered_regions);
220 rh->flush_failure = 0;
221
222 rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
223 sizeof(struct dm_region));
224 if (!rh->region_pool) {
225 vfree(rh->buckets);
226 kfree(rh);
227 rh = ERR_PTR(-ENOMEM);
228 }
229
230 return rh;
231}
232EXPORT_SYMBOL_GPL(dm_region_hash_create);
233
234void dm_region_hash_destroy(struct dm_region_hash *rh)
235{
236 unsigned h;
237 struct dm_region *reg, *nreg;
238
239 BUG_ON(!list_empty(&rh->quiesced_regions));
240 for (h = 0; h < rh->nr_buckets; h++) {
241 list_for_each_entry_safe(reg, nreg, rh->buckets + h,
242 hash_list) {
243 BUG_ON(atomic_read(®->pending));
244 mempool_free(reg, rh->region_pool);
245 }
246 }
247
248 if (rh->log)
249 dm_dirty_log_destroy(rh->log);
250
251 if (rh->region_pool)
252 mempool_destroy(rh->region_pool);
253
254 vfree(rh->buckets);
255 kfree(rh);
256}
257EXPORT_SYMBOL_GPL(dm_region_hash_destroy);
258
259struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh)
260{
261 return rh->log;
262}
263EXPORT_SYMBOL_GPL(dm_rh_dirty_log);
264
265static unsigned rh_hash(struct dm_region_hash *rh, region_t region)
266{
267 return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask;
268}
269
270static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region)
271{
272 struct dm_region *reg;
273 struct list_head *bucket = rh->buckets + rh_hash(rh, region);
274
275 list_for_each_entry(reg, bucket, hash_list)
276 if (reg->key == region)
277 return reg;
278
279 return NULL;
280}
281
282static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg)
283{
284 list_add(®->hash_list, rh->buckets + rh_hash(rh, reg->key));
285}
286
287static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
288{
289 struct dm_region *reg, *nreg;
290
291 nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
292 if (unlikely(!nreg))
293 nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
294
295 nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
296 DM_RH_CLEAN : DM_RH_NOSYNC;
297 nreg->rh = rh;
298 nreg->key = region;
299 INIT_LIST_HEAD(&nreg->list);
300 atomic_set(&nreg->pending, 0);
301 bio_list_init(&nreg->delayed_bios);
302
303 write_lock_irq(&rh->hash_lock);
304 reg = __rh_lookup(rh, region);
305 if (reg)
306
307 mempool_free(nreg, rh->region_pool);
308 else {
309 __rh_insert(rh, nreg);
310 if (nreg->state == DM_RH_CLEAN) {
311 spin_lock(&rh->region_lock);
312 list_add(&nreg->list, &rh->clean_regions);
313 spin_unlock(&rh->region_lock);
314 }
315
316 reg = nreg;
317 }
318 write_unlock_irq(&rh->hash_lock);
319
320 return reg;
321}
322
323static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region)
324{
325 struct dm_region *reg;
326
327 reg = __rh_lookup(rh, region);
328 if (!reg) {
329 read_unlock(&rh->hash_lock);
330 reg = __rh_alloc(rh, region);
331 read_lock(&rh->hash_lock);
332 }
333
334 return reg;
335}
336
337int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block)
338{
339 int r;
340 struct dm_region *reg;
341
342 read_lock(&rh->hash_lock);
343 reg = __rh_lookup(rh, region);
344 read_unlock(&rh->hash_lock);
345
346 if (reg)
347 return reg->state;
348
349
350
351
352
353 r = rh->log->type->in_sync(rh->log, region, may_block);
354
355
356
357
358
359 return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
360}
361EXPORT_SYMBOL_GPL(dm_rh_get_state);
362
363static void complete_resync_work(struct dm_region *reg, int success)
364{
365 struct dm_region_hash *rh = reg->rh;
366
367 rh->log->type->set_region_sync(rh->log, reg->key, success);
368
369
370
371
372
373
374
375
376
377
378 rh->dispatch_bios(rh->context, ®->delayed_bios);
379 if (atomic_dec_and_test(&rh->recovery_in_flight))
380 rh->wakeup_all_recovery_waiters(rh->context);
381 up(&rh->recovery_count);
382}
383
384
385
386
387
388
389
390
391
392
393
394void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
395{
396 unsigned long flags;
397 struct dm_dirty_log *log = rh->log;
398 struct dm_region *reg;
399 region_t region = dm_rh_bio_to_region(rh, bio);
400 int recovering = 0;
401
402 if (bio->bi_rw & REQ_FLUSH) {
403 rh->flush_failure = 1;
404 return;
405 }
406
407
408 log->type->set_region_sync(log, region, 0);
409
410 read_lock(&rh->hash_lock);
411 reg = __rh_find(rh, region);
412 read_unlock(&rh->hash_lock);
413
414
415 BUG_ON(!reg);
416 BUG_ON(!list_empty(®->list));
417
418 spin_lock_irqsave(&rh->region_lock, flags);
419
420
421
422
423
424
425
426 recovering = (reg->state == DM_RH_RECOVERING);
427 reg->state = DM_RH_NOSYNC;
428 BUG_ON(!list_empty(®->list));
429 spin_unlock_irqrestore(&rh->region_lock, flags);
430
431 if (recovering)
432 complete_resync_work(reg, 0);
433}
434EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
435
436void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
437{
438 struct dm_region *reg, *next;
439
440 LIST_HEAD(clean);
441 LIST_HEAD(recovered);
442 LIST_HEAD(failed_recovered);
443
444
445
446
447 write_lock_irq(&rh->hash_lock);
448 spin_lock(&rh->region_lock);
449 if (!list_empty(&rh->clean_regions)) {
450 list_splice_init(&rh->clean_regions, &clean);
451
452 list_for_each_entry(reg, &clean, list)
453 list_del(®->hash_list);
454 }
455
456 if (!list_empty(&rh->recovered_regions)) {
457 list_splice_init(&rh->recovered_regions, &recovered);
458
459 list_for_each_entry(reg, &recovered, list)
460 list_del(®->hash_list);
461 }
462
463 if (!list_empty(&rh->failed_recovered_regions)) {
464 list_splice_init(&rh->failed_recovered_regions,
465 &failed_recovered);
466
467 list_for_each_entry(reg, &failed_recovered, list)
468 list_del(®->hash_list);
469 }
470
471 spin_unlock(&rh->region_lock);
472 write_unlock_irq(&rh->hash_lock);
473
474
475
476
477
478
479 list_for_each_entry_safe(reg, next, &recovered, list) {
480 rh->log->type->clear_region(rh->log, reg->key);
481 complete_resync_work(reg, 1);
482 mempool_free(reg, rh->region_pool);
483 }
484
485 list_for_each_entry_safe(reg, next, &failed_recovered, list) {
486 complete_resync_work(reg, errors_handled ? 0 : 1);
487 mempool_free(reg, rh->region_pool);
488 }
489
490 list_for_each_entry_safe(reg, next, &clean, list) {
491 rh->log->type->clear_region(rh->log, reg->key);
492 mempool_free(reg, rh->region_pool);
493 }
494
495 rh->log->type->flush(rh->log);
496}
497EXPORT_SYMBOL_GPL(dm_rh_update_states);
498
499static void rh_inc(struct dm_region_hash *rh, region_t region)
500{
501 struct dm_region *reg;
502
503 read_lock(&rh->hash_lock);
504 reg = __rh_find(rh, region);
505
506 spin_lock_irq(&rh->region_lock);
507 atomic_inc(®->pending);
508
509 if (reg->state == DM_RH_CLEAN) {
510 reg->state = DM_RH_DIRTY;
511 list_del_init(®->list);
512 spin_unlock_irq(&rh->region_lock);
513
514 rh->log->type->mark_region(rh->log, reg->key);
515 } else
516 spin_unlock_irq(&rh->region_lock);
517
518
519 read_unlock(&rh->hash_lock);
520}
521
522void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
523{
524 struct bio *bio;
525
526 for (bio = bios->head; bio; bio = bio->bi_next) {
527 if (bio->bi_rw & REQ_FLUSH)
528 continue;
529 rh_inc(rh, dm_rh_bio_to_region(rh, bio));
530 }
531}
532EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
533
534void dm_rh_dec(struct dm_region_hash *rh, region_t region)
535{
536 unsigned long flags;
537 struct dm_region *reg;
538 int should_wake = 0;
539
540 read_lock(&rh->hash_lock);
541 reg = __rh_lookup(rh, region);
542 read_unlock(&rh->hash_lock);
543
544 spin_lock_irqsave(&rh->region_lock, flags);
545 if (atomic_dec_and_test(®->pending)) {
546
547
548
549
550
551
552
553
554
555
556
557
558 if (unlikely(rh->flush_failure)) {
559
560
561
562
563
564 reg->state = DM_RH_NOSYNC;
565 } else if (reg->state == DM_RH_RECOVERING) {
566 list_add_tail(®->list, &rh->quiesced_regions);
567 } else if (reg->state == DM_RH_DIRTY) {
568 reg->state = DM_RH_CLEAN;
569 list_add(®->list, &rh->clean_regions);
570 }
571 should_wake = 1;
572 }
573 spin_unlock_irqrestore(&rh->region_lock, flags);
574
575 if (should_wake)
576 rh->wakeup_workers(rh->context);
577}
578EXPORT_SYMBOL_GPL(dm_rh_dec);
579
580
581
582
583static int __rh_recovery_prepare(struct dm_region_hash *rh)
584{
585 int r;
586 region_t region;
587 struct dm_region *reg;
588
589
590
591
592 r = rh->log->type->get_resync_work(rh->log, ®ion);
593 if (r <= 0)
594 return r;
595
596
597
598
599
600 read_lock(&rh->hash_lock);
601 reg = __rh_find(rh, region);
602 read_unlock(&rh->hash_lock);
603
604 spin_lock_irq(&rh->region_lock);
605 reg->state = DM_RH_RECOVERING;
606
607
608 if (atomic_read(®->pending))
609 list_del_init(®->list);
610 else
611 list_move(®->list, &rh->quiesced_regions);
612
613 spin_unlock_irq(&rh->region_lock);
614
615 return 1;
616}
617
618void dm_rh_recovery_prepare(struct dm_region_hash *rh)
619{
620
621 atomic_inc(&rh->recovery_in_flight);
622
623 while (!down_trylock(&rh->recovery_count)) {
624 atomic_inc(&rh->recovery_in_flight);
625 if (__rh_recovery_prepare(rh) <= 0) {
626 atomic_dec(&rh->recovery_in_flight);
627 up(&rh->recovery_count);
628 break;
629 }
630 }
631
632
633 if (atomic_dec_and_test(&rh->recovery_in_flight))
634 rh->wakeup_all_recovery_waiters(rh->context);
635}
636EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
637
638
639
640
641struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh)
642{
643 struct dm_region *reg = NULL;
644
645 spin_lock_irq(&rh->region_lock);
646 if (!list_empty(&rh->quiesced_regions)) {
647 reg = list_entry(rh->quiesced_regions.next,
648 struct dm_region, list);
649 list_del_init(®->list);
650 }
651 spin_unlock_irq(&rh->region_lock);
652
653 return reg;
654}
655EXPORT_SYMBOL_GPL(dm_rh_recovery_start);
656
657void dm_rh_recovery_end(struct dm_region *reg, int success)
658{
659 struct dm_region_hash *rh = reg->rh;
660
661 spin_lock_irq(&rh->region_lock);
662 if (success)
663 list_add(®->list, ®->rh->recovered_regions);
664 else
665 list_add(®->list, ®->rh->failed_recovered_regions);
666
667 spin_unlock_irq(&rh->region_lock);
668
669 rh->wakeup_workers(rh->context);
670}
671EXPORT_SYMBOL_GPL(dm_rh_recovery_end);
672
673
674int dm_rh_recovery_in_flight(struct dm_region_hash *rh)
675{
676 return atomic_read(&rh->recovery_in_flight);
677}
678EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight);
679
680int dm_rh_flush(struct dm_region_hash *rh)
681{
682 return rh->log->type->flush(rh->log);
683}
684EXPORT_SYMBOL_GPL(dm_rh_flush);
685
686void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio)
687{
688 struct dm_region *reg;
689
690 read_lock(&rh->hash_lock);
691 reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio));
692 bio_list_add(®->delayed_bios, bio);
693 read_unlock(&rh->hash_lock);
694}
695EXPORT_SYMBOL_GPL(dm_rh_delay);
696
697void dm_rh_stop_recovery(struct dm_region_hash *rh)
698{
699 int i;
700
701
702 for (i = 0; i < rh->max_recovery; i++)
703 down(&rh->recovery_count);
704}
705EXPORT_SYMBOL_GPL(dm_rh_stop_recovery);
706
707void dm_rh_start_recovery(struct dm_region_hash *rh)
708{
709 int i;
710
711 for (i = 0; i < rh->max_recovery; i++)
712 up(&rh->recovery_count);
713
714 rh->wakeup_workers(rh->context);
715}
716EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
717
718MODULE_DESCRIPTION(DM_NAME " region hash");
719MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
720MODULE_LICENSE("GPL");
721