1
2
3
4
5
6
7
8#include <linux/dm-dirty-log.h>
9#include <linux/dm-region-hash.h>
10
11#include <linux/ctype.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/vmalloc.h>
16
17#include "dm.h"
18
19#define DM_MSG_PREFIX "region hash"
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57struct dm_region_hash {
58 uint32_t region_size;
59 unsigned region_shift;
60
61
62 struct dm_dirty_log *log;
63
64
65 rwlock_t hash_lock;
66 mempool_t *region_pool;
67 unsigned mask;
68 unsigned nr_buckets;
69 unsigned prime;
70 unsigned shift;
71 struct list_head *buckets;
72
73 unsigned max_recovery;
74
75 spinlock_t region_lock;
76 atomic_t recovery_in_flight;
77 struct semaphore recovery_count;
78 struct list_head clean_regions;
79 struct list_head quiesced_regions;
80 struct list_head recovered_regions;
81 struct list_head failed_recovered_regions;
82
83
84
85
86 int flush_failure;
87
88 void *context;
89 sector_t target_begin;
90
91
92 void (*dispatch_bios)(void *context, struct bio_list *bios);
93
94
95 void (*wakeup_workers)(void *context);
96
97
98 void (*wakeup_all_recovery_waiters)(void *context);
99};
100
101struct dm_region {
102 struct dm_region_hash *rh;
103 region_t key;
104 int state;
105
106 struct list_head hash_list;
107 struct list_head list;
108
109 atomic_t pending;
110 struct bio_list delayed_bios;
111};
112
113
114
115
116static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector)
117{
118 return sector >> rh->region_shift;
119}
120
121sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region)
122{
123 return region << rh->region_shift;
124}
125EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
126
127region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
128{
129 return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin);
130}
131EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
132
133void *dm_rh_region_context(struct dm_region *reg)
134{
135 return reg->rh->context;
136}
137EXPORT_SYMBOL_GPL(dm_rh_region_context);
138
139region_t dm_rh_get_region_key(struct dm_region *reg)
140{
141 return reg->key;
142}
143EXPORT_SYMBOL_GPL(dm_rh_get_region_key);
144
145sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
146{
147 return rh->region_size;
148}
149EXPORT_SYMBOL_GPL(dm_rh_get_region_size);
150
151
152
153
154
155#define RH_HASH_MULT 2654435387U
156#define RH_HASH_SHIFT 12
157
158#define MIN_REGIONS 64
159struct dm_region_hash *dm_region_hash_create(
160 void *context, void (*dispatch_bios)(void *context,
161 struct bio_list *bios),
162 void (*wakeup_workers)(void *context),
163 void (*wakeup_all_recovery_waiters)(void *context),
164 sector_t target_begin, unsigned max_recovery,
165 struct dm_dirty_log *log, uint32_t region_size,
166 region_t nr_regions)
167{
168 struct dm_region_hash *rh;
169 unsigned nr_buckets, max_buckets;
170 size_t i;
171
172
173
174
175
176 max_buckets = nr_regions >> 6;
177 for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
178 ;
179 nr_buckets >>= 1;
180
181 rh = kmalloc(sizeof(*rh), GFP_KERNEL);
182 if (!rh) {
183 DMERR("unable to allocate region hash memory");
184 return ERR_PTR(-ENOMEM);
185 }
186
187 rh->context = context;
188 rh->dispatch_bios = dispatch_bios;
189 rh->wakeup_workers = wakeup_workers;
190 rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters;
191 rh->target_begin = target_begin;
192 rh->max_recovery = max_recovery;
193 rh->log = log;
194 rh->region_size = region_size;
195 rh->region_shift = ffs(region_size) - 1;
196 rwlock_init(&rh->hash_lock);
197 rh->mask = nr_buckets - 1;
198 rh->nr_buckets = nr_buckets;
199
200 rh->shift = RH_HASH_SHIFT;
201 rh->prime = RH_HASH_MULT;
202
203 rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets));
204 if (!rh->buckets) {
205 DMERR("unable to allocate region hash bucket memory");
206 kfree(rh);
207 return ERR_PTR(-ENOMEM);
208 }
209
210 for (i = 0; i < nr_buckets; i++)
211 INIT_LIST_HEAD(rh->buckets + i);
212
213 spin_lock_init(&rh->region_lock);
214 sema_init(&rh->recovery_count, 0);
215 atomic_set(&rh->recovery_in_flight, 0);
216 INIT_LIST_HEAD(&rh->clean_regions);
217 INIT_LIST_HEAD(&rh->quiesced_regions);
218 INIT_LIST_HEAD(&rh->recovered_regions);
219 INIT_LIST_HEAD(&rh->failed_recovered_regions);
220 rh->flush_failure = 0;
221
222 rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
223 sizeof(struct dm_region));
224 if (!rh->region_pool) {
225 vfree(rh->buckets);
226 kfree(rh);
227 rh = ERR_PTR(-ENOMEM);
228 }
229
230 return rh;
231}
232EXPORT_SYMBOL_GPL(dm_region_hash_create);
233
234void dm_region_hash_destroy(struct dm_region_hash *rh)
235{
236 unsigned h;
237 struct dm_region *reg, *nreg;
238
239 BUG_ON(!list_empty(&rh->quiesced_regions));
240 for (h = 0; h < rh->nr_buckets; h++) {
241 list_for_each_entry_safe(reg, nreg, rh->buckets + h,
242 hash_list) {
243 BUG_ON(atomic_read(®->pending));
244 mempool_free(reg, rh->region_pool);
245 }
246 }
247
248 if (rh->log)
249 dm_dirty_log_destroy(rh->log);
250
251 if (rh->region_pool)
252 mempool_destroy(rh->region_pool);
253
254 vfree(rh->buckets);
255 kfree(rh);
256}
257EXPORT_SYMBOL_GPL(dm_region_hash_destroy);
258
259struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh)
260{
261 return rh->log;
262}
263EXPORT_SYMBOL_GPL(dm_rh_dirty_log);
264
265static unsigned rh_hash(struct dm_region_hash *rh, region_t region)
266{
267 return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask;
268}
269
270static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region)
271{
272 struct dm_region *reg;
273 struct list_head *bucket = rh->buckets + rh_hash(rh, region);
274
275 list_for_each_entry(reg, bucket, hash_list)
276 if (reg->key == region)
277 return reg;
278
279 return NULL;
280}
281
282static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg)
283{
284 list_add(®->hash_list, rh->buckets + rh_hash(rh, reg->key));
285}
286
287static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
288{
289 struct dm_region *reg, *nreg;
290
291 nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
292 if (unlikely(!nreg))
293 nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
294
295 nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
296 DM_RH_CLEAN : DM_RH_NOSYNC;
297 nreg->rh = rh;
298 nreg->key = region;
299 INIT_LIST_HEAD(&nreg->list);
300 atomic_set(&nreg->pending, 0);
301 bio_list_init(&nreg->delayed_bios);
302
303 write_lock_irq(&rh->hash_lock);
304 reg = __rh_lookup(rh, region);
305 if (reg)
306
307 mempool_free(nreg, rh->region_pool);
308 else {
309 __rh_insert(rh, nreg);
310 if (nreg->state == DM_RH_CLEAN) {
311 spin_lock(&rh->region_lock);
312 list_add(&nreg->list, &rh->clean_regions);
313 spin_unlock(&rh->region_lock);
314 }
315
316 reg = nreg;
317 }
318 write_unlock_irq(&rh->hash_lock);
319
320 return reg;
321}
322
323static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region)
324{
325 struct dm_region *reg;
326
327 reg = __rh_lookup(rh, region);
328 if (!reg) {
329 read_unlock(&rh->hash_lock);
330 reg = __rh_alloc(rh, region);
331 read_lock(&rh->hash_lock);
332 }
333
334 return reg;
335}
336
337int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block)
338{
339 int r;
340 struct dm_region *reg;
341
342 read_lock(&rh->hash_lock);
343 reg = __rh_lookup(rh, region);
344 read_unlock(&rh->hash_lock);
345
346 if (reg)
347 return reg->state;
348
349
350
351
352
353 r = rh->log->type->in_sync(rh->log, region, may_block);
354
355
356
357
358
359 return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
360}
361EXPORT_SYMBOL_GPL(dm_rh_get_state);
362
363static void complete_resync_work(struct dm_region *reg, int success)
364{
365 struct dm_region_hash *rh = reg->rh;
366
367 rh->log->type->set_region_sync(rh->log, reg->key, success);
368
369
370
371
372
373
374
375
376
377
378 rh->dispatch_bios(rh->context, ®->delayed_bios);
379 if (atomic_dec_and_test(&rh->recovery_in_flight))
380 rh->wakeup_all_recovery_waiters(rh->context);
381 up(&rh->recovery_count);
382}
383
384
385
386
387
388
389
390
391
392
393
394void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
395{
396 unsigned long flags;
397 struct dm_dirty_log *log = rh->log;
398 struct dm_region *reg;
399 region_t region = dm_rh_bio_to_region(rh, bio);
400 int recovering = 0;
401
402 if (bio->bi_rw & REQ_FLUSH) {
403 rh->flush_failure = 1;
404 return;
405 }
406
407 if (bio->bi_rw & REQ_DISCARD)
408 return;
409
410
411 log->type->set_region_sync(log, region, 0);
412
413 read_lock(&rh->hash_lock);
414 reg = __rh_find(rh, region);
415 read_unlock(&rh->hash_lock);
416
417
418 BUG_ON(!reg);
419 BUG_ON(!list_empty(®->list));
420
421 spin_lock_irqsave(&rh->region_lock, flags);
422
423
424
425
426
427
428
429 recovering = (reg->state == DM_RH_RECOVERING);
430 reg->state = DM_RH_NOSYNC;
431 BUG_ON(!list_empty(®->list));
432 spin_unlock_irqrestore(&rh->region_lock, flags);
433
434 if (recovering)
435 complete_resync_work(reg, 0);
436}
437EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
438
439void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
440{
441 struct dm_region *reg, *next;
442
443 LIST_HEAD(clean);
444 LIST_HEAD(recovered);
445 LIST_HEAD(failed_recovered);
446
447
448
449
450 write_lock_irq(&rh->hash_lock);
451 spin_lock(&rh->region_lock);
452 if (!list_empty(&rh->clean_regions)) {
453 list_splice_init(&rh->clean_regions, &clean);
454
455 list_for_each_entry(reg, &clean, list)
456 list_del(®->hash_list);
457 }
458
459 if (!list_empty(&rh->recovered_regions)) {
460 list_splice_init(&rh->recovered_regions, &recovered);
461
462 list_for_each_entry(reg, &recovered, list)
463 list_del(®->hash_list);
464 }
465
466 if (!list_empty(&rh->failed_recovered_regions)) {
467 list_splice_init(&rh->failed_recovered_regions,
468 &failed_recovered);
469
470 list_for_each_entry(reg, &failed_recovered, list)
471 list_del(®->hash_list);
472 }
473
474 spin_unlock(&rh->region_lock);
475 write_unlock_irq(&rh->hash_lock);
476
477
478
479
480
481
482 list_for_each_entry_safe(reg, next, &recovered, list) {
483 rh->log->type->clear_region(rh->log, reg->key);
484 complete_resync_work(reg, 1);
485 mempool_free(reg, rh->region_pool);
486 }
487
488 list_for_each_entry_safe(reg, next, &failed_recovered, list) {
489 complete_resync_work(reg, errors_handled ? 0 : 1);
490 mempool_free(reg, rh->region_pool);
491 }
492
493 list_for_each_entry_safe(reg, next, &clean, list) {
494 rh->log->type->clear_region(rh->log, reg->key);
495 mempool_free(reg, rh->region_pool);
496 }
497
498 rh->log->type->flush(rh->log);
499}
500EXPORT_SYMBOL_GPL(dm_rh_update_states);
501
502static void rh_inc(struct dm_region_hash *rh, region_t region)
503{
504 struct dm_region *reg;
505
506 read_lock(&rh->hash_lock);
507 reg = __rh_find(rh, region);
508
509 spin_lock_irq(&rh->region_lock);
510 atomic_inc(®->pending);
511
512 if (reg->state == DM_RH_CLEAN) {
513 reg->state = DM_RH_DIRTY;
514 list_del_init(®->list);
515 spin_unlock_irq(&rh->region_lock);
516
517 rh->log->type->mark_region(rh->log, reg->key);
518 } else
519 spin_unlock_irq(&rh->region_lock);
520
521
522 read_unlock(&rh->hash_lock);
523}
524
525void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
526{
527 struct bio *bio;
528
529 for (bio = bios->head; bio; bio = bio->bi_next) {
530 if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))
531 continue;
532 rh_inc(rh, dm_rh_bio_to_region(rh, bio));
533 }
534}
535EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
536
537void dm_rh_dec(struct dm_region_hash *rh, region_t region)
538{
539 unsigned long flags;
540 struct dm_region *reg;
541 int should_wake = 0;
542
543 read_lock(&rh->hash_lock);
544 reg = __rh_lookup(rh, region);
545 read_unlock(&rh->hash_lock);
546
547 spin_lock_irqsave(&rh->region_lock, flags);
548 if (atomic_dec_and_test(®->pending)) {
549
550
551
552
553
554
555
556
557
558
559
560
561 if (unlikely(rh->flush_failure)) {
562
563
564
565
566
567 reg->state = DM_RH_NOSYNC;
568 } else if (reg->state == DM_RH_RECOVERING) {
569 list_add_tail(®->list, &rh->quiesced_regions);
570 } else if (reg->state == DM_RH_DIRTY) {
571 reg->state = DM_RH_CLEAN;
572 list_add(®->list, &rh->clean_regions);
573 }
574 should_wake = 1;
575 }
576 spin_unlock_irqrestore(&rh->region_lock, flags);
577
578 if (should_wake)
579 rh->wakeup_workers(rh->context);
580}
581EXPORT_SYMBOL_GPL(dm_rh_dec);
582
583
584
585
586static int __rh_recovery_prepare(struct dm_region_hash *rh)
587{
588 int r;
589 region_t region;
590 struct dm_region *reg;
591
592
593
594
595 r = rh->log->type->get_resync_work(rh->log, ®ion);
596 if (r <= 0)
597 return r;
598
599
600
601
602
603 read_lock(&rh->hash_lock);
604 reg = __rh_find(rh, region);
605 read_unlock(&rh->hash_lock);
606
607 spin_lock_irq(&rh->region_lock);
608 reg->state = DM_RH_RECOVERING;
609
610
611 if (atomic_read(®->pending))
612 list_del_init(®->list);
613 else
614 list_move(®->list, &rh->quiesced_regions);
615
616 spin_unlock_irq(&rh->region_lock);
617
618 return 1;
619}
620
621void dm_rh_recovery_prepare(struct dm_region_hash *rh)
622{
623
624 atomic_inc(&rh->recovery_in_flight);
625
626 while (!down_trylock(&rh->recovery_count)) {
627 atomic_inc(&rh->recovery_in_flight);
628 if (__rh_recovery_prepare(rh) <= 0) {
629 atomic_dec(&rh->recovery_in_flight);
630 up(&rh->recovery_count);
631 break;
632 }
633 }
634
635
636 if (atomic_dec_and_test(&rh->recovery_in_flight))
637 rh->wakeup_all_recovery_waiters(rh->context);
638}
639EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
640
641
642
643
644struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh)
645{
646 struct dm_region *reg = NULL;
647
648 spin_lock_irq(&rh->region_lock);
649 if (!list_empty(&rh->quiesced_regions)) {
650 reg = list_entry(rh->quiesced_regions.next,
651 struct dm_region, list);
652 list_del_init(®->list);
653 }
654 spin_unlock_irq(&rh->region_lock);
655
656 return reg;
657}
658EXPORT_SYMBOL_GPL(dm_rh_recovery_start);
659
660void dm_rh_recovery_end(struct dm_region *reg, int success)
661{
662 struct dm_region_hash *rh = reg->rh;
663
664 spin_lock_irq(&rh->region_lock);
665 if (success)
666 list_add(®->list, ®->rh->recovered_regions);
667 else
668 list_add(®->list, ®->rh->failed_recovered_regions);
669
670 spin_unlock_irq(&rh->region_lock);
671
672 rh->wakeup_workers(rh->context);
673}
674EXPORT_SYMBOL_GPL(dm_rh_recovery_end);
675
676
677int dm_rh_recovery_in_flight(struct dm_region_hash *rh)
678{
679 return atomic_read(&rh->recovery_in_flight);
680}
681EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight);
682
683int dm_rh_flush(struct dm_region_hash *rh)
684{
685 return rh->log->type->flush(rh->log);
686}
687EXPORT_SYMBOL_GPL(dm_rh_flush);
688
689void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio)
690{
691 struct dm_region *reg;
692
693 read_lock(&rh->hash_lock);
694 reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio));
695 bio_list_add(®->delayed_bios, bio);
696 read_unlock(&rh->hash_lock);
697}
698EXPORT_SYMBOL_GPL(dm_rh_delay);
699
700void dm_rh_stop_recovery(struct dm_region_hash *rh)
701{
702 int i;
703
704
705 for (i = 0; i < rh->max_recovery; i++)
706 down(&rh->recovery_count);
707}
708EXPORT_SYMBOL_GPL(dm_rh_stop_recovery);
709
710void dm_rh_start_recovery(struct dm_region_hash *rh)
711{
712 int i;
713
714 for (i = 0; i < rh->max_recovery; i++)
715 up(&rh->recovery_count);
716
717 rh->wakeup_workers(rh->context);
718}
719EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
720
721MODULE_DESCRIPTION(DM_NAME " region hash");
722MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
723MODULE_LICENSE("GPL");
724