1
2
3
4
5
6
7
8#include <linux/dm-dirty-log.h>
9#include <linux/dm-region-hash.h>
10
11#include <linux/ctype.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/vmalloc.h>
16
17#include "dm.h"
18
19#define DM_MSG_PREFIX "region hash"
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57struct dm_region_hash {
58 uint32_t region_size;
59 unsigned region_shift;
60
61
62 struct dm_dirty_log *log;
63
64
65 rwlock_t hash_lock;
66 mempool_t *region_pool;
67 unsigned mask;
68 unsigned nr_buckets;
69 unsigned prime;
70 unsigned shift;
71 struct list_head *buckets;
72
73 unsigned max_recovery;
74
75 spinlock_t region_lock;
76 atomic_t recovery_in_flight;
77 struct semaphore recovery_count;
78 struct list_head clean_regions;
79 struct list_head quiesced_regions;
80 struct list_head recovered_regions;
81 struct list_head failed_recovered_regions;
82
83
84
85
86 int flush_failure;
87
88 void *context;
89 sector_t target_begin;
90
91
92 void (*dispatch_bios)(void *context, struct bio_list *bios);
93
94
95 void (*wakeup_workers)(void *context);
96
97
98 void (*wakeup_all_recovery_waiters)(void *context);
99};
100
101struct dm_region {
102 struct dm_region_hash *rh;
103 region_t key;
104 int state;
105
106 struct list_head hash_list;
107 struct list_head list;
108
109 atomic_t pending;
110 struct bio_list delayed_bios;
111};
112
113
114
115
116static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector)
117{
118 return sector >> rh->region_shift;
119}
120
121sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region)
122{
123 return region << rh->region_shift;
124}
125EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
126
127region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
128{
129 return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin);
130}
131EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
132
133void *dm_rh_region_context(struct dm_region *reg)
134{
135 return reg->rh->context;
136}
137EXPORT_SYMBOL_GPL(dm_rh_region_context);
138
139region_t dm_rh_get_region_key(struct dm_region *reg)
140{
141 return reg->key;
142}
143EXPORT_SYMBOL_GPL(dm_rh_get_region_key);
144
145sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
146{
147 return rh->region_size;
148}
149EXPORT_SYMBOL_GPL(dm_rh_get_region_size);
150
151
152
153
154
155#define RH_HASH_MULT 2654435387U
156#define RH_HASH_SHIFT 12
157
158#define MIN_REGIONS 64
159struct dm_region_hash *dm_region_hash_create(
160 void *context, void (*dispatch_bios)(void *context,
161 struct bio_list *bios),
162 void (*wakeup_workers)(void *context),
163 void (*wakeup_all_recovery_waiters)(void *context),
164 sector_t target_begin, unsigned max_recovery,
165 struct dm_dirty_log *log, uint32_t region_size,
166 region_t nr_regions)
167{
168 struct dm_region_hash *rh;
169 unsigned nr_buckets, max_buckets;
170 size_t i;
171
172
173
174
175
176 max_buckets = nr_regions >> 6;
177 for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
178 ;
179 nr_buckets >>= 1;
180
181 rh = kzalloc(sizeof(*rh), GFP_KERNEL);
182 if (!rh) {
183 DMERR("unable to allocate region hash memory");
184 return ERR_PTR(-ENOMEM);
185 }
186
187 rh->context = context;
188 rh->dispatch_bios = dispatch_bios;
189 rh->wakeup_workers = wakeup_workers;
190 rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters;
191 rh->target_begin = target_begin;
192 rh->max_recovery = max_recovery;
193 rh->log = log;
194 rh->region_size = region_size;
195 rh->region_shift = __ffs(region_size);
196 rwlock_init(&rh->hash_lock);
197 rh->mask = nr_buckets - 1;
198 rh->nr_buckets = nr_buckets;
199
200 rh->shift = RH_HASH_SHIFT;
201 rh->prime = RH_HASH_MULT;
202
203 rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets));
204 if (!rh->buckets) {
205 DMERR("unable to allocate region hash bucket memory");
206 kfree(rh);
207 return ERR_PTR(-ENOMEM);
208 }
209
210 for (i = 0; i < nr_buckets; i++)
211 INIT_LIST_HEAD(rh->buckets + i);
212
213 spin_lock_init(&rh->region_lock);
214 sema_init(&rh->recovery_count, 0);
215 atomic_set(&rh->recovery_in_flight, 0);
216 INIT_LIST_HEAD(&rh->clean_regions);
217 INIT_LIST_HEAD(&rh->quiesced_regions);
218 INIT_LIST_HEAD(&rh->recovered_regions);
219 INIT_LIST_HEAD(&rh->failed_recovered_regions);
220 rh->flush_failure = 0;
221
222 rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
223 sizeof(struct dm_region));
224 if (!rh->region_pool) {
225 vfree(rh->buckets);
226 kfree(rh);
227 rh = ERR_PTR(-ENOMEM);
228 }
229
230 return rh;
231}
232EXPORT_SYMBOL_GPL(dm_region_hash_create);
233
234void dm_region_hash_destroy(struct dm_region_hash *rh)
235{
236 unsigned h;
237 struct dm_region *reg, *nreg;
238
239 BUG_ON(!list_empty(&rh->quiesced_regions));
240 for (h = 0; h < rh->nr_buckets; h++) {
241 list_for_each_entry_safe(reg, nreg, rh->buckets + h,
242 hash_list) {
243 BUG_ON(atomic_read(®->pending));
244 mempool_free(reg, rh->region_pool);
245 }
246 }
247
248 if (rh->log)
249 dm_dirty_log_destroy(rh->log);
250
251 mempool_destroy(rh->region_pool);
252 vfree(rh->buckets);
253 kfree(rh);
254}
255EXPORT_SYMBOL_GPL(dm_region_hash_destroy);
256
257struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh)
258{
259 return rh->log;
260}
261EXPORT_SYMBOL_GPL(dm_rh_dirty_log);
262
263static unsigned rh_hash(struct dm_region_hash *rh, region_t region)
264{
265 return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask;
266}
267
268static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region)
269{
270 struct dm_region *reg;
271 struct list_head *bucket = rh->buckets + rh_hash(rh, region);
272
273 list_for_each_entry(reg, bucket, hash_list)
274 if (reg->key == region)
275 return reg;
276
277 return NULL;
278}
279
280static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg)
281{
282 list_add(®->hash_list, rh->buckets + rh_hash(rh, reg->key));
283}
284
285static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
286{
287 struct dm_region *reg, *nreg;
288
289 nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
290 if (unlikely(!nreg))
291 nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
292
293 nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
294 DM_RH_CLEAN : DM_RH_NOSYNC;
295 nreg->rh = rh;
296 nreg->key = region;
297 INIT_LIST_HEAD(&nreg->list);
298 atomic_set(&nreg->pending, 0);
299 bio_list_init(&nreg->delayed_bios);
300
301 write_lock_irq(&rh->hash_lock);
302 reg = __rh_lookup(rh, region);
303 if (reg)
304
305 mempool_free(nreg, rh->region_pool);
306 else {
307 __rh_insert(rh, nreg);
308 if (nreg->state == DM_RH_CLEAN) {
309 spin_lock(&rh->region_lock);
310 list_add(&nreg->list, &rh->clean_regions);
311 spin_unlock(&rh->region_lock);
312 }
313
314 reg = nreg;
315 }
316 write_unlock_irq(&rh->hash_lock);
317
318 return reg;
319}
320
321static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region)
322{
323 struct dm_region *reg;
324
325 reg = __rh_lookup(rh, region);
326 if (!reg) {
327 read_unlock(&rh->hash_lock);
328 reg = __rh_alloc(rh, region);
329 read_lock(&rh->hash_lock);
330 }
331
332 return reg;
333}
334
335int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block)
336{
337 int r;
338 struct dm_region *reg;
339
340 read_lock(&rh->hash_lock);
341 reg = __rh_lookup(rh, region);
342 read_unlock(&rh->hash_lock);
343
344 if (reg)
345 return reg->state;
346
347
348
349
350
351 r = rh->log->type->in_sync(rh->log, region, may_block);
352
353
354
355
356
357 return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
358}
359EXPORT_SYMBOL_GPL(dm_rh_get_state);
360
361static void complete_resync_work(struct dm_region *reg, int success)
362{
363 struct dm_region_hash *rh = reg->rh;
364
365 rh->log->type->set_region_sync(rh->log, reg->key, success);
366
367
368
369
370
371
372
373
374
375
376 rh->dispatch_bios(rh->context, ®->delayed_bios);
377 if (atomic_dec_and_test(&rh->recovery_in_flight))
378 rh->wakeup_all_recovery_waiters(rh->context);
379 up(&rh->recovery_count);
380}
381
382
383
384
385
386
387
388
389
390
391
392void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
393{
394 unsigned long flags;
395 struct dm_dirty_log *log = rh->log;
396 struct dm_region *reg;
397 region_t region = dm_rh_bio_to_region(rh, bio);
398 int recovering = 0;
399
400 if (bio->bi_rw & REQ_FLUSH) {
401 rh->flush_failure = 1;
402 return;
403 }
404
405 if (bio->bi_rw & REQ_DISCARD)
406 return;
407
408
409 log->type->set_region_sync(log, region, 0);
410
411 read_lock(&rh->hash_lock);
412 reg = __rh_find(rh, region);
413 read_unlock(&rh->hash_lock);
414
415
416 BUG_ON(!reg);
417 BUG_ON(!list_empty(®->list));
418
419 spin_lock_irqsave(&rh->region_lock, flags);
420
421
422
423
424
425
426
427 recovering = (reg->state == DM_RH_RECOVERING);
428 reg->state = DM_RH_NOSYNC;
429 BUG_ON(!list_empty(®->list));
430 spin_unlock_irqrestore(&rh->region_lock, flags);
431
432 if (recovering)
433 complete_resync_work(reg, 0);
434}
435EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
436
437void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
438{
439 struct dm_region *reg, *next;
440
441 LIST_HEAD(clean);
442 LIST_HEAD(recovered);
443 LIST_HEAD(failed_recovered);
444
445
446
447
448 write_lock_irq(&rh->hash_lock);
449 spin_lock(&rh->region_lock);
450 if (!list_empty(&rh->clean_regions)) {
451 list_splice_init(&rh->clean_regions, &clean);
452
453 list_for_each_entry(reg, &clean, list)
454 list_del(®->hash_list);
455 }
456
457 if (!list_empty(&rh->recovered_regions)) {
458 list_splice_init(&rh->recovered_regions, &recovered);
459
460 list_for_each_entry(reg, &recovered, list)
461 list_del(®->hash_list);
462 }
463
464 if (!list_empty(&rh->failed_recovered_regions)) {
465 list_splice_init(&rh->failed_recovered_regions,
466 &failed_recovered);
467
468 list_for_each_entry(reg, &failed_recovered, list)
469 list_del(®->hash_list);
470 }
471
472 spin_unlock(&rh->region_lock);
473 write_unlock_irq(&rh->hash_lock);
474
475
476
477
478
479
480 list_for_each_entry_safe(reg, next, &recovered, list) {
481 rh->log->type->clear_region(rh->log, reg->key);
482 complete_resync_work(reg, 1);
483 mempool_free(reg, rh->region_pool);
484 }
485
486 list_for_each_entry_safe(reg, next, &failed_recovered, list) {
487 complete_resync_work(reg, errors_handled ? 0 : 1);
488 mempool_free(reg, rh->region_pool);
489 }
490
491 list_for_each_entry_safe(reg, next, &clean, list) {
492 rh->log->type->clear_region(rh->log, reg->key);
493 mempool_free(reg, rh->region_pool);
494 }
495
496 rh->log->type->flush(rh->log);
497}
498EXPORT_SYMBOL_GPL(dm_rh_update_states);
499
500static void rh_inc(struct dm_region_hash *rh, region_t region)
501{
502 struct dm_region *reg;
503
504 read_lock(&rh->hash_lock);
505 reg = __rh_find(rh, region);
506
507 spin_lock_irq(&rh->region_lock);
508 atomic_inc(®->pending);
509
510 if (reg->state == DM_RH_CLEAN) {
511 reg->state = DM_RH_DIRTY;
512 list_del_init(®->list);
513 spin_unlock_irq(&rh->region_lock);
514
515 rh->log->type->mark_region(rh->log, reg->key);
516 } else
517 spin_unlock_irq(&rh->region_lock);
518
519
520 read_unlock(&rh->hash_lock);
521}
522
523void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
524{
525 struct bio *bio;
526
527 for (bio = bios->head; bio; bio = bio->bi_next) {
528 if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))
529 continue;
530 rh_inc(rh, dm_rh_bio_to_region(rh, bio));
531 }
532}
533EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
534
535void dm_rh_dec(struct dm_region_hash *rh, region_t region)
536{
537 unsigned long flags;
538 struct dm_region *reg;
539 int should_wake = 0;
540
541 read_lock(&rh->hash_lock);
542 reg = __rh_lookup(rh, region);
543 read_unlock(&rh->hash_lock);
544
545 spin_lock_irqsave(&rh->region_lock, flags);
546 if (atomic_dec_and_test(®->pending)) {
547
548
549
550
551
552
553
554
555
556
557
558
559 if (unlikely(rh->flush_failure)) {
560
561
562
563
564
565 reg->state = DM_RH_NOSYNC;
566 } else if (reg->state == DM_RH_RECOVERING) {
567 list_add_tail(®->list, &rh->quiesced_regions);
568 } else if (reg->state == DM_RH_DIRTY) {
569 reg->state = DM_RH_CLEAN;
570 list_add(®->list, &rh->clean_regions);
571 }
572 should_wake = 1;
573 }
574 spin_unlock_irqrestore(&rh->region_lock, flags);
575
576 if (should_wake)
577 rh->wakeup_workers(rh->context);
578}
579EXPORT_SYMBOL_GPL(dm_rh_dec);
580
581
582
583
584static int __rh_recovery_prepare(struct dm_region_hash *rh)
585{
586 int r;
587 region_t region;
588 struct dm_region *reg;
589
590
591
592
593 r = rh->log->type->get_resync_work(rh->log, ®ion);
594 if (r <= 0)
595 return r;
596
597
598
599
600
601 read_lock(&rh->hash_lock);
602 reg = __rh_find(rh, region);
603 read_unlock(&rh->hash_lock);
604
605 spin_lock_irq(&rh->region_lock);
606 reg->state = DM_RH_RECOVERING;
607
608
609 if (atomic_read(®->pending))
610 list_del_init(®->list);
611 else
612 list_move(®->list, &rh->quiesced_regions);
613
614 spin_unlock_irq(&rh->region_lock);
615
616 return 1;
617}
618
619void dm_rh_recovery_prepare(struct dm_region_hash *rh)
620{
621
622 atomic_inc(&rh->recovery_in_flight);
623
624 while (!down_trylock(&rh->recovery_count)) {
625 atomic_inc(&rh->recovery_in_flight);
626 if (__rh_recovery_prepare(rh) <= 0) {
627 atomic_dec(&rh->recovery_in_flight);
628 up(&rh->recovery_count);
629 break;
630 }
631 }
632
633
634 if (atomic_dec_and_test(&rh->recovery_in_flight))
635 rh->wakeup_all_recovery_waiters(rh->context);
636}
637EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
638
639
640
641
642struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh)
643{
644 struct dm_region *reg = NULL;
645
646 spin_lock_irq(&rh->region_lock);
647 if (!list_empty(&rh->quiesced_regions)) {
648 reg = list_entry(rh->quiesced_regions.next,
649 struct dm_region, list);
650 list_del_init(®->list);
651 }
652 spin_unlock_irq(&rh->region_lock);
653
654 return reg;
655}
656EXPORT_SYMBOL_GPL(dm_rh_recovery_start);
657
658void dm_rh_recovery_end(struct dm_region *reg, int success)
659{
660 struct dm_region_hash *rh = reg->rh;
661
662 spin_lock_irq(&rh->region_lock);
663 if (success)
664 list_add(®->list, ®->rh->recovered_regions);
665 else
666 list_add(®->list, ®->rh->failed_recovered_regions);
667
668 spin_unlock_irq(&rh->region_lock);
669
670 rh->wakeup_workers(rh->context);
671}
672EXPORT_SYMBOL_GPL(dm_rh_recovery_end);
673
674
675int dm_rh_recovery_in_flight(struct dm_region_hash *rh)
676{
677 return atomic_read(&rh->recovery_in_flight);
678}
679EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight);
680
681int dm_rh_flush(struct dm_region_hash *rh)
682{
683 return rh->log->type->flush(rh->log);
684}
685EXPORT_SYMBOL_GPL(dm_rh_flush);
686
687void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio)
688{
689 struct dm_region *reg;
690
691 read_lock(&rh->hash_lock);
692 reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio));
693 bio_list_add(®->delayed_bios, bio);
694 read_unlock(&rh->hash_lock);
695}
696EXPORT_SYMBOL_GPL(dm_rh_delay);
697
698void dm_rh_stop_recovery(struct dm_region_hash *rh)
699{
700 int i;
701
702
703 for (i = 0; i < rh->max_recovery; i++)
704 down(&rh->recovery_count);
705}
706EXPORT_SYMBOL_GPL(dm_rh_stop_recovery);
707
708void dm_rh_start_recovery(struct dm_region_hash *rh)
709{
710 int i;
711
712 for (i = 0; i < rh->max_recovery; i++)
713 up(&rh->recovery_count);
714
715 rh->wakeup_workers(rh->context);
716}
717EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
718
719MODULE_DESCRIPTION(DM_NAME " region hash");
720MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
721MODULE_LICENSE("GPL");
722