1
2
3
4
5
6
7
8#include <linux/dm-dirty-log.h>
9#include <linux/dm-region-hash.h>
10
11#include <linux/ctype.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/vmalloc.h>
16
17#include "dm.h"
18
19#define DM_MSG_PREFIX "region hash"
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57struct dm_region_hash {
58 uint32_t region_size;
59 unsigned region_shift;
60
61
62 struct dm_dirty_log *log;
63
64
65 rwlock_t hash_lock;
66 mempool_t *region_pool;
67 unsigned mask;
68 unsigned nr_buckets;
69 unsigned prime;
70 unsigned shift;
71 struct list_head *buckets;
72
73 unsigned max_recovery;
74
75 spinlock_t region_lock;
76 atomic_t recovery_in_flight;
77 struct semaphore recovery_count;
78 struct list_head clean_regions;
79 struct list_head quiesced_regions;
80 struct list_head recovered_regions;
81 struct list_head failed_recovered_regions;
82
83
84
85
86 int flush_failure;
87
88 void *context;
89 sector_t target_begin;
90
91
92 void (*dispatch_bios)(void *context, struct bio_list *bios);
93
94
95 void (*wakeup_workers)(void *context);
96
97
98 void (*wakeup_all_recovery_waiters)(void *context);
99};
100
101struct dm_region {
102 struct dm_region_hash *rh;
103 region_t key;
104 int state;
105
106 struct list_head hash_list;
107 struct list_head list;
108
109 atomic_t pending;
110 struct bio_list delayed_bios;
111};
112
113
114
115
116static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector)
117{
118 return sector >> rh->region_shift;
119}
120
121sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region)
122{
123 return region << rh->region_shift;
124}
125EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
126
127region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
128{
129 return dm_rh_sector_to_region(rh, bio->bi_iter.bi_sector -
130 rh->target_begin);
131}
132EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
133
134void *dm_rh_region_context(struct dm_region *reg)
135{
136 return reg->rh->context;
137}
138EXPORT_SYMBOL_GPL(dm_rh_region_context);
139
140region_t dm_rh_get_region_key(struct dm_region *reg)
141{
142 return reg->key;
143}
144EXPORT_SYMBOL_GPL(dm_rh_get_region_key);
145
146sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
147{
148 return rh->region_size;
149}
150EXPORT_SYMBOL_GPL(dm_rh_get_region_size);
151
152
153
154
155
156#define RH_HASH_MULT 2654435387U
157#define RH_HASH_SHIFT 12
158
159#define MIN_REGIONS 64
160struct dm_region_hash *dm_region_hash_create(
161 void *context, void (*dispatch_bios)(void *context,
162 struct bio_list *bios),
163 void (*wakeup_workers)(void *context),
164 void (*wakeup_all_recovery_waiters)(void *context),
165 sector_t target_begin, unsigned max_recovery,
166 struct dm_dirty_log *log, uint32_t region_size,
167 region_t nr_regions)
168{
169 struct dm_region_hash *rh;
170 unsigned nr_buckets, max_buckets;
171 size_t i;
172
173
174
175
176
177 max_buckets = nr_regions >> 6;
178 for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
179 ;
180 nr_buckets >>= 1;
181
182 rh = kmalloc(sizeof(*rh), GFP_KERNEL);
183 if (!rh) {
184 DMERR("unable to allocate region hash memory");
185 return ERR_PTR(-ENOMEM);
186 }
187
188 rh->context = context;
189 rh->dispatch_bios = dispatch_bios;
190 rh->wakeup_workers = wakeup_workers;
191 rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters;
192 rh->target_begin = target_begin;
193 rh->max_recovery = max_recovery;
194 rh->log = log;
195 rh->region_size = region_size;
196 rh->region_shift = __ffs(region_size);
197 rwlock_init(&rh->hash_lock);
198 rh->mask = nr_buckets - 1;
199 rh->nr_buckets = nr_buckets;
200
201 rh->shift = RH_HASH_SHIFT;
202 rh->prime = RH_HASH_MULT;
203
204 rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets));
205 if (!rh->buckets) {
206 DMERR("unable to allocate region hash bucket memory");
207 kfree(rh);
208 return ERR_PTR(-ENOMEM);
209 }
210
211 for (i = 0; i < nr_buckets; i++)
212 INIT_LIST_HEAD(rh->buckets + i);
213
214 spin_lock_init(&rh->region_lock);
215 sema_init(&rh->recovery_count, 0);
216 atomic_set(&rh->recovery_in_flight, 0);
217 INIT_LIST_HEAD(&rh->clean_regions);
218 INIT_LIST_HEAD(&rh->quiesced_regions);
219 INIT_LIST_HEAD(&rh->recovered_regions);
220 INIT_LIST_HEAD(&rh->failed_recovered_regions);
221 rh->flush_failure = 0;
222
223 rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
224 sizeof(struct dm_region));
225 if (!rh->region_pool) {
226 vfree(rh->buckets);
227 kfree(rh);
228 rh = ERR_PTR(-ENOMEM);
229 }
230
231 return rh;
232}
233EXPORT_SYMBOL_GPL(dm_region_hash_create);
234
235void dm_region_hash_destroy(struct dm_region_hash *rh)
236{
237 unsigned h;
238 struct dm_region *reg, *nreg;
239
240 BUG_ON(!list_empty(&rh->quiesced_regions));
241 for (h = 0; h < rh->nr_buckets; h++) {
242 list_for_each_entry_safe(reg, nreg, rh->buckets + h,
243 hash_list) {
244 BUG_ON(atomic_read(®->pending));
245 mempool_free(reg, rh->region_pool);
246 }
247 }
248
249 if (rh->log)
250 dm_dirty_log_destroy(rh->log);
251
252 mempool_destroy(rh->region_pool);
253 vfree(rh->buckets);
254 kfree(rh);
255}
256EXPORT_SYMBOL_GPL(dm_region_hash_destroy);
257
258struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh)
259{
260 return rh->log;
261}
262EXPORT_SYMBOL_GPL(dm_rh_dirty_log);
263
264static unsigned rh_hash(struct dm_region_hash *rh, region_t region)
265{
266 return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask;
267}
268
269static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region)
270{
271 struct dm_region *reg;
272 struct list_head *bucket = rh->buckets + rh_hash(rh, region);
273
274 list_for_each_entry(reg, bucket, hash_list)
275 if (reg->key == region)
276 return reg;
277
278 return NULL;
279}
280
281static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg)
282{
283 list_add(®->hash_list, rh->buckets + rh_hash(rh, reg->key));
284}
285
286static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
287{
288 struct dm_region *reg, *nreg;
289
290 nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
291 if (unlikely(!nreg))
292 nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
293
294 nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
295 DM_RH_CLEAN : DM_RH_NOSYNC;
296 nreg->rh = rh;
297 nreg->key = region;
298 INIT_LIST_HEAD(&nreg->list);
299 atomic_set(&nreg->pending, 0);
300 bio_list_init(&nreg->delayed_bios);
301
302 write_lock_irq(&rh->hash_lock);
303 reg = __rh_lookup(rh, region);
304 if (reg)
305
306 mempool_free(nreg, rh->region_pool);
307 else {
308 __rh_insert(rh, nreg);
309 if (nreg->state == DM_RH_CLEAN) {
310 spin_lock(&rh->region_lock);
311 list_add(&nreg->list, &rh->clean_regions);
312 spin_unlock(&rh->region_lock);
313 }
314
315 reg = nreg;
316 }
317 write_unlock_irq(&rh->hash_lock);
318
319 return reg;
320}
321
322static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region)
323{
324 struct dm_region *reg;
325
326 reg = __rh_lookup(rh, region);
327 if (!reg) {
328 read_unlock(&rh->hash_lock);
329 reg = __rh_alloc(rh, region);
330 read_lock(&rh->hash_lock);
331 }
332
333 return reg;
334}
335
336int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block)
337{
338 int r;
339 struct dm_region *reg;
340
341 read_lock(&rh->hash_lock);
342 reg = __rh_lookup(rh, region);
343 read_unlock(&rh->hash_lock);
344
345 if (reg)
346 return reg->state;
347
348
349
350
351
352 r = rh->log->type->in_sync(rh->log, region, may_block);
353
354
355
356
357
358 return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
359}
360EXPORT_SYMBOL_GPL(dm_rh_get_state);
361
362static void complete_resync_work(struct dm_region *reg, int success)
363{
364 struct dm_region_hash *rh = reg->rh;
365
366 rh->log->type->set_region_sync(rh->log, reg->key, success);
367
368
369
370
371
372
373
374
375
376
377 rh->dispatch_bios(rh->context, ®->delayed_bios);
378 if (atomic_dec_and_test(&rh->recovery_in_flight))
379 rh->wakeup_all_recovery_waiters(rh->context);
380 up(&rh->recovery_count);
381}
382
383
384
385
386
387
388
389
390
391
392
393void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
394{
395 unsigned long flags;
396 struct dm_dirty_log *log = rh->log;
397 struct dm_region *reg;
398 region_t region = dm_rh_bio_to_region(rh, bio);
399 int recovering = 0;
400
401 if (bio->bi_opf & REQ_PREFLUSH) {
402 rh->flush_failure = 1;
403 return;
404 }
405
406 if (bio_op(bio) == REQ_OP_DISCARD)
407 return;
408
409
410 log->type->set_region_sync(log, region, 0);
411
412 read_lock(&rh->hash_lock);
413 reg = __rh_find(rh, region);
414 read_unlock(&rh->hash_lock);
415
416
417 BUG_ON(!reg);
418 BUG_ON(!list_empty(®->list));
419
420 spin_lock_irqsave(&rh->region_lock, flags);
421
422
423
424
425
426
427
428 recovering = (reg->state == DM_RH_RECOVERING);
429 reg->state = DM_RH_NOSYNC;
430 BUG_ON(!list_empty(®->list));
431 spin_unlock_irqrestore(&rh->region_lock, flags);
432
433 if (recovering)
434 complete_resync_work(reg, 0);
435}
436EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
437
438void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
439{
440 struct dm_region *reg, *next;
441
442 LIST_HEAD(clean);
443 LIST_HEAD(recovered);
444 LIST_HEAD(failed_recovered);
445
446
447
448
449 write_lock_irq(&rh->hash_lock);
450 spin_lock(&rh->region_lock);
451 if (!list_empty(&rh->clean_regions)) {
452 list_splice_init(&rh->clean_regions, &clean);
453
454 list_for_each_entry(reg, &clean, list)
455 list_del(®->hash_list);
456 }
457
458 if (!list_empty(&rh->recovered_regions)) {
459 list_splice_init(&rh->recovered_regions, &recovered);
460
461 list_for_each_entry(reg, &recovered, list)
462 list_del(®->hash_list);
463 }
464
465 if (!list_empty(&rh->failed_recovered_regions)) {
466 list_splice_init(&rh->failed_recovered_regions,
467 &failed_recovered);
468
469 list_for_each_entry(reg, &failed_recovered, list)
470 list_del(®->hash_list);
471 }
472
473 spin_unlock(&rh->region_lock);
474 write_unlock_irq(&rh->hash_lock);
475
476
477
478
479
480
481 list_for_each_entry_safe(reg, next, &recovered, list) {
482 rh->log->type->clear_region(rh->log, reg->key);
483 complete_resync_work(reg, 1);
484 mempool_free(reg, rh->region_pool);
485 }
486
487 list_for_each_entry_safe(reg, next, &failed_recovered, list) {
488 complete_resync_work(reg, errors_handled ? 0 : 1);
489 mempool_free(reg, rh->region_pool);
490 }
491
492 list_for_each_entry_safe(reg, next, &clean, list) {
493 rh->log->type->clear_region(rh->log, reg->key);
494 mempool_free(reg, rh->region_pool);
495 }
496
497 rh->log->type->flush(rh->log);
498}
499EXPORT_SYMBOL_GPL(dm_rh_update_states);
500
501static void rh_inc(struct dm_region_hash *rh, region_t region)
502{
503 struct dm_region *reg;
504
505 read_lock(&rh->hash_lock);
506 reg = __rh_find(rh, region);
507
508 spin_lock_irq(&rh->region_lock);
509 atomic_inc(®->pending);
510
511 if (reg->state == DM_RH_CLEAN) {
512 reg->state = DM_RH_DIRTY;
513 list_del_init(®->list);
514 spin_unlock_irq(&rh->region_lock);
515
516 rh->log->type->mark_region(rh->log, reg->key);
517 } else
518 spin_unlock_irq(&rh->region_lock);
519
520
521 read_unlock(&rh->hash_lock);
522}
523
524void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
525{
526 struct bio *bio;
527
528 for (bio = bios->head; bio; bio = bio->bi_next) {
529 if (bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)
530 continue;
531 rh_inc(rh, dm_rh_bio_to_region(rh, bio));
532 }
533}
534EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
535
536void dm_rh_dec(struct dm_region_hash *rh, region_t region)
537{
538 unsigned long flags;
539 struct dm_region *reg;
540 int should_wake = 0;
541
542 read_lock(&rh->hash_lock);
543 reg = __rh_lookup(rh, region);
544 read_unlock(&rh->hash_lock);
545
546 spin_lock_irqsave(&rh->region_lock, flags);
547 if (atomic_dec_and_test(®->pending)) {
548
549
550
551
552
553
554
555
556
557
558
559
560 if (unlikely(rh->flush_failure)) {
561
562
563
564
565
566 reg->state = DM_RH_NOSYNC;
567 } else if (reg->state == DM_RH_RECOVERING) {
568 list_add_tail(®->list, &rh->quiesced_regions);
569 } else if (reg->state == DM_RH_DIRTY) {
570 reg->state = DM_RH_CLEAN;
571 list_add(®->list, &rh->clean_regions);
572 }
573 should_wake = 1;
574 }
575 spin_unlock_irqrestore(&rh->region_lock, flags);
576
577 if (should_wake)
578 rh->wakeup_workers(rh->context);
579}
580EXPORT_SYMBOL_GPL(dm_rh_dec);
581
582
583
584
585static int __rh_recovery_prepare(struct dm_region_hash *rh)
586{
587 int r;
588 region_t region;
589 struct dm_region *reg;
590
591
592
593
594 r = rh->log->type->get_resync_work(rh->log, ®ion);
595 if (r <= 0)
596 return r;
597
598
599
600
601
602 read_lock(&rh->hash_lock);
603 reg = __rh_find(rh, region);
604 read_unlock(&rh->hash_lock);
605
606 spin_lock_irq(&rh->region_lock);
607 reg->state = DM_RH_RECOVERING;
608
609
610 if (atomic_read(®->pending))
611 list_del_init(®->list);
612 else
613 list_move(®->list, &rh->quiesced_regions);
614
615 spin_unlock_irq(&rh->region_lock);
616
617 return 1;
618}
619
620void dm_rh_recovery_prepare(struct dm_region_hash *rh)
621{
622
623 atomic_inc(&rh->recovery_in_flight);
624
625 while (!down_trylock(&rh->recovery_count)) {
626 atomic_inc(&rh->recovery_in_flight);
627 if (__rh_recovery_prepare(rh) <= 0) {
628 atomic_dec(&rh->recovery_in_flight);
629 up(&rh->recovery_count);
630 break;
631 }
632 }
633
634
635 if (atomic_dec_and_test(&rh->recovery_in_flight))
636 rh->wakeup_all_recovery_waiters(rh->context);
637}
638EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
639
640
641
642
643struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh)
644{
645 struct dm_region *reg = NULL;
646
647 spin_lock_irq(&rh->region_lock);
648 if (!list_empty(&rh->quiesced_regions)) {
649 reg = list_entry(rh->quiesced_regions.next,
650 struct dm_region, list);
651 list_del_init(®->list);
652 }
653 spin_unlock_irq(&rh->region_lock);
654
655 return reg;
656}
657EXPORT_SYMBOL_GPL(dm_rh_recovery_start);
658
659void dm_rh_recovery_end(struct dm_region *reg, int success)
660{
661 struct dm_region_hash *rh = reg->rh;
662
663 spin_lock_irq(&rh->region_lock);
664 if (success)
665 list_add(®->list, ®->rh->recovered_regions);
666 else
667 list_add(®->list, ®->rh->failed_recovered_regions);
668
669 spin_unlock_irq(&rh->region_lock);
670
671 rh->wakeup_workers(rh->context);
672}
673EXPORT_SYMBOL_GPL(dm_rh_recovery_end);
674
675
676int dm_rh_recovery_in_flight(struct dm_region_hash *rh)
677{
678 return atomic_read(&rh->recovery_in_flight);
679}
680EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight);
681
682int dm_rh_flush(struct dm_region_hash *rh)
683{
684 return rh->log->type->flush(rh->log);
685}
686EXPORT_SYMBOL_GPL(dm_rh_flush);
687
688void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio)
689{
690 struct dm_region *reg;
691
692 read_lock(&rh->hash_lock);
693 reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio));
694 bio_list_add(®->delayed_bios, bio);
695 read_unlock(&rh->hash_lock);
696}
697EXPORT_SYMBOL_GPL(dm_rh_delay);
698
699void dm_rh_stop_recovery(struct dm_region_hash *rh)
700{
701 int i;
702
703
704 for (i = 0; i < rh->max_recovery; i++)
705 down(&rh->recovery_count);
706}
707EXPORT_SYMBOL_GPL(dm_rh_stop_recovery);
708
709void dm_rh_start_recovery(struct dm_region_hash *rh)
710{
711 int i;
712
713 for (i = 0; i < rh->max_recovery; i++)
714 up(&rh->recovery_count);
715
716 rh->wakeup_workers(rh->context);
717}
718EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
719
720MODULE_DESCRIPTION(DM_NAME " region hash");
721MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
722MODULE_LICENSE("GPL");
723