1
2
3
4
5
6
7
8#include <linux/dm-dirty-log.h>
9#include <linux/dm-region-hash.h>
10
11#include <linux/ctype.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/vmalloc.h>
15
16#include "dm.h"
17
18#define DM_MSG_PREFIX "region hash"
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56struct dm_region_hash {
57 uint32_t region_size;
58 unsigned region_shift;
59
60
61 struct dm_dirty_log *log;
62
63
64 rwlock_t hash_lock;
65 mempool_t *region_pool;
66 unsigned mask;
67 unsigned nr_buckets;
68 unsigned prime;
69 unsigned shift;
70 struct list_head *buckets;
71
72 unsigned max_recovery;
73
74 spinlock_t region_lock;
75 atomic_t recovery_in_flight;
76 struct semaphore recovery_count;
77 struct list_head clean_regions;
78 struct list_head quiesced_regions;
79 struct list_head recovered_regions;
80 struct list_head failed_recovered_regions;
81
82 void *context;
83 sector_t target_begin;
84
85
86 void (*dispatch_bios)(void *context, struct bio_list *bios);
87
88
89 void (*wakeup_workers)(void *context);
90
91
92 void (*wakeup_all_recovery_waiters)(void *context);
93};
94
95struct dm_region {
96 struct dm_region_hash *rh;
97 region_t key;
98 int state;
99
100 struct list_head hash_list;
101 struct list_head list;
102
103 atomic_t pending;
104 struct bio_list delayed_bios;
105};
106
107
108
109
110static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector)
111{
112 return sector >> rh->region_shift;
113}
114
115sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region)
116{
117 return region << rh->region_shift;
118}
119EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
120
121region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
122{
123 return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin);
124}
125EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
126
127void *dm_rh_region_context(struct dm_region *reg)
128{
129 return reg->rh->context;
130}
131EXPORT_SYMBOL_GPL(dm_rh_region_context);
132
133region_t dm_rh_get_region_key(struct dm_region *reg)
134{
135 return reg->key;
136}
137EXPORT_SYMBOL_GPL(dm_rh_get_region_key);
138
139sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
140{
141 return rh->region_size;
142}
143EXPORT_SYMBOL_GPL(dm_rh_get_region_size);
144
145
146
147
148
149#define RH_HASH_MULT 2654435387U
150#define RH_HASH_SHIFT 12
151
152#define MIN_REGIONS 64
153struct dm_region_hash *dm_region_hash_create(
154 void *context, void (*dispatch_bios)(void *context,
155 struct bio_list *bios),
156 void (*wakeup_workers)(void *context),
157 void (*wakeup_all_recovery_waiters)(void *context),
158 sector_t target_begin, unsigned max_recovery,
159 struct dm_dirty_log *log, uint32_t region_size,
160 region_t nr_regions)
161{
162 struct dm_region_hash *rh;
163 unsigned nr_buckets, max_buckets;
164 size_t i;
165
166
167
168
169
170 max_buckets = nr_regions >> 6;
171 for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
172 ;
173 nr_buckets >>= 1;
174
175 rh = kmalloc(sizeof(*rh), GFP_KERNEL);
176 if (!rh) {
177 DMERR("unable to allocate region hash memory");
178 return ERR_PTR(-ENOMEM);
179 }
180
181 rh->context = context;
182 rh->dispatch_bios = dispatch_bios;
183 rh->wakeup_workers = wakeup_workers;
184 rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters;
185 rh->target_begin = target_begin;
186 rh->max_recovery = max_recovery;
187 rh->log = log;
188 rh->region_size = region_size;
189 rh->region_shift = ffs(region_size) - 1;
190 rwlock_init(&rh->hash_lock);
191 rh->mask = nr_buckets - 1;
192 rh->nr_buckets = nr_buckets;
193
194 rh->shift = RH_HASH_SHIFT;
195 rh->prime = RH_HASH_MULT;
196
197 rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets));
198 if (!rh->buckets) {
199 DMERR("unable to allocate region hash bucket memory");
200 kfree(rh);
201 return ERR_PTR(-ENOMEM);
202 }
203
204 for (i = 0; i < nr_buckets; i++)
205 INIT_LIST_HEAD(rh->buckets + i);
206
207 spin_lock_init(&rh->region_lock);
208 sema_init(&rh->recovery_count, 0);
209 atomic_set(&rh->recovery_in_flight, 0);
210 INIT_LIST_HEAD(&rh->clean_regions);
211 INIT_LIST_HEAD(&rh->quiesced_regions);
212 INIT_LIST_HEAD(&rh->recovered_regions);
213 INIT_LIST_HEAD(&rh->failed_recovered_regions);
214
215 rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
216 sizeof(struct dm_region));
217 if (!rh->region_pool) {
218 vfree(rh->buckets);
219 kfree(rh);
220 rh = ERR_PTR(-ENOMEM);
221 }
222
223 return rh;
224}
225EXPORT_SYMBOL_GPL(dm_region_hash_create);
226
227void dm_region_hash_destroy(struct dm_region_hash *rh)
228{
229 unsigned h;
230 struct dm_region *reg, *nreg;
231
232 BUG_ON(!list_empty(&rh->quiesced_regions));
233 for (h = 0; h < rh->nr_buckets; h++) {
234 list_for_each_entry_safe(reg, nreg, rh->buckets + h,
235 hash_list) {
236 BUG_ON(atomic_read(®->pending));
237 mempool_free(reg, rh->region_pool);
238 }
239 }
240
241 if (rh->log)
242 dm_dirty_log_destroy(rh->log);
243
244 if (rh->region_pool)
245 mempool_destroy(rh->region_pool);
246
247 vfree(rh->buckets);
248 kfree(rh);
249}
250EXPORT_SYMBOL_GPL(dm_region_hash_destroy);
251
252struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh)
253{
254 return rh->log;
255}
256EXPORT_SYMBOL_GPL(dm_rh_dirty_log);
257
258static unsigned rh_hash(struct dm_region_hash *rh, region_t region)
259{
260 return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask;
261}
262
263static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region)
264{
265 struct dm_region *reg;
266 struct list_head *bucket = rh->buckets + rh_hash(rh, region);
267
268 list_for_each_entry(reg, bucket, hash_list)
269 if (reg->key == region)
270 return reg;
271
272 return NULL;
273}
274
275static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg)
276{
277 list_add(®->hash_list, rh->buckets + rh_hash(rh, reg->key));
278}
279
280static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
281{
282 struct dm_region *reg, *nreg;
283
284 nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
285 if (unlikely(!nreg))
286 nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
287
288 nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
289 DM_RH_CLEAN : DM_RH_NOSYNC;
290 nreg->rh = rh;
291 nreg->key = region;
292 INIT_LIST_HEAD(&nreg->list);
293 atomic_set(&nreg->pending, 0);
294 bio_list_init(&nreg->delayed_bios);
295
296 write_lock_irq(&rh->hash_lock);
297 reg = __rh_lookup(rh, region);
298 if (reg)
299
300 mempool_free(nreg, rh->region_pool);
301 else {
302 __rh_insert(rh, nreg);
303 if (nreg->state == DM_RH_CLEAN) {
304 spin_lock(&rh->region_lock);
305 list_add(&nreg->list, &rh->clean_regions);
306 spin_unlock(&rh->region_lock);
307 }
308
309 reg = nreg;
310 }
311 write_unlock_irq(&rh->hash_lock);
312
313 return reg;
314}
315
316static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region)
317{
318 struct dm_region *reg;
319
320 reg = __rh_lookup(rh, region);
321 if (!reg) {
322 read_unlock(&rh->hash_lock);
323 reg = __rh_alloc(rh, region);
324 read_lock(&rh->hash_lock);
325 }
326
327 return reg;
328}
329
330int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block)
331{
332 int r;
333 struct dm_region *reg;
334
335 read_lock(&rh->hash_lock);
336 reg = __rh_lookup(rh, region);
337 read_unlock(&rh->hash_lock);
338
339 if (reg)
340 return reg->state;
341
342
343
344
345
346 r = rh->log->type->in_sync(rh->log, region, may_block);
347
348
349
350
351
352 return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
353}
354EXPORT_SYMBOL_GPL(dm_rh_get_state);
355
356static void complete_resync_work(struct dm_region *reg, int success)
357{
358 struct dm_region_hash *rh = reg->rh;
359
360 rh->log->type->set_region_sync(rh->log, reg->key, success);
361
362
363
364
365
366
367
368
369
370
371 rh->dispatch_bios(rh->context, ®->delayed_bios);
372 if (atomic_dec_and_test(&rh->recovery_in_flight))
373 rh->wakeup_all_recovery_waiters(rh->context);
374 up(&rh->recovery_count);
375}
376
377
378
379
380
381
382
383
384
385
386
387
388
389void dm_rh_mark_nosync(struct dm_region_hash *rh,
390 struct bio *bio, unsigned done, int error)
391{
392 unsigned long flags;
393 struct dm_dirty_log *log = rh->log;
394 struct dm_region *reg;
395 region_t region = dm_rh_bio_to_region(rh, bio);
396 int recovering = 0;
397
398
399 log->type->set_region_sync(log, region, 0);
400
401 read_lock(&rh->hash_lock);
402 reg = __rh_find(rh, region);
403 read_unlock(&rh->hash_lock);
404
405
406 BUG_ON(!reg);
407 BUG_ON(!list_empty(®->list));
408
409 spin_lock_irqsave(&rh->region_lock, flags);
410
411
412
413
414
415
416
417 recovering = (reg->state == DM_RH_RECOVERING);
418 reg->state = DM_RH_NOSYNC;
419 BUG_ON(!list_empty(®->list));
420 spin_unlock_irqrestore(&rh->region_lock, flags);
421
422 bio_endio(bio, error);
423 if (recovering)
424 complete_resync_work(reg, 0);
425}
426EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
427
428void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
429{
430 struct dm_region *reg, *next;
431
432 LIST_HEAD(clean);
433 LIST_HEAD(recovered);
434 LIST_HEAD(failed_recovered);
435
436
437
438
439 write_lock_irq(&rh->hash_lock);
440 spin_lock(&rh->region_lock);
441 if (!list_empty(&rh->clean_regions)) {
442 list_splice_init(&rh->clean_regions, &clean);
443
444 list_for_each_entry(reg, &clean, list)
445 list_del(®->hash_list);
446 }
447
448 if (!list_empty(&rh->recovered_regions)) {
449 list_splice_init(&rh->recovered_regions, &recovered);
450
451 list_for_each_entry(reg, &recovered, list)
452 list_del(®->hash_list);
453 }
454
455 if (!list_empty(&rh->failed_recovered_regions)) {
456 list_splice_init(&rh->failed_recovered_regions,
457 &failed_recovered);
458
459 list_for_each_entry(reg, &failed_recovered, list)
460 list_del(®->hash_list);
461 }
462
463 spin_unlock(&rh->region_lock);
464 write_unlock_irq(&rh->hash_lock);
465
466
467
468
469
470
471 list_for_each_entry_safe(reg, next, &recovered, list) {
472 rh->log->type->clear_region(rh->log, reg->key);
473 complete_resync_work(reg, 1);
474 mempool_free(reg, rh->region_pool);
475 }
476
477 list_for_each_entry_safe(reg, next, &failed_recovered, list) {
478 complete_resync_work(reg, errors_handled ? 0 : 1);
479 mempool_free(reg, rh->region_pool);
480 }
481
482 list_for_each_entry_safe(reg, next, &clean, list) {
483 rh->log->type->clear_region(rh->log, reg->key);
484 mempool_free(reg, rh->region_pool);
485 }
486
487 rh->log->type->flush(rh->log);
488}
489EXPORT_SYMBOL_GPL(dm_rh_update_states);
490
491static void rh_inc(struct dm_region_hash *rh, region_t region)
492{
493 struct dm_region *reg;
494
495 read_lock(&rh->hash_lock);
496 reg = __rh_find(rh, region);
497
498 spin_lock_irq(&rh->region_lock);
499 atomic_inc(®->pending);
500
501 if (reg->state == DM_RH_CLEAN) {
502 reg->state = DM_RH_DIRTY;
503 list_del_init(®->list);
504 spin_unlock_irq(&rh->region_lock);
505
506 rh->log->type->mark_region(rh->log, reg->key);
507 } else
508 spin_unlock_irq(&rh->region_lock);
509
510
511 read_unlock(&rh->hash_lock);
512}
513
514void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
515{
516 struct bio *bio;
517
518 for (bio = bios->head; bio; bio = bio->bi_next)
519 rh_inc(rh, dm_rh_bio_to_region(rh, bio));
520}
521EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
522
523void dm_rh_dec(struct dm_region_hash *rh, region_t region)
524{
525 unsigned long flags;
526 struct dm_region *reg;
527 int should_wake = 0;
528
529 read_lock(&rh->hash_lock);
530 reg = __rh_lookup(rh, region);
531 read_unlock(&rh->hash_lock);
532
533 spin_lock_irqsave(&rh->region_lock, flags);
534 if (atomic_dec_and_test(®->pending)) {
535
536
537
538
539
540
541
542
543
544
545
546
547 if (reg->state == DM_RH_RECOVERING) {
548 list_add_tail(®->list, &rh->quiesced_regions);
549 } else if (reg->state == DM_RH_DIRTY) {
550 reg->state = DM_RH_CLEAN;
551 list_add(®->list, &rh->clean_regions);
552 }
553 should_wake = 1;
554 }
555 spin_unlock_irqrestore(&rh->region_lock, flags);
556
557 if (should_wake)
558 rh->wakeup_workers(rh->context);
559}
560EXPORT_SYMBOL_GPL(dm_rh_dec);
561
562
563
564
565static int __rh_recovery_prepare(struct dm_region_hash *rh)
566{
567 int r;
568 region_t region;
569 struct dm_region *reg;
570
571
572
573
574 r = rh->log->type->get_resync_work(rh->log, ®ion);
575 if (r <= 0)
576 return r;
577
578
579
580
581
582 read_lock(&rh->hash_lock);
583 reg = __rh_find(rh, region);
584 read_unlock(&rh->hash_lock);
585
586 spin_lock_irq(&rh->region_lock);
587 reg->state = DM_RH_RECOVERING;
588
589
590 if (atomic_read(®->pending))
591 list_del_init(®->list);
592 else
593 list_move(®->list, &rh->quiesced_regions);
594
595 spin_unlock_irq(&rh->region_lock);
596
597 return 1;
598}
599
600void dm_rh_recovery_prepare(struct dm_region_hash *rh)
601{
602
603 atomic_inc(&rh->recovery_in_flight);
604
605 while (!down_trylock(&rh->recovery_count)) {
606 atomic_inc(&rh->recovery_in_flight);
607 if (__rh_recovery_prepare(rh) <= 0) {
608 atomic_dec(&rh->recovery_in_flight);
609 up(&rh->recovery_count);
610 break;
611 }
612 }
613
614
615 if (atomic_dec_and_test(&rh->recovery_in_flight))
616 rh->wakeup_all_recovery_waiters(rh->context);
617}
618EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
619
620
621
622
623struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh)
624{
625 struct dm_region *reg = NULL;
626
627 spin_lock_irq(&rh->region_lock);
628 if (!list_empty(&rh->quiesced_regions)) {
629 reg = list_entry(rh->quiesced_regions.next,
630 struct dm_region, list);
631 list_del_init(®->list);
632 }
633 spin_unlock_irq(&rh->region_lock);
634
635 return reg;
636}
637EXPORT_SYMBOL_GPL(dm_rh_recovery_start);
638
639void dm_rh_recovery_end(struct dm_region *reg, int success)
640{
641 struct dm_region_hash *rh = reg->rh;
642
643 spin_lock_irq(&rh->region_lock);
644 if (success)
645 list_add(®->list, ®->rh->recovered_regions);
646 else {
647 reg->state = DM_RH_NOSYNC;
648 list_add(®->list, ®->rh->failed_recovered_regions);
649 }
650 spin_unlock_irq(&rh->region_lock);
651
652 rh->wakeup_workers(rh->context);
653}
654EXPORT_SYMBOL_GPL(dm_rh_recovery_end);
655
656
657int dm_rh_recovery_in_flight(struct dm_region_hash *rh)
658{
659 return atomic_read(&rh->recovery_in_flight);
660}
661EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight);
662
663int dm_rh_flush(struct dm_region_hash *rh)
664{
665 return rh->log->type->flush(rh->log);
666}
667EXPORT_SYMBOL_GPL(dm_rh_flush);
668
669void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio)
670{
671 struct dm_region *reg;
672
673 read_lock(&rh->hash_lock);
674 reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio));
675 bio_list_add(®->delayed_bios, bio);
676 read_unlock(&rh->hash_lock);
677}
678EXPORT_SYMBOL_GPL(dm_rh_delay);
679
680void dm_rh_stop_recovery(struct dm_region_hash *rh)
681{
682 int i;
683
684
685 for (i = 0; i < rh->max_recovery; i++)
686 down(&rh->recovery_count);
687}
688EXPORT_SYMBOL_GPL(dm_rh_stop_recovery);
689
690void dm_rh_start_recovery(struct dm_region_hash *rh)
691{
692 int i;
693
694 for (i = 0; i < rh->max_recovery; i++)
695 up(&rh->recovery_count);
696
697 rh->wakeup_workers(rh->context);
698}
699EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
700
701MODULE_DESCRIPTION(DM_NAME " region hash");
702MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
703MODULE_LICENSE("GPL");
704