1
2
3
4
5
6
7
8
9#include <linux/dm-bufio.h>
10
11#include <linux/device-mapper.h>
12#include <linux/dm-io.h>
13#include <linux/slab.h>
14#include <linux/sched/mm.h>
15#include <linux/jiffies.h>
16#include <linux/vmalloc.h>
17#include <linux/shrinker.h>
18#include <linux/module.h>
19#include <linux/rbtree.h>
20#include <linux/stacktrace.h>
21
22#define DM_MSG_PREFIX "bufio"
23
24
25
26
27
28
29
30
31
32#define DM_BUFIO_MIN_BUFFERS 8
33
34#define DM_BUFIO_MEMORY_PERCENT 2
35#define DM_BUFIO_VMALLOC_PERCENT 25
36#define DM_BUFIO_WRITEBACK_PERCENT 75
37
38
39
40
41#define DM_BUFIO_WORK_TIMER_SECS 30
42
43
44
45
46#define DM_BUFIO_DEFAULT_AGE_SECS 300
47
48
49
50
51#define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024)
52
53
54
55
56
57#define DM_BUFIO_WRITE_ALIGN 4096
58
59
60
61
62#define LIST_CLEAN 0
63#define LIST_DIRTY 1
64#define LIST_SIZE 2
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81struct dm_bufio_client {
82 struct mutex lock;
83
84 struct list_head lru[LIST_SIZE];
85 unsigned long n_buffers[LIST_SIZE];
86
87 struct block_device *bdev;
88 unsigned block_size;
89 s8 sectors_per_block_bits;
90 void (*alloc_callback)(struct dm_buffer *);
91 void (*write_callback)(struct dm_buffer *);
92
93 struct kmem_cache *slab_buffer;
94 struct kmem_cache *slab_cache;
95 struct dm_io_client *dm_io;
96
97 struct list_head reserved_buffers;
98 unsigned need_reserved_buffers;
99
100 unsigned minimum_buffers;
101
102 struct rb_root buffer_tree;
103 wait_queue_head_t free_buffer_wait;
104
105 sector_t start;
106
107 int async_write_error;
108
109 struct list_head client_list;
110 struct shrinker shrinker;
111};
112
113
114
115
116#define B_READING 0
117#define B_WRITING 1
118#define B_DIRTY 2
119
120
121
122
123
124
125enum data_mode {
126 DATA_MODE_SLAB = 0,
127 DATA_MODE_GET_FREE_PAGES = 1,
128 DATA_MODE_VMALLOC = 2,
129 DATA_MODE_LIMIT = 3
130};
131
132struct dm_buffer {
133 struct rb_node node;
134 struct list_head lru_list;
135 sector_t block;
136 void *data;
137 unsigned char data_mode;
138 unsigned char list_mode;
139 blk_status_t read_error;
140 blk_status_t write_error;
141 unsigned hold_count;
142 unsigned long state;
143 unsigned long last_accessed;
144 unsigned dirty_start;
145 unsigned dirty_end;
146 unsigned write_start;
147 unsigned write_end;
148 struct dm_bufio_client *c;
149 struct list_head write_list;
150 void (*end_io)(struct dm_buffer *, blk_status_t);
151#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
152#define MAX_STACK 10
153 unsigned int stack_len;
154 unsigned long stack_entries[MAX_STACK];
155#endif
156};
157
158
159
160#define dm_bufio_in_request() (!!current->bio_list)
161
162static void dm_bufio_lock(struct dm_bufio_client *c)
163{
164 mutex_lock_nested(&c->lock, dm_bufio_in_request());
165}
166
167static int dm_bufio_trylock(struct dm_bufio_client *c)
168{
169 return mutex_trylock(&c->lock);
170}
171
172static void dm_bufio_unlock(struct dm_bufio_client *c)
173{
174 mutex_unlock(&c->lock);
175}
176
177
178
179
180
181
182static unsigned long dm_bufio_default_cache_size;
183
184
185
186
187static unsigned long dm_bufio_cache_size;
188
189
190
191
192
193static unsigned long dm_bufio_cache_size_latch;
194
195static DEFINE_SPINLOCK(param_spinlock);
196
197
198
199
200static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
201static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
202
203static unsigned long dm_bufio_peak_allocated;
204static unsigned long dm_bufio_allocated_kmem_cache;
205static unsigned long dm_bufio_allocated_get_free_pages;
206static unsigned long dm_bufio_allocated_vmalloc;
207static unsigned long dm_bufio_current_allocated;
208
209
210
211
212
213
214static unsigned long dm_bufio_cache_size_per_client;
215
216
217
218
219static int dm_bufio_client_count;
220
221
222
223
224static LIST_HEAD(dm_bufio_all_clients);
225
226
227
228
229
230static DEFINE_MUTEX(dm_bufio_clients_lock);
231
232#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
233static void buffer_record_stack(struct dm_buffer *b)
234{
235 b->stack_len = stack_trace_save(b->stack_entries, MAX_STACK, 2);
236}
237#endif
238
239
240
241
242static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
243{
244 struct rb_node *n = c->buffer_tree.rb_node;
245 struct dm_buffer *b;
246
247 while (n) {
248 b = container_of(n, struct dm_buffer, node);
249
250 if (b->block == block)
251 return b;
252
253 n = (b->block < block) ? n->rb_left : n->rb_right;
254 }
255
256 return NULL;
257}
258
259static void __insert(struct dm_bufio_client *c, struct dm_buffer *b)
260{
261 struct rb_node **new = &c->buffer_tree.rb_node, *parent = NULL;
262 struct dm_buffer *found;
263
264 while (*new) {
265 found = container_of(*new, struct dm_buffer, node);
266
267 if (found->block == b->block) {
268 BUG_ON(found != b);
269 return;
270 }
271
272 parent = *new;
273 new = (found->block < b->block) ?
274 &((*new)->rb_left) : &((*new)->rb_right);
275 }
276
277 rb_link_node(&b->node, parent, new);
278 rb_insert_color(&b->node, &c->buffer_tree);
279}
280
281static void __remove(struct dm_bufio_client *c, struct dm_buffer *b)
282{
283 rb_erase(&b->node, &c->buffer_tree);
284}
285
286
287
288static void adjust_total_allocated(unsigned char data_mode, long diff)
289{
290 static unsigned long * const class_ptr[DATA_MODE_LIMIT] = {
291 &dm_bufio_allocated_kmem_cache,
292 &dm_bufio_allocated_get_free_pages,
293 &dm_bufio_allocated_vmalloc,
294 };
295
296 spin_lock(¶m_spinlock);
297
298 *class_ptr[data_mode] += diff;
299
300 dm_bufio_current_allocated += diff;
301
302 if (dm_bufio_current_allocated > dm_bufio_peak_allocated)
303 dm_bufio_peak_allocated = dm_bufio_current_allocated;
304
305 spin_unlock(¶m_spinlock);
306}
307
308
309
310
311static void __cache_size_refresh(void)
312{
313 BUG_ON(!mutex_is_locked(&dm_bufio_clients_lock));
314 BUG_ON(dm_bufio_client_count < 0);
315
316 dm_bufio_cache_size_latch = READ_ONCE(dm_bufio_cache_size);
317
318
319
320
321 if (!dm_bufio_cache_size_latch) {
322 (void)cmpxchg(&dm_bufio_cache_size, 0,
323 dm_bufio_default_cache_size);
324 dm_bufio_cache_size_latch = dm_bufio_default_cache_size;
325 }
326
327 dm_bufio_cache_size_per_client = dm_bufio_cache_size_latch /
328 (dm_bufio_client_count ? : 1);
329}
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
353 unsigned char *data_mode)
354{
355 if (unlikely(c->slab_cache != NULL)) {
356 *data_mode = DATA_MODE_SLAB;
357 return kmem_cache_alloc(c->slab_cache, gfp_mask);
358 }
359
360 if (c->block_size <= KMALLOC_MAX_SIZE &&
361 gfp_mask & __GFP_NORETRY) {
362 *data_mode = DATA_MODE_GET_FREE_PAGES;
363 return (void *)__get_free_pages(gfp_mask,
364 c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT));
365 }
366
367 *data_mode = DATA_MODE_VMALLOC;
368
369
370
371
372
373
374
375
376
377
378 if (gfp_mask & __GFP_NORETRY) {
379 unsigned noio_flag = memalloc_noio_save();
380 void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
381
382 memalloc_noio_restore(noio_flag);
383 return ptr;
384 }
385
386 return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
387}
388
389
390
391
392static void free_buffer_data(struct dm_bufio_client *c,
393 void *data, unsigned char data_mode)
394{
395 switch (data_mode) {
396 case DATA_MODE_SLAB:
397 kmem_cache_free(c->slab_cache, data);
398 break;
399
400 case DATA_MODE_GET_FREE_PAGES:
401 free_pages((unsigned long)data,
402 c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT));
403 break;
404
405 case DATA_MODE_VMALLOC:
406 vfree(data);
407 break;
408
409 default:
410 DMCRIT("dm_bufio_free_buffer_data: bad data mode: %d",
411 data_mode);
412 BUG();
413 }
414}
415
416
417
418
419static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
420{
421 struct dm_buffer *b = kmem_cache_alloc(c->slab_buffer, gfp_mask);
422
423 if (!b)
424 return NULL;
425
426 b->c = c;
427
428 b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode);
429 if (!b->data) {
430 kmem_cache_free(c->slab_buffer, b);
431 return NULL;
432 }
433
434 adjust_total_allocated(b->data_mode, (long)c->block_size);
435
436#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
437 b->stack_len = 0;
438#endif
439 return b;
440}
441
442
443
444
445static void free_buffer(struct dm_buffer *b)
446{
447 struct dm_bufio_client *c = b->c;
448
449 adjust_total_allocated(b->data_mode, -(long)c->block_size);
450
451 free_buffer_data(c, b->data, b->data_mode);
452 kmem_cache_free(c->slab_buffer, b);
453}
454
455
456
457
458static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
459{
460 struct dm_bufio_client *c = b->c;
461
462 c->n_buffers[dirty]++;
463 b->block = block;
464 b->list_mode = dirty;
465 list_add(&b->lru_list, &c->lru[dirty]);
466 __insert(b->c, b);
467 b->last_accessed = jiffies;
468}
469
470
471
472
473static void __unlink_buffer(struct dm_buffer *b)
474{
475 struct dm_bufio_client *c = b->c;
476
477 BUG_ON(!c->n_buffers[b->list_mode]);
478
479 c->n_buffers[b->list_mode]--;
480 __remove(b->c, b);
481 list_del(&b->lru_list);
482}
483
484
485
486
487static void __relink_lru(struct dm_buffer *b, int dirty)
488{
489 struct dm_bufio_client *c = b->c;
490
491 BUG_ON(!c->n_buffers[b->list_mode]);
492
493 c->n_buffers[b->list_mode]--;
494 c->n_buffers[dirty]++;
495 b->list_mode = dirty;
496 list_move(&b->lru_list, &c->lru[dirty]);
497 b->last_accessed = jiffies;
498}
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522static void dmio_complete(unsigned long error, void *context)
523{
524 struct dm_buffer *b = context;
525
526 b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0);
527}
528
529static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
530 unsigned n_sectors, unsigned offset)
531{
532 int r;
533 struct dm_io_request io_req = {
534 .bi_op = rw,
535 .bi_op_flags = 0,
536 .notify.fn = dmio_complete,
537 .notify.context = b,
538 .client = b->c->dm_io,
539 };
540 struct dm_io_region region = {
541 .bdev = b->c->bdev,
542 .sector = sector,
543 .count = n_sectors,
544 };
545
546 if (b->data_mode != DATA_MODE_VMALLOC) {
547 io_req.mem.type = DM_IO_KMEM;
548 io_req.mem.ptr.addr = (char *)b->data + offset;
549 } else {
550 io_req.mem.type = DM_IO_VMA;
551 io_req.mem.ptr.vma = (char *)b->data + offset;
552 }
553
554 r = dm_io(&io_req, 1, ®ion, NULL);
555 if (unlikely(r))
556 b->end_io(b, errno_to_blk_status(r));
557}
558
559static void bio_complete(struct bio *bio)
560{
561 struct dm_buffer *b = bio->bi_private;
562 blk_status_t status = bio->bi_status;
563 bio_put(bio);
564 b->end_io(b, status);
565}
566
567static void use_bio(struct dm_buffer *b, int rw, sector_t sector,
568 unsigned n_sectors, unsigned offset)
569{
570 struct bio *bio;
571 char *ptr;
572 unsigned vec_size, len;
573
574 vec_size = b->c->block_size >> PAGE_SHIFT;
575 if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT))
576 vec_size += 2;
577
578 bio = bio_kmalloc(GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN, vec_size);
579 if (!bio) {
580dmio:
581 use_dmio(b, rw, sector, n_sectors, offset);
582 return;
583 }
584
585 bio->bi_iter.bi_sector = sector;
586 bio_set_dev(bio, b->c->bdev);
587 bio_set_op_attrs(bio, rw, 0);
588 bio->bi_end_io = bio_complete;
589 bio->bi_private = b;
590
591 ptr = (char *)b->data + offset;
592 len = n_sectors << SECTOR_SHIFT;
593
594 do {
595 unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len);
596 if (!bio_add_page(bio, virt_to_page(ptr), this_step,
597 offset_in_page(ptr))) {
598 bio_put(bio);
599 goto dmio;
600 }
601
602 len -= this_step;
603 ptr += this_step;
604 } while (len > 0);
605
606 submit_bio(bio);
607}
608
609static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t))
610{
611 unsigned n_sectors;
612 sector_t sector;
613 unsigned offset, end;
614
615 b->end_io = end_io;
616
617 if (likely(b->c->sectors_per_block_bits >= 0))
618 sector = b->block << b->c->sectors_per_block_bits;
619 else
620 sector = b->block * (b->c->block_size >> SECTOR_SHIFT);
621 sector += b->c->start;
622
623 if (rw != REQ_OP_WRITE) {
624 n_sectors = b->c->block_size >> SECTOR_SHIFT;
625 offset = 0;
626 } else {
627 if (b->c->write_callback)
628 b->c->write_callback(b);
629 offset = b->write_start;
630 end = b->write_end;
631 offset &= -DM_BUFIO_WRITE_ALIGN;
632 end += DM_BUFIO_WRITE_ALIGN - 1;
633 end &= -DM_BUFIO_WRITE_ALIGN;
634 if (unlikely(end > b->c->block_size))
635 end = b->c->block_size;
636
637 sector += offset >> SECTOR_SHIFT;
638 n_sectors = (end - offset) >> SECTOR_SHIFT;
639 }
640
641 if (b->data_mode != DATA_MODE_VMALLOC)
642 use_bio(b, rw, sector, n_sectors, offset);
643 else
644 use_dmio(b, rw, sector, n_sectors, offset);
645}
646
647
648
649
650
651
652
653
654
655
656
657static void write_endio(struct dm_buffer *b, blk_status_t status)
658{
659 b->write_error = status;
660 if (unlikely(status)) {
661 struct dm_bufio_client *c = b->c;
662
663 (void)cmpxchg(&c->async_write_error, 0,
664 blk_status_to_errno(status));
665 }
666
667 BUG_ON(!test_bit(B_WRITING, &b->state));
668
669 smp_mb__before_atomic();
670 clear_bit(B_WRITING, &b->state);
671 smp_mb__after_atomic();
672
673 wake_up_bit(&b->state, B_WRITING);
674}
675
676
677
678
679
680
681
682
683
684
685static void __write_dirty_buffer(struct dm_buffer *b,
686 struct list_head *write_list)
687{
688 if (!test_bit(B_DIRTY, &b->state))
689 return;
690
691 clear_bit(B_DIRTY, &b->state);
692 wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
693
694 b->write_start = b->dirty_start;
695 b->write_end = b->dirty_end;
696
697 if (!write_list)
698 submit_io(b, REQ_OP_WRITE, write_endio);
699 else
700 list_add_tail(&b->write_list, write_list);
701}
702
703static void __flush_write_list(struct list_head *write_list)
704{
705 struct blk_plug plug;
706 blk_start_plug(&plug);
707 while (!list_empty(write_list)) {
708 struct dm_buffer *b =
709 list_entry(write_list->next, struct dm_buffer, write_list);
710 list_del(&b->write_list);
711 submit_io(b, REQ_OP_WRITE, write_endio);
712 cond_resched();
713 }
714 blk_finish_plug(&plug);
715}
716
717
718
719
720
721
722static void __make_buffer_clean(struct dm_buffer *b)
723{
724 BUG_ON(b->hold_count);
725
726 if (!b->state)
727 return;
728
729 wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
730 __write_dirty_buffer(b, NULL);
731 wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
732}
733
734
735
736
737
738static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
739{
740 struct dm_buffer *b;
741
742 list_for_each_entry_reverse(b, &c->lru[LIST_CLEAN], lru_list) {
743 BUG_ON(test_bit(B_WRITING, &b->state));
744 BUG_ON(test_bit(B_DIRTY, &b->state));
745
746 if (!b->hold_count) {
747 __make_buffer_clean(b);
748 __unlink_buffer(b);
749 return b;
750 }
751 cond_resched();
752 }
753
754 list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
755 BUG_ON(test_bit(B_READING, &b->state));
756
757 if (!b->hold_count) {
758 __make_buffer_clean(b);
759 __unlink_buffer(b);
760 return b;
761 }
762 cond_resched();
763 }
764
765 return NULL;
766}
767
768
769
770
771
772
773
774
775static void __wait_for_free_buffer(struct dm_bufio_client *c)
776{
777 DECLARE_WAITQUEUE(wait, current);
778
779 add_wait_queue(&c->free_buffer_wait, &wait);
780 set_current_state(TASK_UNINTERRUPTIBLE);
781 dm_bufio_unlock(c);
782
783 io_schedule();
784
785 remove_wait_queue(&c->free_buffer_wait, &wait);
786
787 dm_bufio_lock(c);
788}
789
790enum new_flag {
791 NF_FRESH = 0,
792 NF_READ = 1,
793 NF_GET = 2,
794 NF_PREFETCH = 3
795};
796
797
798
799
800
801
802
803static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf)
804{
805 struct dm_buffer *b;
806 bool tried_noio_alloc = false;
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821 while (1) {
822 if (dm_bufio_cache_size_latch != 1) {
823 b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
824 if (b)
825 return b;
826 }
827
828 if (nf == NF_PREFETCH)
829 return NULL;
830
831 if (dm_bufio_cache_size_latch != 1 && !tried_noio_alloc) {
832 dm_bufio_unlock(c);
833 b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
834 dm_bufio_lock(c);
835 if (b)
836 return b;
837 tried_noio_alloc = true;
838 }
839
840 if (!list_empty(&c->reserved_buffers)) {
841 b = list_entry(c->reserved_buffers.next,
842 struct dm_buffer, lru_list);
843 list_del(&b->lru_list);
844 c->need_reserved_buffers++;
845
846 return b;
847 }
848
849 b = __get_unclaimed_buffer(c);
850 if (b)
851 return b;
852
853 __wait_for_free_buffer(c);
854 }
855}
856
857static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c, enum new_flag nf)
858{
859 struct dm_buffer *b = __alloc_buffer_wait_no_callback(c, nf);
860
861 if (!b)
862 return NULL;
863
864 if (c->alloc_callback)
865 c->alloc_callback(b);
866
867 return b;
868}
869
870
871
872
873static void __free_buffer_wake(struct dm_buffer *b)
874{
875 struct dm_bufio_client *c = b->c;
876
877 if (!c->need_reserved_buffers)
878 free_buffer(b);
879 else {
880 list_add(&b->lru_list, &c->reserved_buffers);
881 c->need_reserved_buffers--;
882 }
883
884 wake_up(&c->free_buffer_wait);
885}
886
887static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait,
888 struct list_head *write_list)
889{
890 struct dm_buffer *b, *tmp;
891
892 list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
893 BUG_ON(test_bit(B_READING, &b->state));
894
895 if (!test_bit(B_DIRTY, &b->state) &&
896 !test_bit(B_WRITING, &b->state)) {
897 __relink_lru(b, LIST_CLEAN);
898 continue;
899 }
900
901 if (no_wait && test_bit(B_WRITING, &b->state))
902 return;
903
904 __write_dirty_buffer(b, write_list);
905 cond_resched();
906 }
907}
908
909
910
911
912static void __get_memory_limit(struct dm_bufio_client *c,
913 unsigned long *threshold_buffers,
914 unsigned long *limit_buffers)
915{
916 unsigned long buffers;
917
918 if (unlikely(READ_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch)) {
919 if (mutex_trylock(&dm_bufio_clients_lock)) {
920 __cache_size_refresh();
921 mutex_unlock(&dm_bufio_clients_lock);
922 }
923 }
924
925 buffers = dm_bufio_cache_size_per_client;
926 if (likely(c->sectors_per_block_bits >= 0))
927 buffers >>= c->sectors_per_block_bits + SECTOR_SHIFT;
928 else
929 buffers /= c->block_size;
930
931 if (buffers < c->minimum_buffers)
932 buffers = c->minimum_buffers;
933
934 *limit_buffers = buffers;
935 *threshold_buffers = mult_frac(buffers,
936 DM_BUFIO_WRITEBACK_PERCENT, 100);
937}
938
939
940
941
942
943
944static void __check_watermark(struct dm_bufio_client *c,
945 struct list_head *write_list)
946{
947 unsigned long threshold_buffers, limit_buffers;
948
949 __get_memory_limit(c, &threshold_buffers, &limit_buffers);
950
951 while (c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY] >
952 limit_buffers) {
953
954 struct dm_buffer *b = __get_unclaimed_buffer(c);
955
956 if (!b)
957 return;
958
959 __free_buffer_wake(b);
960 cond_resched();
961 }
962
963 if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
964 __write_dirty_buffers_async(c, 1, write_list);
965}
966
967
968
969
970
971static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
972 enum new_flag nf, int *need_submit,
973 struct list_head *write_list)
974{
975 struct dm_buffer *b, *new_b = NULL;
976
977 *need_submit = 0;
978
979 b = __find(c, block);
980 if (b)
981 goto found_buffer;
982
983 if (nf == NF_GET)
984 return NULL;
985
986 new_b = __alloc_buffer_wait(c, nf);
987 if (!new_b)
988 return NULL;
989
990
991
992
993
994 b = __find(c, block);
995 if (b) {
996 __free_buffer_wake(new_b);
997 goto found_buffer;
998 }
999
1000 __check_watermark(c, write_list);
1001
1002 b = new_b;
1003 b->hold_count = 1;
1004 b->read_error = 0;
1005 b->write_error = 0;
1006 __link_buffer(b, block, LIST_CLEAN);
1007
1008 if (nf == NF_FRESH) {
1009 b->state = 0;
1010 return b;
1011 }
1012
1013 b->state = 1 << B_READING;
1014 *need_submit = 1;
1015
1016 return b;
1017
1018found_buffer:
1019 if (nf == NF_PREFETCH)
1020 return NULL;
1021
1022
1023
1024
1025
1026
1027
1028 if (nf == NF_GET && unlikely(test_bit(B_READING, &b->state)))
1029 return NULL;
1030
1031 b->hold_count++;
1032 __relink_lru(b, test_bit(B_DIRTY, &b->state) ||
1033 test_bit(B_WRITING, &b->state));
1034 return b;
1035}
1036
1037
1038
1039
1040
1041static void read_endio(struct dm_buffer *b, blk_status_t status)
1042{
1043 b->read_error = status;
1044
1045 BUG_ON(!test_bit(B_READING, &b->state));
1046
1047 smp_mb__before_atomic();
1048 clear_bit(B_READING, &b->state);
1049 smp_mb__after_atomic();
1050
1051 wake_up_bit(&b->state, B_READING);
1052}
1053
1054
1055
1056
1057
1058
1059
1060static void *new_read(struct dm_bufio_client *c, sector_t block,
1061 enum new_flag nf, struct dm_buffer **bp)
1062{
1063 int need_submit;
1064 struct dm_buffer *b;
1065
1066 LIST_HEAD(write_list);
1067
1068 dm_bufio_lock(c);
1069 b = __bufio_new(c, block, nf, &need_submit, &write_list);
1070#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
1071 if (b && b->hold_count == 1)
1072 buffer_record_stack(b);
1073#endif
1074 dm_bufio_unlock(c);
1075
1076 __flush_write_list(&write_list);
1077
1078 if (!b)
1079 return NULL;
1080
1081 if (need_submit)
1082 submit_io(b, REQ_OP_READ, read_endio);
1083
1084 wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
1085
1086 if (b->read_error) {
1087 int error = blk_status_to_errno(b->read_error);
1088
1089 dm_bufio_release(b);
1090
1091 return ERR_PTR(error);
1092 }
1093
1094 *bp = b;
1095
1096 return b->data;
1097}
1098
1099void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
1100 struct dm_buffer **bp)
1101{
1102 return new_read(c, block, NF_GET, bp);
1103}
1104EXPORT_SYMBOL_GPL(dm_bufio_get);
1105
1106void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
1107 struct dm_buffer **bp)
1108{
1109 BUG_ON(dm_bufio_in_request());
1110
1111 return new_read(c, block, NF_READ, bp);
1112}
1113EXPORT_SYMBOL_GPL(dm_bufio_read);
1114
1115void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
1116 struct dm_buffer **bp)
1117{
1118 BUG_ON(dm_bufio_in_request());
1119
1120 return new_read(c, block, NF_FRESH, bp);
1121}
1122EXPORT_SYMBOL_GPL(dm_bufio_new);
1123
1124void dm_bufio_prefetch(struct dm_bufio_client *c,
1125 sector_t block, unsigned n_blocks)
1126{
1127 struct blk_plug plug;
1128
1129 LIST_HEAD(write_list);
1130
1131 BUG_ON(dm_bufio_in_request());
1132
1133 blk_start_plug(&plug);
1134 dm_bufio_lock(c);
1135
1136 for (; n_blocks--; block++) {
1137 int need_submit;
1138 struct dm_buffer *b;
1139 b = __bufio_new(c, block, NF_PREFETCH, &need_submit,
1140 &write_list);
1141 if (unlikely(!list_empty(&write_list))) {
1142 dm_bufio_unlock(c);
1143 blk_finish_plug(&plug);
1144 __flush_write_list(&write_list);
1145 blk_start_plug(&plug);
1146 dm_bufio_lock(c);
1147 }
1148 if (unlikely(b != NULL)) {
1149 dm_bufio_unlock(c);
1150
1151 if (need_submit)
1152 submit_io(b, REQ_OP_READ, read_endio);
1153 dm_bufio_release(b);
1154
1155 cond_resched();
1156
1157 if (!n_blocks)
1158 goto flush_plug;
1159 dm_bufio_lock(c);
1160 }
1161 }
1162
1163 dm_bufio_unlock(c);
1164
1165flush_plug:
1166 blk_finish_plug(&plug);
1167}
1168EXPORT_SYMBOL_GPL(dm_bufio_prefetch);
1169
1170void dm_bufio_release(struct dm_buffer *b)
1171{
1172 struct dm_bufio_client *c = b->c;
1173
1174 dm_bufio_lock(c);
1175
1176 BUG_ON(!b->hold_count);
1177
1178 b->hold_count--;
1179 if (!b->hold_count) {
1180 wake_up(&c->free_buffer_wait);
1181
1182
1183
1184
1185
1186
1187 if ((b->read_error || b->write_error) &&
1188 !test_bit(B_READING, &b->state) &&
1189 !test_bit(B_WRITING, &b->state) &&
1190 !test_bit(B_DIRTY, &b->state)) {
1191 __unlink_buffer(b);
1192 __free_buffer_wake(b);
1193 }
1194 }
1195
1196 dm_bufio_unlock(c);
1197}
1198EXPORT_SYMBOL_GPL(dm_bufio_release);
1199
1200void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b,
1201 unsigned start, unsigned end)
1202{
1203 struct dm_bufio_client *c = b->c;
1204
1205 BUG_ON(start >= end);
1206 BUG_ON(end > b->c->block_size);
1207
1208 dm_bufio_lock(c);
1209
1210 BUG_ON(test_bit(B_READING, &b->state));
1211
1212 if (!test_and_set_bit(B_DIRTY, &b->state)) {
1213 b->dirty_start = start;
1214 b->dirty_end = end;
1215 __relink_lru(b, LIST_DIRTY);
1216 } else {
1217 if (start < b->dirty_start)
1218 b->dirty_start = start;
1219 if (end > b->dirty_end)
1220 b->dirty_end = end;
1221 }
1222
1223 dm_bufio_unlock(c);
1224}
1225EXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty);
1226
1227void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
1228{
1229 dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size);
1230}
1231EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty);
1232
1233void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c)
1234{
1235 LIST_HEAD(write_list);
1236
1237 BUG_ON(dm_bufio_in_request());
1238
1239 dm_bufio_lock(c);
1240 __write_dirty_buffers_async(c, 0, &write_list);
1241 dm_bufio_unlock(c);
1242 __flush_write_list(&write_list);
1243}
1244EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
1245
1246
1247
1248
1249
1250
1251
1252
1253int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
1254{
1255 int a, f;
1256 unsigned long buffers_processed = 0;
1257 struct dm_buffer *b, *tmp;
1258
1259 LIST_HEAD(write_list);
1260
1261 dm_bufio_lock(c);
1262 __write_dirty_buffers_async(c, 0, &write_list);
1263 dm_bufio_unlock(c);
1264 __flush_write_list(&write_list);
1265 dm_bufio_lock(c);
1266
1267again:
1268 list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
1269 int dropped_lock = 0;
1270
1271 if (buffers_processed < c->n_buffers[LIST_DIRTY])
1272 buffers_processed++;
1273
1274 BUG_ON(test_bit(B_READING, &b->state));
1275
1276 if (test_bit(B_WRITING, &b->state)) {
1277 if (buffers_processed < c->n_buffers[LIST_DIRTY]) {
1278 dropped_lock = 1;
1279 b->hold_count++;
1280 dm_bufio_unlock(c);
1281 wait_on_bit_io(&b->state, B_WRITING,
1282 TASK_UNINTERRUPTIBLE);
1283 dm_bufio_lock(c);
1284 b->hold_count--;
1285 } else
1286 wait_on_bit_io(&b->state, B_WRITING,
1287 TASK_UNINTERRUPTIBLE);
1288 }
1289
1290 if (!test_bit(B_DIRTY, &b->state) &&
1291 !test_bit(B_WRITING, &b->state))
1292 __relink_lru(b, LIST_CLEAN);
1293
1294 cond_resched();
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310 if (dropped_lock)
1311 goto again;
1312 }
1313 wake_up(&c->free_buffer_wait);
1314 dm_bufio_unlock(c);
1315
1316 a = xchg(&c->async_write_error, 0);
1317 f = dm_bufio_issue_flush(c);
1318 if (a)
1319 return a;
1320
1321 return f;
1322}
1323EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
1324
1325
1326
1327
1328int dm_bufio_issue_flush(struct dm_bufio_client *c)
1329{
1330 struct dm_io_request io_req = {
1331 .bi_op = REQ_OP_WRITE,
1332 .bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
1333 .mem.type = DM_IO_KMEM,
1334 .mem.ptr.addr = NULL,
1335 .client = c->dm_io,
1336 };
1337 struct dm_io_region io_reg = {
1338 .bdev = c->bdev,
1339 .sector = 0,
1340 .count = 0,
1341 };
1342
1343 BUG_ON(dm_bufio_in_request());
1344
1345 return dm_io(&io_req, 1, &io_reg, NULL);
1346}
1347EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block)
1362{
1363 struct dm_bufio_client *c = b->c;
1364 struct dm_buffer *new;
1365
1366 BUG_ON(dm_bufio_in_request());
1367
1368 dm_bufio_lock(c);
1369
1370retry:
1371 new = __find(c, new_block);
1372 if (new) {
1373 if (new->hold_count) {
1374 __wait_for_free_buffer(c);
1375 goto retry;
1376 }
1377
1378
1379
1380
1381
1382 __make_buffer_clean(new);
1383 __unlink_buffer(new);
1384 __free_buffer_wake(new);
1385 }
1386
1387 BUG_ON(!b->hold_count);
1388 BUG_ON(test_bit(B_READING, &b->state));
1389
1390 __write_dirty_buffer(b, NULL);
1391 if (b->hold_count == 1) {
1392 wait_on_bit_io(&b->state, B_WRITING,
1393 TASK_UNINTERRUPTIBLE);
1394 set_bit(B_DIRTY, &b->state);
1395 b->dirty_start = 0;
1396 b->dirty_end = c->block_size;
1397 __unlink_buffer(b);
1398 __link_buffer(b, new_block, LIST_DIRTY);
1399 } else {
1400 sector_t old_block;
1401 wait_on_bit_lock_io(&b->state, B_WRITING,
1402 TASK_UNINTERRUPTIBLE);
1403
1404
1405
1406
1407
1408
1409
1410 old_block = b->block;
1411 __unlink_buffer(b);
1412 __link_buffer(b, new_block, b->list_mode);
1413 submit_io(b, REQ_OP_WRITE, write_endio);
1414 wait_on_bit_io(&b->state, B_WRITING,
1415 TASK_UNINTERRUPTIBLE);
1416 __unlink_buffer(b);
1417 __link_buffer(b, old_block, b->list_mode);
1418 }
1419
1420 dm_bufio_unlock(c);
1421 dm_bufio_release(b);
1422}
1423EXPORT_SYMBOL_GPL(dm_bufio_release_move);
1424
1425
1426
1427
1428
1429
1430
1431void dm_bufio_forget(struct dm_bufio_client *c, sector_t block)
1432{
1433 struct dm_buffer *b;
1434
1435 dm_bufio_lock(c);
1436
1437 b = __find(c, block);
1438 if (b && likely(!b->hold_count) && likely(!b->state)) {
1439 __unlink_buffer(b);
1440 __free_buffer_wake(b);
1441 }
1442
1443 dm_bufio_unlock(c);
1444}
1445EXPORT_SYMBOL_GPL(dm_bufio_forget);
1446
1447void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n)
1448{
1449 c->minimum_buffers = n;
1450}
1451EXPORT_SYMBOL_GPL(dm_bufio_set_minimum_buffers);
1452
1453unsigned dm_bufio_get_block_size(struct dm_bufio_client *c)
1454{
1455 return c->block_size;
1456}
1457EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);
1458
1459sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
1460{
1461 sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT;
1462 if (likely(c->sectors_per_block_bits >= 0))
1463 s >>= c->sectors_per_block_bits;
1464 else
1465 sector_div(s, c->block_size >> SECTOR_SHIFT);
1466 return s;
1467}
1468EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
1469
1470sector_t dm_bufio_get_block_number(struct dm_buffer *b)
1471{
1472 return b->block;
1473}
1474EXPORT_SYMBOL_GPL(dm_bufio_get_block_number);
1475
1476void *dm_bufio_get_block_data(struct dm_buffer *b)
1477{
1478 return b->data;
1479}
1480EXPORT_SYMBOL_GPL(dm_bufio_get_block_data);
1481
1482void *dm_bufio_get_aux_data(struct dm_buffer *b)
1483{
1484 return b + 1;
1485}
1486EXPORT_SYMBOL_GPL(dm_bufio_get_aux_data);
1487
1488struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b)
1489{
1490 return b->c;
1491}
1492EXPORT_SYMBOL_GPL(dm_bufio_get_client);
1493
1494static void drop_buffers(struct dm_bufio_client *c)
1495{
1496 struct dm_buffer *b;
1497 int i;
1498 bool warned = false;
1499
1500 BUG_ON(dm_bufio_in_request());
1501
1502
1503
1504
1505 dm_bufio_write_dirty_buffers_async(c);
1506
1507 dm_bufio_lock(c);
1508
1509 while ((b = __get_unclaimed_buffer(c)))
1510 __free_buffer_wake(b);
1511
1512 for (i = 0; i < LIST_SIZE; i++)
1513 list_for_each_entry(b, &c->lru[i], lru_list) {
1514 WARN_ON(!warned);
1515 warned = true;
1516 DMERR("leaked buffer %llx, hold count %u, list %d",
1517 (unsigned long long)b->block, b->hold_count, i);
1518#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
1519 stack_trace_print(b->stack_entries, b->stack_len, 1);
1520
1521 b->hold_count = 0;
1522#endif
1523 }
1524
1525#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
1526 while ((b = __get_unclaimed_buffer(c)))
1527 __free_buffer_wake(b);
1528#endif
1529
1530 for (i = 0; i < LIST_SIZE; i++)
1531 BUG_ON(!list_empty(&c->lru[i]));
1532
1533 dm_bufio_unlock(c);
1534}
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
1545{
1546 if (!(gfp & __GFP_FS)) {
1547 if (test_bit(B_READING, &b->state) ||
1548 test_bit(B_WRITING, &b->state) ||
1549 test_bit(B_DIRTY, &b->state))
1550 return false;
1551 }
1552
1553 if (b->hold_count)
1554 return false;
1555
1556 __make_buffer_clean(b);
1557 __unlink_buffer(b);
1558 __free_buffer_wake(b);
1559
1560 return true;
1561}
1562
1563static unsigned long get_retain_buffers(struct dm_bufio_client *c)
1564{
1565 unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes);
1566 if (likely(c->sectors_per_block_bits >= 0))
1567 retain_bytes >>= c->sectors_per_block_bits + SECTOR_SHIFT;
1568 else
1569 retain_bytes /= c->block_size;
1570 return retain_bytes;
1571}
1572
1573static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
1574 gfp_t gfp_mask)
1575{
1576 int l;
1577 struct dm_buffer *b, *tmp;
1578 unsigned long freed = 0;
1579 unsigned long count = c->n_buffers[LIST_CLEAN] +
1580 c->n_buffers[LIST_DIRTY];
1581 unsigned long retain_target = get_retain_buffers(c);
1582
1583 for (l = 0; l < LIST_SIZE; l++) {
1584 list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
1585 if (__try_evict_buffer(b, gfp_mask))
1586 freed++;
1587 if (!--nr_to_scan || ((count - freed) <= retain_target))
1588 return freed;
1589 cond_resched();
1590 }
1591 }
1592 return freed;
1593}
1594
1595static unsigned long
1596dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
1597{
1598 struct dm_bufio_client *c;
1599 unsigned long freed;
1600
1601 c = container_of(shrink, struct dm_bufio_client, shrinker);
1602 if (sc->gfp_mask & __GFP_FS)
1603 dm_bufio_lock(c);
1604 else if (!dm_bufio_trylock(c))
1605 return SHRINK_STOP;
1606
1607 freed = __scan(c, sc->nr_to_scan, sc->gfp_mask);
1608 dm_bufio_unlock(c);
1609 return freed;
1610}
1611
1612static unsigned long
1613dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
1614{
1615 struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
1616 unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) +
1617 READ_ONCE(c->n_buffers[LIST_DIRTY]);
1618 unsigned long retain_target = get_retain_buffers(c);
1619
1620 return (count < retain_target) ? 0 : (count - retain_target);
1621}
1622
1623
1624
1625
1626struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
1627 unsigned reserved_buffers, unsigned aux_size,
1628 void (*alloc_callback)(struct dm_buffer *),
1629 void (*write_callback)(struct dm_buffer *))
1630{
1631 int r;
1632 struct dm_bufio_client *c;
1633 unsigned i;
1634 char slab_name[27];
1635
1636 if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) {
1637 DMERR("%s: block size not specified or is not multiple of 512b", __func__);
1638 r = -EINVAL;
1639 goto bad_client;
1640 }
1641
1642 c = kzalloc(sizeof(*c), GFP_KERNEL);
1643 if (!c) {
1644 r = -ENOMEM;
1645 goto bad_client;
1646 }
1647 c->buffer_tree = RB_ROOT;
1648
1649 c->bdev = bdev;
1650 c->block_size = block_size;
1651 if (is_power_of_2(block_size))
1652 c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT;
1653 else
1654 c->sectors_per_block_bits = -1;
1655
1656 c->alloc_callback = alloc_callback;
1657 c->write_callback = write_callback;
1658
1659 for (i = 0; i < LIST_SIZE; i++) {
1660 INIT_LIST_HEAD(&c->lru[i]);
1661 c->n_buffers[i] = 0;
1662 }
1663
1664 mutex_init(&c->lock);
1665 INIT_LIST_HEAD(&c->reserved_buffers);
1666 c->need_reserved_buffers = reserved_buffers;
1667
1668 dm_bufio_set_minimum_buffers(c, DM_BUFIO_MIN_BUFFERS);
1669
1670 init_waitqueue_head(&c->free_buffer_wait);
1671 c->async_write_error = 0;
1672
1673 c->dm_io = dm_io_client_create();
1674 if (IS_ERR(c->dm_io)) {
1675 r = PTR_ERR(c->dm_io);
1676 goto bad_dm_io;
1677 }
1678
1679 if (block_size <= KMALLOC_MAX_SIZE &&
1680 (block_size < PAGE_SIZE || !is_power_of_2(block_size))) {
1681 unsigned align = min(1U << __ffs(block_size), (unsigned)PAGE_SIZE);
1682 snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", block_size);
1683 c->slab_cache = kmem_cache_create(slab_name, block_size, align,
1684 SLAB_RECLAIM_ACCOUNT, NULL);
1685 if (!c->slab_cache) {
1686 r = -ENOMEM;
1687 goto bad;
1688 }
1689 }
1690 if (aux_size)
1691 snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer-%u", aux_size);
1692 else
1693 snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer");
1694 c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size,
1695 0, SLAB_RECLAIM_ACCOUNT, NULL);
1696 if (!c->slab_buffer) {
1697 r = -ENOMEM;
1698 goto bad;
1699 }
1700
1701 while (c->need_reserved_buffers) {
1702 struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL);
1703
1704 if (!b) {
1705 r = -ENOMEM;
1706 goto bad;
1707 }
1708 __free_buffer_wake(b);
1709 }
1710
1711 c->shrinker.count_objects = dm_bufio_shrink_count;
1712 c->shrinker.scan_objects = dm_bufio_shrink_scan;
1713 c->shrinker.seeks = 1;
1714 c->shrinker.batch = 0;
1715 r = register_shrinker(&c->shrinker);
1716 if (r)
1717 goto bad;
1718
1719 mutex_lock(&dm_bufio_clients_lock);
1720 dm_bufio_client_count++;
1721 list_add(&c->client_list, &dm_bufio_all_clients);
1722 __cache_size_refresh();
1723 mutex_unlock(&dm_bufio_clients_lock);
1724
1725 return c;
1726
1727bad:
1728 while (!list_empty(&c->reserved_buffers)) {
1729 struct dm_buffer *b = list_entry(c->reserved_buffers.next,
1730 struct dm_buffer, lru_list);
1731 list_del(&b->lru_list);
1732 free_buffer(b);
1733 }
1734 kmem_cache_destroy(c->slab_cache);
1735 kmem_cache_destroy(c->slab_buffer);
1736 dm_io_client_destroy(c->dm_io);
1737bad_dm_io:
1738 mutex_destroy(&c->lock);
1739 kfree(c);
1740bad_client:
1741 return ERR_PTR(r);
1742}
1743EXPORT_SYMBOL_GPL(dm_bufio_client_create);
1744
1745
1746
1747
1748
1749void dm_bufio_client_destroy(struct dm_bufio_client *c)
1750{
1751 unsigned i;
1752
1753 drop_buffers(c);
1754
1755 unregister_shrinker(&c->shrinker);
1756
1757 mutex_lock(&dm_bufio_clients_lock);
1758
1759 list_del(&c->client_list);
1760 dm_bufio_client_count--;
1761 __cache_size_refresh();
1762
1763 mutex_unlock(&dm_bufio_clients_lock);
1764
1765 BUG_ON(!RB_EMPTY_ROOT(&c->buffer_tree));
1766 BUG_ON(c->need_reserved_buffers);
1767
1768 while (!list_empty(&c->reserved_buffers)) {
1769 struct dm_buffer *b = list_entry(c->reserved_buffers.next,
1770 struct dm_buffer, lru_list);
1771 list_del(&b->lru_list);
1772 free_buffer(b);
1773 }
1774
1775 for (i = 0; i < LIST_SIZE; i++)
1776 if (c->n_buffers[i])
1777 DMERR("leaked buffer count %d: %ld", i, c->n_buffers[i]);
1778
1779 for (i = 0; i < LIST_SIZE; i++)
1780 BUG_ON(c->n_buffers[i]);
1781
1782 kmem_cache_destroy(c->slab_cache);
1783 kmem_cache_destroy(c->slab_buffer);
1784 dm_io_client_destroy(c->dm_io);
1785 mutex_destroy(&c->lock);
1786 kfree(c);
1787}
1788EXPORT_SYMBOL_GPL(dm_bufio_client_destroy);
1789
1790void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start)
1791{
1792 c->start = start;
1793}
1794EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset);
1795
1796static unsigned get_max_age_hz(void)
1797{
1798 unsigned max_age = READ_ONCE(dm_bufio_max_age);
1799
1800 if (max_age > UINT_MAX / HZ)
1801 max_age = UINT_MAX / HZ;
1802
1803 return max_age * HZ;
1804}
1805
1806static bool older_than(struct dm_buffer *b, unsigned long age_hz)
1807{
1808 return time_after_eq(jiffies, b->last_accessed + age_hz);
1809}
1810
1811static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
1812{
1813 struct dm_buffer *b, *tmp;
1814 unsigned long retain_target = get_retain_buffers(c);
1815 unsigned long count;
1816 LIST_HEAD(write_list);
1817
1818 dm_bufio_lock(c);
1819
1820 __check_watermark(c, &write_list);
1821 if (unlikely(!list_empty(&write_list))) {
1822 dm_bufio_unlock(c);
1823 __flush_write_list(&write_list);
1824 dm_bufio_lock(c);
1825 }
1826
1827 count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
1828 list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_CLEAN], lru_list) {
1829 if (count <= retain_target)
1830 break;
1831
1832 if (!older_than(b, age_hz))
1833 break;
1834
1835 if (__try_evict_buffer(b, 0))
1836 count--;
1837
1838 cond_resched();
1839 }
1840
1841 dm_bufio_unlock(c);
1842}
1843
1844static void cleanup_old_buffers(void)
1845{
1846 unsigned long max_age_hz = get_max_age_hz();
1847 struct dm_bufio_client *c;
1848
1849 mutex_lock(&dm_bufio_clients_lock);
1850
1851 __cache_size_refresh();
1852
1853 list_for_each_entry(c, &dm_bufio_all_clients, client_list)
1854 __evict_old_buffers(c, max_age_hz);
1855
1856 mutex_unlock(&dm_bufio_clients_lock);
1857}
1858
1859static struct workqueue_struct *dm_bufio_wq;
1860static struct delayed_work dm_bufio_work;
1861
1862static void work_fn(struct work_struct *w)
1863{
1864 cleanup_old_buffers();
1865
1866 queue_delayed_work(dm_bufio_wq, &dm_bufio_work,
1867 DM_BUFIO_WORK_TIMER_SECS * HZ);
1868}
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878static int __init dm_bufio_init(void)
1879{
1880 __u64 mem;
1881
1882 dm_bufio_allocated_kmem_cache = 0;
1883 dm_bufio_allocated_get_free_pages = 0;
1884 dm_bufio_allocated_vmalloc = 0;
1885 dm_bufio_current_allocated = 0;
1886
1887 mem = (__u64)mult_frac(totalram_pages() - totalhigh_pages(),
1888 DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT;
1889
1890 if (mem > ULONG_MAX)
1891 mem = ULONG_MAX;
1892
1893#ifdef CONFIG_MMU
1894 if (mem > mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100))
1895 mem = mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100);
1896#endif
1897
1898 dm_bufio_default_cache_size = mem;
1899
1900 mutex_lock(&dm_bufio_clients_lock);
1901 __cache_size_refresh();
1902 mutex_unlock(&dm_bufio_clients_lock);
1903
1904 dm_bufio_wq = alloc_workqueue("dm_bufio_cache", WQ_MEM_RECLAIM, 0);
1905 if (!dm_bufio_wq)
1906 return -ENOMEM;
1907
1908 INIT_DELAYED_WORK(&dm_bufio_work, work_fn);
1909 queue_delayed_work(dm_bufio_wq, &dm_bufio_work,
1910 DM_BUFIO_WORK_TIMER_SECS * HZ);
1911
1912 return 0;
1913}
1914
1915
1916
1917
1918static void __exit dm_bufio_exit(void)
1919{
1920 int bug = 0;
1921
1922 cancel_delayed_work_sync(&dm_bufio_work);
1923 destroy_workqueue(dm_bufio_wq);
1924
1925 if (dm_bufio_client_count) {
1926 DMCRIT("%s: dm_bufio_client_count leaked: %d",
1927 __func__, dm_bufio_client_count);
1928 bug = 1;
1929 }
1930
1931 if (dm_bufio_current_allocated) {
1932 DMCRIT("%s: dm_bufio_current_allocated leaked: %lu",
1933 __func__, dm_bufio_current_allocated);
1934 bug = 1;
1935 }
1936
1937 if (dm_bufio_allocated_get_free_pages) {
1938 DMCRIT("%s: dm_bufio_allocated_get_free_pages leaked: %lu",
1939 __func__, dm_bufio_allocated_get_free_pages);
1940 bug = 1;
1941 }
1942
1943 if (dm_bufio_allocated_vmalloc) {
1944 DMCRIT("%s: dm_bufio_vmalloc leaked: %lu",
1945 __func__, dm_bufio_allocated_vmalloc);
1946 bug = 1;
1947 }
1948
1949 BUG_ON(bug);
1950}
1951
1952module_init(dm_bufio_init)
1953module_exit(dm_bufio_exit)
1954
1955module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, S_IRUGO | S_IWUSR);
1956MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
1957
1958module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
1959MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
1960
1961module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, S_IRUGO | S_IWUSR);
1962MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");
1963
1964module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
1965MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory");
1966
1967module_param_named(allocated_kmem_cache_bytes, dm_bufio_allocated_kmem_cache, ulong, S_IRUGO);
1968MODULE_PARM_DESC(allocated_kmem_cache_bytes, "Memory allocated with kmem_cache_alloc");
1969
1970module_param_named(allocated_get_free_pages_bytes, dm_bufio_allocated_get_free_pages, ulong, S_IRUGO);
1971MODULE_PARM_DESC(allocated_get_free_pages_bytes, "Memory allocated with get_free_pages");
1972
1973module_param_named(allocated_vmalloc_bytes, dm_bufio_allocated_vmalloc, ulong, S_IRUGO);
1974MODULE_PARM_DESC(allocated_vmalloc_bytes, "Memory allocated with vmalloc");
1975
1976module_param_named(current_allocated_bytes, dm_bufio_current_allocated, ulong, S_IRUGO);
1977MODULE_PARM_DESC(current_allocated_bytes, "Memory currently used by the cache");
1978
1979MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>");
1980MODULE_DESCRIPTION(DM_NAME " buffered I/O library");
1981MODULE_LICENSE("GPL");
1982