1#ifndef _BCACHE_H
2#define _BCACHE_H
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
179
180#include <linux/bcache.h>
181#include <linux/bio.h>
182#include <linux/kobject.h>
183#include <linux/list.h>
184#include <linux/mutex.h>
185#include <linux/rbtree.h>
186#include <linux/rwsem.h>
187#include <linux/types.h>
188#include <linux/workqueue.h>
189
190#include "bset.h"
191#include "util.h"
192#include "closure.h"
193
194struct bucket {
195 atomic_t pin;
196 uint16_t prio;
197 uint8_t gen;
198 uint8_t last_gc;
199 uint16_t gc_mark;
200};
201
202
203
204
205
206
207BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
208#define GC_MARK_RECLAIMABLE 1
209#define GC_MARK_DIRTY 2
210#define GC_MARK_METADATA 3
211#define GC_SECTORS_USED_SIZE 13
212#define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE))
213BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
214BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);
215
216#include "journal.h"
217#include "stats.h"
218struct search;
219struct btree;
220struct keybuf;
221
222struct keybuf_key {
223 struct rb_node node;
224 BKEY_PADDED(key);
225 void *private;
226};
227
228struct keybuf {
229 struct bkey last_scanned;
230 spinlock_t lock;
231
232
233
234
235
236
237 struct bkey start;
238 struct bkey end;
239
240 struct rb_root keys;
241
242#define KEYBUF_NR 500
243 DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR);
244};
245
246struct bcache_device {
247 struct closure cl;
248
249 struct kobject kobj;
250
251 struct cache_set *c;
252 unsigned id;
253#define BCACHEDEVNAME_SIZE 12
254 char name[BCACHEDEVNAME_SIZE];
255
256 struct gendisk *disk;
257
258 unsigned long flags;
259#define BCACHE_DEV_CLOSING 0
260#define BCACHE_DEV_DETACHING 1
261#define BCACHE_DEV_UNLINK_DONE 2
262
263 unsigned nr_stripes;
264 unsigned stripe_size;
265 atomic_t *stripe_sectors_dirty;
266 unsigned long *full_dirty_stripes;
267
268 unsigned long sectors_dirty_last;
269 long sectors_dirty_derivative;
270
271 struct bio_set *bio_split;
272
273 unsigned data_csum:1;
274
275 int (*cache_miss)(struct btree *, struct search *,
276 struct bio *, unsigned);
277 int (*ioctl) (struct bcache_device *, fmode_t, unsigned, unsigned long);
278};
279
280struct io {
281
282 struct hlist_node hash;
283 struct list_head lru;
284
285 unsigned long jiffies;
286 unsigned sequential;
287 sector_t last;
288};
289
290struct cached_dev {
291 struct list_head list;
292 struct bcache_device disk;
293 struct block_device *bdev;
294
295 struct cache_sb sb;
296 struct bio sb_bio;
297 struct bio_vec sb_bv[1];
298 struct closure sb_write;
299 struct semaphore sb_write_mutex;
300
301
302 atomic_t count;
303 struct work_struct detach;
304
305
306
307
308
309 atomic_t running;
310
311
312
313
314
315 struct rw_semaphore writeback_lock;
316
317
318
319
320
321
322 atomic_t has_dirty;
323
324 struct bch_ratelimit writeback_rate;
325 struct delayed_work writeback_rate_update;
326
327
328
329
330
331 sector_t last_read;
332
333
334 struct semaphore in_flight;
335 struct task_struct *writeback_thread;
336
337 struct keybuf writeback_keys;
338
339
340#define RECENT_IO_BITS 7
341#define RECENT_IO (1 << RECENT_IO_BITS)
342 struct io io[RECENT_IO];
343 struct hlist_head io_hash[RECENT_IO + 1];
344 struct list_head io_lru;
345 spinlock_t io_lock;
346
347 struct cache_accounting accounting;
348
349
350 unsigned sequential_cutoff;
351 unsigned readahead;
352
353 unsigned verify:1;
354 unsigned bypass_torture_test:1;
355
356 unsigned partial_stripes_expensive:1;
357 unsigned writeback_metadata:1;
358 unsigned writeback_running:1;
359 unsigned char writeback_percent;
360 unsigned writeback_delay;
361
362 uint64_t writeback_rate_target;
363 int64_t writeback_rate_proportional;
364 int64_t writeback_rate_derivative;
365 int64_t writeback_rate_change;
366
367 unsigned writeback_rate_update_seconds;
368 unsigned writeback_rate_d_term;
369 unsigned writeback_rate_p_term_inverse;
370};
371
372enum alloc_reserve {
373 RESERVE_BTREE,
374 RESERVE_PRIO,
375 RESERVE_MOVINGGC,
376 RESERVE_NONE,
377 RESERVE_NR,
378};
379
380struct cache {
381 struct cache_set *set;
382 struct cache_sb sb;
383 struct bio sb_bio;
384 struct bio_vec sb_bv[1];
385
386 struct kobject kobj;
387 struct block_device *bdev;
388
389 struct task_struct *alloc_thread;
390
391 struct closure prio;
392 struct prio_set *disk_buckets;
393
394
395
396
397
398
399
400
401 uint64_t *prio_buckets;
402 uint64_t *prio_last_buckets;
403
404
405
406
407
408
409
410
411
412
413 DECLARE_FIFO(long, free)[RESERVE_NR];
414 DECLARE_FIFO(long, free_inc);
415
416 size_t fifo_last_bucket;
417
418
419 struct bucket *buckets;
420
421 DECLARE_HEAP(struct bucket *, heap);
422
423
424
425
426
427
428 unsigned invalidate_needs_gc:1;
429
430 bool discard;
431
432 struct journal_device journal;
433
434
435#define IO_ERROR_SHIFT 20
436 atomic_t io_errors;
437 atomic_t io_count;
438
439 atomic_long_t meta_sectors_written;
440 atomic_long_t btree_sectors_written;
441 atomic_long_t sectors_written;
442};
443
444struct gc_stat {
445 size_t nodes;
446 size_t key_bytes;
447
448 size_t nkeys;
449 uint64_t data;
450 unsigned in_use;
451};
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467#define CACHE_SET_UNREGISTERING 0
468#define CACHE_SET_STOPPING 1
469#define CACHE_SET_RUNNING 2
470
471struct cache_set {
472 struct closure cl;
473
474 struct list_head list;
475 struct kobject kobj;
476 struct kobject internal;
477 struct dentry *debug;
478 struct cache_accounting accounting;
479
480 unsigned long flags;
481
482 struct cache_sb sb;
483
484 struct cache *cache[MAX_CACHES_PER_SET];
485 struct cache *cache_by_alloc[MAX_CACHES_PER_SET];
486 int caches_loaded;
487
488 struct bcache_device **devices;
489 struct list_head cached_devs;
490 uint64_t cached_dev_sectors;
491 struct closure caching;
492
493 struct closure sb_write;
494 struct semaphore sb_write_mutex;
495
496 mempool_t *search;
497 mempool_t *bio_meta;
498 struct bio_set *bio_split;
499
500
501 struct shrinker shrink;
502
503
504 struct mutex bucket_lock;
505
506
507 unsigned short bucket_bits;
508
509
510 unsigned short block_bits;
511
512
513
514
515
516 unsigned btree_pages;
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534 struct list_head btree_cache;
535 struct list_head btree_cache_freeable;
536 struct list_head btree_cache_freed;
537
538
539 unsigned btree_cache_used;
540
541
542
543
544
545
546
547 wait_queue_head_t btree_cache_wait;
548 struct task_struct *btree_cache_alloc_lock;
549
550
551
552
553
554
555
556
557
558
559
560 atomic_t prio_blocked;
561 wait_queue_head_t bucket_wait;
562
563
564
565
566
567 atomic_t rescale;
568
569
570
571
572
573
574 uint16_t min_prio;
575
576
577
578
579
580 uint8_t need_gc;
581 struct gc_stat gc_stats;
582 size_t nbuckets;
583
584 struct task_struct *gc_thread;
585
586 struct bkey gc_done;
587
588
589
590
591
592 int gc_mark_valid;
593
594
595 atomic_t sectors_to_gc;
596
597 wait_queue_head_t moving_gc_wait;
598 struct keybuf moving_gc_keys;
599
600 struct semaphore moving_in_flight;
601
602 struct workqueue_struct *moving_gc_wq;
603
604 struct btree *root;
605
606#ifdef CONFIG_BCACHE_DEBUG
607 struct btree *verify_data;
608 struct bset *verify_ondisk;
609 struct mutex verify_lock;
610#endif
611
612 unsigned nr_uuids;
613 struct uuid_entry *uuids;
614 BKEY_PADDED(uuid_bucket);
615 struct closure uuid_write;
616 struct semaphore uuid_write_mutex;
617
618
619
620
621
622 mempool_t *fill_iter;
623
624 struct bset_sort_state sort;
625
626
627 struct list_head data_buckets;
628 spinlock_t data_bucket_lock;
629
630 struct journal journal;
631
632#define CONGESTED_MAX 1024
633 unsigned congested_last_us;
634 atomic_t congested;
635
636
637 unsigned congested_read_threshold_us;
638 unsigned congested_write_threshold_us;
639
640 struct time_stats btree_gc_time;
641 struct time_stats btree_split_time;
642 struct time_stats btree_read_time;
643
644 atomic_long_t cache_read_races;
645 atomic_long_t writeback_keys_done;
646 atomic_long_t writeback_keys_failed;
647
648 enum {
649 ON_ERROR_UNREGISTER,
650 ON_ERROR_PANIC,
651 } on_error;
652 unsigned error_limit;
653 unsigned error_decay;
654
655 unsigned short journal_delay_ms;
656 bool expensive_debug_checks;
657 unsigned verify:1;
658 unsigned key_merging_disabled:1;
659 unsigned gc_always_rewrite:1;
660 unsigned shrinker_disabled:1;
661 unsigned copy_gc_enabled:1;
662
663#define BUCKET_HASH_BITS 12
664 struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
665};
666
667struct bbio {
668 unsigned submit_time_us;
669 union {
670 struct bkey key;
671 uint64_t _pad[3];
672
673
674
675
676 };
677 struct bio bio;
678};
679
680#define BTREE_PRIO USHRT_MAX
681#define INITIAL_PRIO 32768U
682
683#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE)
684#define btree_blocks(b) \
685 ((unsigned) (KEY_SIZE(&b->key) >> (b)->c->block_bits))
686
687#define btree_default_blocks(c) \
688 ((unsigned) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
689
690#define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS)
691#define bucket_bytes(c) ((c)->sb.bucket_size << 9)
692#define block_bytes(c) ((c)->sb.block_size << 9)
693
694#define prios_per_bucket(c) \
695 ((bucket_bytes(c) - sizeof(struct prio_set)) / \
696 sizeof(struct bucket_disk))
697#define prio_buckets(c) \
698 DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c))
699
700static inline size_t sector_to_bucket(struct cache_set *c, sector_t s)
701{
702 return s >> c->bucket_bits;
703}
704
705static inline sector_t bucket_to_sector(struct cache_set *c, size_t b)
706{
707 return ((sector_t) b) << c->bucket_bits;
708}
709
710static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
711{
712 return s & (c->sb.bucket_size - 1);
713}
714
715static inline struct cache *PTR_CACHE(struct cache_set *c,
716 const struct bkey *k,
717 unsigned ptr)
718{
719 return c->cache[PTR_DEV(k, ptr)];
720}
721
722static inline size_t PTR_BUCKET_NR(struct cache_set *c,
723 const struct bkey *k,
724 unsigned ptr)
725{
726 return sector_to_bucket(c, PTR_OFFSET(k, ptr));
727}
728
729static inline struct bucket *PTR_BUCKET(struct cache_set *c,
730 const struct bkey *k,
731 unsigned ptr)
732{
733 return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
734}
735
736static inline uint8_t gen_after(uint8_t a, uint8_t b)
737{
738 uint8_t r = a - b;
739 return r > 128U ? 0 : r;
740}
741
742static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
743 unsigned i)
744{
745 return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
746}
747
748static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
749 unsigned i)
750{
751 return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
752}
753
754
755
756
757
758
759
760#define csum_set(i) \
761 bch_crc64(((void *) (i)) + sizeof(uint64_t), \
762 ((void *) bset_bkey_last(i)) - \
763 (((void *) (i)) + sizeof(uint64_t)))
764
765
766
767#define btree_bug(b, ...) \
768do { \
769 if (bch_cache_set_error((b)->c, __VA_ARGS__)) \
770 dump_stack(); \
771} while (0)
772
773#define cache_bug(c, ...) \
774do { \
775 if (bch_cache_set_error(c, __VA_ARGS__)) \
776 dump_stack(); \
777} while (0)
778
779#define btree_bug_on(cond, b, ...) \
780do { \
781 if (cond) \
782 btree_bug(b, __VA_ARGS__); \
783} while (0)
784
785#define cache_bug_on(cond, c, ...) \
786do { \
787 if (cond) \
788 cache_bug(c, __VA_ARGS__); \
789} while (0)
790
791#define cache_set_err_on(cond, c, ...) \
792do { \
793 if (cond) \
794 bch_cache_set_error(c, __VA_ARGS__); \
795} while (0)
796
797
798
799#define for_each_cache(ca, cs, iter) \
800 for (iter = 0; ca = cs->cache[iter], iter < (cs)->sb.nr_in_set; iter++)
801
802#define for_each_bucket(b, ca) \
803 for (b = (ca)->buckets + (ca)->sb.first_bucket; \
804 b < (ca)->buckets + (ca)->sb.nbuckets; b++)
805
806static inline void cached_dev_put(struct cached_dev *dc)
807{
808 if (atomic_dec_and_test(&dc->count))
809 schedule_work(&dc->detach);
810}
811
812static inline bool cached_dev_get(struct cached_dev *dc)
813{
814 if (!atomic_inc_not_zero(&dc->count))
815 return false;
816
817
818 smp_mb__after_atomic();
819 return true;
820}
821
822
823
824
825
826
827static inline uint8_t bucket_gc_gen(struct bucket *b)
828{
829 return b->gen - b->last_gc;
830}
831
832#define BUCKET_GC_GEN_MAX 96U
833
834#define kobj_attribute_write(n, fn) \
835 static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
836
837#define kobj_attribute_rw(n, show, store) \
838 static struct kobj_attribute ksysfs_##n = \
839 __ATTR(n, S_IWUSR|S_IRUSR, show, store)
840
841static inline void wake_up_allocators(struct cache_set *c)
842{
843 struct cache *ca;
844 unsigned i;
845
846 for_each_cache(ca, c, i)
847 wake_up_process(ca->alloc_thread);
848}
849
850
851
852void bch_count_io_errors(struct cache *, int, const char *);
853void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
854 int, const char *);
855void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
856void bch_bbio_free(struct bio *, struct cache_set *);
857struct bio *bch_bbio_alloc(struct cache_set *);
858
859void __bch_submit_bbio(struct bio *, struct cache_set *);
860void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
861
862uint8_t bch_inc_gen(struct cache *, struct bucket *);
863void bch_rescale_priorities(struct cache_set *, int);
864
865bool bch_can_invalidate_bucket(struct cache *, struct bucket *);
866void __bch_invalidate_one_bucket(struct cache *, struct bucket *);
867
868void __bch_bucket_free(struct cache *, struct bucket *);
869void bch_bucket_free(struct cache_set *, struct bkey *);
870
871long bch_bucket_alloc(struct cache *, unsigned, bool);
872int __bch_bucket_alloc_set(struct cache_set *, unsigned,
873 struct bkey *, int, bool);
874int bch_bucket_alloc_set(struct cache_set *, unsigned,
875 struct bkey *, int, bool);
876bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
877 unsigned, unsigned, bool);
878
879__printf(2, 3)
880bool bch_cache_set_error(struct cache_set *, const char *, ...);
881
882void bch_prio_write(struct cache *);
883void bch_write_bdev_super(struct cached_dev *, struct closure *);
884
885extern struct workqueue_struct *bcache_wq;
886extern const char * const bch_cache_modes[];
887extern struct mutex bch_register_lock;
888extern struct list_head bch_cache_sets;
889
890extern struct kobj_type bch_cached_dev_ktype;
891extern struct kobj_type bch_flash_dev_ktype;
892extern struct kobj_type bch_cache_set_ktype;
893extern struct kobj_type bch_cache_set_internal_ktype;
894extern struct kobj_type bch_cache_ktype;
895
896void bch_cached_dev_release(struct kobject *);
897void bch_flash_dev_release(struct kobject *);
898void bch_cache_set_release(struct kobject *);
899void bch_cache_release(struct kobject *);
900
901int bch_uuid_write(struct cache_set *);
902void bcache_write_super(struct cache_set *);
903
904int bch_flash_dev_create(struct cache_set *c, uint64_t size);
905
906int bch_cached_dev_attach(struct cached_dev *, struct cache_set *);
907void bch_cached_dev_detach(struct cached_dev *);
908void bch_cached_dev_run(struct cached_dev *);
909void bcache_device_stop(struct bcache_device *);
910
911void bch_cache_set_unregister(struct cache_set *);
912void bch_cache_set_stop(struct cache_set *);
913
914struct cache_set *bch_cache_set_alloc(struct cache_sb *);
915void bch_btree_cache_free(struct cache_set *);
916int bch_btree_cache_alloc(struct cache_set *);
917void bch_moving_init_cache_set(struct cache_set *);
918int bch_open_buckets_alloc(struct cache_set *);
919void bch_open_buckets_free(struct cache_set *);
920
921int bch_cache_allocator_start(struct cache *ca);
922
923void bch_debug_exit(void);
924int bch_debug_init(struct kobject *);
925void bch_request_exit(void);
926int bch_request_init(void);
927
928#endif
929