1
2#ifndef _BCACHE_H
3#define _BCACHE_H
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
180
181#include <linux/bcache.h>
182#include <linux/bio.h>
183#include <linux/kobject.h>
184#include <linux/list.h>
185#include <linux/mutex.h>
186#include <linux/rbtree.h>
187#include <linux/rwsem.h>
188#include <linux/refcount.h>
189#include <linux/types.h>
190#include <linux/workqueue.h>
191
192#include "bset.h"
193#include "util.h"
194#include "closure.h"
195
196struct bucket {
197 atomic_t pin;
198 uint16_t prio;
199 uint8_t gen;
200 uint8_t last_gc;
201 uint16_t gc_mark;
202};
203
204
205
206
207
208
209BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
210#define GC_MARK_RECLAIMABLE 1
211#define GC_MARK_DIRTY 2
212#define GC_MARK_METADATA 3
213#define GC_SECTORS_USED_SIZE 13
214#define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE))
215BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
216BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);
217
218#include "journal.h"
219#include "stats.h"
220struct search;
221struct btree;
222struct keybuf;
223
224struct keybuf_key {
225 struct rb_node node;
226 BKEY_PADDED(key);
227 void *private;
228};
229
230struct keybuf {
231 struct bkey last_scanned;
232 spinlock_t lock;
233
234
235
236
237
238
239 struct bkey start;
240 struct bkey end;
241
242 struct rb_root keys;
243
244#define KEYBUF_NR 500
245 DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR);
246};
247
248struct bcache_device {
249 struct closure cl;
250
251 struct kobject kobj;
252
253 struct cache_set *c;
254 unsigned id;
255#define BCACHEDEVNAME_SIZE 12
256 char name[BCACHEDEVNAME_SIZE];
257
258 struct gendisk *disk;
259
260 unsigned long flags;
261#define BCACHE_DEV_CLOSING 0
262#define BCACHE_DEV_DETACHING 1
263#define BCACHE_DEV_UNLINK_DONE 2
264
265 unsigned nr_stripes;
266 unsigned stripe_size;
267 atomic_t *stripe_sectors_dirty;
268 unsigned long *full_dirty_stripes;
269
270 struct bio_set *bio_split;
271
272 unsigned data_csum:1;
273
274 int (*cache_miss)(struct btree *, struct search *,
275 struct bio *, unsigned);
276 int (*ioctl) (struct bcache_device *, fmode_t, unsigned, unsigned long);
277};
278
279struct io {
280
281 struct hlist_node hash;
282 struct list_head lru;
283
284 unsigned long jiffies;
285 unsigned sequential;
286 sector_t last;
287};
288
289struct cached_dev {
290 struct list_head list;
291 struct bcache_device disk;
292 struct block_device *bdev;
293
294 struct cache_sb sb;
295 struct bio sb_bio;
296 struct bio_vec sb_bv[1];
297 struct closure sb_write;
298 struct semaphore sb_write_mutex;
299
300
301 refcount_t count;
302 struct work_struct detach;
303
304
305
306
307
308 atomic_t running;
309
310
311
312
313
314 struct rw_semaphore writeback_lock;
315
316
317
318
319
320
321 atomic_t has_dirty;
322
323
324
325
326
327
328 atomic_t backing_idle;
329
330 struct bch_ratelimit writeback_rate;
331 struct delayed_work writeback_rate_update;
332
333
334 struct semaphore in_flight;
335 struct task_struct *writeback_thread;
336 struct workqueue_struct *writeback_write_wq;
337
338 struct keybuf writeback_keys;
339
340
341
342
343
344
345 struct closure_waitlist writeback_ordering_wait;
346 atomic_t writeback_sequence_next;
347
348
349#define RECENT_IO_BITS 7
350#define RECENT_IO (1 << RECENT_IO_BITS)
351 struct io io[RECENT_IO];
352 struct hlist_head io_hash[RECENT_IO + 1];
353 struct list_head io_lru;
354 spinlock_t io_lock;
355
356 struct cache_accounting accounting;
357
358
359 unsigned sequential_cutoff;
360 unsigned readahead;
361
362 unsigned verify:1;
363 unsigned bypass_torture_test:1;
364
365 unsigned partial_stripes_expensive:1;
366 unsigned writeback_metadata:1;
367 unsigned writeback_running:1;
368 unsigned char writeback_percent;
369 unsigned writeback_delay;
370
371 uint64_t writeback_rate_target;
372 int64_t writeback_rate_proportional;
373 int64_t writeback_rate_integral;
374 int64_t writeback_rate_integral_scaled;
375 int32_t writeback_rate_change;
376
377 unsigned writeback_rate_update_seconds;
378 unsigned writeback_rate_i_term_inverse;
379 unsigned writeback_rate_p_term_inverse;
380 unsigned writeback_rate_minimum;
381};
382
383enum alloc_reserve {
384 RESERVE_BTREE,
385 RESERVE_PRIO,
386 RESERVE_MOVINGGC,
387 RESERVE_NONE,
388 RESERVE_NR,
389};
390
391struct cache {
392 struct cache_set *set;
393 struct cache_sb sb;
394 struct bio sb_bio;
395 struct bio_vec sb_bv[1];
396
397 struct kobject kobj;
398 struct block_device *bdev;
399
400 struct task_struct *alloc_thread;
401
402 struct closure prio;
403 struct prio_set *disk_buckets;
404
405
406
407
408
409
410
411
412 uint64_t *prio_buckets;
413 uint64_t *prio_last_buckets;
414
415
416
417
418
419
420
421
422
423
424 DECLARE_FIFO(long, free)[RESERVE_NR];
425 DECLARE_FIFO(long, free_inc);
426
427 size_t fifo_last_bucket;
428
429
430 struct bucket *buckets;
431
432 DECLARE_HEAP(struct bucket *, heap);
433
434
435
436
437
438
439 unsigned invalidate_needs_gc;
440
441 bool discard;
442
443 struct journal_device journal;
444
445
446#define IO_ERROR_SHIFT 20
447 atomic_t io_errors;
448 atomic_t io_count;
449
450 atomic_long_t meta_sectors_written;
451 atomic_long_t btree_sectors_written;
452 atomic_long_t sectors_written;
453};
454
455struct gc_stat {
456 size_t nodes;
457 size_t key_bytes;
458
459 size_t nkeys;
460 uint64_t data;
461 unsigned in_use;
462};
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478#define CACHE_SET_UNREGISTERING 0
479#define CACHE_SET_STOPPING 1
480#define CACHE_SET_RUNNING 2
481
482struct cache_set {
483 struct closure cl;
484
485 struct list_head list;
486 struct kobject kobj;
487 struct kobject internal;
488 struct dentry *debug;
489 struct cache_accounting accounting;
490
491 unsigned long flags;
492
493 struct cache_sb sb;
494
495 struct cache *cache[MAX_CACHES_PER_SET];
496 struct cache *cache_by_alloc[MAX_CACHES_PER_SET];
497 int caches_loaded;
498
499 struct bcache_device **devices;
500 unsigned devices_max_used;
501 struct list_head cached_devs;
502 uint64_t cached_dev_sectors;
503 struct closure caching;
504
505 struct closure sb_write;
506 struct semaphore sb_write_mutex;
507
508 mempool_t *search;
509 mempool_t *bio_meta;
510 struct bio_set *bio_split;
511
512
513 struct shrinker shrink;
514
515
516 struct mutex bucket_lock;
517
518
519 unsigned short bucket_bits;
520
521
522 unsigned short block_bits;
523
524
525
526
527
528 unsigned btree_pages;
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546 struct list_head btree_cache;
547 struct list_head btree_cache_freeable;
548 struct list_head btree_cache_freed;
549
550
551 unsigned btree_cache_used;
552
553
554
555
556
557
558
559 wait_queue_head_t btree_cache_wait;
560 struct task_struct *btree_cache_alloc_lock;
561
562
563
564
565
566
567
568
569
570
571
572 atomic_t prio_blocked;
573 wait_queue_head_t bucket_wait;
574
575
576
577
578
579 atomic_t rescale;
580
581
582
583
584
585
586 uint16_t min_prio;
587
588
589
590
591
592 uint8_t need_gc;
593 struct gc_stat gc_stats;
594 size_t nbuckets;
595 size_t avail_nbuckets;
596
597 struct task_struct *gc_thread;
598
599 struct bkey gc_done;
600
601
602
603
604
605 int gc_mark_valid;
606
607
608 atomic_t sectors_to_gc;
609 wait_queue_head_t gc_wait;
610
611 struct keybuf moving_gc_keys;
612
613 struct semaphore moving_in_flight;
614
615 struct workqueue_struct *moving_gc_wq;
616
617 struct btree *root;
618
619#ifdef CONFIG_BCACHE_DEBUG
620 struct btree *verify_data;
621 struct bset *verify_ondisk;
622 struct mutex verify_lock;
623#endif
624
625 unsigned nr_uuids;
626 struct uuid_entry *uuids;
627 BKEY_PADDED(uuid_bucket);
628 struct closure uuid_write;
629 struct semaphore uuid_write_mutex;
630
631
632
633
634
635 mempool_t *fill_iter;
636
637 struct bset_sort_state sort;
638
639
640 struct list_head data_buckets;
641 spinlock_t data_bucket_lock;
642
643 struct journal journal;
644
645#define CONGESTED_MAX 1024
646 unsigned congested_last_us;
647 atomic_t congested;
648
649
650 unsigned congested_read_threshold_us;
651 unsigned congested_write_threshold_us;
652
653 struct time_stats btree_gc_time;
654 struct time_stats btree_split_time;
655 struct time_stats btree_read_time;
656
657 atomic_long_t cache_read_races;
658 atomic_long_t writeback_keys_done;
659 atomic_long_t writeback_keys_failed;
660
661 atomic_long_t reclaim;
662 atomic_long_t flush_write;
663 atomic_long_t retry_flush_write;
664
665 enum {
666 ON_ERROR_UNREGISTER,
667 ON_ERROR_PANIC,
668 } on_error;
669#define DEFAULT_IO_ERROR_LIMIT 8
670 unsigned error_limit;
671 unsigned error_decay;
672
673 unsigned short journal_delay_ms;
674 bool expensive_debug_checks;
675 unsigned verify:1;
676 unsigned key_merging_disabled:1;
677 unsigned gc_always_rewrite:1;
678 unsigned shrinker_disabled:1;
679 unsigned copy_gc_enabled:1;
680
681#define BUCKET_HASH_BITS 12
682 struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
683
684 DECLARE_HEAP(struct btree *, flush_btree);
685};
686
687struct bbio {
688 unsigned submit_time_us;
689 union {
690 struct bkey key;
691 uint64_t _pad[3];
692
693
694
695
696 };
697 struct bio bio;
698};
699
700#define BTREE_PRIO USHRT_MAX
701#define INITIAL_PRIO 32768U
702
703#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE)
704#define btree_blocks(b) \
705 ((unsigned) (KEY_SIZE(&b->key) >> (b)->c->block_bits))
706
707#define btree_default_blocks(c) \
708 ((unsigned) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
709
710#define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS)
711#define bucket_bytes(c) ((c)->sb.bucket_size << 9)
712#define block_bytes(c) ((c)->sb.block_size << 9)
713
714#define prios_per_bucket(c) \
715 ((bucket_bytes(c) - sizeof(struct prio_set)) / \
716 sizeof(struct bucket_disk))
717#define prio_buckets(c) \
718 DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c))
719
720static inline size_t sector_to_bucket(struct cache_set *c, sector_t s)
721{
722 return s >> c->bucket_bits;
723}
724
725static inline sector_t bucket_to_sector(struct cache_set *c, size_t b)
726{
727 return ((sector_t) b) << c->bucket_bits;
728}
729
730static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
731{
732 return s & (c->sb.bucket_size - 1);
733}
734
735static inline struct cache *PTR_CACHE(struct cache_set *c,
736 const struct bkey *k,
737 unsigned ptr)
738{
739 return c->cache[PTR_DEV(k, ptr)];
740}
741
742static inline size_t PTR_BUCKET_NR(struct cache_set *c,
743 const struct bkey *k,
744 unsigned ptr)
745{
746 return sector_to_bucket(c, PTR_OFFSET(k, ptr));
747}
748
749static inline struct bucket *PTR_BUCKET(struct cache_set *c,
750 const struct bkey *k,
751 unsigned ptr)
752{
753 return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
754}
755
756static inline uint8_t gen_after(uint8_t a, uint8_t b)
757{
758 uint8_t r = a - b;
759 return r > 128U ? 0 : r;
760}
761
762static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
763 unsigned i)
764{
765 return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
766}
767
768static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
769 unsigned i)
770{
771 return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
772}
773
774
775
776
777
778
779
780#define csum_set(i) \
781 bch_crc64(((void *) (i)) + sizeof(uint64_t), \
782 ((void *) bset_bkey_last(i)) - \
783 (((void *) (i)) + sizeof(uint64_t)))
784
785
786
787#define btree_bug(b, ...) \
788do { \
789 if (bch_cache_set_error((b)->c, __VA_ARGS__)) \
790 dump_stack(); \
791} while (0)
792
793#define cache_bug(c, ...) \
794do { \
795 if (bch_cache_set_error(c, __VA_ARGS__)) \
796 dump_stack(); \
797} while (0)
798
799#define btree_bug_on(cond, b, ...) \
800do { \
801 if (cond) \
802 btree_bug(b, __VA_ARGS__); \
803} while (0)
804
805#define cache_bug_on(cond, c, ...) \
806do { \
807 if (cond) \
808 cache_bug(c, __VA_ARGS__); \
809} while (0)
810
811#define cache_set_err_on(cond, c, ...) \
812do { \
813 if (cond) \
814 bch_cache_set_error(c, __VA_ARGS__); \
815} while (0)
816
817
818
819#define for_each_cache(ca, cs, iter) \
820 for (iter = 0; ca = cs->cache[iter], iter < (cs)->sb.nr_in_set; iter++)
821
822#define for_each_bucket(b, ca) \
823 for (b = (ca)->buckets + (ca)->sb.first_bucket; \
824 b < (ca)->buckets + (ca)->sb.nbuckets; b++)
825
826static inline void cached_dev_put(struct cached_dev *dc)
827{
828 if (refcount_dec_and_test(&dc->count))
829 schedule_work(&dc->detach);
830}
831
832static inline bool cached_dev_get(struct cached_dev *dc)
833{
834 if (!refcount_inc_not_zero(&dc->count))
835 return false;
836
837
838 smp_mb__after_atomic();
839 return true;
840}
841
842
843
844
845
846
847static inline uint8_t bucket_gc_gen(struct bucket *b)
848{
849 return b->gen - b->last_gc;
850}
851
852#define BUCKET_GC_GEN_MAX 96U
853
854#define kobj_attribute_write(n, fn) \
855 static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
856
857#define kobj_attribute_rw(n, show, store) \
858 static struct kobj_attribute ksysfs_##n = \
859 __ATTR(n, S_IWUSR|S_IRUSR, show, store)
860
861static inline void wake_up_allocators(struct cache_set *c)
862{
863 struct cache *ca;
864 unsigned i;
865
866 for_each_cache(ca, c, i)
867 wake_up_process(ca->alloc_thread);
868}
869
870
871
872void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
873void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
874 blk_status_t, const char *);
875void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
876 const char *);
877void bch_bbio_free(struct bio *, struct cache_set *);
878struct bio *bch_bbio_alloc(struct cache_set *);
879
880void __bch_submit_bbio(struct bio *, struct cache_set *);
881void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
882
883uint8_t bch_inc_gen(struct cache *, struct bucket *);
884void bch_rescale_priorities(struct cache_set *, int);
885
886bool bch_can_invalidate_bucket(struct cache *, struct bucket *);
887void __bch_invalidate_one_bucket(struct cache *, struct bucket *);
888
889void __bch_bucket_free(struct cache *, struct bucket *);
890void bch_bucket_free(struct cache_set *, struct bkey *);
891
892long bch_bucket_alloc(struct cache *, unsigned, bool);
893int __bch_bucket_alloc_set(struct cache_set *, unsigned,
894 struct bkey *, int, bool);
895int bch_bucket_alloc_set(struct cache_set *, unsigned,
896 struct bkey *, int, bool);
897bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
898 unsigned, unsigned, bool);
899
900__printf(2, 3)
901bool bch_cache_set_error(struct cache_set *, const char *, ...);
902
903void bch_prio_write(struct cache *);
904void bch_write_bdev_super(struct cached_dev *, struct closure *);
905
906extern struct workqueue_struct *bcache_wq;
907extern const char * const bch_cache_modes[];
908extern struct mutex bch_register_lock;
909extern struct list_head bch_cache_sets;
910
911extern struct kobj_type bch_cached_dev_ktype;
912extern struct kobj_type bch_flash_dev_ktype;
913extern struct kobj_type bch_cache_set_ktype;
914extern struct kobj_type bch_cache_set_internal_ktype;
915extern struct kobj_type bch_cache_ktype;
916
917void bch_cached_dev_release(struct kobject *);
918void bch_flash_dev_release(struct kobject *);
919void bch_cache_set_release(struct kobject *);
920void bch_cache_release(struct kobject *);
921
922int bch_uuid_write(struct cache_set *);
923void bcache_write_super(struct cache_set *);
924
925int bch_flash_dev_create(struct cache_set *c, uint64_t size);
926
927int bch_cached_dev_attach(struct cached_dev *, struct cache_set *, uint8_t *);
928void bch_cached_dev_detach(struct cached_dev *);
929void bch_cached_dev_run(struct cached_dev *);
930void bcache_device_stop(struct bcache_device *);
931
932void bch_cache_set_unregister(struct cache_set *);
933void bch_cache_set_stop(struct cache_set *);
934
935struct cache_set *bch_cache_set_alloc(struct cache_sb *);
936void bch_btree_cache_free(struct cache_set *);
937int bch_btree_cache_alloc(struct cache_set *);
938void bch_moving_init_cache_set(struct cache_set *);
939int bch_open_buckets_alloc(struct cache_set *);
940void bch_open_buckets_free(struct cache_set *);
941
942int bch_cache_allocator_start(struct cache *ca);
943
944void bch_debug_exit(void);
945int bch_debug_init(struct kobject *);
946void bch_request_exit(void);
947int bch_request_init(void);
948
949#endif
950