1
2#ifndef _BCACHE_H
3#define _BCACHE_H
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179#define pr_fmt(fmt) "bcache: %s() " fmt, __func__
180
181#include <linux/bcache.h>
182#include <linux/bio.h>
183#include <linux/kobject.h>
184#include <linux/list.h>
185#include <linux/mutex.h>
186#include <linux/rbtree.h>
187#include <linux/rwsem.h>
188#include <linux/refcount.h>
189#include <linux/types.h>
190#include <linux/workqueue.h>
191#include <linux/kthread.h>
192
193#include "bset.h"
194#include "util.h"
195#include "closure.h"
196
197struct bucket {
198 atomic_t pin;
199 uint16_t prio;
200 uint8_t gen;
201 uint8_t last_gc;
202 uint16_t gc_mark;
203};
204
205
206
207
208
209
210BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
211#define GC_MARK_RECLAIMABLE 1
212#define GC_MARK_DIRTY 2
213#define GC_MARK_METADATA 3
214#define GC_SECTORS_USED_SIZE 13
215#define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE))
216BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
217BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);
218
219#include "journal.h"
220#include "stats.h"
221struct search;
222struct btree;
223struct keybuf;
224
225struct keybuf_key {
226 struct rb_node node;
227 BKEY_PADDED(key);
228 void *private;
229};
230
231struct keybuf {
232 struct bkey last_scanned;
233 spinlock_t lock;
234
235
236
237
238
239
240 struct bkey start;
241 struct bkey end;
242
243 struct rb_root keys;
244
245#define KEYBUF_NR 500
246 DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR);
247};
248
249struct bcache_device {
250 struct closure cl;
251
252 struct kobject kobj;
253
254 struct cache_set *c;
255 unsigned int id;
256#define BCACHEDEVNAME_SIZE 12
257 char name[BCACHEDEVNAME_SIZE];
258
259 struct gendisk *disk;
260
261 unsigned long flags;
262#define BCACHE_DEV_CLOSING 0
263#define BCACHE_DEV_DETACHING 1
264#define BCACHE_DEV_UNLINK_DONE 2
265#define BCACHE_DEV_WB_RUNNING 3
266#define BCACHE_DEV_RATE_DW_RUNNING 4
267 int nr_stripes;
268 unsigned int stripe_size;
269 atomic_t *stripe_sectors_dirty;
270 unsigned long *full_dirty_stripes;
271
272 struct bio_set bio_split;
273
274 unsigned int data_csum:1;
275
276 int (*cache_miss)(struct btree *b, struct search *s,
277 struct bio *bio, unsigned int sectors);
278 int (*ioctl)(struct bcache_device *d, fmode_t mode,
279 unsigned int cmd, unsigned long arg);
280};
281
282struct io {
283
284 struct hlist_node hash;
285 struct list_head lru;
286
287 unsigned long jiffies;
288 unsigned int sequential;
289 sector_t last;
290};
291
292enum stop_on_failure {
293 BCH_CACHED_DEV_STOP_AUTO = 0,
294 BCH_CACHED_DEV_STOP_ALWAYS,
295 BCH_CACHED_DEV_STOP_MODE_MAX,
296};
297
298struct cached_dev {
299 struct list_head list;
300 struct bcache_device disk;
301 struct block_device *bdev;
302
303 struct cache_sb sb;
304 struct cache_sb_disk *sb_disk;
305 struct bio sb_bio;
306 struct bio_vec sb_bv[1];
307 struct closure sb_write;
308 struct semaphore sb_write_mutex;
309
310
311 refcount_t count;
312 struct work_struct detach;
313
314
315
316
317
318 atomic_t running;
319
320
321
322
323
324 struct rw_semaphore writeback_lock;
325
326
327
328
329
330
331 atomic_t has_dirty;
332
333#define BCH_CACHE_READA_ALL 0
334#define BCH_CACHE_READA_META_ONLY 1
335 unsigned int cache_readahead_policy;
336 struct bch_ratelimit writeback_rate;
337 struct delayed_work writeback_rate_update;
338
339
340 struct semaphore in_flight;
341 struct task_struct *writeback_thread;
342 struct workqueue_struct *writeback_write_wq;
343
344 struct keybuf writeback_keys;
345
346 struct task_struct *status_update_thread;
347
348
349
350
351
352 struct closure_waitlist writeback_ordering_wait;
353 atomic_t writeback_sequence_next;
354
355
356#define RECENT_IO_BITS 7
357#define RECENT_IO (1 << RECENT_IO_BITS)
358 struct io io[RECENT_IO];
359 struct hlist_head io_hash[RECENT_IO + 1];
360 struct list_head io_lru;
361 spinlock_t io_lock;
362
363 struct cache_accounting accounting;
364
365
366 unsigned int sequential_cutoff;
367 unsigned int readahead;
368
369 unsigned int io_disable:1;
370 unsigned int verify:1;
371 unsigned int bypass_torture_test:1;
372
373 unsigned int partial_stripes_expensive:1;
374 unsigned int writeback_metadata:1;
375 unsigned int writeback_running:1;
376 unsigned char writeback_percent;
377 unsigned int writeback_delay;
378
379 uint64_t writeback_rate_target;
380 int64_t writeback_rate_proportional;
381 int64_t writeback_rate_integral;
382 int64_t writeback_rate_integral_scaled;
383 int32_t writeback_rate_change;
384
385 unsigned int writeback_rate_update_seconds;
386 unsigned int writeback_rate_i_term_inverse;
387 unsigned int writeback_rate_p_term_inverse;
388 unsigned int writeback_rate_minimum;
389
390 enum stop_on_failure stop_when_cache_set_failed;
391#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
392 atomic_t io_errors;
393 unsigned int error_limit;
394 unsigned int offline_seconds;
395
396 char backing_dev_name[BDEVNAME_SIZE];
397};
398
399enum alloc_reserve {
400 RESERVE_BTREE,
401 RESERVE_PRIO,
402 RESERVE_MOVINGGC,
403 RESERVE_NONE,
404 RESERVE_NR,
405};
406
407struct cache {
408 struct cache_set *set;
409 struct cache_sb sb;
410 struct cache_sb_disk *sb_disk;
411 struct bio sb_bio;
412 struct bio_vec sb_bv[1];
413
414 struct kobject kobj;
415 struct block_device *bdev;
416
417 struct task_struct *alloc_thread;
418
419 struct closure prio;
420 struct prio_set *disk_buckets;
421
422
423
424
425
426
427
428
429 uint64_t *prio_buckets;
430 uint64_t *prio_last_buckets;
431
432
433
434
435
436
437
438
439
440
441 DECLARE_FIFO(long, free)[RESERVE_NR];
442 DECLARE_FIFO(long, free_inc);
443
444 size_t fifo_last_bucket;
445
446
447 struct bucket *buckets;
448
449 DECLARE_HEAP(struct bucket *, heap);
450
451
452
453
454
455
456 unsigned int invalidate_needs_gc;
457
458 bool discard;
459
460 struct journal_device journal;
461
462
463#define IO_ERROR_SHIFT 20
464 atomic_t io_errors;
465 atomic_t io_count;
466
467 atomic_long_t meta_sectors_written;
468 atomic_long_t btree_sectors_written;
469 atomic_long_t sectors_written;
470
471 char cache_dev_name[BDEVNAME_SIZE];
472};
473
474struct gc_stat {
475 size_t nodes;
476 size_t nodes_pre;
477 size_t key_bytes;
478
479 size_t nkeys;
480 uint64_t data;
481 unsigned int in_use;
482};
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502#define CACHE_SET_UNREGISTERING 0
503#define CACHE_SET_STOPPING 1
504#define CACHE_SET_RUNNING 2
505#define CACHE_SET_IO_DISABLE 3
506
507struct cache_set {
508 struct closure cl;
509
510 struct list_head list;
511 struct kobject kobj;
512 struct kobject internal;
513 struct dentry *debug;
514 struct cache_accounting accounting;
515
516 unsigned long flags;
517 atomic_t idle_counter;
518 atomic_t at_max_writeback_rate;
519
520 struct cache *cache;
521
522 struct bcache_device **devices;
523 unsigned int devices_max_used;
524 atomic_t attached_dev_nr;
525 struct list_head cached_devs;
526 uint64_t cached_dev_sectors;
527 atomic_long_t flash_dev_dirty_sectors;
528 struct closure caching;
529
530 struct closure sb_write;
531 struct semaphore sb_write_mutex;
532
533 mempool_t search;
534 mempool_t bio_meta;
535 struct bio_set bio_split;
536
537
538 struct shrinker shrink;
539
540
541 struct mutex bucket_lock;
542
543
544 unsigned short bucket_bits;
545
546
547 unsigned short block_bits;
548
549
550
551
552
553 unsigned int btree_pages;
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571 struct list_head btree_cache;
572 struct list_head btree_cache_freeable;
573 struct list_head btree_cache_freed;
574
575
576 unsigned int btree_cache_used;
577
578
579
580
581
582
583
584 wait_queue_head_t btree_cache_wait;
585 struct task_struct *btree_cache_alloc_lock;
586 spinlock_t btree_cannibalize_lock;
587
588
589
590
591
592
593
594
595
596
597
598 atomic_t prio_blocked;
599 wait_queue_head_t bucket_wait;
600
601
602
603
604
605 atomic_t rescale;
606
607
608
609 atomic_t search_inflight;
610
611
612
613
614
615
616 uint16_t min_prio;
617
618
619
620
621
622 uint8_t need_gc;
623 struct gc_stat gc_stats;
624 size_t nbuckets;
625 size_t avail_nbuckets;
626
627 struct task_struct *gc_thread;
628
629 struct bkey gc_done;
630
631
632
633
634
635
636
637
638
639
640
641#define BCH_ENABLE_AUTO_GC 1
642#define BCH_DO_AUTO_GC 2
643 uint8_t gc_after_writeback;
644
645
646
647
648
649 int gc_mark_valid;
650
651
652 atomic_t sectors_to_gc;
653 wait_queue_head_t gc_wait;
654
655 struct keybuf moving_gc_keys;
656
657 struct semaphore moving_in_flight;
658
659 struct workqueue_struct *moving_gc_wq;
660
661 struct btree *root;
662
663#ifdef CONFIG_BCACHE_DEBUG
664 struct btree *verify_data;
665 struct bset *verify_ondisk;
666 struct mutex verify_lock;
667#endif
668
669 uint8_t set_uuid[16];
670 unsigned int nr_uuids;
671 struct uuid_entry *uuids;
672 BKEY_PADDED(uuid_bucket);
673 struct closure uuid_write;
674 struct semaphore uuid_write_mutex;
675
676
677
678
679
680
681
682
683
684 mempool_t fill_iter;
685
686 struct bset_sort_state sort;
687
688
689 struct list_head data_buckets;
690 spinlock_t data_bucket_lock;
691
692 struct journal journal;
693
694#define CONGESTED_MAX 1024
695 unsigned int congested_last_us;
696 atomic_t congested;
697
698
699 unsigned int congested_read_threshold_us;
700 unsigned int congested_write_threshold_us;
701
702 struct time_stats btree_gc_time;
703 struct time_stats btree_split_time;
704 struct time_stats btree_read_time;
705
706 atomic_long_t cache_read_races;
707 atomic_long_t writeback_keys_done;
708 atomic_long_t writeback_keys_failed;
709
710 atomic_long_t reclaim;
711 atomic_long_t reclaimed_journal_buckets;
712 atomic_long_t flush_write;
713
714 enum {
715 ON_ERROR_UNREGISTER,
716 ON_ERROR_PANIC,
717 } on_error;
718#define DEFAULT_IO_ERROR_LIMIT 8
719 unsigned int error_limit;
720 unsigned int error_decay;
721
722 unsigned short journal_delay_ms;
723 bool expensive_debug_checks;
724 unsigned int verify:1;
725 unsigned int key_merging_disabled:1;
726 unsigned int gc_always_rewrite:1;
727 unsigned int shrinker_disabled:1;
728 unsigned int copy_gc_enabled:1;
729 unsigned int idle_max_writeback_rate_enabled:1;
730
731#define BUCKET_HASH_BITS 12
732 struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
733};
734
735struct bbio {
736 unsigned int submit_time_us;
737 union {
738 struct bkey key;
739 uint64_t _pad[3];
740
741
742
743
744 };
745 struct bio bio;
746};
747
748#define BTREE_PRIO USHRT_MAX
749#define INITIAL_PRIO 32768U
750
751#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE)
752#define btree_blocks(b) \
753 ((unsigned int) (KEY_SIZE(&b->key) >> (b)->c->block_bits))
754
755#define btree_default_blocks(c) \
756 ((unsigned int) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
757
758#define bucket_bytes(ca) ((ca)->sb.bucket_size << 9)
759#define block_bytes(ca) ((ca)->sb.block_size << 9)
760
761static inline unsigned int meta_bucket_pages(struct cache_sb *sb)
762{
763 unsigned int n, max_pages;
764
765 max_pages = min_t(unsigned int,
766 __rounddown_pow_of_two(USHRT_MAX) / PAGE_SECTORS,
767 MAX_ORDER_NR_PAGES);
768
769 n = sb->bucket_size / PAGE_SECTORS;
770 if (n > max_pages)
771 n = max_pages;
772
773 return n;
774}
775
776static inline unsigned int meta_bucket_bytes(struct cache_sb *sb)
777{
778 return meta_bucket_pages(sb) << PAGE_SHIFT;
779}
780
781#define prios_per_bucket(ca) \
782 ((meta_bucket_bytes(&(ca)->sb) - sizeof(struct prio_set)) / \
783 sizeof(struct bucket_disk))
784
785#define prio_buckets(ca) \
786 DIV_ROUND_UP((size_t) (ca)->sb.nbuckets, prios_per_bucket(ca))
787
788static inline size_t sector_to_bucket(struct cache_set *c, sector_t s)
789{
790 return s >> c->bucket_bits;
791}
792
793static inline sector_t bucket_to_sector(struct cache_set *c, size_t b)
794{
795 return ((sector_t) b) << c->bucket_bits;
796}
797
798static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
799{
800 return s & (c->cache->sb.bucket_size - 1);
801}
802
803static inline struct cache *PTR_CACHE(struct cache_set *c,
804 const struct bkey *k,
805 unsigned int ptr)
806{
807 return c->cache;
808}
809
810static inline size_t PTR_BUCKET_NR(struct cache_set *c,
811 const struct bkey *k,
812 unsigned int ptr)
813{
814 return sector_to_bucket(c, PTR_OFFSET(k, ptr));
815}
816
817static inline struct bucket *PTR_BUCKET(struct cache_set *c,
818 const struct bkey *k,
819 unsigned int ptr)
820{
821 return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
822}
823
824static inline uint8_t gen_after(uint8_t a, uint8_t b)
825{
826 uint8_t r = a - b;
827
828 return r > 128U ? 0 : r;
829}
830
831static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
832 unsigned int i)
833{
834 return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
835}
836
837static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
838 unsigned int i)
839{
840 return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
841}
842
843
844
845
846
847
848
849#define csum_set(i) \
850 bch_crc64(((void *) (i)) + sizeof(uint64_t), \
851 ((void *) bset_bkey_last(i)) - \
852 (((void *) (i)) + sizeof(uint64_t)))
853
854
855
856#define btree_bug(b, ...) \
857do { \
858 if (bch_cache_set_error((b)->c, __VA_ARGS__)) \
859 dump_stack(); \
860} while (0)
861
862#define cache_bug(c, ...) \
863do { \
864 if (bch_cache_set_error(c, __VA_ARGS__)) \
865 dump_stack(); \
866} while (0)
867
868#define btree_bug_on(cond, b, ...) \
869do { \
870 if (cond) \
871 btree_bug(b, __VA_ARGS__); \
872} while (0)
873
874#define cache_bug_on(cond, c, ...) \
875do { \
876 if (cond) \
877 cache_bug(c, __VA_ARGS__); \
878} while (0)
879
880#define cache_set_err_on(cond, c, ...) \
881do { \
882 if (cond) \
883 bch_cache_set_error(c, __VA_ARGS__); \
884} while (0)
885
886
887
888#define for_each_bucket(b, ca) \
889 for (b = (ca)->buckets + (ca)->sb.first_bucket; \
890 b < (ca)->buckets + (ca)->sb.nbuckets; b++)
891
892static inline void cached_dev_put(struct cached_dev *dc)
893{
894 if (refcount_dec_and_test(&dc->count))
895 schedule_work(&dc->detach);
896}
897
898static inline bool cached_dev_get(struct cached_dev *dc)
899{
900 if (!refcount_inc_not_zero(&dc->count))
901 return false;
902
903
904 smp_mb__after_atomic();
905 return true;
906}
907
908
909
910
911
912
913static inline uint8_t bucket_gc_gen(struct bucket *b)
914{
915 return b->gen - b->last_gc;
916}
917
918#define BUCKET_GC_GEN_MAX 96U
919
920#define kobj_attribute_write(n, fn) \
921 static struct kobj_attribute ksysfs_##n = __ATTR(n, 0200, NULL, fn)
922
923#define kobj_attribute_rw(n, show, store) \
924 static struct kobj_attribute ksysfs_##n = \
925 __ATTR(n, 0600, show, store)
926
927static inline void wake_up_allocators(struct cache_set *c)
928{
929 struct cache *ca = c->cache;
930
931 wake_up_process(ca->alloc_thread);
932}
933
934static inline void closure_bio_submit(struct cache_set *c,
935 struct bio *bio,
936 struct closure *cl)
937{
938 closure_get(cl);
939 if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
940 bio->bi_status = BLK_STS_IOERR;
941 bio_endio(bio);
942 return;
943 }
944 submit_bio_noacct(bio);
945}
946
947
948
949
950
951
952
953static inline void wait_for_kthread_stop(void)
954{
955 while (!kthread_should_stop()) {
956 set_current_state(TASK_INTERRUPTIBLE);
957 schedule();
958 }
959}
960
961
962
963void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
964void bch_count_io_errors(struct cache *ca, blk_status_t error,
965 int is_read, const char *m);
966void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
967 blk_status_t error, const char *m);
968void bch_bbio_endio(struct cache_set *c, struct bio *bio,
969 blk_status_t error, const char *m);
970void bch_bbio_free(struct bio *bio, struct cache_set *c);
971struct bio *bch_bbio_alloc(struct cache_set *c);
972
973void __bch_submit_bbio(struct bio *bio, struct cache_set *c);
974void bch_submit_bbio(struct bio *bio, struct cache_set *c,
975 struct bkey *k, unsigned int ptr);
976
977uint8_t bch_inc_gen(struct cache *ca, struct bucket *b);
978void bch_rescale_priorities(struct cache_set *c, int sectors);
979
980bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b);
981void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b);
982
983void __bch_bucket_free(struct cache *ca, struct bucket *b);
984void bch_bucket_free(struct cache_set *c, struct bkey *k);
985
986long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait);
987int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
988 struct bkey *k, bool wait);
989int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
990 struct bkey *k, bool wait);
991bool bch_alloc_sectors(struct cache_set *c, struct bkey *k,
992 unsigned int sectors, unsigned int write_point,
993 unsigned int write_prio, bool wait);
994bool bch_cached_dev_error(struct cached_dev *dc);
995
996__printf(2, 3)
997bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
998
999int bch_prio_write(struct cache *ca, bool wait);
1000void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
1001
1002extern struct workqueue_struct *bcache_wq;
1003extern struct workqueue_struct *bch_journal_wq;
1004extern struct mutex bch_register_lock;
1005extern struct list_head bch_cache_sets;
1006
1007extern struct kobj_type bch_cached_dev_ktype;
1008extern struct kobj_type bch_flash_dev_ktype;
1009extern struct kobj_type bch_cache_set_ktype;
1010extern struct kobj_type bch_cache_set_internal_ktype;
1011extern struct kobj_type bch_cache_ktype;
1012
1013void bch_cached_dev_release(struct kobject *kobj);
1014void bch_flash_dev_release(struct kobject *kobj);
1015void bch_cache_set_release(struct kobject *kobj);
1016void bch_cache_release(struct kobject *kobj);
1017
1018int bch_uuid_write(struct cache_set *c);
1019void bcache_write_super(struct cache_set *c);
1020
1021int bch_flash_dev_create(struct cache_set *c, uint64_t size);
1022
1023int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
1024 uint8_t *set_uuid);
1025void bch_cached_dev_detach(struct cached_dev *dc);
1026int bch_cached_dev_run(struct cached_dev *dc);
1027void bcache_device_stop(struct bcache_device *d);
1028
1029void bch_cache_set_unregister(struct cache_set *c);
1030void bch_cache_set_stop(struct cache_set *c);
1031
1032struct cache_set *bch_cache_set_alloc(struct cache_sb *sb);
1033void bch_btree_cache_free(struct cache_set *c);
1034int bch_btree_cache_alloc(struct cache_set *c);
1035void bch_moving_init_cache_set(struct cache_set *c);
1036int bch_open_buckets_alloc(struct cache_set *c);
1037void bch_open_buckets_free(struct cache_set *c);
1038
1039int bch_cache_allocator_start(struct cache *ca);
1040
1041void bch_debug_exit(void);
1042void bch_debug_init(void);
1043void bch_request_exit(void);
1044int bch_request_init(void);
1045
1046#endif
1047