1
2#ifndef _BCACHE_H
3#define _BCACHE_H
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
180
181#include <linux/bcache.h>
182#include <linux/bio.h>
183#include <linux/kobject.h>
184#include <linux/list.h>
185#include <linux/mutex.h>
186#include <linux/rbtree.h>
187#include <linux/rwsem.h>
188#include <linux/refcount.h>
189#include <linux/types.h>
190#include <linux/workqueue.h>
191#include <linux/kthread.h>
192
193#include "bset.h"
194#include "util.h"
195#include "closure.h"
196
197struct bucket {
198 atomic_t pin;
199 uint16_t prio;
200 uint8_t gen;
201 uint8_t last_gc;
202 uint16_t gc_mark;
203};
204
205
206
207
208
209
210BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
211#define GC_MARK_RECLAIMABLE 1
212#define GC_MARK_DIRTY 2
213#define GC_MARK_METADATA 3
214#define GC_SECTORS_USED_SIZE 13
215#define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE))
216BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
217BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);
218
219#include "journal.h"
220#include "stats.h"
221struct search;
222struct btree;
223struct keybuf;
224
225struct keybuf_key {
226 struct rb_node node;
227 BKEY_PADDED(key);
228 void *private;
229};
230
231struct keybuf {
232 struct bkey last_scanned;
233 spinlock_t lock;
234
235
236
237
238
239
240 struct bkey start;
241 struct bkey end;
242
243 struct rb_root keys;
244
245#define KEYBUF_NR 500
246 DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR);
247};
248
249struct bcache_device {
250 struct closure cl;
251
252 struct kobject kobj;
253
254 struct cache_set *c;
255 unsigned int id;
256#define BCACHEDEVNAME_SIZE 12
257 char name[BCACHEDEVNAME_SIZE];
258
259 struct gendisk *disk;
260
261 unsigned long flags;
262#define BCACHE_DEV_CLOSING 0
263#define BCACHE_DEV_DETACHING 1
264#define BCACHE_DEV_UNLINK_DONE 2
265#define BCACHE_DEV_WB_RUNNING 3
266#define BCACHE_DEV_RATE_DW_RUNNING 4
267 unsigned int nr_stripes;
268 unsigned int stripe_size;
269 atomic_t *stripe_sectors_dirty;
270 unsigned long *full_dirty_stripes;
271
272 struct bio_set bio_split;
273
274 unsigned int data_csum:1;
275
276 int (*cache_miss)(struct btree *b, struct search *s,
277 struct bio *bio, unsigned int sectors);
278 int (*ioctl)(struct bcache_device *d, fmode_t mode,
279 unsigned int cmd, unsigned long arg);
280};
281
282struct io {
283
284 struct hlist_node hash;
285 struct list_head lru;
286
287 unsigned long jiffies;
288 unsigned int sequential;
289 sector_t last;
290};
291
292enum stop_on_failure {
293 BCH_CACHED_DEV_STOP_AUTO = 0,
294 BCH_CACHED_DEV_STOP_ALWAYS,
295 BCH_CACHED_DEV_STOP_MODE_MAX,
296};
297
298struct cached_dev {
299 struct list_head list;
300 struct bcache_device disk;
301 struct block_device *bdev;
302
303 struct cache_sb sb;
304 struct bio sb_bio;
305 struct bio_vec sb_bv[1];
306 struct closure sb_write;
307 struct semaphore sb_write_mutex;
308
309
310 refcount_t count;
311 struct work_struct detach;
312
313
314
315
316
317 atomic_t running;
318
319
320
321
322
323 struct rw_semaphore writeback_lock;
324
325
326
327
328
329
330 atomic_t has_dirty;
331
332 struct bch_ratelimit writeback_rate;
333 struct delayed_work writeback_rate_update;
334
335
336 struct semaphore in_flight;
337 struct task_struct *writeback_thread;
338 struct workqueue_struct *writeback_write_wq;
339
340 struct keybuf writeback_keys;
341
342 struct task_struct *status_update_thread;
343
344
345
346
347
348 struct closure_waitlist writeback_ordering_wait;
349 atomic_t writeback_sequence_next;
350
351
352#define RECENT_IO_BITS 7
353#define RECENT_IO (1 << RECENT_IO_BITS)
354 struct io io[RECENT_IO];
355 struct hlist_head io_hash[RECENT_IO + 1];
356 struct list_head io_lru;
357 spinlock_t io_lock;
358
359 struct cache_accounting accounting;
360
361
362 unsigned int sequential_cutoff;
363 unsigned int readahead;
364
365 unsigned int io_disable:1;
366 unsigned int verify:1;
367 unsigned int bypass_torture_test:1;
368
369 unsigned int partial_stripes_expensive:1;
370 unsigned int writeback_metadata:1;
371 unsigned int writeback_running:1;
372 unsigned char writeback_percent;
373 unsigned int writeback_delay;
374
375 uint64_t writeback_rate_target;
376 int64_t writeback_rate_proportional;
377 int64_t writeback_rate_integral;
378 int64_t writeback_rate_integral_scaled;
379 int32_t writeback_rate_change;
380
381 unsigned int writeback_rate_update_seconds;
382 unsigned int writeback_rate_i_term_inverse;
383 unsigned int writeback_rate_p_term_inverse;
384 unsigned int writeback_rate_minimum;
385
386 enum stop_on_failure stop_when_cache_set_failed;
387#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
388 atomic_t io_errors;
389 unsigned int error_limit;
390 unsigned int offline_seconds;
391
392 char backing_dev_name[BDEVNAME_SIZE];
393};
394
395enum alloc_reserve {
396 RESERVE_BTREE,
397 RESERVE_PRIO,
398 RESERVE_MOVINGGC,
399 RESERVE_NONE,
400 RESERVE_NR,
401};
402
403struct cache {
404 struct cache_set *set;
405 struct cache_sb sb;
406 struct bio sb_bio;
407 struct bio_vec sb_bv[1];
408
409 struct kobject kobj;
410 struct block_device *bdev;
411
412 struct task_struct *alloc_thread;
413
414 struct closure prio;
415 struct prio_set *disk_buckets;
416
417
418
419
420
421
422
423
424 uint64_t *prio_buckets;
425 uint64_t *prio_last_buckets;
426
427
428
429
430
431
432
433
434
435
436 DECLARE_FIFO(long, free)[RESERVE_NR];
437 DECLARE_FIFO(long, free_inc);
438
439 size_t fifo_last_bucket;
440
441
442 struct bucket *buckets;
443
444 DECLARE_HEAP(struct bucket *, heap);
445
446
447
448
449
450
451 unsigned int invalidate_needs_gc;
452
453 bool discard;
454
455 struct journal_device journal;
456
457
458#define IO_ERROR_SHIFT 20
459 atomic_t io_errors;
460 atomic_t io_count;
461
462 atomic_long_t meta_sectors_written;
463 atomic_long_t btree_sectors_written;
464 atomic_long_t sectors_written;
465
466 char cache_dev_name[BDEVNAME_SIZE];
467};
468
469struct gc_stat {
470 size_t nodes;
471 size_t nodes_pre;
472 size_t key_bytes;
473
474 size_t nkeys;
475 uint64_t data;
476 unsigned int in_use;
477};
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497#define CACHE_SET_UNREGISTERING 0
498#define CACHE_SET_STOPPING 1
499#define CACHE_SET_RUNNING 2
500#define CACHE_SET_IO_DISABLE 3
501
502struct cache_set {
503 struct closure cl;
504
505 struct list_head list;
506 struct kobject kobj;
507 struct kobject internal;
508 struct dentry *debug;
509 struct cache_accounting accounting;
510
511 unsigned long flags;
512 atomic_t idle_counter;
513 atomic_t at_max_writeback_rate;
514
515 struct cache_sb sb;
516
517 struct cache *cache[MAX_CACHES_PER_SET];
518 struct cache *cache_by_alloc[MAX_CACHES_PER_SET];
519 int caches_loaded;
520
521 struct bcache_device **devices;
522 unsigned int devices_max_used;
523 atomic_t attached_dev_nr;
524 struct list_head cached_devs;
525 uint64_t cached_dev_sectors;
526 atomic_long_t flash_dev_dirty_sectors;
527 struct closure caching;
528
529 struct closure sb_write;
530 struct semaphore sb_write_mutex;
531
532 mempool_t search;
533 mempool_t bio_meta;
534 struct bio_set bio_split;
535
536
537 struct shrinker shrink;
538
539
540 struct mutex bucket_lock;
541
542
543 unsigned short bucket_bits;
544
545
546 unsigned short block_bits;
547
548
549
550
551
552 unsigned int btree_pages;
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570 struct list_head btree_cache;
571 struct list_head btree_cache_freeable;
572 struct list_head btree_cache_freed;
573
574
575 unsigned int btree_cache_used;
576
577
578
579
580
581
582
583 wait_queue_head_t btree_cache_wait;
584 struct task_struct *btree_cache_alloc_lock;
585
586
587
588
589
590
591
592
593
594
595
596 atomic_t prio_blocked;
597 wait_queue_head_t bucket_wait;
598
599
600
601
602
603 atomic_t rescale;
604
605
606
607 atomic_t search_inflight;
608
609
610
611
612
613
614 uint16_t min_prio;
615
616
617
618
619
620 uint8_t need_gc;
621 struct gc_stat gc_stats;
622 size_t nbuckets;
623 size_t avail_nbuckets;
624
625 struct task_struct *gc_thread;
626
627 struct bkey gc_done;
628
629
630
631
632
633
634
635
636
637
638
639#define BCH_ENABLE_AUTO_GC 1
640#define BCH_DO_AUTO_GC 2
641 uint8_t gc_after_writeback;
642
643
644
645
646
647 int gc_mark_valid;
648
649
650 atomic_t sectors_to_gc;
651 wait_queue_head_t gc_wait;
652
653 struct keybuf moving_gc_keys;
654
655 struct semaphore moving_in_flight;
656
657 struct workqueue_struct *moving_gc_wq;
658
659 struct btree *root;
660
661#ifdef CONFIG_BCACHE_DEBUG
662 struct btree *verify_data;
663 struct bset *verify_ondisk;
664 struct mutex verify_lock;
665#endif
666
667 unsigned int nr_uuids;
668 struct uuid_entry *uuids;
669 BKEY_PADDED(uuid_bucket);
670 struct closure uuid_write;
671 struct semaphore uuid_write_mutex;
672
673
674
675
676
677
678
679
680
681 mempool_t fill_iter;
682
683 struct bset_sort_state sort;
684
685
686 struct list_head data_buckets;
687 spinlock_t data_bucket_lock;
688
689 struct journal journal;
690
691#define CONGESTED_MAX 1024
692 unsigned int congested_last_us;
693 atomic_t congested;
694
695
696 unsigned int congested_read_threshold_us;
697 unsigned int congested_write_threshold_us;
698
699 struct time_stats btree_gc_time;
700 struct time_stats btree_split_time;
701 struct time_stats btree_read_time;
702
703 atomic_long_t cache_read_races;
704 atomic_long_t writeback_keys_done;
705 atomic_long_t writeback_keys_failed;
706
707 atomic_long_t reclaim;
708 atomic_long_t reclaimed_journal_buckets;
709 atomic_long_t flush_write;
710
711 enum {
712 ON_ERROR_UNREGISTER,
713 ON_ERROR_PANIC,
714 } on_error;
715#define DEFAULT_IO_ERROR_LIMIT 8
716 unsigned int error_limit;
717 unsigned int error_decay;
718
719 unsigned short journal_delay_ms;
720 bool expensive_debug_checks;
721 unsigned int verify:1;
722 unsigned int key_merging_disabled:1;
723 unsigned int gc_always_rewrite:1;
724 unsigned int shrinker_disabled:1;
725 unsigned int copy_gc_enabled:1;
726
727#define BUCKET_HASH_BITS 12
728 struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
729};
730
731struct bbio {
732 unsigned int submit_time_us;
733 union {
734 struct bkey key;
735 uint64_t _pad[3];
736
737
738
739
740 };
741 struct bio bio;
742};
743
744#define BTREE_PRIO USHRT_MAX
745#define INITIAL_PRIO 32768U
746
747#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE)
748#define btree_blocks(b) \
749 ((unsigned int) (KEY_SIZE(&b->key) >> (b)->c->block_bits))
750
751#define btree_default_blocks(c) \
752 ((unsigned int) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
753
754#define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS)
755#define bucket_bytes(c) ((c)->sb.bucket_size << 9)
756#define block_bytes(c) ((c)->sb.block_size << 9)
757
758#define prios_per_bucket(c) \
759 ((bucket_bytes(c) - sizeof(struct prio_set)) / \
760 sizeof(struct bucket_disk))
761#define prio_buckets(c) \
762 DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c))
763
764static inline size_t sector_to_bucket(struct cache_set *c, sector_t s)
765{
766 return s >> c->bucket_bits;
767}
768
769static inline sector_t bucket_to_sector(struct cache_set *c, size_t b)
770{
771 return ((sector_t) b) << c->bucket_bits;
772}
773
774static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
775{
776 return s & (c->sb.bucket_size - 1);
777}
778
779static inline struct cache *PTR_CACHE(struct cache_set *c,
780 const struct bkey *k,
781 unsigned int ptr)
782{
783 return c->cache[PTR_DEV(k, ptr)];
784}
785
786static inline size_t PTR_BUCKET_NR(struct cache_set *c,
787 const struct bkey *k,
788 unsigned int ptr)
789{
790 return sector_to_bucket(c, PTR_OFFSET(k, ptr));
791}
792
793static inline struct bucket *PTR_BUCKET(struct cache_set *c,
794 const struct bkey *k,
795 unsigned int ptr)
796{
797 return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
798}
799
800static inline uint8_t gen_after(uint8_t a, uint8_t b)
801{
802 uint8_t r = a - b;
803
804 return r > 128U ? 0 : r;
805}
806
807static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
808 unsigned int i)
809{
810 return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
811}
812
813static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
814 unsigned int i)
815{
816 return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
817}
818
819
820
821
822
823
824
825#define csum_set(i) \
826 bch_crc64(((void *) (i)) + sizeof(uint64_t), \
827 ((void *) bset_bkey_last(i)) - \
828 (((void *) (i)) + sizeof(uint64_t)))
829
830
831
832#define btree_bug(b, ...) \
833do { \
834 if (bch_cache_set_error((b)->c, __VA_ARGS__)) \
835 dump_stack(); \
836} while (0)
837
838#define cache_bug(c, ...) \
839do { \
840 if (bch_cache_set_error(c, __VA_ARGS__)) \
841 dump_stack(); \
842} while (0)
843
844#define btree_bug_on(cond, b, ...) \
845do { \
846 if (cond) \
847 btree_bug(b, __VA_ARGS__); \
848} while (0)
849
850#define cache_bug_on(cond, c, ...) \
851do { \
852 if (cond) \
853 cache_bug(c, __VA_ARGS__); \
854} while (0)
855
856#define cache_set_err_on(cond, c, ...) \
857do { \
858 if (cond) \
859 bch_cache_set_error(c, __VA_ARGS__); \
860} while (0)
861
862
863
864#define for_each_cache(ca, cs, iter) \
865 for (iter = 0; ca = cs->cache[iter], iter < (cs)->sb.nr_in_set; iter++)
866
867#define for_each_bucket(b, ca) \
868 for (b = (ca)->buckets + (ca)->sb.first_bucket; \
869 b < (ca)->buckets + (ca)->sb.nbuckets; b++)
870
871static inline void cached_dev_put(struct cached_dev *dc)
872{
873 if (refcount_dec_and_test(&dc->count))
874 schedule_work(&dc->detach);
875}
876
877static inline bool cached_dev_get(struct cached_dev *dc)
878{
879 if (!refcount_inc_not_zero(&dc->count))
880 return false;
881
882
883 smp_mb__after_atomic();
884 return true;
885}
886
887
888
889
890
891
892static inline uint8_t bucket_gc_gen(struct bucket *b)
893{
894 return b->gen - b->last_gc;
895}
896
897#define BUCKET_GC_GEN_MAX 96U
898
899#define kobj_attribute_write(n, fn) \
900 static struct kobj_attribute ksysfs_##n = __ATTR(n, 0200, NULL, fn)
901
902#define kobj_attribute_rw(n, show, store) \
903 static struct kobj_attribute ksysfs_##n = \
904 __ATTR(n, 0600, show, store)
905
906static inline void wake_up_allocators(struct cache_set *c)
907{
908 struct cache *ca;
909 unsigned int i;
910
911 for_each_cache(ca, c, i)
912 wake_up_process(ca->alloc_thread);
913}
914
915static inline void closure_bio_submit(struct cache_set *c,
916 struct bio *bio,
917 struct closure *cl)
918{
919 closure_get(cl);
920 if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
921 bio->bi_status = BLK_STS_IOERR;
922 bio_endio(bio);
923 return;
924 }
925 generic_make_request(bio);
926}
927
928
929
930
931
932
933
934static inline void wait_for_kthread_stop(void)
935{
936 while (!kthread_should_stop()) {
937 set_current_state(TASK_INTERRUPTIBLE);
938 schedule();
939 }
940}
941
942
943
944void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
945void bch_count_io_errors(struct cache *ca, blk_status_t error,
946 int is_read, const char *m);
947void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
948 blk_status_t error, const char *m);
949void bch_bbio_endio(struct cache_set *c, struct bio *bio,
950 blk_status_t error, const char *m);
951void bch_bbio_free(struct bio *bio, struct cache_set *c);
952struct bio *bch_bbio_alloc(struct cache_set *c);
953
954void __bch_submit_bbio(struct bio *bio, struct cache_set *c);
955void bch_submit_bbio(struct bio *bio, struct cache_set *c,
956 struct bkey *k, unsigned int ptr);
957
958uint8_t bch_inc_gen(struct cache *ca, struct bucket *b);
959void bch_rescale_priorities(struct cache_set *c, int sectors);
960
961bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b);
962void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b);
963
964void __bch_bucket_free(struct cache *ca, struct bucket *b);
965void bch_bucket_free(struct cache_set *c, struct bkey *k);
966
967long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait);
968int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
969 struct bkey *k, int n, bool wait);
970int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
971 struct bkey *k, int n, bool wait);
972bool bch_alloc_sectors(struct cache_set *c, struct bkey *k,
973 unsigned int sectors, unsigned int write_point,
974 unsigned int write_prio, bool wait);
975bool bch_cached_dev_error(struct cached_dev *dc);
976
977__printf(2, 3)
978bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
979
980void bch_prio_write(struct cache *ca);
981void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
982
983extern struct workqueue_struct *bcache_wq;
984extern struct workqueue_struct *bch_journal_wq;
985extern struct mutex bch_register_lock;
986extern struct list_head bch_cache_sets;
987
988extern struct kobj_type bch_cached_dev_ktype;
989extern struct kobj_type bch_flash_dev_ktype;
990extern struct kobj_type bch_cache_set_ktype;
991extern struct kobj_type bch_cache_set_internal_ktype;
992extern struct kobj_type bch_cache_ktype;
993
994void bch_cached_dev_release(struct kobject *kobj);
995void bch_flash_dev_release(struct kobject *kobj);
996void bch_cache_set_release(struct kobject *kobj);
997void bch_cache_release(struct kobject *kobj);
998
999int bch_uuid_write(struct cache_set *c);
1000void bcache_write_super(struct cache_set *c);
1001
1002int bch_flash_dev_create(struct cache_set *c, uint64_t size);
1003
1004int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
1005 uint8_t *set_uuid);
1006void bch_cached_dev_detach(struct cached_dev *dc);
1007int bch_cached_dev_run(struct cached_dev *dc);
1008void bcache_device_stop(struct bcache_device *d);
1009
1010void bch_cache_set_unregister(struct cache_set *c);
1011void bch_cache_set_stop(struct cache_set *c);
1012
1013struct cache_set *bch_cache_set_alloc(struct cache_sb *sb);
1014void bch_btree_cache_free(struct cache_set *c);
1015int bch_btree_cache_alloc(struct cache_set *c);
1016void bch_moving_init_cache_set(struct cache_set *c);
1017int bch_open_buckets_alloc(struct cache_set *c);
1018void bch_open_buckets_free(struct cache_set *c);
1019
1020int bch_cache_allocator_start(struct cache *ca);
1021
1022void bch_debug_exit(void);
1023void bch_debug_init(void);
1024void bch_request_exit(void);
1025int bch_request_init(void);
1026
1027#endif
1028