1
2#ifndef _BCACHE_H
3#define _BCACHE_H
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
180
181#include <linux/bcache.h>
182#include <linux/bio.h>
183#include <linux/kobject.h>
184#include <linux/list.h>
185#include <linux/mutex.h>
186#include <linux/rbtree.h>
187#include <linux/rwsem.h>
188#include <linux/refcount.h>
189#include <linux/types.h>
190#include <linux/workqueue.h>
191#include <linux/kthread.h>
192
193#include "bset.h"
194#include "util.h"
195#include "closure.h"
196
197struct bucket {
198 atomic_t pin;
199 uint16_t prio;
200 uint8_t gen;
201 uint8_t last_gc;
202 uint16_t gc_mark;
203};
204
205
206
207
208
209
210BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
211#define GC_MARK_RECLAIMABLE 1
212#define GC_MARK_DIRTY 2
213#define GC_MARK_METADATA 3
214#define GC_SECTORS_USED_SIZE 13
215#define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE))
216BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
217BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);
218
219#include "journal.h"
220#include "stats.h"
221struct search;
222struct btree;
223struct keybuf;
224
225struct keybuf_key {
226 struct rb_node node;
227 BKEY_PADDED(key);
228 void *private;
229};
230
231struct keybuf {
232 struct bkey last_scanned;
233 spinlock_t lock;
234
235
236
237
238
239
240 struct bkey start;
241 struct bkey end;
242
243 struct rb_root keys;
244
245#define KEYBUF_NR 500
246 DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR);
247};
248
249struct bcache_device {
250 struct closure cl;
251
252 struct kobject kobj;
253
254 struct cache_set *c;
255 unsigned id;
256#define BCACHEDEVNAME_SIZE 12
257 char name[BCACHEDEVNAME_SIZE];
258
259 struct gendisk *disk;
260
261 unsigned long flags;
262#define BCACHE_DEV_CLOSING 0
263#define BCACHE_DEV_DETACHING 1
264#define BCACHE_DEV_UNLINK_DONE 2
265#define BCACHE_DEV_WB_RUNNING 3
266#define BCACHE_DEV_RATE_DW_RUNNING 4
267 unsigned nr_stripes;
268 unsigned stripe_size;
269 atomic_t *stripe_sectors_dirty;
270 unsigned long *full_dirty_stripes;
271
272 struct bio_set bio_split;
273
274 unsigned data_csum:1;
275
276 int (*cache_miss)(struct btree *, struct search *,
277 struct bio *, unsigned);
278 int (*ioctl) (struct bcache_device *, fmode_t, unsigned, unsigned long);
279};
280
281struct io {
282
283 struct hlist_node hash;
284 struct list_head lru;
285
286 unsigned long jiffies;
287 unsigned sequential;
288 sector_t last;
289};
290
291enum stop_on_failure {
292 BCH_CACHED_DEV_STOP_AUTO = 0,
293 BCH_CACHED_DEV_STOP_ALWAYS,
294 BCH_CACHED_DEV_STOP_MODE_MAX,
295};
296
297struct cached_dev {
298 struct list_head list;
299 struct bcache_device disk;
300 struct block_device *bdev;
301
302 struct cache_sb sb;
303 struct bio sb_bio;
304 struct bio_vec sb_bv[1];
305 struct closure sb_write;
306 struct semaphore sb_write_mutex;
307
308
309 refcount_t count;
310 struct work_struct detach;
311
312
313
314
315
316 atomic_t running;
317
318
319
320
321
322 struct rw_semaphore writeback_lock;
323
324
325
326
327
328
329 atomic_t has_dirty;
330
331
332
333
334
335
336 atomic_t backing_idle;
337
338 struct bch_ratelimit writeback_rate;
339 struct delayed_work writeback_rate_update;
340
341
342 struct semaphore in_flight;
343 struct task_struct *writeback_thread;
344 struct workqueue_struct *writeback_write_wq;
345
346 struct keybuf writeback_keys;
347
348 struct task_struct *status_update_thread;
349
350
351
352
353
354 struct closure_waitlist writeback_ordering_wait;
355 atomic_t writeback_sequence_next;
356
357
358#define RECENT_IO_BITS 7
359#define RECENT_IO (1 << RECENT_IO_BITS)
360 struct io io[RECENT_IO];
361 struct hlist_head io_hash[RECENT_IO + 1];
362 struct list_head io_lru;
363 spinlock_t io_lock;
364
365 struct cache_accounting accounting;
366
367
368 unsigned sequential_cutoff;
369 unsigned readahead;
370
371 unsigned io_disable:1;
372 unsigned verify:1;
373 unsigned bypass_torture_test:1;
374
375 unsigned partial_stripes_expensive:1;
376 unsigned writeback_metadata:1;
377 unsigned writeback_running:1;
378 unsigned char writeback_percent;
379 unsigned writeback_delay;
380
381 uint64_t writeback_rate_target;
382 int64_t writeback_rate_proportional;
383 int64_t writeback_rate_integral;
384 int64_t writeback_rate_integral_scaled;
385 int32_t writeback_rate_change;
386
387 unsigned writeback_rate_update_seconds;
388 unsigned writeback_rate_i_term_inverse;
389 unsigned writeback_rate_p_term_inverse;
390 unsigned writeback_rate_minimum;
391
392 enum stop_on_failure stop_when_cache_set_failed;
393#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
394 atomic_t io_errors;
395 unsigned error_limit;
396 unsigned offline_seconds;
397
398 char backing_dev_name[BDEVNAME_SIZE];
399};
400
401enum alloc_reserve {
402 RESERVE_BTREE,
403 RESERVE_PRIO,
404 RESERVE_MOVINGGC,
405 RESERVE_NONE,
406 RESERVE_NR,
407};
408
409struct cache {
410 struct cache_set *set;
411 struct cache_sb sb;
412 struct bio sb_bio;
413 struct bio_vec sb_bv[1];
414
415 struct kobject kobj;
416 struct block_device *bdev;
417
418 struct task_struct *alloc_thread;
419
420 struct closure prio;
421 struct prio_set *disk_buckets;
422
423
424
425
426
427
428
429
430 uint64_t *prio_buckets;
431 uint64_t *prio_last_buckets;
432
433
434
435
436
437
438
439
440
441
442 DECLARE_FIFO(long, free)[RESERVE_NR];
443 DECLARE_FIFO(long, free_inc);
444
445 size_t fifo_last_bucket;
446
447
448 struct bucket *buckets;
449
450 DECLARE_HEAP(struct bucket *, heap);
451
452
453
454
455
456
457 unsigned invalidate_needs_gc;
458
459 bool discard;
460
461 struct journal_device journal;
462
463
464#define IO_ERROR_SHIFT 20
465 atomic_t io_errors;
466 atomic_t io_count;
467
468 atomic_long_t meta_sectors_written;
469 atomic_long_t btree_sectors_written;
470 atomic_long_t sectors_written;
471
472 char cache_dev_name[BDEVNAME_SIZE];
473};
474
475struct gc_stat {
476 size_t nodes;
477 size_t key_bytes;
478
479 size_t nkeys;
480 uint64_t data;
481 unsigned in_use;
482};
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502#define CACHE_SET_UNREGISTERING 0
503#define CACHE_SET_STOPPING 1
504#define CACHE_SET_RUNNING 2
505#define CACHE_SET_IO_DISABLE 3
506
507struct cache_set {
508 struct closure cl;
509
510 struct list_head list;
511 struct kobject kobj;
512 struct kobject internal;
513 struct dentry *debug;
514 struct cache_accounting accounting;
515
516 unsigned long flags;
517
518 struct cache_sb sb;
519
520 struct cache *cache[MAX_CACHES_PER_SET];
521 struct cache *cache_by_alloc[MAX_CACHES_PER_SET];
522 int caches_loaded;
523
524 struct bcache_device **devices;
525 unsigned devices_max_used;
526 struct list_head cached_devs;
527 uint64_t cached_dev_sectors;
528 struct closure caching;
529
530 struct closure sb_write;
531 struct semaphore sb_write_mutex;
532
533 mempool_t search;
534 mempool_t bio_meta;
535 struct bio_set bio_split;
536
537
538 struct shrinker shrink;
539
540
541 struct mutex bucket_lock;
542
543
544 unsigned short bucket_bits;
545
546
547 unsigned short block_bits;
548
549
550
551
552
553 unsigned btree_pages;
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571 struct list_head btree_cache;
572 struct list_head btree_cache_freeable;
573 struct list_head btree_cache_freed;
574
575
576 unsigned btree_cache_used;
577
578
579
580
581
582
583
584 wait_queue_head_t btree_cache_wait;
585 struct task_struct *btree_cache_alloc_lock;
586
587
588
589
590
591
592
593
594
595
596
597 atomic_t prio_blocked;
598 wait_queue_head_t bucket_wait;
599
600
601
602
603
604 atomic_t rescale;
605
606
607
608
609
610
611 uint16_t min_prio;
612
613
614
615
616
617 uint8_t need_gc;
618 struct gc_stat gc_stats;
619 size_t nbuckets;
620 size_t avail_nbuckets;
621
622 struct task_struct *gc_thread;
623
624 struct bkey gc_done;
625
626
627
628
629
630 int gc_mark_valid;
631
632
633 atomic_t sectors_to_gc;
634 wait_queue_head_t gc_wait;
635
636 struct keybuf moving_gc_keys;
637
638 struct semaphore moving_in_flight;
639
640 struct workqueue_struct *moving_gc_wq;
641
642 struct btree *root;
643
644#ifdef CONFIG_BCACHE_DEBUG
645 struct btree *verify_data;
646 struct bset *verify_ondisk;
647 struct mutex verify_lock;
648#endif
649
650 unsigned nr_uuids;
651 struct uuid_entry *uuids;
652 BKEY_PADDED(uuid_bucket);
653 struct closure uuid_write;
654 struct semaphore uuid_write_mutex;
655
656
657
658
659
660 mempool_t fill_iter;
661
662 struct bset_sort_state sort;
663
664
665 struct list_head data_buckets;
666 spinlock_t data_bucket_lock;
667
668 struct journal journal;
669
670#define CONGESTED_MAX 1024
671 unsigned congested_last_us;
672 atomic_t congested;
673
674
675 unsigned congested_read_threshold_us;
676 unsigned congested_write_threshold_us;
677
678 struct time_stats btree_gc_time;
679 struct time_stats btree_split_time;
680 struct time_stats btree_read_time;
681
682 atomic_long_t cache_read_races;
683 atomic_long_t writeback_keys_done;
684 atomic_long_t writeback_keys_failed;
685
686 atomic_long_t reclaim;
687 atomic_long_t flush_write;
688 atomic_long_t retry_flush_write;
689
690 enum {
691 ON_ERROR_UNREGISTER,
692 ON_ERROR_PANIC,
693 } on_error;
694#define DEFAULT_IO_ERROR_LIMIT 8
695 unsigned error_limit;
696 unsigned error_decay;
697
698 unsigned short journal_delay_ms;
699 bool expensive_debug_checks;
700 unsigned verify:1;
701 unsigned key_merging_disabled:1;
702 unsigned gc_always_rewrite:1;
703 unsigned shrinker_disabled:1;
704 unsigned copy_gc_enabled:1;
705
706#define BUCKET_HASH_BITS 12
707 struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
708
709 DECLARE_HEAP(struct btree *, flush_btree);
710};
711
712struct bbio {
713 unsigned submit_time_us;
714 union {
715 struct bkey key;
716 uint64_t _pad[3];
717
718
719
720
721 };
722 struct bio bio;
723};
724
725#define BTREE_PRIO USHRT_MAX
726#define INITIAL_PRIO 32768U
727
728#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE)
729#define btree_blocks(b) \
730 ((unsigned) (KEY_SIZE(&b->key) >> (b)->c->block_bits))
731
732#define btree_default_blocks(c) \
733 ((unsigned) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
734
735#define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS)
736#define bucket_bytes(c) ((c)->sb.bucket_size << 9)
737#define block_bytes(c) ((c)->sb.block_size << 9)
738
739#define prios_per_bucket(c) \
740 ((bucket_bytes(c) - sizeof(struct prio_set)) / \
741 sizeof(struct bucket_disk))
742#define prio_buckets(c) \
743 DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c))
744
745static inline size_t sector_to_bucket(struct cache_set *c, sector_t s)
746{
747 return s >> c->bucket_bits;
748}
749
750static inline sector_t bucket_to_sector(struct cache_set *c, size_t b)
751{
752 return ((sector_t) b) << c->bucket_bits;
753}
754
755static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
756{
757 return s & (c->sb.bucket_size - 1);
758}
759
760static inline struct cache *PTR_CACHE(struct cache_set *c,
761 const struct bkey *k,
762 unsigned ptr)
763{
764 return c->cache[PTR_DEV(k, ptr)];
765}
766
767static inline size_t PTR_BUCKET_NR(struct cache_set *c,
768 const struct bkey *k,
769 unsigned ptr)
770{
771 return sector_to_bucket(c, PTR_OFFSET(k, ptr));
772}
773
774static inline struct bucket *PTR_BUCKET(struct cache_set *c,
775 const struct bkey *k,
776 unsigned ptr)
777{
778 return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
779}
780
781static inline uint8_t gen_after(uint8_t a, uint8_t b)
782{
783 uint8_t r = a - b;
784 return r > 128U ? 0 : r;
785}
786
787static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
788 unsigned i)
789{
790 return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
791}
792
793static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
794 unsigned i)
795{
796 return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
797}
798
799
800
801
802
803
804
805#define csum_set(i) \
806 bch_crc64(((void *) (i)) + sizeof(uint64_t), \
807 ((void *) bset_bkey_last(i)) - \
808 (((void *) (i)) + sizeof(uint64_t)))
809
810
811
812#define btree_bug(b, ...) \
813do { \
814 if (bch_cache_set_error((b)->c, __VA_ARGS__)) \
815 dump_stack(); \
816} while (0)
817
818#define cache_bug(c, ...) \
819do { \
820 if (bch_cache_set_error(c, __VA_ARGS__)) \
821 dump_stack(); \
822} while (0)
823
824#define btree_bug_on(cond, b, ...) \
825do { \
826 if (cond) \
827 btree_bug(b, __VA_ARGS__); \
828} while (0)
829
830#define cache_bug_on(cond, c, ...) \
831do { \
832 if (cond) \
833 cache_bug(c, __VA_ARGS__); \
834} while (0)
835
836#define cache_set_err_on(cond, c, ...) \
837do { \
838 if (cond) \
839 bch_cache_set_error(c, __VA_ARGS__); \
840} while (0)
841
842
843
844#define for_each_cache(ca, cs, iter) \
845 for (iter = 0; ca = cs->cache[iter], iter < (cs)->sb.nr_in_set; iter++)
846
847#define for_each_bucket(b, ca) \
848 for (b = (ca)->buckets + (ca)->sb.first_bucket; \
849 b < (ca)->buckets + (ca)->sb.nbuckets; b++)
850
851static inline void cached_dev_put(struct cached_dev *dc)
852{
853 if (refcount_dec_and_test(&dc->count))
854 schedule_work(&dc->detach);
855}
856
857static inline bool cached_dev_get(struct cached_dev *dc)
858{
859 if (!refcount_inc_not_zero(&dc->count))
860 return false;
861
862
863 smp_mb__after_atomic();
864 return true;
865}
866
867
868
869
870
871
872static inline uint8_t bucket_gc_gen(struct bucket *b)
873{
874 return b->gen - b->last_gc;
875}
876
877#define BUCKET_GC_GEN_MAX 96U
878
879#define kobj_attribute_write(n, fn) \
880 static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
881
882#define kobj_attribute_rw(n, show, store) \
883 static struct kobj_attribute ksysfs_##n = \
884 __ATTR(n, S_IWUSR|S_IRUSR, show, store)
885
886static inline void wake_up_allocators(struct cache_set *c)
887{
888 struct cache *ca;
889 unsigned i;
890
891 for_each_cache(ca, c, i)
892 wake_up_process(ca->alloc_thread);
893}
894
895static inline void closure_bio_submit(struct cache_set *c,
896 struct bio *bio,
897 struct closure *cl)
898{
899 closure_get(cl);
900 if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
901 bio->bi_status = BLK_STS_IOERR;
902 bio_endio(bio);
903 return;
904 }
905 generic_make_request(bio);
906}
907
908
909
910
911
912
913
914static inline void wait_for_kthread_stop(void)
915{
916 while (!kthread_should_stop()) {
917 set_current_state(TASK_INTERRUPTIBLE);
918 schedule();
919 }
920}
921
922
923
924void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
925void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
926void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
927 blk_status_t, const char *);
928void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
929 const char *);
930void bch_bbio_free(struct bio *, struct cache_set *);
931struct bio *bch_bbio_alloc(struct cache_set *);
932
933void __bch_submit_bbio(struct bio *, struct cache_set *);
934void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
935
936uint8_t bch_inc_gen(struct cache *, struct bucket *);
937void bch_rescale_priorities(struct cache_set *, int);
938
939bool bch_can_invalidate_bucket(struct cache *, struct bucket *);
940void __bch_invalidate_one_bucket(struct cache *, struct bucket *);
941
942void __bch_bucket_free(struct cache *, struct bucket *);
943void bch_bucket_free(struct cache_set *, struct bkey *);
944
945long bch_bucket_alloc(struct cache *, unsigned, bool);
946int __bch_bucket_alloc_set(struct cache_set *, unsigned,
947 struct bkey *, int, bool);
948int bch_bucket_alloc_set(struct cache_set *, unsigned,
949 struct bkey *, int, bool);
950bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
951 unsigned, unsigned, bool);
952bool bch_cached_dev_error(struct cached_dev *dc);
953
954__printf(2, 3)
955bool bch_cache_set_error(struct cache_set *, const char *, ...);
956
957void bch_prio_write(struct cache *);
958void bch_write_bdev_super(struct cached_dev *, struct closure *);
959
960extern struct workqueue_struct *bcache_wq;
961extern struct mutex bch_register_lock;
962extern struct list_head bch_cache_sets;
963
964extern struct kobj_type bch_cached_dev_ktype;
965extern struct kobj_type bch_flash_dev_ktype;
966extern struct kobj_type bch_cache_set_ktype;
967extern struct kobj_type bch_cache_set_internal_ktype;
968extern struct kobj_type bch_cache_ktype;
969
970void bch_cached_dev_release(struct kobject *);
971void bch_flash_dev_release(struct kobject *);
972void bch_cache_set_release(struct kobject *);
973void bch_cache_release(struct kobject *);
974
975int bch_uuid_write(struct cache_set *);
976void bcache_write_super(struct cache_set *);
977
978int bch_flash_dev_create(struct cache_set *c, uint64_t size);
979
980int bch_cached_dev_attach(struct cached_dev *, struct cache_set *, uint8_t *);
981void bch_cached_dev_detach(struct cached_dev *);
982void bch_cached_dev_run(struct cached_dev *);
983void bcache_device_stop(struct bcache_device *);
984
985void bch_cache_set_unregister(struct cache_set *);
986void bch_cache_set_stop(struct cache_set *);
987
988struct cache_set *bch_cache_set_alloc(struct cache_sb *);
989void bch_btree_cache_free(struct cache_set *);
990int bch_btree_cache_alloc(struct cache_set *);
991void bch_moving_init_cache_set(struct cache_set *);
992int bch_open_buckets_alloc(struct cache_set *);
993void bch_open_buckets_free(struct cache_set *);
994
995int bch_cache_allocator_start(struct cache *ca);
996
997void bch_debug_exit(void);
998int bch_debug_init(struct kobject *);
999void bch_request_exit(void);
1000int bch_request_init(void);
1001
1002#endif
1003