1#ifndef _BCACHE_H
2#define _BCACHE_H
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
179
180#include <linux/bcache.h>
181#include <linux/bio.h>
182#include <linux/kobject.h>
183#include <linux/list.h>
184#include <linux/mutex.h>
185#include <linux/rbtree.h>
186#include <linux/rwsem.h>
187#include <linux/types.h>
188#include <linux/workqueue.h>
189
190#include "bset.h"
191#include "util.h"
192#include "closure.h"
193
194struct bucket {
195 atomic_t pin;
196 uint16_t prio;
197 uint8_t gen;
198 uint8_t last_gc;
199 uint16_t gc_mark;
200};
201
202
203
204
205
206
207BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
208#define GC_MARK_RECLAIMABLE 1
209#define GC_MARK_DIRTY 2
210#define GC_MARK_METADATA 3
211#define GC_SECTORS_USED_SIZE 13
212#define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE))
213BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
214BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);
215
216#include "journal.h"
217#include "stats.h"
218struct search;
219struct btree;
220struct keybuf;
221
222struct keybuf_key {
223 struct rb_node node;
224 BKEY_PADDED(key);
225 void *private;
226};
227
228struct keybuf {
229 struct bkey last_scanned;
230 spinlock_t lock;
231
232
233
234
235
236
237 struct bkey start;
238 struct bkey end;
239
240 struct rb_root keys;
241
242#define KEYBUF_NR 500
243 DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR);
244};
245
246struct bio_split_pool {
247 struct bio_set *bio_split;
248 mempool_t *bio_split_hook;
249};
250
251struct bio_split_hook {
252 struct closure cl;
253 struct bio_split_pool *p;
254 struct bio *bio;
255 bio_end_io_t *bi_end_io;
256 void *bi_private;
257};
258
259struct bcache_device {
260 struct closure cl;
261
262 struct kobject kobj;
263
264 struct cache_set *c;
265 unsigned id;
266#define BCACHEDEVNAME_SIZE 12
267 char name[BCACHEDEVNAME_SIZE];
268
269 struct gendisk *disk;
270
271 unsigned long flags;
272#define BCACHE_DEV_CLOSING 0
273#define BCACHE_DEV_DETACHING 1
274#define BCACHE_DEV_UNLINK_DONE 2
275
276 unsigned nr_stripes;
277 unsigned stripe_size;
278 atomic_t *stripe_sectors_dirty;
279 unsigned long *full_dirty_stripes;
280
281 unsigned long sectors_dirty_last;
282 long sectors_dirty_derivative;
283
284 struct bio_set *bio_split;
285
286 unsigned data_csum:1;
287
288 int (*cache_miss)(struct btree *, struct search *,
289 struct bio *, unsigned);
290 int (*ioctl) (struct bcache_device *, fmode_t, unsigned, unsigned long);
291
292 struct bio_split_pool bio_split_hook;
293};
294
295struct io {
296
297 struct hlist_node hash;
298 struct list_head lru;
299
300 unsigned long jiffies;
301 unsigned sequential;
302 sector_t last;
303};
304
305struct cached_dev {
306 struct list_head list;
307 struct bcache_device disk;
308 struct block_device *bdev;
309
310 struct cache_sb sb;
311 struct bio sb_bio;
312 struct bio_vec sb_bv[1];
313 struct closure sb_write;
314 struct semaphore sb_write_mutex;
315
316
317 atomic_t count;
318 struct work_struct detach;
319
320
321
322
323
324 atomic_t running;
325
326
327
328
329
330 struct rw_semaphore writeback_lock;
331
332
333
334
335
336
337 atomic_t has_dirty;
338
339 struct bch_ratelimit writeback_rate;
340 struct delayed_work writeback_rate_update;
341
342
343
344
345
346 sector_t last_read;
347
348
349 struct semaphore in_flight;
350 struct task_struct *writeback_thread;
351
352 struct keybuf writeback_keys;
353
354
355#define RECENT_IO_BITS 7
356#define RECENT_IO (1 << RECENT_IO_BITS)
357 struct io io[RECENT_IO];
358 struct hlist_head io_hash[RECENT_IO + 1];
359 struct list_head io_lru;
360 spinlock_t io_lock;
361
362 struct cache_accounting accounting;
363
364
365 unsigned sequential_cutoff;
366 unsigned readahead;
367
368 unsigned verify:1;
369 unsigned bypass_torture_test:1;
370
371 unsigned partial_stripes_expensive:1;
372 unsigned writeback_metadata:1;
373 unsigned writeback_running:1;
374 unsigned char writeback_percent;
375 unsigned writeback_delay;
376
377 uint64_t writeback_rate_target;
378 int64_t writeback_rate_proportional;
379 int64_t writeback_rate_derivative;
380 int64_t writeback_rate_change;
381
382 unsigned writeback_rate_update_seconds;
383 unsigned writeback_rate_d_term;
384 unsigned writeback_rate_p_term_inverse;
385};
386
387enum alloc_reserve {
388 RESERVE_BTREE,
389 RESERVE_PRIO,
390 RESERVE_MOVINGGC,
391 RESERVE_NONE,
392 RESERVE_NR,
393};
394
395struct cache {
396 struct cache_set *set;
397 struct cache_sb sb;
398 struct bio sb_bio;
399 struct bio_vec sb_bv[1];
400
401 struct kobject kobj;
402 struct block_device *bdev;
403
404 struct task_struct *alloc_thread;
405
406 struct closure prio;
407 struct prio_set *disk_buckets;
408
409
410
411
412
413
414
415
416 uint64_t *prio_buckets;
417 uint64_t *prio_last_buckets;
418
419
420
421
422
423
424
425
426
427
428 DECLARE_FIFO(long, free)[RESERVE_NR];
429 DECLARE_FIFO(long, free_inc);
430
431 size_t fifo_last_bucket;
432
433
434 struct bucket *buckets;
435
436 DECLARE_HEAP(struct bucket *, heap);
437
438
439
440
441
442
443 unsigned invalidate_needs_gc:1;
444
445 bool discard;
446
447 struct journal_device journal;
448
449
450#define IO_ERROR_SHIFT 20
451 atomic_t io_errors;
452 atomic_t io_count;
453
454 atomic_long_t meta_sectors_written;
455 atomic_long_t btree_sectors_written;
456 atomic_long_t sectors_written;
457
458 struct bio_split_pool bio_split_hook;
459};
460
461struct gc_stat {
462 size_t nodes;
463 size_t key_bytes;
464
465 size_t nkeys;
466 uint64_t data;
467 unsigned in_use;
468};
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484#define CACHE_SET_UNREGISTERING 0
485#define CACHE_SET_STOPPING 1
486#define CACHE_SET_RUNNING 2
487
488struct cache_set {
489 struct closure cl;
490
491 struct list_head list;
492 struct kobject kobj;
493 struct kobject internal;
494 struct dentry *debug;
495 struct cache_accounting accounting;
496
497 unsigned long flags;
498
499 struct cache_sb sb;
500
501 struct cache *cache[MAX_CACHES_PER_SET];
502 struct cache *cache_by_alloc[MAX_CACHES_PER_SET];
503 int caches_loaded;
504
505 struct bcache_device **devices;
506 struct list_head cached_devs;
507 uint64_t cached_dev_sectors;
508 struct closure caching;
509
510 struct closure sb_write;
511 struct semaphore sb_write_mutex;
512
513 mempool_t *search;
514 mempool_t *bio_meta;
515 struct bio_set *bio_split;
516
517
518 struct shrinker shrink;
519
520
521 struct mutex bucket_lock;
522
523
524 unsigned short bucket_bits;
525
526
527 unsigned short block_bits;
528
529
530
531
532
533 unsigned btree_pages;
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551 struct list_head btree_cache;
552 struct list_head btree_cache_freeable;
553 struct list_head btree_cache_freed;
554
555
556 unsigned btree_cache_used;
557
558
559
560
561
562
563
564 wait_queue_head_t btree_cache_wait;
565 struct task_struct *btree_cache_alloc_lock;
566
567
568
569
570
571
572
573
574
575
576
577 atomic_t prio_blocked;
578 wait_queue_head_t bucket_wait;
579
580
581
582
583
584 atomic_t rescale;
585
586
587
588
589
590
591 uint16_t min_prio;
592
593
594
595
596
597 uint8_t need_gc;
598 struct gc_stat gc_stats;
599 size_t nbuckets;
600
601 struct task_struct *gc_thread;
602
603 struct bkey gc_done;
604
605
606
607
608
609 int gc_mark_valid;
610
611
612 atomic_t sectors_to_gc;
613
614 wait_queue_head_t moving_gc_wait;
615 struct keybuf moving_gc_keys;
616
617 struct semaphore moving_in_flight;
618
619 struct workqueue_struct *moving_gc_wq;
620
621 struct btree *root;
622
623#ifdef CONFIG_BCACHE_DEBUG
624 struct btree *verify_data;
625 struct bset *verify_ondisk;
626 struct mutex verify_lock;
627#endif
628
629 unsigned nr_uuids;
630 struct uuid_entry *uuids;
631 BKEY_PADDED(uuid_bucket);
632 struct closure uuid_write;
633 struct semaphore uuid_write_mutex;
634
635
636
637
638
639 mempool_t *fill_iter;
640
641 struct bset_sort_state sort;
642
643
644 struct list_head data_buckets;
645 spinlock_t data_bucket_lock;
646
647 struct journal journal;
648
649#define CONGESTED_MAX 1024
650 unsigned congested_last_us;
651 atomic_t congested;
652
653
654 unsigned congested_read_threshold_us;
655 unsigned congested_write_threshold_us;
656
657 struct time_stats btree_gc_time;
658 struct time_stats btree_split_time;
659 struct time_stats btree_read_time;
660
661 atomic_long_t cache_read_races;
662 atomic_long_t writeback_keys_done;
663 atomic_long_t writeback_keys_failed;
664
665 enum {
666 ON_ERROR_UNREGISTER,
667 ON_ERROR_PANIC,
668 } on_error;
669 unsigned error_limit;
670 unsigned error_decay;
671
672 unsigned short journal_delay_ms;
673 bool expensive_debug_checks;
674 unsigned verify:1;
675 unsigned key_merging_disabled:1;
676 unsigned gc_always_rewrite:1;
677 unsigned shrinker_disabled:1;
678 unsigned copy_gc_enabled:1;
679
680#define BUCKET_HASH_BITS 12
681 struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
682};
683
684struct bbio {
685 unsigned submit_time_us;
686 union {
687 struct bkey key;
688 uint64_t _pad[3];
689
690
691
692
693 };
694 struct bio bio;
695};
696
697#define BTREE_PRIO USHRT_MAX
698#define INITIAL_PRIO 32768U
699
700#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE)
701#define btree_blocks(b) \
702 ((unsigned) (KEY_SIZE(&b->key) >> (b)->c->block_bits))
703
704#define btree_default_blocks(c) \
705 ((unsigned) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
706
707#define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS)
708#define bucket_bytes(c) ((c)->sb.bucket_size << 9)
709#define block_bytes(c) ((c)->sb.block_size << 9)
710
711#define prios_per_bucket(c) \
712 ((bucket_bytes(c) - sizeof(struct prio_set)) / \
713 sizeof(struct bucket_disk))
714#define prio_buckets(c) \
715 DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c))
716
717static inline size_t sector_to_bucket(struct cache_set *c, sector_t s)
718{
719 return s >> c->bucket_bits;
720}
721
722static inline sector_t bucket_to_sector(struct cache_set *c, size_t b)
723{
724 return ((sector_t) b) << c->bucket_bits;
725}
726
727static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
728{
729 return s & (c->sb.bucket_size - 1);
730}
731
732static inline struct cache *PTR_CACHE(struct cache_set *c,
733 const struct bkey *k,
734 unsigned ptr)
735{
736 return c->cache[PTR_DEV(k, ptr)];
737}
738
739static inline size_t PTR_BUCKET_NR(struct cache_set *c,
740 const struct bkey *k,
741 unsigned ptr)
742{
743 return sector_to_bucket(c, PTR_OFFSET(k, ptr));
744}
745
746static inline struct bucket *PTR_BUCKET(struct cache_set *c,
747 const struct bkey *k,
748 unsigned ptr)
749{
750 return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
751}
752
753static inline uint8_t gen_after(uint8_t a, uint8_t b)
754{
755 uint8_t r = a - b;
756 return r > 128U ? 0 : r;
757}
758
759static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
760 unsigned i)
761{
762 return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
763}
764
765static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
766 unsigned i)
767{
768 return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
769}
770
771
772
773
774
775
776
777#define csum_set(i) \
778 bch_crc64(((void *) (i)) + sizeof(uint64_t), \
779 ((void *) bset_bkey_last(i)) - \
780 (((void *) (i)) + sizeof(uint64_t)))
781
782
783
784#define btree_bug(b, ...) \
785do { \
786 if (bch_cache_set_error((b)->c, __VA_ARGS__)) \
787 dump_stack(); \
788} while (0)
789
790#define cache_bug(c, ...) \
791do { \
792 if (bch_cache_set_error(c, __VA_ARGS__)) \
793 dump_stack(); \
794} while (0)
795
796#define btree_bug_on(cond, b, ...) \
797do { \
798 if (cond) \
799 btree_bug(b, __VA_ARGS__); \
800} while (0)
801
802#define cache_bug_on(cond, c, ...) \
803do { \
804 if (cond) \
805 cache_bug(c, __VA_ARGS__); \
806} while (0)
807
808#define cache_set_err_on(cond, c, ...) \
809do { \
810 if (cond) \
811 bch_cache_set_error(c, __VA_ARGS__); \
812} while (0)
813
814
815
816#define for_each_cache(ca, cs, iter) \
817 for (iter = 0; ca = cs->cache[iter], iter < (cs)->sb.nr_in_set; iter++)
818
819#define for_each_bucket(b, ca) \
820 for (b = (ca)->buckets + (ca)->sb.first_bucket; \
821 b < (ca)->buckets + (ca)->sb.nbuckets; b++)
822
823static inline void cached_dev_put(struct cached_dev *dc)
824{
825 if (atomic_dec_and_test(&dc->count))
826 schedule_work(&dc->detach);
827}
828
829static inline bool cached_dev_get(struct cached_dev *dc)
830{
831 if (!atomic_inc_not_zero(&dc->count))
832 return false;
833
834
835 smp_mb__after_atomic();
836 return true;
837}
838
839
840
841
842
843
844static inline uint8_t bucket_gc_gen(struct bucket *b)
845{
846 return b->gen - b->last_gc;
847}
848
849#define BUCKET_GC_GEN_MAX 96U
850
851#define kobj_attribute_write(n, fn) \
852 static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
853
854#define kobj_attribute_rw(n, show, store) \
855 static struct kobj_attribute ksysfs_##n = \
856 __ATTR(n, S_IWUSR|S_IRUSR, show, store)
857
858static inline void wake_up_allocators(struct cache_set *c)
859{
860 struct cache *ca;
861 unsigned i;
862
863 for_each_cache(ca, c, i)
864 wake_up_process(ca->alloc_thread);
865}
866
867
868
869void bch_count_io_errors(struct cache *, int, const char *);
870void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
871 int, const char *);
872void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
873void bch_bbio_free(struct bio *, struct cache_set *);
874struct bio *bch_bbio_alloc(struct cache_set *);
875
876void bch_generic_make_request(struct bio *, struct bio_split_pool *);
877void __bch_submit_bbio(struct bio *, struct cache_set *);
878void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
879
880uint8_t bch_inc_gen(struct cache *, struct bucket *);
881void bch_rescale_priorities(struct cache_set *, int);
882
883bool bch_can_invalidate_bucket(struct cache *, struct bucket *);
884void __bch_invalidate_one_bucket(struct cache *, struct bucket *);
885
886void __bch_bucket_free(struct cache *, struct bucket *);
887void bch_bucket_free(struct cache_set *, struct bkey *);
888
889long bch_bucket_alloc(struct cache *, unsigned, bool);
890int __bch_bucket_alloc_set(struct cache_set *, unsigned,
891 struct bkey *, int, bool);
892int bch_bucket_alloc_set(struct cache_set *, unsigned,
893 struct bkey *, int, bool);
894bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
895 unsigned, unsigned, bool);
896
897__printf(2, 3)
898bool bch_cache_set_error(struct cache_set *, const char *, ...);
899
900void bch_prio_write(struct cache *);
901void bch_write_bdev_super(struct cached_dev *, struct closure *);
902
903extern struct workqueue_struct *bcache_wq;
904extern const char * const bch_cache_modes[];
905extern struct mutex bch_register_lock;
906extern struct list_head bch_cache_sets;
907
908extern struct kobj_type bch_cached_dev_ktype;
909extern struct kobj_type bch_flash_dev_ktype;
910extern struct kobj_type bch_cache_set_ktype;
911extern struct kobj_type bch_cache_set_internal_ktype;
912extern struct kobj_type bch_cache_ktype;
913
914void bch_cached_dev_release(struct kobject *);
915void bch_flash_dev_release(struct kobject *);
916void bch_cache_set_release(struct kobject *);
917void bch_cache_release(struct kobject *);
918
919int bch_uuid_write(struct cache_set *);
920void bcache_write_super(struct cache_set *);
921
922int bch_flash_dev_create(struct cache_set *c, uint64_t size);
923
924int bch_cached_dev_attach(struct cached_dev *, struct cache_set *);
925void bch_cached_dev_detach(struct cached_dev *);
926void bch_cached_dev_run(struct cached_dev *);
927void bcache_device_stop(struct bcache_device *);
928
929void bch_cache_set_unregister(struct cache_set *);
930void bch_cache_set_stop(struct cache_set *);
931
932struct cache_set *bch_cache_set_alloc(struct cache_sb *);
933void bch_btree_cache_free(struct cache_set *);
934int bch_btree_cache_alloc(struct cache_set *);
935void bch_moving_init_cache_set(struct cache_set *);
936int bch_open_buckets_alloc(struct cache_set *);
937void bch_open_buckets_free(struct cache_set *);
938
939int bch_cache_allocator_start(struct cache *ca);
940
941void bch_debug_exit(void);
942int bch_debug_init(struct kobject *);
943void bch_request_exit(void);
944int bch_request_init(void);
945
946#endif
947