1
2
3
4
5
6
7#include <linux/sched.h>
8#include <linux/bio.h>
9#include <linux/slab.h>
10#include <linux/blkdev.h>
11#include <linux/raid/pq.h>
12#include <linux/hash.h>
13#include <linux/list_sort.h>
14#include <linux/raid/xor.h>
15#include <linux/mm.h>
16#include "misc.h"
17#include "ctree.h"
18#include "disk-io.h"
19#include "volumes.h"
20#include "raid56.h"
21#include "async-thread.h"
22
23
24#define RBIO_RMW_LOCKED_BIT 1
25
26
27
28
29
30#define RBIO_CACHE_BIT 2
31
32
33
34
35#define RBIO_CACHE_READY_BIT 3
36
37#define RBIO_CACHE_SIZE 1024
38
39#define BTRFS_STRIPE_HASH_TABLE_BITS 11
40
41
42struct btrfs_stripe_hash {
43 struct list_head hash_list;
44 spinlock_t lock;
45};
46
47
48struct btrfs_stripe_hash_table {
49 struct list_head stripe_cache;
50 spinlock_t cache_lock;
51 int cache_size;
52 struct btrfs_stripe_hash table[];
53};
54
55enum btrfs_rbio_ops {
56 BTRFS_RBIO_WRITE,
57 BTRFS_RBIO_READ_REBUILD,
58 BTRFS_RBIO_PARITY_SCRUB,
59 BTRFS_RBIO_REBUILD_MISSING,
60};
61
62struct btrfs_raid_bio {
63 struct btrfs_io_context *bioc;
64
65
66
67
68
69
70 struct list_head hash_list;
71
72
73
74
75 struct list_head stripe_cache;
76
77
78
79
80 struct btrfs_work work;
81
82
83
84
85
86
87 struct bio_list bio_list;
88 spinlock_t bio_list_lock;
89
90
91
92
93
94
95
96 struct list_head plug_list;
97
98
99
100
101
102 unsigned long flags;
103
104
105 int stripe_len;
106
107
108 int nr_data;
109
110 int real_stripes;
111
112 int stripe_npages;
113
114
115
116
117
118
119 enum btrfs_rbio_ops operation;
120
121
122 int faila;
123
124
125 int failb;
126
127 int scrubp;
128
129
130
131
132 int nr_pages;
133
134
135
136
137
138
139 int bio_list_bytes;
140
141 int generic_bio_cnt;
142
143 refcount_t refs;
144
145 atomic_t stripes_pending;
146
147 atomic_t error;
148
149
150
151
152
153
154
155
156
157 struct page **stripe_pages;
158
159
160
161
162
163 struct page **bio_pages;
164
165
166
167
168 unsigned long *dbitmap;
169
170
171 void **finish_pointers;
172
173
174 unsigned long *finish_pbitmap;
175};
176
177static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
178static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
179static void rmw_work(struct btrfs_work *work);
180static void read_rebuild_work(struct btrfs_work *work);
181static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
182static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
183static void __free_raid_bio(struct btrfs_raid_bio *rbio);
184static void index_rbio_pages(struct btrfs_raid_bio *rbio);
185static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
186
187static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
188 int need_check);
189static void scrub_parity_work(struct btrfs_work *work);
190
191static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
192{
193 btrfs_init_work(&rbio->work, work_func, NULL, NULL);
194 btrfs_queue_work(rbio->bioc->fs_info->rmw_workers, &rbio->work);
195}
196
197
198
199
200
201int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
202{
203 struct btrfs_stripe_hash_table *table;
204 struct btrfs_stripe_hash_table *x;
205 struct btrfs_stripe_hash *cur;
206 struct btrfs_stripe_hash *h;
207 int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
208 int i;
209
210 if (info->stripe_hash_table)
211 return 0;
212
213
214
215
216
217
218
219
220 table = kvzalloc(struct_size(table, table, num_entries), GFP_KERNEL);
221 if (!table)
222 return -ENOMEM;
223
224 spin_lock_init(&table->cache_lock);
225 INIT_LIST_HEAD(&table->stripe_cache);
226
227 h = table->table;
228
229 for (i = 0; i < num_entries; i++) {
230 cur = h + i;
231 INIT_LIST_HEAD(&cur->hash_list);
232 spin_lock_init(&cur->lock);
233 }
234
235 x = cmpxchg(&info->stripe_hash_table, NULL, table);
236 kvfree(x);
237 return 0;
238}
239
240
241
242
243
244
245
246
247
248
249static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
250{
251 int i;
252 int ret;
253
254 ret = alloc_rbio_pages(rbio);
255 if (ret)
256 return;
257
258 for (i = 0; i < rbio->nr_pages; i++) {
259 if (!rbio->bio_pages[i])
260 continue;
261
262 copy_highpage(rbio->stripe_pages[i], rbio->bio_pages[i]);
263 SetPageUptodate(rbio->stripe_pages[i]);
264 }
265 set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
266}
267
268
269
270
271static int rbio_bucket(struct btrfs_raid_bio *rbio)
272{
273 u64 num = rbio->bioc->raid_map[0];
274
275
276
277
278
279
280
281
282
283 return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS);
284}
285
286
287
288
289
290static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest)
291{
292 int i;
293 struct page *s;
294 struct page *d;
295
296 if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags))
297 return;
298
299 for (i = 0; i < dest->nr_pages; i++) {
300 s = src->stripe_pages[i];
301 if (!s || !PageUptodate(s)) {
302 continue;
303 }
304
305 d = dest->stripe_pages[i];
306 if (d)
307 __free_page(d);
308
309 dest->stripe_pages[i] = s;
310 src->stripe_pages[i] = NULL;
311 }
312}
313
314
315
316
317
318
319
320
321static void merge_rbio(struct btrfs_raid_bio *dest,
322 struct btrfs_raid_bio *victim)
323{
324 bio_list_merge(&dest->bio_list, &victim->bio_list);
325 dest->bio_list_bytes += victim->bio_list_bytes;
326 dest->generic_bio_cnt += victim->generic_bio_cnt;
327 bio_list_init(&victim->bio_list);
328}
329
330
331
332
333
334static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
335{
336 int bucket = rbio_bucket(rbio);
337 struct btrfs_stripe_hash_table *table;
338 struct btrfs_stripe_hash *h;
339 int freeit = 0;
340
341
342
343
344 if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
345 return;
346
347 table = rbio->bioc->fs_info->stripe_hash_table;
348 h = table->table + bucket;
349
350
351
352
353 spin_lock(&h->lock);
354
355
356
357
358
359 spin_lock(&rbio->bio_list_lock);
360
361 if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) {
362 list_del_init(&rbio->stripe_cache);
363 table->cache_size -= 1;
364 freeit = 1;
365
366
367
368
369
370
371
372
373
374
375 if (bio_list_empty(&rbio->bio_list)) {
376 if (!list_empty(&rbio->hash_list)) {
377 list_del_init(&rbio->hash_list);
378 refcount_dec(&rbio->refs);
379 BUG_ON(!list_empty(&rbio->plug_list));
380 }
381 }
382 }
383
384 spin_unlock(&rbio->bio_list_lock);
385 spin_unlock(&h->lock);
386
387 if (freeit)
388 __free_raid_bio(rbio);
389}
390
391
392
393
394static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
395{
396 struct btrfs_stripe_hash_table *table;
397 unsigned long flags;
398
399 if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
400 return;
401
402 table = rbio->bioc->fs_info->stripe_hash_table;
403
404 spin_lock_irqsave(&table->cache_lock, flags);
405 __remove_rbio_from_cache(rbio);
406 spin_unlock_irqrestore(&table->cache_lock, flags);
407}
408
409
410
411
412static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
413{
414 struct btrfs_stripe_hash_table *table;
415 unsigned long flags;
416 struct btrfs_raid_bio *rbio;
417
418 table = info->stripe_hash_table;
419
420 spin_lock_irqsave(&table->cache_lock, flags);
421 while (!list_empty(&table->stripe_cache)) {
422 rbio = list_entry(table->stripe_cache.next,
423 struct btrfs_raid_bio,
424 stripe_cache);
425 __remove_rbio_from_cache(rbio);
426 }
427 spin_unlock_irqrestore(&table->cache_lock, flags);
428}
429
430
431
432
433
434void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
435{
436 if (!info->stripe_hash_table)
437 return;
438 btrfs_clear_rbio_cache(info);
439 kvfree(info->stripe_hash_table);
440 info->stripe_hash_table = NULL;
441}
442
443
444
445
446
447
448
449
450
451
452
453
454static void cache_rbio(struct btrfs_raid_bio *rbio)
455{
456 struct btrfs_stripe_hash_table *table;
457 unsigned long flags;
458
459 if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
460 return;
461
462 table = rbio->bioc->fs_info->stripe_hash_table;
463
464 spin_lock_irqsave(&table->cache_lock, flags);
465 spin_lock(&rbio->bio_list_lock);
466
467
468 if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
469 refcount_inc(&rbio->refs);
470
471 if (!list_empty(&rbio->stripe_cache)){
472 list_move(&rbio->stripe_cache, &table->stripe_cache);
473 } else {
474 list_add(&rbio->stripe_cache, &table->stripe_cache);
475 table->cache_size += 1;
476 }
477
478 spin_unlock(&rbio->bio_list_lock);
479
480 if (table->cache_size > RBIO_CACHE_SIZE) {
481 struct btrfs_raid_bio *found;
482
483 found = list_entry(table->stripe_cache.prev,
484 struct btrfs_raid_bio,
485 stripe_cache);
486
487 if (found != rbio)
488 __remove_rbio_from_cache(found);
489 }
490
491 spin_unlock_irqrestore(&table->cache_lock, flags);
492}
493
494
495
496
497
498
499static void run_xor(void **pages, int src_cnt, ssize_t len)
500{
501 int src_off = 0;
502 int xor_src_cnt = 0;
503 void *dest = pages[src_cnt];
504
505 while(src_cnt > 0) {
506 xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
507 xor_blocks(xor_src_cnt, len, dest, pages + src_off);
508
509 src_cnt -= xor_src_cnt;
510 src_off += xor_src_cnt;
511 }
512}
513
514
515
516
517
518static int rbio_is_full(struct btrfs_raid_bio *rbio)
519{
520 unsigned long flags;
521 unsigned long size = rbio->bio_list_bytes;
522 int ret = 1;
523
524 spin_lock_irqsave(&rbio->bio_list_lock, flags);
525 if (size != rbio->nr_data * rbio->stripe_len)
526 ret = 0;
527 BUG_ON(size > rbio->nr_data * rbio->stripe_len);
528 spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
529
530 return ret;
531}
532
533
534
535
536
537
538
539
540
541
542
543static int rbio_can_merge(struct btrfs_raid_bio *last,
544 struct btrfs_raid_bio *cur)
545{
546 if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) ||
547 test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags))
548 return 0;
549
550
551
552
553
554
555
556
557 if (test_bit(RBIO_CACHE_BIT, &last->flags) ||
558 test_bit(RBIO_CACHE_BIT, &cur->flags))
559 return 0;
560
561 if (last->bioc->raid_map[0] != cur->bioc->raid_map[0])
562 return 0;
563
564
565 if (last->operation != cur->operation)
566 return 0;
567
568
569
570
571
572
573
574
575 if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
576 return 0;
577
578 if (last->operation == BTRFS_RBIO_REBUILD_MISSING)
579 return 0;
580
581 if (last->operation == BTRFS_RBIO_READ_REBUILD) {
582 int fa = last->faila;
583 int fb = last->failb;
584 int cur_fa = cur->faila;
585 int cur_fb = cur->failb;
586
587 if (last->faila >= last->failb) {
588 fa = last->failb;
589 fb = last->faila;
590 }
591
592 if (cur->faila >= cur->failb) {
593 cur_fa = cur->failb;
594 cur_fb = cur->faila;
595 }
596
597 if (fa != cur_fa || fb != cur_fb)
598 return 0;
599 }
600 return 1;
601}
602
603static int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe,
604 int index)
605{
606 return stripe * rbio->stripe_npages + index;
607}
608
609
610
611
612
613static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe,
614 int index)
615{
616 return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)];
617}
618
619
620
621
622static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
623{
624 return rbio_stripe_page(rbio, rbio->nr_data, index);
625}
626
627
628
629
630
631static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
632{
633 if (rbio->nr_data + 1 == rbio->real_stripes)
634 return NULL;
635 return rbio_stripe_page(rbio, rbio->nr_data + 1, index);
636}
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
661{
662 struct btrfs_stripe_hash *h;
663 struct btrfs_raid_bio *cur;
664 struct btrfs_raid_bio *pending;
665 unsigned long flags;
666 struct btrfs_raid_bio *freeit = NULL;
667 struct btrfs_raid_bio *cache_drop = NULL;
668 int ret = 0;
669
670 h = rbio->bioc->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
671
672 spin_lock_irqsave(&h->lock, flags);
673 list_for_each_entry(cur, &h->hash_list, hash_list) {
674 if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0])
675 continue;
676
677 spin_lock(&cur->bio_list_lock);
678
679
680 if (bio_list_empty(&cur->bio_list) &&
681 list_empty(&cur->plug_list) &&
682 test_bit(RBIO_CACHE_BIT, &cur->flags) &&
683 !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
684 list_del_init(&cur->hash_list);
685 refcount_dec(&cur->refs);
686
687 steal_rbio(cur, rbio);
688 cache_drop = cur;
689 spin_unlock(&cur->bio_list_lock);
690
691 goto lockit;
692 }
693
694
695 if (rbio_can_merge(cur, rbio)) {
696 merge_rbio(cur, rbio);
697 spin_unlock(&cur->bio_list_lock);
698 freeit = rbio;
699 ret = 1;
700 goto out;
701 }
702
703
704
705
706
707
708
709 list_for_each_entry(pending, &cur->plug_list, plug_list) {
710 if (rbio_can_merge(pending, rbio)) {
711 merge_rbio(pending, rbio);
712 spin_unlock(&cur->bio_list_lock);
713 freeit = rbio;
714 ret = 1;
715 goto out;
716 }
717 }
718
719
720
721
722
723 list_add_tail(&rbio->plug_list, &cur->plug_list);
724 spin_unlock(&cur->bio_list_lock);
725 ret = 1;
726 goto out;
727 }
728lockit:
729 refcount_inc(&rbio->refs);
730 list_add(&rbio->hash_list, &h->hash_list);
731out:
732 spin_unlock_irqrestore(&h->lock, flags);
733 if (cache_drop)
734 remove_rbio_from_cache(cache_drop);
735 if (freeit)
736 __free_raid_bio(freeit);
737 return ret;
738}
739
740
741
742
743
744static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
745{
746 int bucket;
747 struct btrfs_stripe_hash *h;
748 unsigned long flags;
749 int keep_cache = 0;
750
751 bucket = rbio_bucket(rbio);
752 h = rbio->bioc->fs_info->stripe_hash_table->table + bucket;
753
754 if (list_empty(&rbio->plug_list))
755 cache_rbio(rbio);
756
757 spin_lock_irqsave(&h->lock, flags);
758 spin_lock(&rbio->bio_list_lock);
759
760 if (!list_empty(&rbio->hash_list)) {
761
762
763
764
765
766 if (list_empty(&rbio->plug_list) &&
767 test_bit(RBIO_CACHE_BIT, &rbio->flags)) {
768 keep_cache = 1;
769 clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
770 BUG_ON(!bio_list_empty(&rbio->bio_list));
771 goto done;
772 }
773
774 list_del_init(&rbio->hash_list);
775 refcount_dec(&rbio->refs);
776
777
778
779
780
781
782 if (!list_empty(&rbio->plug_list)) {
783 struct btrfs_raid_bio *next;
784 struct list_head *head = rbio->plug_list.next;
785
786 next = list_entry(head, struct btrfs_raid_bio,
787 plug_list);
788
789 list_del_init(&rbio->plug_list);
790
791 list_add(&next->hash_list, &h->hash_list);
792 refcount_inc(&next->refs);
793 spin_unlock(&rbio->bio_list_lock);
794 spin_unlock_irqrestore(&h->lock, flags);
795
796 if (next->operation == BTRFS_RBIO_READ_REBUILD)
797 start_async_work(next, read_rebuild_work);
798 else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
799 steal_rbio(rbio, next);
800 start_async_work(next, read_rebuild_work);
801 } else if (next->operation == BTRFS_RBIO_WRITE) {
802 steal_rbio(rbio, next);
803 start_async_work(next, rmw_work);
804 } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
805 steal_rbio(rbio, next);
806 start_async_work(next, scrub_parity_work);
807 }
808
809 goto done_nolock;
810 }
811 }
812done:
813 spin_unlock(&rbio->bio_list_lock);
814 spin_unlock_irqrestore(&h->lock, flags);
815
816done_nolock:
817 if (!keep_cache)
818 remove_rbio_from_cache(rbio);
819}
820
821static void __free_raid_bio(struct btrfs_raid_bio *rbio)
822{
823 int i;
824
825 if (!refcount_dec_and_test(&rbio->refs))
826 return;
827
828 WARN_ON(!list_empty(&rbio->stripe_cache));
829 WARN_ON(!list_empty(&rbio->hash_list));
830 WARN_ON(!bio_list_empty(&rbio->bio_list));
831
832 for (i = 0; i < rbio->nr_pages; i++) {
833 if (rbio->stripe_pages[i]) {
834 __free_page(rbio->stripe_pages[i]);
835 rbio->stripe_pages[i] = NULL;
836 }
837 }
838
839 btrfs_put_bioc(rbio->bioc);
840 kfree(rbio);
841}
842
843static void rbio_endio_bio_list(struct bio *cur, blk_status_t err)
844{
845 struct bio *next;
846
847 while (cur) {
848 next = cur->bi_next;
849 cur->bi_next = NULL;
850 cur->bi_status = err;
851 bio_endio(cur);
852 cur = next;
853 }
854}
855
856
857
858
859
860static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
861{
862 struct bio *cur = bio_list_get(&rbio->bio_list);
863 struct bio *extra;
864
865 if (rbio->generic_bio_cnt)
866 btrfs_bio_counter_sub(rbio->bioc->fs_info, rbio->generic_bio_cnt);
867
868
869
870
871
872
873
874
875
876 unlock_stripe(rbio);
877 extra = bio_list_get(&rbio->bio_list);
878 __free_raid_bio(rbio);
879
880 rbio_endio_bio_list(cur, err);
881 if (extra)
882 rbio_endio_bio_list(extra, err);
883}
884
885
886
887
888
889static void raid_write_end_io(struct bio *bio)
890{
891 struct btrfs_raid_bio *rbio = bio->bi_private;
892 blk_status_t err = bio->bi_status;
893 int max_errors;
894
895 if (err)
896 fail_bio_stripe(rbio, bio);
897
898 bio_put(bio);
899
900 if (!atomic_dec_and_test(&rbio->stripes_pending))
901 return;
902
903 err = BLK_STS_OK;
904
905
906 max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
907 0 : rbio->bioc->max_errors;
908 if (atomic_read(&rbio->error) > max_errors)
909 err = BLK_STS_IOERR;
910
911 rbio_orig_end_io(rbio, err);
912}
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930static struct page *page_in_rbio(struct btrfs_raid_bio *rbio,
931 int index, int pagenr, int bio_list_only)
932{
933 int chunk_page;
934 struct page *p = NULL;
935
936 chunk_page = index * (rbio->stripe_len >> PAGE_SHIFT) + pagenr;
937
938 spin_lock_irq(&rbio->bio_list_lock);
939 p = rbio->bio_pages[chunk_page];
940 spin_unlock_irq(&rbio->bio_list_lock);
941
942 if (p || bio_list_only)
943 return p;
944
945 return rbio->stripe_pages[chunk_page];
946}
947
948
949
950
951
952static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
953{
954 return DIV_ROUND_UP(stripe_len, PAGE_SIZE) * nr_stripes;
955}
956
957
958
959
960
961static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
962 struct btrfs_io_context *bioc,
963 u64 stripe_len)
964{
965 struct btrfs_raid_bio *rbio;
966 int nr_data = 0;
967 int real_stripes = bioc->num_stripes - bioc->num_tgtdevs;
968 int num_pages = rbio_nr_pages(stripe_len, real_stripes);
969 int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
970 void *p;
971
972 rbio = kzalloc(sizeof(*rbio) +
973 sizeof(*rbio->stripe_pages) * num_pages +
974 sizeof(*rbio->bio_pages) * num_pages +
975 sizeof(*rbio->finish_pointers) * real_stripes +
976 sizeof(*rbio->dbitmap) * BITS_TO_LONGS(stripe_npages) +
977 sizeof(*rbio->finish_pbitmap) *
978 BITS_TO_LONGS(stripe_npages),
979 GFP_NOFS);
980 if (!rbio)
981 return ERR_PTR(-ENOMEM);
982
983 bio_list_init(&rbio->bio_list);
984 INIT_LIST_HEAD(&rbio->plug_list);
985 spin_lock_init(&rbio->bio_list_lock);
986 INIT_LIST_HEAD(&rbio->stripe_cache);
987 INIT_LIST_HEAD(&rbio->hash_list);
988 rbio->bioc = bioc;
989 rbio->stripe_len = stripe_len;
990 rbio->nr_pages = num_pages;
991 rbio->real_stripes = real_stripes;
992 rbio->stripe_npages = stripe_npages;
993 rbio->faila = -1;
994 rbio->failb = -1;
995 refcount_set(&rbio->refs, 1);
996 atomic_set(&rbio->error, 0);
997 atomic_set(&rbio->stripes_pending, 0);
998
999
1000
1001
1002
1003 p = rbio + 1;
1004#define CONSUME_ALLOC(ptr, count) do { \
1005 ptr = p; \
1006 p = (unsigned char *)p + sizeof(*(ptr)) * (count); \
1007 } while (0)
1008 CONSUME_ALLOC(rbio->stripe_pages, num_pages);
1009 CONSUME_ALLOC(rbio->bio_pages, num_pages);
1010 CONSUME_ALLOC(rbio->finish_pointers, real_stripes);
1011 CONSUME_ALLOC(rbio->dbitmap, BITS_TO_LONGS(stripe_npages));
1012 CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages));
1013#undef CONSUME_ALLOC
1014
1015 if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
1016 nr_data = real_stripes - 1;
1017 else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
1018 nr_data = real_stripes - 2;
1019 else
1020 BUG();
1021
1022 rbio->nr_data = nr_data;
1023 return rbio;
1024}
1025
1026
1027static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
1028{
1029 int i;
1030 struct page *page;
1031
1032 for (i = 0; i < rbio->nr_pages; i++) {
1033 if (rbio->stripe_pages[i])
1034 continue;
1035 page = alloc_page(GFP_NOFS);
1036 if (!page)
1037 return -ENOMEM;
1038 rbio->stripe_pages[i] = page;
1039 }
1040 return 0;
1041}
1042
1043
1044static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
1045{
1046 int i;
1047 struct page *page;
1048
1049 i = rbio_stripe_page_index(rbio, rbio->nr_data, 0);
1050
1051 for (; i < rbio->nr_pages; i++) {
1052 if (rbio->stripe_pages[i])
1053 continue;
1054 page = alloc_page(GFP_NOFS);
1055 if (!page)
1056 return -ENOMEM;
1057 rbio->stripe_pages[i] = page;
1058 }
1059 return 0;
1060}
1061
1062
1063
1064
1065
1066
1067static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1068 struct bio_list *bio_list,
1069 struct page *page,
1070 int stripe_nr,
1071 unsigned long page_index,
1072 unsigned long bio_max_len)
1073{
1074 struct bio *last = bio_list->tail;
1075 int ret;
1076 struct bio *bio;
1077 struct btrfs_io_stripe *stripe;
1078 u64 disk_start;
1079
1080 stripe = &rbio->bioc->stripes[stripe_nr];
1081 disk_start = stripe->physical + (page_index << PAGE_SHIFT);
1082
1083
1084 if (!stripe->dev->bdev)
1085 return fail_rbio_index(rbio, stripe_nr);
1086
1087
1088 if (last) {
1089 u64 last_end = last->bi_iter.bi_sector << 9;
1090 last_end += last->bi_iter.bi_size;
1091
1092
1093
1094
1095
1096 if (last_end == disk_start && !last->bi_status &&
1097 last->bi_bdev == stripe->dev->bdev) {
1098 ret = bio_add_page(last, page, PAGE_SIZE, 0);
1099 if (ret == PAGE_SIZE)
1100 return 0;
1101 }
1102 }
1103
1104
1105 bio = btrfs_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
1106 btrfs_bio(bio)->device = stripe->dev;
1107 bio->bi_iter.bi_size = 0;
1108 bio_set_dev(bio, stripe->dev->bdev);
1109 bio->bi_iter.bi_sector = disk_start >> 9;
1110
1111 bio_add_page(bio, page, PAGE_SIZE, 0);
1112 bio_list_add(bio_list, bio);
1113 return 0;
1114}
1115
1116
1117
1118
1119
1120
1121
1122
1123static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
1124{
1125 if (rbio->faila >= 0 || rbio->failb >= 0) {
1126 BUG_ON(rbio->faila == rbio->real_stripes - 1);
1127 __raid56_parity_recover(rbio);
1128 } else {
1129 finish_rmw(rbio);
1130 }
1131}
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141static void index_rbio_pages(struct btrfs_raid_bio *rbio)
1142{
1143 struct bio *bio;
1144 u64 start;
1145 unsigned long stripe_offset;
1146 unsigned long page_index;
1147
1148 spin_lock_irq(&rbio->bio_list_lock);
1149 bio_list_for_each(bio, &rbio->bio_list) {
1150 struct bio_vec bvec;
1151 struct bvec_iter iter;
1152 int i = 0;
1153
1154 start = bio->bi_iter.bi_sector << 9;
1155 stripe_offset = start - rbio->bioc->raid_map[0];
1156 page_index = stripe_offset >> PAGE_SHIFT;
1157
1158 if (bio_flagged(bio, BIO_CLONED))
1159 bio->bi_iter = btrfs_bio(bio)->iter;
1160
1161 bio_for_each_segment(bvec, bio, iter) {
1162 rbio->bio_pages[page_index + i] = bvec.bv_page;
1163 i++;
1164 }
1165 }
1166 spin_unlock_irq(&rbio->bio_list_lock);
1167}
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
1178{
1179 struct btrfs_io_context *bioc = rbio->bioc;
1180 void **pointers = rbio->finish_pointers;
1181 int nr_data = rbio->nr_data;
1182 int stripe;
1183 int pagenr;
1184 bool has_qstripe;
1185 struct bio_list bio_list;
1186 struct bio *bio;
1187 int ret;
1188
1189 bio_list_init(&bio_list);
1190
1191 if (rbio->real_stripes - rbio->nr_data == 1)
1192 has_qstripe = false;
1193 else if (rbio->real_stripes - rbio->nr_data == 2)
1194 has_qstripe = true;
1195 else
1196 BUG();
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206 spin_lock_irq(&rbio->bio_list_lock);
1207 set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
1208 spin_unlock_irq(&rbio->bio_list_lock);
1209
1210 atomic_set(&rbio->error, 0);
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221 index_rbio_pages(rbio);
1222 if (!rbio_is_full(rbio))
1223 cache_rbio_pages(rbio);
1224 else
1225 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
1226
1227 for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
1228 struct page *p;
1229
1230 for (stripe = 0; stripe < nr_data; stripe++) {
1231 p = page_in_rbio(rbio, stripe, pagenr, 0);
1232 pointers[stripe] = kmap_local_page(p);
1233 }
1234
1235
1236 p = rbio_pstripe_page(rbio, pagenr);
1237 SetPageUptodate(p);
1238 pointers[stripe++] = kmap_local_page(p);
1239
1240 if (has_qstripe) {
1241
1242
1243
1244
1245
1246 p = rbio_qstripe_page(rbio, pagenr);
1247 SetPageUptodate(p);
1248 pointers[stripe++] = kmap_local_page(p);
1249
1250 raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
1251 pointers);
1252 } else {
1253
1254 copy_page(pointers[nr_data], pointers[0]);
1255 run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
1256 }
1257 for (stripe = stripe - 1; stripe >= 0; stripe--)
1258 kunmap_local(pointers[stripe]);
1259 }
1260
1261
1262
1263
1264
1265
1266 for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
1267 for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
1268 struct page *page;
1269 if (stripe < rbio->nr_data) {
1270 page = page_in_rbio(rbio, stripe, pagenr, 1);
1271 if (!page)
1272 continue;
1273 } else {
1274 page = rbio_stripe_page(rbio, stripe, pagenr);
1275 }
1276
1277 ret = rbio_add_io_page(rbio, &bio_list,
1278 page, stripe, pagenr, rbio->stripe_len);
1279 if (ret)
1280 goto cleanup;
1281 }
1282 }
1283
1284 if (likely(!bioc->num_tgtdevs))
1285 goto write_data;
1286
1287 for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
1288 if (!bioc->tgtdev_map[stripe])
1289 continue;
1290
1291 for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
1292 struct page *page;
1293 if (stripe < rbio->nr_data) {
1294 page = page_in_rbio(rbio, stripe, pagenr, 1);
1295 if (!page)
1296 continue;
1297 } else {
1298 page = rbio_stripe_page(rbio, stripe, pagenr);
1299 }
1300
1301 ret = rbio_add_io_page(rbio, &bio_list, page,
1302 rbio->bioc->tgtdev_map[stripe],
1303 pagenr, rbio->stripe_len);
1304 if (ret)
1305 goto cleanup;
1306 }
1307 }
1308
1309write_data:
1310 atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
1311 BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
1312
1313 while ((bio = bio_list_pop(&bio_list))) {
1314 bio->bi_private = rbio;
1315 bio->bi_end_io = raid_write_end_io;
1316 bio->bi_opf = REQ_OP_WRITE;
1317
1318 submit_bio(bio);
1319 }
1320 return;
1321
1322cleanup:
1323 rbio_orig_end_io(rbio, BLK_STS_IOERR);
1324
1325 while ((bio = bio_list_pop(&bio_list)))
1326 bio_put(bio);
1327}
1328
1329
1330
1331
1332
1333
1334static int find_bio_stripe(struct btrfs_raid_bio *rbio,
1335 struct bio *bio)
1336{
1337 u64 physical = bio->bi_iter.bi_sector;
1338 int i;
1339 struct btrfs_io_stripe *stripe;
1340
1341 physical <<= 9;
1342
1343 for (i = 0; i < rbio->bioc->num_stripes; i++) {
1344 stripe = &rbio->bioc->stripes[i];
1345 if (in_range(physical, stripe->physical, rbio->stripe_len) &&
1346 stripe->dev->bdev && bio->bi_bdev == stripe->dev->bdev) {
1347 return i;
1348 }
1349 }
1350 return -1;
1351}
1352
1353
1354
1355
1356
1357
1358static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
1359 struct bio *bio)
1360{
1361 u64 logical = bio->bi_iter.bi_sector << 9;
1362 int i;
1363
1364 for (i = 0; i < rbio->nr_data; i++) {
1365 u64 stripe_start = rbio->bioc->raid_map[i];
1366
1367 if (in_range(logical, stripe_start, rbio->stripe_len))
1368 return i;
1369 }
1370 return -1;
1371}
1372
1373
1374
1375
1376static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed)
1377{
1378 unsigned long flags;
1379 int ret = 0;
1380
1381 spin_lock_irqsave(&rbio->bio_list_lock, flags);
1382
1383
1384 if (rbio->faila == failed || rbio->failb == failed)
1385 goto out;
1386
1387 if (rbio->faila == -1) {
1388
1389 rbio->faila = failed;
1390 atomic_inc(&rbio->error);
1391 } else if (rbio->failb == -1) {
1392
1393 rbio->failb = failed;
1394 atomic_inc(&rbio->error);
1395 } else {
1396 ret = -EIO;
1397 }
1398out:
1399 spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
1400
1401 return ret;
1402}
1403
1404
1405
1406
1407
1408static int fail_bio_stripe(struct btrfs_raid_bio *rbio,
1409 struct bio *bio)
1410{
1411 int failed = find_bio_stripe(rbio, bio);
1412
1413 if (failed < 0)
1414 return -EIO;
1415
1416 return fail_rbio_index(rbio, failed);
1417}
1418
1419
1420
1421
1422
1423static void set_bio_pages_uptodate(struct bio *bio)
1424{
1425 struct bio_vec *bvec;
1426 struct bvec_iter_all iter_all;
1427
1428 ASSERT(!bio_flagged(bio, BIO_CLONED));
1429
1430 bio_for_each_segment_all(bvec, bio, iter_all)
1431 SetPageUptodate(bvec->bv_page);
1432}
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442static void raid_rmw_end_io(struct bio *bio)
1443{
1444 struct btrfs_raid_bio *rbio = bio->bi_private;
1445
1446 if (bio->bi_status)
1447 fail_bio_stripe(rbio, bio);
1448 else
1449 set_bio_pages_uptodate(bio);
1450
1451 bio_put(bio);
1452
1453 if (!atomic_dec_and_test(&rbio->stripes_pending))
1454 return;
1455
1456 if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
1457 goto cleanup;
1458
1459
1460
1461
1462
1463
1464 validate_rbio_for_rmw(rbio);
1465 return;
1466
1467cleanup:
1468
1469 rbio_orig_end_io(rbio, BLK_STS_IOERR);
1470}
1471
1472
1473
1474
1475
1476static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
1477{
1478 int bios_to_read = 0;
1479 struct bio_list bio_list;
1480 int ret;
1481 int pagenr;
1482 int stripe;
1483 struct bio *bio;
1484
1485 bio_list_init(&bio_list);
1486
1487 ret = alloc_rbio_pages(rbio);
1488 if (ret)
1489 goto cleanup;
1490
1491 index_rbio_pages(rbio);
1492
1493 atomic_set(&rbio->error, 0);
1494
1495
1496
1497
1498 for (stripe = 0; stripe < rbio->nr_data; stripe++) {
1499 for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
1500 struct page *page;
1501
1502
1503
1504
1505
1506
1507 page = page_in_rbio(rbio, stripe, pagenr, 1);
1508 if (page)
1509 continue;
1510
1511 page = rbio_stripe_page(rbio, stripe, pagenr);
1512
1513
1514
1515
1516 if (PageUptodate(page))
1517 continue;
1518
1519 ret = rbio_add_io_page(rbio, &bio_list, page,
1520 stripe, pagenr, rbio->stripe_len);
1521 if (ret)
1522 goto cleanup;
1523 }
1524 }
1525
1526 bios_to_read = bio_list_size(&bio_list);
1527 if (!bios_to_read) {
1528
1529
1530
1531
1532
1533
1534 goto finish;
1535 }
1536
1537
1538
1539
1540
1541 atomic_set(&rbio->stripes_pending, bios_to_read);
1542 while ((bio = bio_list_pop(&bio_list))) {
1543 bio->bi_private = rbio;
1544 bio->bi_end_io = raid_rmw_end_io;
1545 bio->bi_opf = REQ_OP_READ;
1546
1547 btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
1548
1549 submit_bio(bio);
1550 }
1551
1552 return 0;
1553
1554cleanup:
1555 rbio_orig_end_io(rbio, BLK_STS_IOERR);
1556
1557 while ((bio = bio_list_pop(&bio_list)))
1558 bio_put(bio);
1559
1560 return -EIO;
1561
1562finish:
1563 validate_rbio_for_rmw(rbio);
1564 return 0;
1565}
1566
1567
1568
1569
1570
1571static int full_stripe_write(struct btrfs_raid_bio *rbio)
1572{
1573 int ret;
1574
1575 ret = alloc_rbio_parity_pages(rbio);
1576 if (ret) {
1577 __free_raid_bio(rbio);
1578 return ret;
1579 }
1580
1581 ret = lock_stripe_add(rbio);
1582 if (ret == 0)
1583 finish_rmw(rbio);
1584 return 0;
1585}
1586
1587
1588
1589
1590
1591
1592static int partial_stripe_write(struct btrfs_raid_bio *rbio)
1593{
1594 int ret;
1595
1596 ret = lock_stripe_add(rbio);
1597 if (ret == 0)
1598 start_async_work(rbio, rmw_work);
1599 return 0;
1600}
1601
1602
1603
1604
1605
1606
1607
1608static int __raid56_parity_write(struct btrfs_raid_bio *rbio)
1609{
1610
1611 if (!rbio_is_full(rbio))
1612 return partial_stripe_write(rbio);
1613 return full_stripe_write(rbio);
1614}
1615
1616
1617
1618
1619
1620
1621
1622
1623struct btrfs_plug_cb {
1624 struct blk_plug_cb cb;
1625 struct btrfs_fs_info *info;
1626 struct list_head rbio_list;
1627 struct btrfs_work work;
1628};
1629
1630
1631
1632
1633static int plug_cmp(void *priv, const struct list_head *a,
1634 const struct list_head *b)
1635{
1636 const struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
1637 plug_list);
1638 const struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
1639 plug_list);
1640 u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
1641 u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
1642
1643 if (a_sector < b_sector)
1644 return -1;
1645 if (a_sector > b_sector)
1646 return 1;
1647 return 0;
1648}
1649
1650static void run_plug(struct btrfs_plug_cb *plug)
1651{
1652 struct btrfs_raid_bio *cur;
1653 struct btrfs_raid_bio *last = NULL;
1654
1655
1656
1657
1658
1659
1660 list_sort(NULL, &plug->rbio_list, plug_cmp);
1661 while (!list_empty(&plug->rbio_list)) {
1662 cur = list_entry(plug->rbio_list.next,
1663 struct btrfs_raid_bio, plug_list);
1664 list_del_init(&cur->plug_list);
1665
1666 if (rbio_is_full(cur)) {
1667 int ret;
1668
1669
1670 ret = full_stripe_write(cur);
1671 BUG_ON(ret);
1672 continue;
1673 }
1674 if (last) {
1675 if (rbio_can_merge(last, cur)) {
1676 merge_rbio(last, cur);
1677 __free_raid_bio(cur);
1678 continue;
1679
1680 }
1681 __raid56_parity_write(last);
1682 }
1683 last = cur;
1684 }
1685 if (last) {
1686 __raid56_parity_write(last);
1687 }
1688 kfree(plug);
1689}
1690
1691
1692
1693
1694
1695static void unplug_work(struct btrfs_work *work)
1696{
1697 struct btrfs_plug_cb *plug;
1698 plug = container_of(work, struct btrfs_plug_cb, work);
1699 run_plug(plug);
1700}
1701
1702static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
1703{
1704 struct btrfs_plug_cb *plug;
1705 plug = container_of(cb, struct btrfs_plug_cb, cb);
1706
1707 if (from_schedule) {
1708 btrfs_init_work(&plug->work, unplug_work, NULL, NULL);
1709 btrfs_queue_work(plug->info->rmw_workers,
1710 &plug->work);
1711 return;
1712 }
1713 run_plug(plug);
1714}
1715
1716
1717
1718
1719int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc,
1720 u64 stripe_len)
1721{
1722 struct btrfs_fs_info *fs_info = bioc->fs_info;
1723 struct btrfs_raid_bio *rbio;
1724 struct btrfs_plug_cb *plug = NULL;
1725 struct blk_plug_cb *cb;
1726 int ret;
1727
1728 rbio = alloc_rbio(fs_info, bioc, stripe_len);
1729 if (IS_ERR(rbio)) {
1730 btrfs_put_bioc(bioc);
1731 return PTR_ERR(rbio);
1732 }
1733 bio_list_add(&rbio->bio_list, bio);
1734 rbio->bio_list_bytes = bio->bi_iter.bi_size;
1735 rbio->operation = BTRFS_RBIO_WRITE;
1736
1737 btrfs_bio_counter_inc_noblocked(fs_info);
1738 rbio->generic_bio_cnt = 1;
1739
1740
1741
1742
1743
1744 if (rbio_is_full(rbio)) {
1745 ret = full_stripe_write(rbio);
1746 if (ret)
1747 btrfs_bio_counter_dec(fs_info);
1748 return ret;
1749 }
1750
1751 cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug));
1752 if (cb) {
1753 plug = container_of(cb, struct btrfs_plug_cb, cb);
1754 if (!plug->info) {
1755 plug->info = fs_info;
1756 INIT_LIST_HEAD(&plug->rbio_list);
1757 }
1758 list_add_tail(&rbio->plug_list, &plug->rbio_list);
1759 ret = 0;
1760 } else {
1761 ret = __raid56_parity_write(rbio);
1762 if (ret)
1763 btrfs_bio_counter_dec(fs_info);
1764 }
1765 return ret;
1766}
1767
1768
1769
1770
1771
1772
1773static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1774{
1775 int pagenr, stripe;
1776 void **pointers;
1777 void **unmap_array;
1778 int faila = -1, failb = -1;
1779 struct page *page;
1780 blk_status_t err;
1781 int i;
1782
1783 pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
1784 if (!pointers) {
1785 err = BLK_STS_RESOURCE;
1786 goto cleanup_io;
1787 }
1788
1789
1790
1791
1792
1793 unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
1794 if (!unmap_array) {
1795 err = BLK_STS_RESOURCE;
1796 goto cleanup_pointers;
1797 }
1798
1799 faila = rbio->faila;
1800 failb = rbio->failb;
1801
1802 if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
1803 rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
1804 spin_lock_irq(&rbio->bio_list_lock);
1805 set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
1806 spin_unlock_irq(&rbio->bio_list_lock);
1807 }
1808
1809 index_rbio_pages(rbio);
1810
1811 for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
1812
1813
1814
1815
1816 if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
1817 !test_bit(pagenr, rbio->dbitmap))
1818 continue;
1819
1820
1821
1822
1823
1824
1825
1826 for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
1827
1828
1829
1830
1831 if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
1832 rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
1833 (stripe == faila || stripe == failb)) {
1834 page = page_in_rbio(rbio, stripe, pagenr, 0);
1835 } else {
1836 page = rbio_stripe_page(rbio, stripe, pagenr);
1837 }
1838 pointers[stripe] = kmap_local_page(page);
1839 unmap_array[stripe] = pointers[stripe];
1840 }
1841
1842
1843 if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) {
1844
1845
1846
1847
1848 if (failb < 0) {
1849 if (faila == rbio->nr_data) {
1850
1851
1852
1853
1854
1855 err = BLK_STS_IOERR;
1856 goto cleanup;
1857 }
1858
1859
1860
1861
1862 goto pstripe;
1863 }
1864
1865
1866 if (faila > failb)
1867 swap(faila, failb);
1868
1869
1870
1871
1872
1873
1874
1875 if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) {
1876 if (rbio->bioc->raid_map[faila] ==
1877 RAID5_P_STRIPE) {
1878 err = BLK_STS_IOERR;
1879 goto cleanup;
1880 }
1881
1882
1883
1884
1885 goto pstripe;
1886 }
1887
1888 if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) {
1889 raid6_datap_recov(rbio->real_stripes,
1890 PAGE_SIZE, faila, pointers);
1891 } else {
1892 raid6_2data_recov(rbio->real_stripes,
1893 PAGE_SIZE, faila, failb,
1894 pointers);
1895 }
1896 } else {
1897 void *p;
1898
1899
1900 BUG_ON(failb != -1);
1901pstripe:
1902
1903 copy_page(pointers[faila], pointers[rbio->nr_data]);
1904
1905
1906 p = pointers[faila];
1907 for (stripe = faila; stripe < rbio->nr_data - 1; stripe++)
1908 pointers[stripe] = pointers[stripe + 1];
1909 pointers[rbio->nr_data - 1] = p;
1910
1911
1912 run_xor(pointers, rbio->nr_data - 1, PAGE_SIZE);
1913 }
1914
1915
1916
1917
1918
1919
1920 if (rbio->operation == BTRFS_RBIO_WRITE) {
1921 for (i = 0; i < rbio->stripe_npages; i++) {
1922 if (faila != -1) {
1923 page = rbio_stripe_page(rbio, faila, i);
1924 SetPageUptodate(page);
1925 }
1926 if (failb != -1) {
1927 page = rbio_stripe_page(rbio, failb, i);
1928 SetPageUptodate(page);
1929 }
1930 }
1931 }
1932 for (stripe = rbio->real_stripes - 1; stripe >= 0; stripe--)
1933 kunmap_local(unmap_array[stripe]);
1934 }
1935
1936 err = BLK_STS_OK;
1937cleanup:
1938 kfree(unmap_array);
1939cleanup_pointers:
1940 kfree(pointers);
1941
1942cleanup_io:
1943
1944
1945
1946
1947
1948 if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
1949 rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965 if (err == BLK_STS_OK && rbio->failb < 0)
1966 cache_rbio_pages(rbio);
1967 else
1968 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
1969
1970 rbio_orig_end_io(rbio, err);
1971 } else if (err == BLK_STS_OK) {
1972 rbio->faila = -1;
1973 rbio->failb = -1;
1974
1975 if (rbio->operation == BTRFS_RBIO_WRITE)
1976 finish_rmw(rbio);
1977 else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB)
1978 finish_parity_scrub(rbio, 0);
1979 else
1980 BUG();
1981 } else {
1982 rbio_orig_end_io(rbio, err);
1983 }
1984}
1985
1986
1987
1988
1989
1990static void raid_recover_end_io(struct bio *bio)
1991{
1992 struct btrfs_raid_bio *rbio = bio->bi_private;
1993
1994
1995
1996
1997
1998 if (bio->bi_status)
1999 fail_bio_stripe(rbio, bio);
2000 else
2001 set_bio_pages_uptodate(bio);
2002 bio_put(bio);
2003
2004 if (!atomic_dec_and_test(&rbio->stripes_pending))
2005 return;
2006
2007 if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
2008 rbio_orig_end_io(rbio, BLK_STS_IOERR);
2009 else
2010 __raid_recover_end_io(rbio);
2011}
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
2022{
2023 int bios_to_read = 0;
2024 struct bio_list bio_list;
2025 int ret;
2026 int pagenr;
2027 int stripe;
2028 struct bio *bio;
2029
2030 bio_list_init(&bio_list);
2031
2032 ret = alloc_rbio_pages(rbio);
2033 if (ret)
2034 goto cleanup;
2035
2036 atomic_set(&rbio->error, 0);
2037
2038
2039
2040
2041
2042
2043 for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
2044 if (rbio->faila == stripe || rbio->failb == stripe) {
2045 atomic_inc(&rbio->error);
2046 continue;
2047 }
2048
2049 for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
2050 struct page *p;
2051
2052
2053
2054
2055
2056 p = rbio_stripe_page(rbio, stripe, pagenr);
2057 if (PageUptodate(p))
2058 continue;
2059
2060 ret = rbio_add_io_page(rbio, &bio_list,
2061 rbio_stripe_page(rbio, stripe, pagenr),
2062 stripe, pagenr, rbio->stripe_len);
2063 if (ret < 0)
2064 goto cleanup;
2065 }
2066 }
2067
2068 bios_to_read = bio_list_size(&bio_list);
2069 if (!bios_to_read) {
2070
2071
2072
2073
2074
2075 if (atomic_read(&rbio->error) <= rbio->bioc->max_errors) {
2076 __raid_recover_end_io(rbio);
2077 return 0;
2078 } else {
2079 goto cleanup;
2080 }
2081 }
2082
2083
2084
2085
2086
2087 atomic_set(&rbio->stripes_pending, bios_to_read);
2088 while ((bio = bio_list_pop(&bio_list))) {
2089 bio->bi_private = rbio;
2090 bio->bi_end_io = raid_recover_end_io;
2091 bio->bi_opf = REQ_OP_READ;
2092
2093 btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
2094
2095 submit_bio(bio);
2096 }
2097
2098 return 0;
2099
2100cleanup:
2101 if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
2102 rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
2103 rbio_orig_end_io(rbio, BLK_STS_IOERR);
2104
2105 while ((bio = bio_list_pop(&bio_list)))
2106 bio_put(bio);
2107
2108 return -EIO;
2109}
2110
2111
2112
2113
2114
2115
2116
2117int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
2118 u64 stripe_len, int mirror_num, int generic_io)
2119{
2120 struct btrfs_fs_info *fs_info = bioc->fs_info;
2121 struct btrfs_raid_bio *rbio;
2122 int ret;
2123
2124 if (generic_io) {
2125 ASSERT(bioc->mirror_num == mirror_num);
2126 btrfs_bio(bio)->mirror_num = mirror_num;
2127 }
2128
2129 rbio = alloc_rbio(fs_info, bioc, stripe_len);
2130 if (IS_ERR(rbio)) {
2131 if (generic_io)
2132 btrfs_put_bioc(bioc);
2133 return PTR_ERR(rbio);
2134 }
2135
2136 rbio->operation = BTRFS_RBIO_READ_REBUILD;
2137 bio_list_add(&rbio->bio_list, bio);
2138 rbio->bio_list_bytes = bio->bi_iter.bi_size;
2139
2140 rbio->faila = find_logical_bio_stripe(rbio, bio);
2141 if (rbio->faila == -1) {
2142 btrfs_warn(fs_info,
2143"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)",
2144 __func__, bio->bi_iter.bi_sector << 9,
2145 (u64)bio->bi_iter.bi_size, bioc->map_type);
2146 if (generic_io)
2147 btrfs_put_bioc(bioc);
2148 kfree(rbio);
2149 return -EIO;
2150 }
2151
2152 if (generic_io) {
2153 btrfs_bio_counter_inc_noblocked(fs_info);
2154 rbio->generic_bio_cnt = 1;
2155 } else {
2156 btrfs_get_bioc(bioc);
2157 }
2158
2159
2160
2161
2162
2163
2164 if (mirror_num > 2) {
2165
2166
2167
2168
2169
2170 rbio->failb = rbio->real_stripes - (mirror_num - 1);
2171 ASSERT(rbio->failb > 0);
2172 if (rbio->failb <= rbio->faila)
2173 rbio->failb--;
2174 }
2175
2176 ret = lock_stripe_add(rbio);
2177
2178
2179
2180
2181
2182
2183
2184
2185 if (ret == 0)
2186 __raid56_parity_recover(rbio);
2187
2188
2189
2190
2191
2192 return 0;
2193
2194}
2195
2196static void rmw_work(struct btrfs_work *work)
2197{
2198 struct btrfs_raid_bio *rbio;
2199
2200 rbio = container_of(work, struct btrfs_raid_bio, work);
2201 raid56_rmw_stripe(rbio);
2202}
2203
2204static void read_rebuild_work(struct btrfs_work *work)
2205{
2206 struct btrfs_raid_bio *rbio;
2207
2208 rbio = container_of(work, struct btrfs_raid_bio, work);
2209 __raid56_parity_recover(rbio);
2210}
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
2223 struct btrfs_io_context *bioc,
2224 u64 stripe_len, struct btrfs_device *scrub_dev,
2225 unsigned long *dbitmap, int stripe_nsectors)
2226{
2227 struct btrfs_fs_info *fs_info = bioc->fs_info;
2228 struct btrfs_raid_bio *rbio;
2229 int i;
2230
2231 rbio = alloc_rbio(fs_info, bioc, stripe_len);
2232 if (IS_ERR(rbio))
2233 return NULL;
2234 bio_list_add(&rbio->bio_list, bio);
2235
2236
2237
2238
2239 ASSERT(!bio->bi_iter.bi_size);
2240 rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
2241
2242
2243
2244
2245
2246
2247 for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
2248 if (bioc->stripes[i].dev == scrub_dev) {
2249 rbio->scrubp = i;
2250 break;
2251 }
2252 }
2253 ASSERT(i < rbio->real_stripes);
2254
2255
2256 ASSERT(fs_info->sectorsize == PAGE_SIZE);
2257 ASSERT(rbio->stripe_npages == stripe_nsectors);
2258 bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
2259
2260
2261
2262
2263
2264 rbio->generic_bio_cnt = 1;
2265
2266 return rbio;
2267}
2268
2269
2270void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
2271 u64 logical)
2272{
2273 int stripe_offset;
2274 int index;
2275
2276 ASSERT(logical >= rbio->bioc->raid_map[0]);
2277 ASSERT(logical + PAGE_SIZE <= rbio->bioc->raid_map[0] +
2278 rbio->stripe_len * rbio->nr_data);
2279 stripe_offset = (int)(logical - rbio->bioc->raid_map[0]);
2280 index = stripe_offset >> PAGE_SHIFT;
2281 rbio->bio_pages[index] = page;
2282}
2283
2284
2285
2286
2287
2288static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
2289{
2290 int i;
2291 int bit;
2292 int index;
2293 struct page *page;
2294
2295 for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) {
2296 for (i = 0; i < rbio->real_stripes; i++) {
2297 index = i * rbio->stripe_npages + bit;
2298 if (rbio->stripe_pages[index])
2299 continue;
2300
2301 page = alloc_page(GFP_NOFS);
2302 if (!page)
2303 return -ENOMEM;
2304 rbio->stripe_pages[index] = page;
2305 }
2306 }
2307 return 0;
2308}
2309
2310static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
2311 int need_check)
2312{
2313 struct btrfs_io_context *bioc = rbio->bioc;
2314 void **pointers = rbio->finish_pointers;
2315 unsigned long *pbitmap = rbio->finish_pbitmap;
2316 int nr_data = rbio->nr_data;
2317 int stripe;
2318 int pagenr;
2319 bool has_qstripe;
2320 struct page *p_page = NULL;
2321 struct page *q_page = NULL;
2322 struct bio_list bio_list;
2323 struct bio *bio;
2324 int is_replace = 0;
2325 int ret;
2326
2327 bio_list_init(&bio_list);
2328
2329 if (rbio->real_stripes - rbio->nr_data == 1)
2330 has_qstripe = false;
2331 else if (rbio->real_stripes - rbio->nr_data == 2)
2332 has_qstripe = true;
2333 else
2334 BUG();
2335
2336 if (bioc->num_tgtdevs && bioc->tgtdev_map[rbio->scrubp]) {
2337 is_replace = 1;
2338 bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages);
2339 }
2340
2341
2342
2343
2344
2345
2346 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
2347
2348 if (!need_check)
2349 goto writeback;
2350
2351 p_page = alloc_page(GFP_NOFS);
2352 if (!p_page)
2353 goto cleanup;
2354 SetPageUptodate(p_page);
2355
2356 if (has_qstripe) {
2357
2358 q_page = alloc_page(GFP_NOFS);
2359 if (!q_page) {
2360 __free_page(p_page);
2361 goto cleanup;
2362 }
2363 SetPageUptodate(q_page);
2364 pointers[rbio->real_stripes - 1] = kmap_local_page(q_page);
2365 }
2366
2367 atomic_set(&rbio->error, 0);
2368
2369
2370 pointers[nr_data] = kmap_local_page(p_page);
2371
2372 for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
2373 struct page *p;
2374 void *parity;
2375
2376 for (stripe = 0; stripe < nr_data; stripe++) {
2377 p = page_in_rbio(rbio, stripe, pagenr, 0);
2378 pointers[stripe] = kmap_local_page(p);
2379 }
2380
2381 if (has_qstripe) {
2382
2383 raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
2384 pointers);
2385 } else {
2386
2387 copy_page(pointers[nr_data], pointers[0]);
2388 run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
2389 }
2390
2391
2392 p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
2393 parity = kmap_local_page(p);
2394 if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE))
2395 copy_page(parity, pointers[rbio->scrubp]);
2396 else
2397
2398 bitmap_clear(rbio->dbitmap, pagenr, 1);
2399 kunmap_local(parity);
2400
2401 for (stripe = nr_data - 1; stripe >= 0; stripe--)
2402 kunmap_local(pointers[stripe]);
2403 }
2404
2405 kunmap_local(pointers[nr_data]);
2406 __free_page(p_page);
2407 if (q_page) {
2408 kunmap_local(pointers[rbio->real_stripes - 1]);
2409 __free_page(q_page);
2410 }
2411
2412writeback:
2413
2414
2415
2416
2417
2418 for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
2419 struct page *page;
2420
2421 page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
2422 ret = rbio_add_io_page(rbio, &bio_list,
2423 page, rbio->scrubp, pagenr, rbio->stripe_len);
2424 if (ret)
2425 goto cleanup;
2426 }
2427
2428 if (!is_replace)
2429 goto submit_write;
2430
2431 for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) {
2432 struct page *page;
2433
2434 page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
2435 ret = rbio_add_io_page(rbio, &bio_list, page,
2436 bioc->tgtdev_map[rbio->scrubp],
2437 pagenr, rbio->stripe_len);
2438 if (ret)
2439 goto cleanup;
2440 }
2441
2442submit_write:
2443 nr_data = bio_list_size(&bio_list);
2444 if (!nr_data) {
2445
2446 rbio_orig_end_io(rbio, BLK_STS_OK);
2447 return;
2448 }
2449
2450 atomic_set(&rbio->stripes_pending, nr_data);
2451
2452 while ((bio = bio_list_pop(&bio_list))) {
2453 bio->bi_private = rbio;
2454 bio->bi_end_io = raid_write_end_io;
2455 bio->bi_opf = REQ_OP_WRITE;
2456
2457 submit_bio(bio);
2458 }
2459 return;
2460
2461cleanup:
2462 rbio_orig_end_io(rbio, BLK_STS_IOERR);
2463
2464 while ((bio = bio_list_pop(&bio_list)))
2465 bio_put(bio);
2466}
2467
2468static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
2469{
2470 if (stripe >= 0 && stripe < rbio->nr_data)
2471 return 1;
2472 return 0;
2473}
2474
2475
2476
2477
2478
2479
2480
2481
2482static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
2483{
2484 if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
2485 goto cleanup;
2486
2487 if (rbio->faila >= 0 || rbio->failb >= 0) {
2488 int dfail = 0, failp = -1;
2489
2490 if (is_data_stripe(rbio, rbio->faila))
2491 dfail++;
2492 else if (is_parity_stripe(rbio->faila))
2493 failp = rbio->faila;
2494
2495 if (is_data_stripe(rbio, rbio->failb))
2496 dfail++;
2497 else if (is_parity_stripe(rbio->failb))
2498 failp = rbio->failb;
2499
2500
2501
2502
2503
2504
2505 if (dfail > rbio->bioc->max_errors - 1)
2506 goto cleanup;
2507
2508
2509
2510
2511
2512 if (dfail == 0) {
2513 finish_parity_scrub(rbio, 0);
2514 return;
2515 }
2516
2517
2518
2519
2520
2521
2522
2523 if (failp != rbio->scrubp)
2524 goto cleanup;
2525
2526 __raid_recover_end_io(rbio);
2527 } else {
2528 finish_parity_scrub(rbio, 1);
2529 }
2530 return;
2531
2532cleanup:
2533 rbio_orig_end_io(rbio, BLK_STS_IOERR);
2534}
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544static void raid56_parity_scrub_end_io(struct bio *bio)
2545{
2546 struct btrfs_raid_bio *rbio = bio->bi_private;
2547
2548 if (bio->bi_status)
2549 fail_bio_stripe(rbio, bio);
2550 else
2551 set_bio_pages_uptodate(bio);
2552
2553 bio_put(bio);
2554
2555 if (!atomic_dec_and_test(&rbio->stripes_pending))
2556 return;
2557
2558
2559
2560
2561
2562
2563 validate_rbio_for_parity_scrub(rbio);
2564}
2565
2566static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
2567{
2568 int bios_to_read = 0;
2569 struct bio_list bio_list;
2570 int ret;
2571 int pagenr;
2572 int stripe;
2573 struct bio *bio;
2574
2575 bio_list_init(&bio_list);
2576
2577 ret = alloc_rbio_essential_pages(rbio);
2578 if (ret)
2579 goto cleanup;
2580
2581 atomic_set(&rbio->error, 0);
2582
2583
2584
2585
2586 for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
2587 for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
2588 struct page *page;
2589
2590
2591
2592
2593
2594
2595 page = page_in_rbio(rbio, stripe, pagenr, 1);
2596 if (page)
2597 continue;
2598
2599 page = rbio_stripe_page(rbio, stripe, pagenr);
2600
2601
2602
2603
2604 if (PageUptodate(page))
2605 continue;
2606
2607 ret = rbio_add_io_page(rbio, &bio_list, page,
2608 stripe, pagenr, rbio->stripe_len);
2609 if (ret)
2610 goto cleanup;
2611 }
2612 }
2613
2614 bios_to_read = bio_list_size(&bio_list);
2615 if (!bios_to_read) {
2616
2617
2618
2619
2620
2621
2622 goto finish;
2623 }
2624
2625
2626
2627
2628
2629 atomic_set(&rbio->stripes_pending, bios_to_read);
2630 while ((bio = bio_list_pop(&bio_list))) {
2631 bio->bi_private = rbio;
2632 bio->bi_end_io = raid56_parity_scrub_end_io;
2633 bio->bi_opf = REQ_OP_READ;
2634
2635 btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
2636
2637 submit_bio(bio);
2638 }
2639
2640 return;
2641
2642cleanup:
2643 rbio_orig_end_io(rbio, BLK_STS_IOERR);
2644
2645 while ((bio = bio_list_pop(&bio_list)))
2646 bio_put(bio);
2647
2648 return;
2649
2650finish:
2651 validate_rbio_for_parity_scrub(rbio);
2652}
2653
2654static void scrub_parity_work(struct btrfs_work *work)
2655{
2656 struct btrfs_raid_bio *rbio;
2657
2658 rbio = container_of(work, struct btrfs_raid_bio, work);
2659 raid56_parity_scrub_stripe(rbio);
2660}
2661
2662void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
2663{
2664 if (!lock_stripe_add(rbio))
2665 start_async_work(rbio, scrub_parity_work);
2666}
2667
2668
2669
2670struct btrfs_raid_bio *
2671raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc,
2672 u64 length)
2673{
2674 struct btrfs_fs_info *fs_info = bioc->fs_info;
2675 struct btrfs_raid_bio *rbio;
2676
2677 rbio = alloc_rbio(fs_info, bioc, length);
2678 if (IS_ERR(rbio))
2679 return NULL;
2680
2681 rbio->operation = BTRFS_RBIO_REBUILD_MISSING;
2682 bio_list_add(&rbio->bio_list, bio);
2683
2684
2685
2686
2687 ASSERT(!bio->bi_iter.bi_size);
2688
2689 rbio->faila = find_logical_bio_stripe(rbio, bio);
2690 if (rbio->faila == -1) {
2691 BUG();
2692 kfree(rbio);
2693 return NULL;
2694 }
2695
2696
2697
2698
2699
2700 rbio->generic_bio_cnt = 1;
2701
2702 return rbio;
2703}
2704
2705void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
2706{
2707 if (!lock_stripe_add(rbio))
2708 start_async_work(rbio, read_rebuild_work);
2709}
2710