1
2
3
4
5
6#include <linux/blkdev.h>
7#include <linux/ratelimit.h>
8#include <linux/sched/mm.h>
9#include <crypto/hash.h>
10#include "ctree.h"
11#include "discard.h"
12#include "volumes.h"
13#include "disk-io.h"
14#include "ordered-data.h"
15#include "transaction.h"
16#include "backref.h"
17#include "extent_io.h"
18#include "dev-replace.h"
19#include "check-integrity.h"
20#include "rcu-string.h"
21#include "raid56.h"
22#include "block-group.h"
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37struct scrub_block;
38struct scrub_ctx;
39
40
41
42
43
44
45
46#define SCRUB_PAGES_PER_RD_BIO 32
47#define SCRUB_PAGES_PER_WR_BIO 32
48#define SCRUB_BIOS_PER_SCTX 64
49
50
51
52
53
54
55#define SCRUB_MAX_PAGES_PER_BLOCK 16
56
57struct scrub_recover {
58 refcount_t refs;
59 struct btrfs_bio *bbio;
60 u64 map_length;
61};
62
63struct scrub_page {
64 struct scrub_block *sblock;
65 struct page *page;
66 struct btrfs_device *dev;
67 struct list_head list;
68 u64 flags;
69 u64 generation;
70 u64 logical;
71 u64 physical;
72 u64 physical_for_dev_replace;
73 atomic_t refs;
74 struct {
75 unsigned int mirror_num:8;
76 unsigned int have_csum:1;
77 unsigned int io_error:1;
78 };
79 u8 csum[BTRFS_CSUM_SIZE];
80
81 struct scrub_recover *recover;
82};
83
84struct scrub_bio {
85 int index;
86 struct scrub_ctx *sctx;
87 struct btrfs_device *dev;
88 struct bio *bio;
89 blk_status_t status;
90 u64 logical;
91 u64 physical;
92#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
93 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
94#else
95 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
96#endif
97 int page_count;
98 int next_free;
99 struct btrfs_work work;
100};
101
102struct scrub_block {
103 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
104 int page_count;
105 atomic_t outstanding_pages;
106 refcount_t refs;
107 struct scrub_ctx *sctx;
108 struct scrub_parity *sparity;
109 struct {
110 unsigned int header_error:1;
111 unsigned int checksum_error:1;
112 unsigned int no_io_error_seen:1;
113 unsigned int generation_error:1;
114
115
116
117 unsigned int data_corrected:1;
118 };
119 struct btrfs_work work;
120};
121
122
123struct scrub_parity {
124 struct scrub_ctx *sctx;
125
126 struct btrfs_device *scrub_dev;
127
128 u64 logic_start;
129
130 u64 logic_end;
131
132 int nsectors;
133
134 u64 stripe_len;
135
136 refcount_t refs;
137
138 struct list_head spages;
139
140
141 struct btrfs_work work;
142
143
144 unsigned long *dbitmap;
145
146
147
148
149
150 unsigned long *ebitmap;
151
152 unsigned long bitmap[];
153};
154
155struct scrub_ctx {
156 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
157 struct btrfs_fs_info *fs_info;
158 int first_free;
159 int curr;
160 atomic_t bios_in_flight;
161 atomic_t workers_pending;
162 spinlock_t list_lock;
163 wait_queue_head_t list_wait;
164 u16 csum_size;
165 struct list_head csum_list;
166 atomic_t cancel_req;
167 int readonly;
168 int pages_per_rd_bio;
169
170 int is_dev_replace;
171
172 struct scrub_bio *wr_curr_bio;
173 struct mutex wr_lock;
174 int pages_per_wr_bio;
175 struct btrfs_device *wr_tgtdev;
176 bool flush_all_writes;
177
178
179
180
181 struct btrfs_scrub_progress stat;
182 spinlock_t stat_lock;
183
184
185
186
187
188
189
190
191 refcount_t refs;
192};
193
194struct scrub_warning {
195 struct btrfs_path *path;
196 u64 extent_item_size;
197 const char *errstr;
198 u64 physical;
199 u64 logical;
200 struct btrfs_device *dev;
201};
202
203struct full_stripe_lock {
204 struct rb_node node;
205 u64 logical;
206 u64 refs;
207 struct mutex mutex;
208};
209
210static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
211static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
212static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
213static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
214 struct scrub_block *sblocks_for_recheck);
215static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
216 struct scrub_block *sblock,
217 int retry_failed_mirror);
218static void scrub_recheck_block_checksum(struct scrub_block *sblock);
219static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
220 struct scrub_block *sblock_good);
221static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
222 struct scrub_block *sblock_good,
223 int page_num, int force_write);
224static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
225static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
226 int page_num);
227static int scrub_checksum_data(struct scrub_block *sblock);
228static int scrub_checksum_tree_block(struct scrub_block *sblock);
229static int scrub_checksum_super(struct scrub_block *sblock);
230static void scrub_block_get(struct scrub_block *sblock);
231static void scrub_block_put(struct scrub_block *sblock);
232static void scrub_page_get(struct scrub_page *spage);
233static void scrub_page_put(struct scrub_page *spage);
234static void scrub_parity_get(struct scrub_parity *sparity);
235static void scrub_parity_put(struct scrub_parity *sparity);
236static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
237 struct scrub_page *spage);
238static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
239 u64 physical, struct btrfs_device *dev, u64 flags,
240 u64 gen, int mirror_num, u8 *csum, int force,
241 u64 physical_for_dev_replace);
242static void scrub_bio_end_io(struct bio *bio);
243static void scrub_bio_end_io_worker(struct btrfs_work *work);
244static void scrub_block_complete(struct scrub_block *sblock);
245static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
246 u64 extent_logical, u64 extent_len,
247 u64 *extent_physical,
248 struct btrfs_device **extent_dev,
249 int *extent_mirror_num);
250static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
251 struct scrub_page *spage);
252static void scrub_wr_submit(struct scrub_ctx *sctx);
253static void scrub_wr_bio_end_io(struct bio *bio);
254static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
255static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
256static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
257static void scrub_put_ctx(struct scrub_ctx *sctx);
258
259static inline int scrub_is_page_on_raid56(struct scrub_page *page)
260{
261 return page->recover &&
262 (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
263}
264
265static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
266{
267 refcount_inc(&sctx->refs);
268 atomic_inc(&sctx->bios_in_flight);
269}
270
271static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
272{
273 atomic_dec(&sctx->bios_in_flight);
274 wake_up(&sctx->list_wait);
275 scrub_put_ctx(sctx);
276}
277
278static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
279{
280 while (atomic_read(&fs_info->scrub_pause_req)) {
281 mutex_unlock(&fs_info->scrub_lock);
282 wait_event(fs_info->scrub_pause_wait,
283 atomic_read(&fs_info->scrub_pause_req) == 0);
284 mutex_lock(&fs_info->scrub_lock);
285 }
286}
287
288static void scrub_pause_on(struct btrfs_fs_info *fs_info)
289{
290 atomic_inc(&fs_info->scrubs_paused);
291 wake_up(&fs_info->scrub_pause_wait);
292}
293
294static void scrub_pause_off(struct btrfs_fs_info *fs_info)
295{
296 mutex_lock(&fs_info->scrub_lock);
297 __scrub_blocked_if_needed(fs_info);
298 atomic_dec(&fs_info->scrubs_paused);
299 mutex_unlock(&fs_info->scrub_lock);
300
301 wake_up(&fs_info->scrub_pause_wait);
302}
303
304static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
305{
306 scrub_pause_on(fs_info);
307 scrub_pause_off(fs_info);
308}
309
310
311
312
313
314
315
316
317
318
319
320static struct full_stripe_lock *insert_full_stripe_lock(
321 struct btrfs_full_stripe_locks_tree *locks_root,
322 u64 fstripe_logical)
323{
324 struct rb_node **p;
325 struct rb_node *parent = NULL;
326 struct full_stripe_lock *entry;
327 struct full_stripe_lock *ret;
328
329 lockdep_assert_held(&locks_root->lock);
330
331 p = &locks_root->root.rb_node;
332 while (*p) {
333 parent = *p;
334 entry = rb_entry(parent, struct full_stripe_lock, node);
335 if (fstripe_logical < entry->logical) {
336 p = &(*p)->rb_left;
337 } else if (fstripe_logical > entry->logical) {
338 p = &(*p)->rb_right;
339 } else {
340 entry->refs++;
341 return entry;
342 }
343 }
344
345
346
347
348 ret = kmalloc(sizeof(*ret), GFP_KERNEL);
349 if (!ret)
350 return ERR_PTR(-ENOMEM);
351 ret->logical = fstripe_logical;
352 ret->refs = 1;
353 mutex_init(&ret->mutex);
354
355 rb_link_node(&ret->node, parent, p);
356 rb_insert_color(&ret->node, &locks_root->root);
357 return ret;
358}
359
360
361
362
363
364
365
366static struct full_stripe_lock *search_full_stripe_lock(
367 struct btrfs_full_stripe_locks_tree *locks_root,
368 u64 fstripe_logical)
369{
370 struct rb_node *node;
371 struct full_stripe_lock *entry;
372
373 lockdep_assert_held(&locks_root->lock);
374
375 node = locks_root->root.rb_node;
376 while (node) {
377 entry = rb_entry(node, struct full_stripe_lock, node);
378 if (fstripe_logical < entry->logical)
379 node = node->rb_left;
380 else if (fstripe_logical > entry->logical)
381 node = node->rb_right;
382 else
383 return entry;
384 }
385 return NULL;
386}
387
388
389
390
391
392
393static u64 get_full_stripe_logical(struct btrfs_block_group *cache, u64 bytenr)
394{
395 u64 ret;
396
397
398
399
400
401 WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
402
403
404
405
406
407 ret = div64_u64(bytenr - cache->start, cache->full_stripe_len) *
408 cache->full_stripe_len + cache->start;
409 return ret;
410}
411
412
413
414
415
416
417
418
419
420
421
422
423static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
424 bool *locked_ret)
425{
426 struct btrfs_block_group *bg_cache;
427 struct btrfs_full_stripe_locks_tree *locks_root;
428 struct full_stripe_lock *existing;
429 u64 fstripe_start;
430 int ret = 0;
431
432 *locked_ret = false;
433 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
434 if (!bg_cache) {
435 ASSERT(0);
436 return -ENOENT;
437 }
438
439
440 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
441 goto out;
442 locks_root = &bg_cache->full_stripe_locks_root;
443
444 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
445
446
447 mutex_lock(&locks_root->lock);
448 existing = insert_full_stripe_lock(locks_root, fstripe_start);
449 mutex_unlock(&locks_root->lock);
450 if (IS_ERR(existing)) {
451 ret = PTR_ERR(existing);
452 goto out;
453 }
454 mutex_lock(&existing->mutex);
455 *locked_ret = true;
456out:
457 btrfs_put_block_group(bg_cache);
458 return ret;
459}
460
461
462
463
464
465
466
467
468
469
470static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
471 bool locked)
472{
473 struct btrfs_block_group *bg_cache;
474 struct btrfs_full_stripe_locks_tree *locks_root;
475 struct full_stripe_lock *fstripe_lock;
476 u64 fstripe_start;
477 bool freeit = false;
478 int ret = 0;
479
480
481 if (!locked)
482 return 0;
483
484 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
485 if (!bg_cache) {
486 ASSERT(0);
487 return -ENOENT;
488 }
489 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
490 goto out;
491
492 locks_root = &bg_cache->full_stripe_locks_root;
493 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
494
495 mutex_lock(&locks_root->lock);
496 fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
497
498 if (!fstripe_lock) {
499 WARN_ON(1);
500 ret = -ENOENT;
501 mutex_unlock(&locks_root->lock);
502 goto out;
503 }
504
505 if (fstripe_lock->refs == 0) {
506 WARN_ON(1);
507 btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
508 fstripe_lock->logical);
509 } else {
510 fstripe_lock->refs--;
511 }
512
513 if (fstripe_lock->refs == 0) {
514 rb_erase(&fstripe_lock->node, &locks_root->root);
515 freeit = true;
516 }
517 mutex_unlock(&locks_root->lock);
518
519 mutex_unlock(&fstripe_lock->mutex);
520 if (freeit)
521 kfree(fstripe_lock);
522out:
523 btrfs_put_block_group(bg_cache);
524 return ret;
525}
526
527static void scrub_free_csums(struct scrub_ctx *sctx)
528{
529 while (!list_empty(&sctx->csum_list)) {
530 struct btrfs_ordered_sum *sum;
531 sum = list_first_entry(&sctx->csum_list,
532 struct btrfs_ordered_sum, list);
533 list_del(&sum->list);
534 kfree(sum);
535 }
536}
537
538static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
539{
540 int i;
541
542 if (!sctx)
543 return;
544
545
546 if (sctx->curr != -1) {
547 struct scrub_bio *sbio = sctx->bios[sctx->curr];
548
549 for (i = 0; i < sbio->page_count; i++) {
550 WARN_ON(!sbio->pagev[i]->page);
551 scrub_block_put(sbio->pagev[i]->sblock);
552 }
553 bio_put(sbio->bio);
554 }
555
556 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
557 struct scrub_bio *sbio = sctx->bios[i];
558
559 if (!sbio)
560 break;
561 kfree(sbio);
562 }
563
564 kfree(sctx->wr_curr_bio);
565 scrub_free_csums(sctx);
566 kfree(sctx);
567}
568
569static void scrub_put_ctx(struct scrub_ctx *sctx)
570{
571 if (refcount_dec_and_test(&sctx->refs))
572 scrub_free_ctx(sctx);
573}
574
575static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
576 struct btrfs_fs_info *fs_info, int is_dev_replace)
577{
578 struct scrub_ctx *sctx;
579 int i;
580
581 sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
582 if (!sctx)
583 goto nomem;
584 refcount_set(&sctx->refs, 1);
585 sctx->is_dev_replace = is_dev_replace;
586 sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
587 sctx->curr = -1;
588 sctx->fs_info = fs_info;
589 INIT_LIST_HEAD(&sctx->csum_list);
590 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
591 struct scrub_bio *sbio;
592
593 sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
594 if (!sbio)
595 goto nomem;
596 sctx->bios[i] = sbio;
597
598 sbio->index = i;
599 sbio->sctx = sctx;
600 sbio->page_count = 0;
601 btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, NULL,
602 NULL);
603
604 if (i != SCRUB_BIOS_PER_SCTX - 1)
605 sctx->bios[i]->next_free = i + 1;
606 else
607 sctx->bios[i]->next_free = -1;
608 }
609 sctx->first_free = 0;
610 atomic_set(&sctx->bios_in_flight, 0);
611 atomic_set(&sctx->workers_pending, 0);
612 atomic_set(&sctx->cancel_req, 0);
613 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
614
615 spin_lock_init(&sctx->list_lock);
616 spin_lock_init(&sctx->stat_lock);
617 init_waitqueue_head(&sctx->list_wait);
618
619 WARN_ON(sctx->wr_curr_bio != NULL);
620 mutex_init(&sctx->wr_lock);
621 sctx->wr_curr_bio = NULL;
622 if (is_dev_replace) {
623 WARN_ON(!fs_info->dev_replace.tgtdev);
624 sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
625 sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;
626 sctx->flush_all_writes = false;
627 }
628
629 return sctx;
630
631nomem:
632 scrub_free_ctx(sctx);
633 return ERR_PTR(-ENOMEM);
634}
635
636static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
637 void *warn_ctx)
638{
639 u64 isize;
640 u32 nlink;
641 int ret;
642 int i;
643 unsigned nofs_flag;
644 struct extent_buffer *eb;
645 struct btrfs_inode_item *inode_item;
646 struct scrub_warning *swarn = warn_ctx;
647 struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
648 struct inode_fs_paths *ipath = NULL;
649 struct btrfs_root *local_root;
650 struct btrfs_key key;
651
652 local_root = btrfs_get_fs_root(fs_info, root, true);
653 if (IS_ERR(local_root)) {
654 ret = PTR_ERR(local_root);
655 goto err;
656 }
657
658
659
660
661 key.objectid = inum;
662 key.type = BTRFS_INODE_ITEM_KEY;
663 key.offset = 0;
664
665 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
666 if (ret) {
667 btrfs_put_root(local_root);
668 btrfs_release_path(swarn->path);
669 goto err;
670 }
671
672 eb = swarn->path->nodes[0];
673 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
674 struct btrfs_inode_item);
675 isize = btrfs_inode_size(eb, inode_item);
676 nlink = btrfs_inode_nlink(eb, inode_item);
677 btrfs_release_path(swarn->path);
678
679
680
681
682
683
684 nofs_flag = memalloc_nofs_save();
685 ipath = init_ipath(4096, local_root, swarn->path);
686 memalloc_nofs_restore(nofs_flag);
687 if (IS_ERR(ipath)) {
688 btrfs_put_root(local_root);
689 ret = PTR_ERR(ipath);
690 ipath = NULL;
691 goto err;
692 }
693 ret = paths_from_inode(inum, ipath);
694
695 if (ret < 0)
696 goto err;
697
698
699
700
701
702 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
703 btrfs_warn_in_rcu(fs_info,
704"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
705 swarn->errstr, swarn->logical,
706 rcu_str_deref(swarn->dev->name),
707 swarn->physical,
708 root, inum, offset,
709 min(isize - offset, (u64)PAGE_SIZE), nlink,
710 (char *)(unsigned long)ipath->fspath->val[i]);
711
712 btrfs_put_root(local_root);
713 free_ipath(ipath);
714 return 0;
715
716err:
717 btrfs_warn_in_rcu(fs_info,
718 "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
719 swarn->errstr, swarn->logical,
720 rcu_str_deref(swarn->dev->name),
721 swarn->physical,
722 root, inum, offset, ret);
723
724 free_ipath(ipath);
725 return 0;
726}
727
728static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
729{
730 struct btrfs_device *dev;
731 struct btrfs_fs_info *fs_info;
732 struct btrfs_path *path;
733 struct btrfs_key found_key;
734 struct extent_buffer *eb;
735 struct btrfs_extent_item *ei;
736 struct scrub_warning swarn;
737 unsigned long ptr = 0;
738 u64 extent_item_pos;
739 u64 flags = 0;
740 u64 ref_root;
741 u32 item_size;
742 u8 ref_level = 0;
743 int ret;
744
745 WARN_ON(sblock->page_count < 1);
746 dev = sblock->pagev[0]->dev;
747 fs_info = sblock->sctx->fs_info;
748
749 path = btrfs_alloc_path();
750 if (!path)
751 return;
752
753 swarn.physical = sblock->pagev[0]->physical;
754 swarn.logical = sblock->pagev[0]->logical;
755 swarn.errstr = errstr;
756 swarn.dev = NULL;
757
758 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
759 &flags);
760 if (ret < 0)
761 goto out;
762
763 extent_item_pos = swarn.logical - found_key.objectid;
764 swarn.extent_item_size = found_key.offset;
765
766 eb = path->nodes[0];
767 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
768 item_size = btrfs_item_size_nr(eb, path->slots[0]);
769
770 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
771 do {
772 ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
773 item_size, &ref_root,
774 &ref_level);
775 btrfs_warn_in_rcu(fs_info,
776"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
777 errstr, swarn.logical,
778 rcu_str_deref(dev->name),
779 swarn.physical,
780 ref_level ? "node" : "leaf",
781 ret < 0 ? -1 : ref_level,
782 ret < 0 ? -1 : ref_root);
783 } while (ret != 1);
784 btrfs_release_path(path);
785 } else {
786 btrfs_release_path(path);
787 swarn.path = path;
788 swarn.dev = dev;
789 iterate_extent_inodes(fs_info, found_key.objectid,
790 extent_item_pos, 1,
791 scrub_print_warning_inode, &swarn, false);
792 }
793
794out:
795 btrfs_free_path(path);
796}
797
798static inline void scrub_get_recover(struct scrub_recover *recover)
799{
800 refcount_inc(&recover->refs);
801}
802
803static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
804 struct scrub_recover *recover)
805{
806 if (refcount_dec_and_test(&recover->refs)) {
807 btrfs_bio_counter_dec(fs_info);
808 btrfs_put_bbio(recover->bbio);
809 kfree(recover);
810 }
811}
812
813
814
815
816
817
818
819
820
821static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
822{
823 struct scrub_ctx *sctx = sblock_to_check->sctx;
824 struct btrfs_device *dev;
825 struct btrfs_fs_info *fs_info;
826 u64 logical;
827 unsigned int failed_mirror_index;
828 unsigned int is_metadata;
829 unsigned int have_csum;
830 struct scrub_block *sblocks_for_recheck;
831 struct scrub_block *sblock_bad;
832 int ret;
833 int mirror_index;
834 int page_num;
835 int success;
836 bool full_stripe_locked;
837 unsigned int nofs_flag;
838 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
839 DEFAULT_RATELIMIT_BURST);
840
841 BUG_ON(sblock_to_check->page_count < 1);
842 fs_info = sctx->fs_info;
843 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
844
845
846
847
848
849 spin_lock(&sctx->stat_lock);
850 ++sctx->stat.super_errors;
851 spin_unlock(&sctx->stat_lock);
852 return 0;
853 }
854 logical = sblock_to_check->pagev[0]->logical;
855 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
856 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
857 is_metadata = !(sblock_to_check->pagev[0]->flags &
858 BTRFS_EXTENT_FLAG_DATA);
859 have_csum = sblock_to_check->pagev[0]->have_csum;
860 dev = sblock_to_check->pagev[0]->dev;
861
862
863
864
865
866
867
868
869
870
871 nofs_flag = memalloc_nofs_save();
872
873
874
875
876
877
878
879 ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
880 if (ret < 0) {
881 memalloc_nofs_restore(nofs_flag);
882 spin_lock(&sctx->stat_lock);
883 if (ret == -ENOMEM)
884 sctx->stat.malloc_errors++;
885 sctx->stat.read_errors++;
886 sctx->stat.uncorrectable_errors++;
887 spin_unlock(&sctx->stat_lock);
888 return ret;
889 }
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920 sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
921 sizeof(*sblocks_for_recheck), GFP_KERNEL);
922 if (!sblocks_for_recheck) {
923 spin_lock(&sctx->stat_lock);
924 sctx->stat.malloc_errors++;
925 sctx->stat.read_errors++;
926 sctx->stat.uncorrectable_errors++;
927 spin_unlock(&sctx->stat_lock);
928 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
929 goto out;
930 }
931
932
933 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
934 if (ret) {
935 spin_lock(&sctx->stat_lock);
936 sctx->stat.read_errors++;
937 sctx->stat.uncorrectable_errors++;
938 spin_unlock(&sctx->stat_lock);
939 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
940 goto out;
941 }
942 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
943 sblock_bad = sblocks_for_recheck + failed_mirror_index;
944
945
946 scrub_recheck_block(fs_info, sblock_bad, 1);
947
948 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
949 sblock_bad->no_io_error_seen) {
950
951
952
953
954
955
956
957
958 spin_lock(&sctx->stat_lock);
959 sctx->stat.unverified_errors++;
960 sblock_to_check->data_corrected = 1;
961 spin_unlock(&sctx->stat_lock);
962
963 if (sctx->is_dev_replace)
964 scrub_write_block_to_dev_replace(sblock_bad);
965 goto out;
966 }
967
968 if (!sblock_bad->no_io_error_seen) {
969 spin_lock(&sctx->stat_lock);
970 sctx->stat.read_errors++;
971 spin_unlock(&sctx->stat_lock);
972 if (__ratelimit(&rs))
973 scrub_print_warning("i/o error", sblock_to_check);
974 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
975 } else if (sblock_bad->checksum_error) {
976 spin_lock(&sctx->stat_lock);
977 sctx->stat.csum_errors++;
978 spin_unlock(&sctx->stat_lock);
979 if (__ratelimit(&rs))
980 scrub_print_warning("checksum error", sblock_to_check);
981 btrfs_dev_stat_inc_and_print(dev,
982 BTRFS_DEV_STAT_CORRUPTION_ERRS);
983 } else if (sblock_bad->header_error) {
984 spin_lock(&sctx->stat_lock);
985 sctx->stat.verify_errors++;
986 spin_unlock(&sctx->stat_lock);
987 if (__ratelimit(&rs))
988 scrub_print_warning("checksum/header error",
989 sblock_to_check);
990 if (sblock_bad->generation_error)
991 btrfs_dev_stat_inc_and_print(dev,
992 BTRFS_DEV_STAT_GENERATION_ERRS);
993 else
994 btrfs_dev_stat_inc_and_print(dev,
995 BTRFS_DEV_STAT_CORRUPTION_ERRS);
996 }
997
998 if (sctx->readonly) {
999 ASSERT(!sctx->is_dev_replace);
1000 goto out;
1001 }
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018 for (mirror_index = 0; ;mirror_index++) {
1019 struct scrub_block *sblock_other;
1020
1021 if (mirror_index == failed_mirror_index)
1022 continue;
1023
1024
1025 if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1026 if (mirror_index >= BTRFS_MAX_MIRRORS)
1027 break;
1028 if (!sblocks_for_recheck[mirror_index].page_count)
1029 break;
1030
1031 sblock_other = sblocks_for_recheck + mirror_index;
1032 } else {
1033 struct scrub_recover *r = sblock_bad->pagev[0]->recover;
1034 int max_allowed = r->bbio->num_stripes -
1035 r->bbio->num_tgtdevs;
1036
1037 if (mirror_index >= max_allowed)
1038 break;
1039 if (!sblocks_for_recheck[1].page_count)
1040 break;
1041
1042 ASSERT(failed_mirror_index == 0);
1043 sblock_other = sblocks_for_recheck + 1;
1044 sblock_other->pagev[0]->mirror_num = 1 + mirror_index;
1045 }
1046
1047
1048 scrub_recheck_block(fs_info, sblock_other, 0);
1049
1050 if (!sblock_other->header_error &&
1051 !sblock_other->checksum_error &&
1052 sblock_other->no_io_error_seen) {
1053 if (sctx->is_dev_replace) {
1054 scrub_write_block_to_dev_replace(sblock_other);
1055 goto corrected_error;
1056 } else {
1057 ret = scrub_repair_block_from_good_copy(
1058 sblock_bad, sblock_other);
1059 if (!ret)
1060 goto corrected_error;
1061 }
1062 }
1063 }
1064
1065 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1066 goto did_not_correct_error;
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092 success = 1;
1093 for (page_num = 0; page_num < sblock_bad->page_count;
1094 page_num++) {
1095 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1096 struct scrub_block *sblock_other = NULL;
1097
1098
1099 if (!page_bad->io_error && !sctx->is_dev_replace)
1100 continue;
1101
1102 if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1103
1104
1105
1106
1107
1108
1109
1110 sblock_other = NULL;
1111 } else if (page_bad->io_error) {
1112
1113 for (mirror_index = 0;
1114 mirror_index < BTRFS_MAX_MIRRORS &&
1115 sblocks_for_recheck[mirror_index].page_count > 0;
1116 mirror_index++) {
1117 if (!sblocks_for_recheck[mirror_index].
1118 pagev[page_num]->io_error) {
1119 sblock_other = sblocks_for_recheck +
1120 mirror_index;
1121 break;
1122 }
1123 }
1124 if (!sblock_other)
1125 success = 0;
1126 }
1127
1128 if (sctx->is_dev_replace) {
1129
1130
1131
1132
1133
1134
1135
1136 if (!sblock_other)
1137 sblock_other = sblock_bad;
1138
1139 if (scrub_write_page_to_dev_replace(sblock_other,
1140 page_num) != 0) {
1141 atomic64_inc(
1142 &fs_info->dev_replace.num_write_errors);
1143 success = 0;
1144 }
1145 } else if (sblock_other) {
1146 ret = scrub_repair_page_from_good_copy(sblock_bad,
1147 sblock_other,
1148 page_num, 0);
1149 if (0 == ret)
1150 page_bad->io_error = 0;
1151 else
1152 success = 0;
1153 }
1154 }
1155
1156 if (success && !sctx->is_dev_replace) {
1157 if (is_metadata || have_csum) {
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167 scrub_recheck_block(fs_info, sblock_bad, 1);
1168 if (!sblock_bad->header_error &&
1169 !sblock_bad->checksum_error &&
1170 sblock_bad->no_io_error_seen)
1171 goto corrected_error;
1172 else
1173 goto did_not_correct_error;
1174 } else {
1175corrected_error:
1176 spin_lock(&sctx->stat_lock);
1177 sctx->stat.corrected_errors++;
1178 sblock_to_check->data_corrected = 1;
1179 spin_unlock(&sctx->stat_lock);
1180 btrfs_err_rl_in_rcu(fs_info,
1181 "fixed up error at logical %llu on dev %s",
1182 logical, rcu_str_deref(dev->name));
1183 }
1184 } else {
1185did_not_correct_error:
1186 spin_lock(&sctx->stat_lock);
1187 sctx->stat.uncorrectable_errors++;
1188 spin_unlock(&sctx->stat_lock);
1189 btrfs_err_rl_in_rcu(fs_info,
1190 "unable to fixup (regular) error at logical %llu on dev %s",
1191 logical, rcu_str_deref(dev->name));
1192 }
1193
1194out:
1195 if (sblocks_for_recheck) {
1196 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1197 mirror_index++) {
1198 struct scrub_block *sblock = sblocks_for_recheck +
1199 mirror_index;
1200 struct scrub_recover *recover;
1201 int page_index;
1202
1203 for (page_index = 0; page_index < sblock->page_count;
1204 page_index++) {
1205 sblock->pagev[page_index]->sblock = NULL;
1206 recover = sblock->pagev[page_index]->recover;
1207 if (recover) {
1208 scrub_put_recover(fs_info, recover);
1209 sblock->pagev[page_index]->recover =
1210 NULL;
1211 }
1212 scrub_page_put(sblock->pagev[page_index]);
1213 }
1214 }
1215 kfree(sblocks_for_recheck);
1216 }
1217
1218 ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
1219 memalloc_nofs_restore(nofs_flag);
1220 if (ret < 0)
1221 return ret;
1222 return 0;
1223}
1224
1225static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1226{
1227 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1228 return 2;
1229 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1230 return 3;
1231 else
1232 return (int)bbio->num_stripes;
1233}
1234
1235static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1236 u64 *raid_map,
1237 u64 mapped_length,
1238 int nstripes, int mirror,
1239 int *stripe_index,
1240 u64 *stripe_offset)
1241{
1242 int i;
1243
1244 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1245
1246 for (i = 0; i < nstripes; i++) {
1247 if (raid_map[i] == RAID6_Q_STRIPE ||
1248 raid_map[i] == RAID5_P_STRIPE)
1249 continue;
1250
1251 if (logical >= raid_map[i] &&
1252 logical < raid_map[i] + mapped_length)
1253 break;
1254 }
1255
1256 *stripe_index = i;
1257 *stripe_offset = logical - raid_map[i];
1258 } else {
1259
1260 *stripe_index = mirror;
1261 *stripe_offset = 0;
1262 }
1263}
1264
1265static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1266 struct scrub_block *sblocks_for_recheck)
1267{
1268 struct scrub_ctx *sctx = original_sblock->sctx;
1269 struct btrfs_fs_info *fs_info = sctx->fs_info;
1270 u64 length = original_sblock->page_count * PAGE_SIZE;
1271 u64 logical = original_sblock->pagev[0]->logical;
1272 u64 generation = original_sblock->pagev[0]->generation;
1273 u64 flags = original_sblock->pagev[0]->flags;
1274 u64 have_csum = original_sblock->pagev[0]->have_csum;
1275 struct scrub_recover *recover;
1276 struct btrfs_bio *bbio;
1277 u64 sublen;
1278 u64 mapped_length;
1279 u64 stripe_offset;
1280 int stripe_index;
1281 int page_index = 0;
1282 int mirror_index;
1283 int nmirrors;
1284 int ret;
1285
1286
1287
1288
1289
1290
1291
1292 while (length > 0) {
1293 sublen = min_t(u64, length, PAGE_SIZE);
1294 mapped_length = sublen;
1295 bbio = NULL;
1296
1297
1298
1299
1300
1301 btrfs_bio_counter_inc_blocked(fs_info);
1302 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
1303 logical, &mapped_length, &bbio);
1304 if (ret || !bbio || mapped_length < sublen) {
1305 btrfs_put_bbio(bbio);
1306 btrfs_bio_counter_dec(fs_info);
1307 return -EIO;
1308 }
1309
1310 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1311 if (!recover) {
1312 btrfs_put_bbio(bbio);
1313 btrfs_bio_counter_dec(fs_info);
1314 return -ENOMEM;
1315 }
1316
1317 refcount_set(&recover->refs, 1);
1318 recover->bbio = bbio;
1319 recover->map_length = mapped_length;
1320
1321 BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
1322
1323 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1324
1325 for (mirror_index = 0; mirror_index < nmirrors;
1326 mirror_index++) {
1327 struct scrub_block *sblock;
1328 struct scrub_page *page;
1329
1330 sblock = sblocks_for_recheck + mirror_index;
1331 sblock->sctx = sctx;
1332
1333 page = kzalloc(sizeof(*page), GFP_NOFS);
1334 if (!page) {
1335leave_nomem:
1336 spin_lock(&sctx->stat_lock);
1337 sctx->stat.malloc_errors++;
1338 spin_unlock(&sctx->stat_lock);
1339 scrub_put_recover(fs_info, recover);
1340 return -ENOMEM;
1341 }
1342 scrub_page_get(page);
1343 sblock->pagev[page_index] = page;
1344 page->sblock = sblock;
1345 page->flags = flags;
1346 page->generation = generation;
1347 page->logical = logical;
1348 page->have_csum = have_csum;
1349 if (have_csum)
1350 memcpy(page->csum,
1351 original_sblock->pagev[0]->csum,
1352 sctx->csum_size);
1353
1354 scrub_stripe_index_and_offset(logical,
1355 bbio->map_type,
1356 bbio->raid_map,
1357 mapped_length,
1358 bbio->num_stripes -
1359 bbio->num_tgtdevs,
1360 mirror_index,
1361 &stripe_index,
1362 &stripe_offset);
1363 page->physical = bbio->stripes[stripe_index].physical +
1364 stripe_offset;
1365 page->dev = bbio->stripes[stripe_index].dev;
1366
1367 BUG_ON(page_index >= original_sblock->page_count);
1368 page->physical_for_dev_replace =
1369 original_sblock->pagev[page_index]->
1370 physical_for_dev_replace;
1371
1372 page->mirror_num = mirror_index + 1;
1373 sblock->page_count++;
1374 page->page = alloc_page(GFP_NOFS);
1375 if (!page->page)
1376 goto leave_nomem;
1377
1378 scrub_get_recover(recover);
1379 page->recover = recover;
1380 }
1381 scrub_put_recover(fs_info, recover);
1382 length -= sublen;
1383 logical += sublen;
1384 page_index++;
1385 }
1386
1387 return 0;
1388}
1389
1390static void scrub_bio_wait_endio(struct bio *bio)
1391{
1392 complete(bio->bi_private);
1393}
1394
1395static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1396 struct bio *bio,
1397 struct scrub_page *page)
1398{
1399 DECLARE_COMPLETION_ONSTACK(done);
1400 int ret;
1401 int mirror_num;
1402
1403 bio->bi_iter.bi_sector = page->logical >> 9;
1404 bio->bi_private = &done;
1405 bio->bi_end_io = scrub_bio_wait_endio;
1406
1407 mirror_num = page->sblock->pagev[0]->mirror_num;
1408 ret = raid56_parity_recover(fs_info, bio, page->recover->bbio,
1409 page->recover->map_length,
1410 mirror_num, 0);
1411 if (ret)
1412 return ret;
1413
1414 wait_for_completion_io(&done);
1415 return blk_status_to_errno(bio->bi_status);
1416}
1417
1418static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
1419 struct scrub_block *sblock)
1420{
1421 struct scrub_page *first_page = sblock->pagev[0];
1422 struct bio *bio;
1423 int page_num;
1424
1425
1426 ASSERT(first_page->dev);
1427 if (!first_page->dev->bdev)
1428 goto out;
1429
1430 bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
1431 bio_set_dev(bio, first_page->dev->bdev);
1432
1433 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1434 struct scrub_page *page = sblock->pagev[page_num];
1435
1436 WARN_ON(!page->page);
1437 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1438 }
1439
1440 if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
1441 bio_put(bio);
1442 goto out;
1443 }
1444
1445 bio_put(bio);
1446
1447 scrub_recheck_block_checksum(sblock);
1448
1449 return;
1450out:
1451 for (page_num = 0; page_num < sblock->page_count; page_num++)
1452 sblock->pagev[page_num]->io_error = 1;
1453
1454 sblock->no_io_error_seen = 0;
1455}
1456
1457
1458
1459
1460
1461
1462
1463
1464static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1465 struct scrub_block *sblock,
1466 int retry_failed_mirror)
1467{
1468 int page_num;
1469
1470 sblock->no_io_error_seen = 1;
1471
1472
1473 if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
1474 return scrub_recheck_block_on_raid56(fs_info, sblock);
1475
1476 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1477 struct bio *bio;
1478 struct scrub_page *page = sblock->pagev[page_num];
1479
1480 if (page->dev->bdev == NULL) {
1481 page->io_error = 1;
1482 sblock->no_io_error_seen = 0;
1483 continue;
1484 }
1485
1486 WARN_ON(!page->page);
1487 bio = btrfs_io_bio_alloc(1);
1488 bio_set_dev(bio, page->dev->bdev);
1489
1490 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1491 bio->bi_iter.bi_sector = page->physical >> 9;
1492 bio->bi_opf = REQ_OP_READ;
1493
1494 if (btrfsic_submit_bio_wait(bio)) {
1495 page->io_error = 1;
1496 sblock->no_io_error_seen = 0;
1497 }
1498
1499 bio_put(bio);
1500 }
1501
1502 if (sblock->no_io_error_seen)
1503 scrub_recheck_block_checksum(sblock);
1504}
1505
1506static inline int scrub_check_fsid(u8 fsid[],
1507 struct scrub_page *spage)
1508{
1509 struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
1510 int ret;
1511
1512 ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1513 return !ret;
1514}
1515
1516static void scrub_recheck_block_checksum(struct scrub_block *sblock)
1517{
1518 sblock->header_error = 0;
1519 sblock->checksum_error = 0;
1520 sblock->generation_error = 0;
1521
1522 if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
1523 scrub_checksum_data(sblock);
1524 else
1525 scrub_checksum_tree_block(sblock);
1526}
1527
1528static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1529 struct scrub_block *sblock_good)
1530{
1531 int page_num;
1532 int ret = 0;
1533
1534 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1535 int ret_sub;
1536
1537 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1538 sblock_good,
1539 page_num, 1);
1540 if (ret_sub)
1541 ret = ret_sub;
1542 }
1543
1544 return ret;
1545}
1546
1547static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1548 struct scrub_block *sblock_good,
1549 int page_num, int force_write)
1550{
1551 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1552 struct scrub_page *page_good = sblock_good->pagev[page_num];
1553 struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
1554
1555 BUG_ON(page_bad->page == NULL);
1556 BUG_ON(page_good->page == NULL);
1557 if (force_write || sblock_bad->header_error ||
1558 sblock_bad->checksum_error || page_bad->io_error) {
1559 struct bio *bio;
1560 int ret;
1561
1562 if (!page_bad->dev->bdev) {
1563 btrfs_warn_rl(fs_info,
1564 "scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
1565 return -EIO;
1566 }
1567
1568 bio = btrfs_io_bio_alloc(1);
1569 bio_set_dev(bio, page_bad->dev->bdev);
1570 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1571 bio->bi_opf = REQ_OP_WRITE;
1572
1573 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1574 if (PAGE_SIZE != ret) {
1575 bio_put(bio);
1576 return -EIO;
1577 }
1578
1579 if (btrfsic_submit_bio_wait(bio)) {
1580 btrfs_dev_stat_inc_and_print(page_bad->dev,
1581 BTRFS_DEV_STAT_WRITE_ERRS);
1582 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1583 bio_put(bio);
1584 return -EIO;
1585 }
1586 bio_put(bio);
1587 }
1588
1589 return 0;
1590}
1591
1592static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1593{
1594 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
1595 int page_num;
1596
1597
1598
1599
1600
1601 if (sblock->sparity)
1602 return;
1603
1604 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1605 int ret;
1606
1607 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1608 if (ret)
1609 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1610 }
1611}
1612
1613static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1614 int page_num)
1615{
1616 struct scrub_page *spage = sblock->pagev[page_num];
1617
1618 BUG_ON(spage->page == NULL);
1619 if (spage->io_error)
1620 clear_page(page_address(spage->page));
1621
1622 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1623}
1624
1625static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1626 struct scrub_page *spage)
1627{
1628 struct scrub_bio *sbio;
1629 int ret;
1630
1631 mutex_lock(&sctx->wr_lock);
1632again:
1633 if (!sctx->wr_curr_bio) {
1634 sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio),
1635 GFP_KERNEL);
1636 if (!sctx->wr_curr_bio) {
1637 mutex_unlock(&sctx->wr_lock);
1638 return -ENOMEM;
1639 }
1640 sctx->wr_curr_bio->sctx = sctx;
1641 sctx->wr_curr_bio->page_count = 0;
1642 }
1643 sbio = sctx->wr_curr_bio;
1644 if (sbio->page_count == 0) {
1645 struct bio *bio;
1646
1647 sbio->physical = spage->physical_for_dev_replace;
1648 sbio->logical = spage->logical;
1649 sbio->dev = sctx->wr_tgtdev;
1650 bio = sbio->bio;
1651 if (!bio) {
1652 bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
1653 sbio->bio = bio;
1654 }
1655
1656 bio->bi_private = sbio;
1657 bio->bi_end_io = scrub_wr_bio_end_io;
1658 bio_set_dev(bio, sbio->dev->bdev);
1659 bio->bi_iter.bi_sector = sbio->physical >> 9;
1660 bio->bi_opf = REQ_OP_WRITE;
1661 sbio->status = 0;
1662 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1663 spage->physical_for_dev_replace ||
1664 sbio->logical + sbio->page_count * PAGE_SIZE !=
1665 spage->logical) {
1666 scrub_wr_submit(sctx);
1667 goto again;
1668 }
1669
1670 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1671 if (ret != PAGE_SIZE) {
1672 if (sbio->page_count < 1) {
1673 bio_put(sbio->bio);
1674 sbio->bio = NULL;
1675 mutex_unlock(&sctx->wr_lock);
1676 return -EIO;
1677 }
1678 scrub_wr_submit(sctx);
1679 goto again;
1680 }
1681
1682 sbio->pagev[sbio->page_count] = spage;
1683 scrub_page_get(spage);
1684 sbio->page_count++;
1685 if (sbio->page_count == sctx->pages_per_wr_bio)
1686 scrub_wr_submit(sctx);
1687 mutex_unlock(&sctx->wr_lock);
1688
1689 return 0;
1690}
1691
1692static void scrub_wr_submit(struct scrub_ctx *sctx)
1693{
1694 struct scrub_bio *sbio;
1695
1696 if (!sctx->wr_curr_bio)
1697 return;
1698
1699 sbio = sctx->wr_curr_bio;
1700 sctx->wr_curr_bio = NULL;
1701 WARN_ON(!sbio->bio->bi_disk);
1702 scrub_pending_bio_inc(sctx);
1703
1704
1705
1706
1707 btrfsic_submit_bio(sbio->bio);
1708}
1709
1710static void scrub_wr_bio_end_io(struct bio *bio)
1711{
1712 struct scrub_bio *sbio = bio->bi_private;
1713 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
1714
1715 sbio->status = bio->bi_status;
1716 sbio->bio = bio;
1717
1718 btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
1719 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1720}
1721
1722static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1723{
1724 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1725 struct scrub_ctx *sctx = sbio->sctx;
1726 int i;
1727
1728 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1729 if (sbio->status) {
1730 struct btrfs_dev_replace *dev_replace =
1731 &sbio->sctx->fs_info->dev_replace;
1732
1733 for (i = 0; i < sbio->page_count; i++) {
1734 struct scrub_page *spage = sbio->pagev[i];
1735
1736 spage->io_error = 1;
1737 atomic64_inc(&dev_replace->num_write_errors);
1738 }
1739 }
1740
1741 for (i = 0; i < sbio->page_count; i++)
1742 scrub_page_put(sbio->pagev[i]);
1743
1744 bio_put(sbio->bio);
1745 kfree(sbio);
1746 scrub_pending_bio_dec(sctx);
1747}
1748
1749static int scrub_checksum(struct scrub_block *sblock)
1750{
1751 u64 flags;
1752 int ret;
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762 sblock->header_error = 0;
1763 sblock->generation_error = 0;
1764 sblock->checksum_error = 0;
1765
1766 WARN_ON(sblock->page_count < 1);
1767 flags = sblock->pagev[0]->flags;
1768 ret = 0;
1769 if (flags & BTRFS_EXTENT_FLAG_DATA)
1770 ret = scrub_checksum_data(sblock);
1771 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1772 ret = scrub_checksum_tree_block(sblock);
1773 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1774 (void)scrub_checksum_super(sblock);
1775 else
1776 WARN_ON(1);
1777 if (ret)
1778 scrub_handle_errored_block(sblock);
1779
1780 return ret;
1781}
1782
1783static int scrub_checksum_data(struct scrub_block *sblock)
1784{
1785 struct scrub_ctx *sctx = sblock->sctx;
1786 struct btrfs_fs_info *fs_info = sctx->fs_info;
1787 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1788 u8 csum[BTRFS_CSUM_SIZE];
1789 struct scrub_page *spage;
1790 char *kaddr;
1791
1792 BUG_ON(sblock->page_count < 1);
1793 spage = sblock->pagev[0];
1794 if (!spage->have_csum)
1795 return 0;
1796
1797 kaddr = page_address(spage->page);
1798
1799 shash->tfm = fs_info->csum_shash;
1800 crypto_shash_init(shash);
1801 crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
1802
1803 if (memcmp(csum, spage->csum, sctx->csum_size))
1804 sblock->checksum_error = 1;
1805
1806 return sblock->checksum_error;
1807}
1808
1809static int scrub_checksum_tree_block(struct scrub_block *sblock)
1810{
1811 struct scrub_ctx *sctx = sblock->sctx;
1812 struct btrfs_header *h;
1813 struct btrfs_fs_info *fs_info = sctx->fs_info;
1814 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1815 u8 calculated_csum[BTRFS_CSUM_SIZE];
1816 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1817 const int num_pages = sctx->fs_info->nodesize >> PAGE_SHIFT;
1818 int i;
1819 struct scrub_page *spage;
1820 char *kaddr;
1821
1822 BUG_ON(sblock->page_count < 1);
1823 spage = sblock->pagev[0];
1824 kaddr = page_address(spage->page);
1825 h = (struct btrfs_header *)kaddr;
1826 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1827
1828
1829
1830
1831
1832
1833 if (spage->logical != btrfs_stack_header_bytenr(h))
1834 sblock->header_error = 1;
1835
1836 if (spage->generation != btrfs_stack_header_generation(h)) {
1837 sblock->header_error = 1;
1838 sblock->generation_error = 1;
1839 }
1840
1841 if (!scrub_check_fsid(h->fsid, spage))
1842 sblock->header_error = 1;
1843
1844 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1845 BTRFS_UUID_SIZE))
1846 sblock->header_error = 1;
1847
1848 shash->tfm = fs_info->csum_shash;
1849 crypto_shash_init(shash);
1850 crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
1851 PAGE_SIZE - BTRFS_CSUM_SIZE);
1852
1853 for (i = 1; i < num_pages; i++) {
1854 kaddr = page_address(sblock->pagev[i]->page);
1855 crypto_shash_update(shash, kaddr, PAGE_SIZE);
1856 }
1857
1858 crypto_shash_final(shash, calculated_csum);
1859 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1860 sblock->checksum_error = 1;
1861
1862 return sblock->header_error || sblock->checksum_error;
1863}
1864
1865static int scrub_checksum_super(struct scrub_block *sblock)
1866{
1867 struct btrfs_super_block *s;
1868 struct scrub_ctx *sctx = sblock->sctx;
1869 struct btrfs_fs_info *fs_info = sctx->fs_info;
1870 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1871 u8 calculated_csum[BTRFS_CSUM_SIZE];
1872 struct scrub_page *spage;
1873 char *kaddr;
1874 int fail_gen = 0;
1875 int fail_cor = 0;
1876
1877 BUG_ON(sblock->page_count < 1);
1878 spage = sblock->pagev[0];
1879 kaddr = page_address(spage->page);
1880 s = (struct btrfs_super_block *)kaddr;
1881
1882 if (spage->logical != btrfs_super_bytenr(s))
1883 ++fail_cor;
1884
1885 if (spage->generation != btrfs_super_generation(s))
1886 ++fail_gen;
1887
1888 if (!scrub_check_fsid(s->fsid, spage))
1889 ++fail_cor;
1890
1891 shash->tfm = fs_info->csum_shash;
1892 crypto_shash_init(shash);
1893 crypto_shash_digest(shash, kaddr + BTRFS_CSUM_SIZE,
1894 BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, calculated_csum);
1895
1896 if (memcmp(calculated_csum, s->csum, sctx->csum_size))
1897 ++fail_cor;
1898
1899 if (fail_cor + fail_gen) {
1900
1901
1902
1903
1904
1905 spin_lock(&sctx->stat_lock);
1906 ++sctx->stat.super_errors;
1907 spin_unlock(&sctx->stat_lock);
1908 if (fail_cor)
1909 btrfs_dev_stat_inc_and_print(spage->dev,
1910 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1911 else
1912 btrfs_dev_stat_inc_and_print(spage->dev,
1913 BTRFS_DEV_STAT_GENERATION_ERRS);
1914 }
1915
1916 return fail_cor + fail_gen;
1917}
1918
1919static void scrub_block_get(struct scrub_block *sblock)
1920{
1921 refcount_inc(&sblock->refs);
1922}
1923
1924static void scrub_block_put(struct scrub_block *sblock)
1925{
1926 if (refcount_dec_and_test(&sblock->refs)) {
1927 int i;
1928
1929 if (sblock->sparity)
1930 scrub_parity_put(sblock->sparity);
1931
1932 for (i = 0; i < sblock->page_count; i++)
1933 scrub_page_put(sblock->pagev[i]);
1934 kfree(sblock);
1935 }
1936}
1937
1938static void scrub_page_get(struct scrub_page *spage)
1939{
1940 atomic_inc(&spage->refs);
1941}
1942
1943static void scrub_page_put(struct scrub_page *spage)
1944{
1945 if (atomic_dec_and_test(&spage->refs)) {
1946 if (spage->page)
1947 __free_page(spage->page);
1948 kfree(spage);
1949 }
1950}
1951
1952static void scrub_submit(struct scrub_ctx *sctx)
1953{
1954 struct scrub_bio *sbio;
1955
1956 if (sctx->curr == -1)
1957 return;
1958
1959 sbio = sctx->bios[sctx->curr];
1960 sctx->curr = -1;
1961 scrub_pending_bio_inc(sctx);
1962 btrfsic_submit_bio(sbio->bio);
1963}
1964
1965static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
1966 struct scrub_page *spage)
1967{
1968 struct scrub_block *sblock = spage->sblock;
1969 struct scrub_bio *sbio;
1970 int ret;
1971
1972again:
1973
1974
1975
1976 while (sctx->curr == -1) {
1977 spin_lock(&sctx->list_lock);
1978 sctx->curr = sctx->first_free;
1979 if (sctx->curr != -1) {
1980 sctx->first_free = sctx->bios[sctx->curr]->next_free;
1981 sctx->bios[sctx->curr]->next_free = -1;
1982 sctx->bios[sctx->curr]->page_count = 0;
1983 spin_unlock(&sctx->list_lock);
1984 } else {
1985 spin_unlock(&sctx->list_lock);
1986 wait_event(sctx->list_wait, sctx->first_free != -1);
1987 }
1988 }
1989 sbio = sctx->bios[sctx->curr];
1990 if (sbio->page_count == 0) {
1991 struct bio *bio;
1992
1993 sbio->physical = spage->physical;
1994 sbio->logical = spage->logical;
1995 sbio->dev = spage->dev;
1996 bio = sbio->bio;
1997 if (!bio) {
1998 bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
1999 sbio->bio = bio;
2000 }
2001
2002 bio->bi_private = sbio;
2003 bio->bi_end_io = scrub_bio_end_io;
2004 bio_set_dev(bio, sbio->dev->bdev);
2005 bio->bi_iter.bi_sector = sbio->physical >> 9;
2006 bio->bi_opf = REQ_OP_READ;
2007 sbio->status = 0;
2008 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
2009 spage->physical ||
2010 sbio->logical + sbio->page_count * PAGE_SIZE !=
2011 spage->logical ||
2012 sbio->dev != spage->dev) {
2013 scrub_submit(sctx);
2014 goto again;
2015 }
2016
2017 sbio->pagev[sbio->page_count] = spage;
2018 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
2019 if (ret != PAGE_SIZE) {
2020 if (sbio->page_count < 1) {
2021 bio_put(sbio->bio);
2022 sbio->bio = NULL;
2023 return -EIO;
2024 }
2025 scrub_submit(sctx);
2026 goto again;
2027 }
2028
2029 scrub_block_get(sblock);
2030 atomic_inc(&sblock->outstanding_pages);
2031 sbio->page_count++;
2032 if (sbio->page_count == sctx->pages_per_rd_bio)
2033 scrub_submit(sctx);
2034
2035 return 0;
2036}
2037
2038static void scrub_missing_raid56_end_io(struct bio *bio)
2039{
2040 struct scrub_block *sblock = bio->bi_private;
2041 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
2042
2043 if (bio->bi_status)
2044 sblock->no_io_error_seen = 0;
2045
2046 bio_put(bio);
2047
2048 btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
2049}
2050
2051static void scrub_missing_raid56_worker(struct btrfs_work *work)
2052{
2053 struct scrub_block *sblock = container_of(work, struct scrub_block, work);
2054 struct scrub_ctx *sctx = sblock->sctx;
2055 struct btrfs_fs_info *fs_info = sctx->fs_info;
2056 u64 logical;
2057 struct btrfs_device *dev;
2058
2059 logical = sblock->pagev[0]->logical;
2060 dev = sblock->pagev[0]->dev;
2061
2062 if (sblock->no_io_error_seen)
2063 scrub_recheck_block_checksum(sblock);
2064
2065 if (!sblock->no_io_error_seen) {
2066 spin_lock(&sctx->stat_lock);
2067 sctx->stat.read_errors++;
2068 spin_unlock(&sctx->stat_lock);
2069 btrfs_err_rl_in_rcu(fs_info,
2070 "IO error rebuilding logical %llu for dev %s",
2071 logical, rcu_str_deref(dev->name));
2072 } else if (sblock->header_error || sblock->checksum_error) {
2073 spin_lock(&sctx->stat_lock);
2074 sctx->stat.uncorrectable_errors++;
2075 spin_unlock(&sctx->stat_lock);
2076 btrfs_err_rl_in_rcu(fs_info,
2077 "failed to rebuild valid logical %llu for dev %s",
2078 logical, rcu_str_deref(dev->name));
2079 } else {
2080 scrub_write_block_to_dev_replace(sblock);
2081 }
2082
2083 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2084 mutex_lock(&sctx->wr_lock);
2085 scrub_wr_submit(sctx);
2086 mutex_unlock(&sctx->wr_lock);
2087 }
2088
2089 scrub_block_put(sblock);
2090 scrub_pending_bio_dec(sctx);
2091}
2092
2093static void scrub_missing_raid56_pages(struct scrub_block *sblock)
2094{
2095 struct scrub_ctx *sctx = sblock->sctx;
2096 struct btrfs_fs_info *fs_info = sctx->fs_info;
2097 u64 length = sblock->page_count * PAGE_SIZE;
2098 u64 logical = sblock->pagev[0]->logical;
2099 struct btrfs_bio *bbio = NULL;
2100 struct bio *bio;
2101 struct btrfs_raid_bio *rbio;
2102 int ret;
2103 int i;
2104
2105 btrfs_bio_counter_inc_blocked(fs_info);
2106 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
2107 &length, &bbio);
2108 if (ret || !bbio || !bbio->raid_map)
2109 goto bbio_out;
2110
2111 if (WARN_ON(!sctx->is_dev_replace ||
2112 !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
2113
2114
2115
2116
2117
2118
2119 goto bbio_out;
2120 }
2121
2122 bio = btrfs_io_bio_alloc(0);
2123 bio->bi_iter.bi_sector = logical >> 9;
2124 bio->bi_private = sblock;
2125 bio->bi_end_io = scrub_missing_raid56_end_io;
2126
2127 rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length);
2128 if (!rbio)
2129 goto rbio_out;
2130
2131 for (i = 0; i < sblock->page_count; i++) {
2132 struct scrub_page *spage = sblock->pagev[i];
2133
2134 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2135 }
2136
2137 btrfs_init_work(&sblock->work, scrub_missing_raid56_worker, NULL, NULL);
2138 scrub_block_get(sblock);
2139 scrub_pending_bio_inc(sctx);
2140 raid56_submit_missing_rbio(rbio);
2141 return;
2142
2143rbio_out:
2144 bio_put(bio);
2145bbio_out:
2146 btrfs_bio_counter_dec(fs_info);
2147 btrfs_put_bbio(bbio);
2148 spin_lock(&sctx->stat_lock);
2149 sctx->stat.malloc_errors++;
2150 spin_unlock(&sctx->stat_lock);
2151}
2152
2153static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2154 u64 physical, struct btrfs_device *dev, u64 flags,
2155 u64 gen, int mirror_num, u8 *csum, int force,
2156 u64 physical_for_dev_replace)
2157{
2158 struct scrub_block *sblock;
2159 int index;
2160
2161 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2162 if (!sblock) {
2163 spin_lock(&sctx->stat_lock);
2164 sctx->stat.malloc_errors++;
2165 spin_unlock(&sctx->stat_lock);
2166 return -ENOMEM;
2167 }
2168
2169
2170
2171 refcount_set(&sblock->refs, 1);
2172 sblock->sctx = sctx;
2173 sblock->no_io_error_seen = 1;
2174
2175 for (index = 0; len > 0; index++) {
2176 struct scrub_page *spage;
2177 u64 l = min_t(u64, len, PAGE_SIZE);
2178
2179 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2180 if (!spage) {
2181leave_nomem:
2182 spin_lock(&sctx->stat_lock);
2183 sctx->stat.malloc_errors++;
2184 spin_unlock(&sctx->stat_lock);
2185 scrub_block_put(sblock);
2186 return -ENOMEM;
2187 }
2188 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2189 scrub_page_get(spage);
2190 sblock->pagev[index] = spage;
2191 spage->sblock = sblock;
2192 spage->dev = dev;
2193 spage->flags = flags;
2194 spage->generation = gen;
2195 spage->logical = logical;
2196 spage->physical = physical;
2197 spage->physical_for_dev_replace = physical_for_dev_replace;
2198 spage->mirror_num = mirror_num;
2199 if (csum) {
2200 spage->have_csum = 1;
2201 memcpy(spage->csum, csum, sctx->csum_size);
2202 } else {
2203 spage->have_csum = 0;
2204 }
2205 sblock->page_count++;
2206 spage->page = alloc_page(GFP_KERNEL);
2207 if (!spage->page)
2208 goto leave_nomem;
2209 len -= l;
2210 logical += l;
2211 physical += l;
2212 physical_for_dev_replace += l;
2213 }
2214
2215 WARN_ON(sblock->page_count == 0);
2216 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2217
2218
2219
2220
2221 scrub_missing_raid56_pages(sblock);
2222 } else {
2223 for (index = 0; index < sblock->page_count; index++) {
2224 struct scrub_page *spage = sblock->pagev[index];
2225 int ret;
2226
2227 ret = scrub_add_page_to_rd_bio(sctx, spage);
2228 if (ret) {
2229 scrub_block_put(sblock);
2230 return ret;
2231 }
2232 }
2233
2234 if (force)
2235 scrub_submit(sctx);
2236 }
2237
2238
2239 scrub_block_put(sblock);
2240 return 0;
2241}
2242
2243static void scrub_bio_end_io(struct bio *bio)
2244{
2245 struct scrub_bio *sbio = bio->bi_private;
2246 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
2247
2248 sbio->status = bio->bi_status;
2249 sbio->bio = bio;
2250
2251 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
2252}
2253
2254static void scrub_bio_end_io_worker(struct btrfs_work *work)
2255{
2256 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2257 struct scrub_ctx *sctx = sbio->sctx;
2258 int i;
2259
2260 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2261 if (sbio->status) {
2262 for (i = 0; i < sbio->page_count; i++) {
2263 struct scrub_page *spage = sbio->pagev[i];
2264
2265 spage->io_error = 1;
2266 spage->sblock->no_io_error_seen = 0;
2267 }
2268 }
2269
2270
2271 for (i = 0; i < sbio->page_count; i++) {
2272 struct scrub_page *spage = sbio->pagev[i];
2273 struct scrub_block *sblock = spage->sblock;
2274
2275 if (atomic_dec_and_test(&sblock->outstanding_pages))
2276 scrub_block_complete(sblock);
2277 scrub_block_put(sblock);
2278 }
2279
2280 bio_put(sbio->bio);
2281 sbio->bio = NULL;
2282 spin_lock(&sctx->list_lock);
2283 sbio->next_free = sctx->first_free;
2284 sctx->first_free = sbio->index;
2285 spin_unlock(&sctx->list_lock);
2286
2287 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2288 mutex_lock(&sctx->wr_lock);
2289 scrub_wr_submit(sctx);
2290 mutex_unlock(&sctx->wr_lock);
2291 }
2292
2293 scrub_pending_bio_dec(sctx);
2294}
2295
2296static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2297 unsigned long *bitmap,
2298 u64 start, u64 len)
2299{
2300 u64 offset;
2301 u64 nsectors64;
2302 u32 nsectors;
2303 int sectorsize = sparity->sctx->fs_info->sectorsize;
2304
2305 if (len >= sparity->stripe_len) {
2306 bitmap_set(bitmap, 0, sparity->nsectors);
2307 return;
2308 }
2309
2310 start -= sparity->logic_start;
2311 start = div64_u64_rem(start, sparity->stripe_len, &offset);
2312 offset = div_u64(offset, sectorsize);
2313 nsectors64 = div_u64(len, sectorsize);
2314
2315 ASSERT(nsectors64 < UINT_MAX);
2316 nsectors = (u32)nsectors64;
2317
2318 if (offset + nsectors <= sparity->nsectors) {
2319 bitmap_set(bitmap, offset, nsectors);
2320 return;
2321 }
2322
2323 bitmap_set(bitmap, offset, sparity->nsectors - offset);
2324 bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
2325}
2326
2327static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
2328 u64 start, u64 len)
2329{
2330 __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
2331}
2332
2333static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
2334 u64 start, u64 len)
2335{
2336 __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
2337}
2338
2339static void scrub_block_complete(struct scrub_block *sblock)
2340{
2341 int corrupted = 0;
2342
2343 if (!sblock->no_io_error_seen) {
2344 corrupted = 1;
2345 scrub_handle_errored_block(sblock);
2346 } else {
2347
2348
2349
2350
2351
2352 corrupted = scrub_checksum(sblock);
2353 if (!corrupted && sblock->sctx->is_dev_replace)
2354 scrub_write_block_to_dev_replace(sblock);
2355 }
2356
2357 if (sblock->sparity && corrupted && !sblock->data_corrected) {
2358 u64 start = sblock->pagev[0]->logical;
2359 u64 end = sblock->pagev[sblock->page_count - 1]->logical +
2360 PAGE_SIZE;
2361
2362 scrub_parity_mark_sectors_error(sblock->sparity,
2363 start, end - start);
2364 }
2365}
2366
2367static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
2368{
2369 struct btrfs_ordered_sum *sum = NULL;
2370 unsigned long index;
2371 unsigned long num_sectors;
2372
2373 while (!list_empty(&sctx->csum_list)) {
2374 sum = list_first_entry(&sctx->csum_list,
2375 struct btrfs_ordered_sum, list);
2376 if (sum->bytenr > logical)
2377 return 0;
2378 if (sum->bytenr + sum->len > logical)
2379 break;
2380
2381 ++sctx->stat.csum_discards;
2382 list_del(&sum->list);
2383 kfree(sum);
2384 sum = NULL;
2385 }
2386 if (!sum)
2387 return 0;
2388
2389 index = div_u64(logical - sum->bytenr, sctx->fs_info->sectorsize);
2390 ASSERT(index < UINT_MAX);
2391
2392 num_sectors = sum->len / sctx->fs_info->sectorsize;
2393 memcpy(csum, sum->sums + index * sctx->csum_size, sctx->csum_size);
2394 if (index == num_sectors - 1) {
2395 list_del(&sum->list);
2396 kfree(sum);
2397 }
2398 return 1;
2399}
2400
2401
2402static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
2403 u64 logical, u64 len,
2404 u64 physical, struct btrfs_device *dev, u64 flags,
2405 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2406{
2407 int ret;
2408 u8 csum[BTRFS_CSUM_SIZE];
2409 u32 blocksize;
2410
2411 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2412 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2413 blocksize = map->stripe_len;
2414 else
2415 blocksize = sctx->fs_info->sectorsize;
2416 spin_lock(&sctx->stat_lock);
2417 sctx->stat.data_extents_scrubbed++;
2418 sctx->stat.data_bytes_scrubbed += len;
2419 spin_unlock(&sctx->stat_lock);
2420 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2421 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2422 blocksize = map->stripe_len;
2423 else
2424 blocksize = sctx->fs_info->nodesize;
2425 spin_lock(&sctx->stat_lock);
2426 sctx->stat.tree_extents_scrubbed++;
2427 sctx->stat.tree_bytes_scrubbed += len;
2428 spin_unlock(&sctx->stat_lock);
2429 } else {
2430 blocksize = sctx->fs_info->sectorsize;
2431 WARN_ON(1);
2432 }
2433
2434 while (len) {
2435 u64 l = min_t(u64, len, blocksize);
2436 int have_csum = 0;
2437
2438 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2439
2440 have_csum = scrub_find_csum(sctx, logical, csum);
2441 if (have_csum == 0)
2442 ++sctx->stat.no_csum;
2443 }
2444 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2445 mirror_num, have_csum ? csum : NULL, 0,
2446 physical_for_dev_replace);
2447 if (ret)
2448 return ret;
2449 len -= l;
2450 logical += l;
2451 physical += l;
2452 physical_for_dev_replace += l;
2453 }
2454 return 0;
2455}
2456
2457static int scrub_pages_for_parity(struct scrub_parity *sparity,
2458 u64 logical, u64 len,
2459 u64 physical, struct btrfs_device *dev,
2460 u64 flags, u64 gen, int mirror_num, u8 *csum)
2461{
2462 struct scrub_ctx *sctx = sparity->sctx;
2463 struct scrub_block *sblock;
2464 int index;
2465
2466 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2467 if (!sblock) {
2468 spin_lock(&sctx->stat_lock);
2469 sctx->stat.malloc_errors++;
2470 spin_unlock(&sctx->stat_lock);
2471 return -ENOMEM;
2472 }
2473
2474
2475
2476 refcount_set(&sblock->refs, 1);
2477 sblock->sctx = sctx;
2478 sblock->no_io_error_seen = 1;
2479 sblock->sparity = sparity;
2480 scrub_parity_get(sparity);
2481
2482 for (index = 0; len > 0; index++) {
2483 struct scrub_page *spage;
2484 u64 l = min_t(u64, len, PAGE_SIZE);
2485
2486 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2487 if (!spage) {
2488leave_nomem:
2489 spin_lock(&sctx->stat_lock);
2490 sctx->stat.malloc_errors++;
2491 spin_unlock(&sctx->stat_lock);
2492 scrub_block_put(sblock);
2493 return -ENOMEM;
2494 }
2495 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2496
2497 scrub_page_get(spage);
2498 sblock->pagev[index] = spage;
2499
2500 scrub_page_get(spage);
2501 list_add_tail(&spage->list, &sparity->spages);
2502 spage->sblock = sblock;
2503 spage->dev = dev;
2504 spage->flags = flags;
2505 spage->generation = gen;
2506 spage->logical = logical;
2507 spage->physical = physical;
2508 spage->mirror_num = mirror_num;
2509 if (csum) {
2510 spage->have_csum = 1;
2511 memcpy(spage->csum, csum, sctx->csum_size);
2512 } else {
2513 spage->have_csum = 0;
2514 }
2515 sblock->page_count++;
2516 spage->page = alloc_page(GFP_KERNEL);
2517 if (!spage->page)
2518 goto leave_nomem;
2519 len -= l;
2520 logical += l;
2521 physical += l;
2522 }
2523
2524 WARN_ON(sblock->page_count == 0);
2525 for (index = 0; index < sblock->page_count; index++) {
2526 struct scrub_page *spage = sblock->pagev[index];
2527 int ret;
2528
2529 ret = scrub_add_page_to_rd_bio(sctx, spage);
2530 if (ret) {
2531 scrub_block_put(sblock);
2532 return ret;
2533 }
2534 }
2535
2536
2537 scrub_block_put(sblock);
2538 return 0;
2539}
2540
2541static int scrub_extent_for_parity(struct scrub_parity *sparity,
2542 u64 logical, u64 len,
2543 u64 physical, struct btrfs_device *dev,
2544 u64 flags, u64 gen, int mirror_num)
2545{
2546 struct scrub_ctx *sctx = sparity->sctx;
2547 int ret;
2548 u8 csum[BTRFS_CSUM_SIZE];
2549 u32 blocksize;
2550
2551 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2552 scrub_parity_mark_sectors_error(sparity, logical, len);
2553 return 0;
2554 }
2555
2556 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2557 blocksize = sparity->stripe_len;
2558 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2559 blocksize = sparity->stripe_len;
2560 } else {
2561 blocksize = sctx->fs_info->sectorsize;
2562 WARN_ON(1);
2563 }
2564
2565 while (len) {
2566 u64 l = min_t(u64, len, blocksize);
2567 int have_csum = 0;
2568
2569 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2570
2571 have_csum = scrub_find_csum(sctx, logical, csum);
2572 if (have_csum == 0)
2573 goto skip;
2574 }
2575 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2576 flags, gen, mirror_num,
2577 have_csum ? csum : NULL);
2578 if (ret)
2579 return ret;
2580skip:
2581 len -= l;
2582 logical += l;
2583 physical += l;
2584 }
2585 return 0;
2586}
2587
2588
2589
2590
2591
2592
2593
2594
2595static int get_raid56_logic_offset(u64 physical, int num,
2596 struct map_lookup *map, u64 *offset,
2597 u64 *stripe_start)
2598{
2599 int i;
2600 int j = 0;
2601 u64 stripe_nr;
2602 u64 last_offset;
2603 u32 stripe_index;
2604 u32 rot;
2605 const int data_stripes = nr_data_stripes(map);
2606
2607 last_offset = (physical - map->stripes[num].physical) * data_stripes;
2608 if (stripe_start)
2609 *stripe_start = last_offset;
2610
2611 *offset = last_offset;
2612 for (i = 0; i < data_stripes; i++) {
2613 *offset = last_offset + i * map->stripe_len;
2614
2615 stripe_nr = div64_u64(*offset, map->stripe_len);
2616 stripe_nr = div_u64(stripe_nr, data_stripes);
2617
2618
2619 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
2620
2621 rot += i;
2622 stripe_index = rot % map->num_stripes;
2623 if (stripe_index == num)
2624 return 0;
2625 if (stripe_index < num)
2626 j++;
2627 }
2628 *offset = last_offset + j * map->stripe_len;
2629 return 1;
2630}
2631
2632static void scrub_free_parity(struct scrub_parity *sparity)
2633{
2634 struct scrub_ctx *sctx = sparity->sctx;
2635 struct scrub_page *curr, *next;
2636 int nbits;
2637
2638 nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
2639 if (nbits) {
2640 spin_lock(&sctx->stat_lock);
2641 sctx->stat.read_errors += nbits;
2642 sctx->stat.uncorrectable_errors += nbits;
2643 spin_unlock(&sctx->stat_lock);
2644 }
2645
2646 list_for_each_entry_safe(curr, next, &sparity->spages, list) {
2647 list_del_init(&curr->list);
2648 scrub_page_put(curr);
2649 }
2650
2651 kfree(sparity);
2652}
2653
2654static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
2655{
2656 struct scrub_parity *sparity = container_of(work, struct scrub_parity,
2657 work);
2658 struct scrub_ctx *sctx = sparity->sctx;
2659
2660 scrub_free_parity(sparity);
2661 scrub_pending_bio_dec(sctx);
2662}
2663
2664static void scrub_parity_bio_endio(struct bio *bio)
2665{
2666 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
2667 struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
2668
2669 if (bio->bi_status)
2670 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2671 sparity->nsectors);
2672
2673 bio_put(bio);
2674
2675 btrfs_init_work(&sparity->work, scrub_parity_bio_endio_worker, NULL,
2676 NULL);
2677 btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
2678}
2679
2680static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2681{
2682 struct scrub_ctx *sctx = sparity->sctx;
2683 struct btrfs_fs_info *fs_info = sctx->fs_info;
2684 struct bio *bio;
2685 struct btrfs_raid_bio *rbio;
2686 struct btrfs_bio *bbio = NULL;
2687 u64 length;
2688 int ret;
2689
2690 if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
2691 sparity->nsectors))
2692 goto out;
2693
2694 length = sparity->logic_end - sparity->logic_start;
2695
2696 btrfs_bio_counter_inc_blocked(fs_info);
2697 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
2698 &length, &bbio);
2699 if (ret || !bbio || !bbio->raid_map)
2700 goto bbio_out;
2701
2702 bio = btrfs_io_bio_alloc(0);
2703 bio->bi_iter.bi_sector = sparity->logic_start >> 9;
2704 bio->bi_private = sparity;
2705 bio->bi_end_io = scrub_parity_bio_endio;
2706
2707 rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio,
2708 length, sparity->scrub_dev,
2709 sparity->dbitmap,
2710 sparity->nsectors);
2711 if (!rbio)
2712 goto rbio_out;
2713
2714 scrub_pending_bio_inc(sctx);
2715 raid56_parity_submit_scrub_rbio(rbio);
2716 return;
2717
2718rbio_out:
2719 bio_put(bio);
2720bbio_out:
2721 btrfs_bio_counter_dec(fs_info);
2722 btrfs_put_bbio(bbio);
2723 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2724 sparity->nsectors);
2725 spin_lock(&sctx->stat_lock);
2726 sctx->stat.malloc_errors++;
2727 spin_unlock(&sctx->stat_lock);
2728out:
2729 scrub_free_parity(sparity);
2730}
2731
2732static inline int scrub_calc_parity_bitmap_len(int nsectors)
2733{
2734 return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
2735}
2736
2737static void scrub_parity_get(struct scrub_parity *sparity)
2738{
2739 refcount_inc(&sparity->refs);
2740}
2741
2742static void scrub_parity_put(struct scrub_parity *sparity)
2743{
2744 if (!refcount_dec_and_test(&sparity->refs))
2745 return;
2746
2747 scrub_parity_check_and_repair(sparity);
2748}
2749
2750static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2751 struct map_lookup *map,
2752 struct btrfs_device *sdev,
2753 struct btrfs_path *path,
2754 u64 logic_start,
2755 u64 logic_end)
2756{
2757 struct btrfs_fs_info *fs_info = sctx->fs_info;
2758 struct btrfs_root *root = fs_info->extent_root;
2759 struct btrfs_root *csum_root = fs_info->csum_root;
2760 struct btrfs_extent_item *extent;
2761 struct btrfs_bio *bbio = NULL;
2762 u64 flags;
2763 int ret;
2764 int slot;
2765 struct extent_buffer *l;
2766 struct btrfs_key key;
2767 u64 generation;
2768 u64 extent_logical;
2769 u64 extent_physical;
2770 u64 extent_len;
2771 u64 mapped_length;
2772 struct btrfs_device *extent_dev;
2773 struct scrub_parity *sparity;
2774 int nsectors;
2775 int bitmap_len;
2776 int extent_mirror_num;
2777 int stop_loop = 0;
2778
2779 nsectors = div_u64(map->stripe_len, fs_info->sectorsize);
2780 bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
2781 sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
2782 GFP_NOFS);
2783 if (!sparity) {
2784 spin_lock(&sctx->stat_lock);
2785 sctx->stat.malloc_errors++;
2786 spin_unlock(&sctx->stat_lock);
2787 return -ENOMEM;
2788 }
2789
2790 sparity->stripe_len = map->stripe_len;
2791 sparity->nsectors = nsectors;
2792 sparity->sctx = sctx;
2793 sparity->scrub_dev = sdev;
2794 sparity->logic_start = logic_start;
2795 sparity->logic_end = logic_end;
2796 refcount_set(&sparity->refs, 1);
2797 INIT_LIST_HEAD(&sparity->spages);
2798 sparity->dbitmap = sparity->bitmap;
2799 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
2800
2801 ret = 0;
2802 while (logic_start < logic_end) {
2803 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2804 key.type = BTRFS_METADATA_ITEM_KEY;
2805 else
2806 key.type = BTRFS_EXTENT_ITEM_KEY;
2807 key.objectid = logic_start;
2808 key.offset = (u64)-1;
2809
2810 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2811 if (ret < 0)
2812 goto out;
2813
2814 if (ret > 0) {
2815 ret = btrfs_previous_extent_item(root, path, 0);
2816 if (ret < 0)
2817 goto out;
2818 if (ret > 0) {
2819 btrfs_release_path(path);
2820 ret = btrfs_search_slot(NULL, root, &key,
2821 path, 0, 0);
2822 if (ret < 0)
2823 goto out;
2824 }
2825 }
2826
2827 stop_loop = 0;
2828 while (1) {
2829 u64 bytes;
2830
2831 l = path->nodes[0];
2832 slot = path->slots[0];
2833 if (slot >= btrfs_header_nritems(l)) {
2834 ret = btrfs_next_leaf(root, path);
2835 if (ret == 0)
2836 continue;
2837 if (ret < 0)
2838 goto out;
2839
2840 stop_loop = 1;
2841 break;
2842 }
2843 btrfs_item_key_to_cpu(l, &key, slot);
2844
2845 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2846 key.type != BTRFS_METADATA_ITEM_KEY)
2847 goto next;
2848
2849 if (key.type == BTRFS_METADATA_ITEM_KEY)
2850 bytes = fs_info->nodesize;
2851 else
2852 bytes = key.offset;
2853
2854 if (key.objectid + bytes <= logic_start)
2855 goto next;
2856
2857 if (key.objectid >= logic_end) {
2858 stop_loop = 1;
2859 break;
2860 }
2861
2862 while (key.objectid >= logic_start + map->stripe_len)
2863 logic_start += map->stripe_len;
2864
2865 extent = btrfs_item_ptr(l, slot,
2866 struct btrfs_extent_item);
2867 flags = btrfs_extent_flags(l, extent);
2868 generation = btrfs_extent_generation(l, extent);
2869
2870 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
2871 (key.objectid < logic_start ||
2872 key.objectid + bytes >
2873 logic_start + map->stripe_len)) {
2874 btrfs_err(fs_info,
2875 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
2876 key.objectid, logic_start);
2877 spin_lock(&sctx->stat_lock);
2878 sctx->stat.uncorrectable_errors++;
2879 spin_unlock(&sctx->stat_lock);
2880 goto next;
2881 }
2882again:
2883 extent_logical = key.objectid;
2884 extent_len = bytes;
2885
2886 if (extent_logical < logic_start) {
2887 extent_len -= logic_start - extent_logical;
2888 extent_logical = logic_start;
2889 }
2890
2891 if (extent_logical + extent_len >
2892 logic_start + map->stripe_len)
2893 extent_len = logic_start + map->stripe_len -
2894 extent_logical;
2895
2896 scrub_parity_mark_sectors_data(sparity, extent_logical,
2897 extent_len);
2898
2899 mapped_length = extent_len;
2900 bbio = NULL;
2901 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
2902 extent_logical, &mapped_length, &bbio,
2903 0);
2904 if (!ret) {
2905 if (!bbio || mapped_length < extent_len)
2906 ret = -EIO;
2907 }
2908 if (ret) {
2909 btrfs_put_bbio(bbio);
2910 goto out;
2911 }
2912 extent_physical = bbio->stripes[0].physical;
2913 extent_mirror_num = bbio->mirror_num;
2914 extent_dev = bbio->stripes[0].dev;
2915 btrfs_put_bbio(bbio);
2916
2917 ret = btrfs_lookup_csums_range(csum_root,
2918 extent_logical,
2919 extent_logical + extent_len - 1,
2920 &sctx->csum_list, 1);
2921 if (ret)
2922 goto out;
2923
2924 ret = scrub_extent_for_parity(sparity, extent_logical,
2925 extent_len,
2926 extent_physical,
2927 extent_dev, flags,
2928 generation,
2929 extent_mirror_num);
2930
2931 scrub_free_csums(sctx);
2932
2933 if (ret)
2934 goto out;
2935
2936 if (extent_logical + extent_len <
2937 key.objectid + bytes) {
2938 logic_start += map->stripe_len;
2939
2940 if (logic_start >= logic_end) {
2941 stop_loop = 1;
2942 break;
2943 }
2944
2945 if (logic_start < key.objectid + bytes) {
2946 cond_resched();
2947 goto again;
2948 }
2949 }
2950next:
2951 path->slots[0]++;
2952 }
2953
2954 btrfs_release_path(path);
2955
2956 if (stop_loop)
2957 break;
2958
2959 logic_start += map->stripe_len;
2960 }
2961out:
2962 if (ret < 0)
2963 scrub_parity_mark_sectors_error(sparity, logic_start,
2964 logic_end - logic_start);
2965 scrub_parity_put(sparity);
2966 scrub_submit(sctx);
2967 mutex_lock(&sctx->wr_lock);
2968 scrub_wr_submit(sctx);
2969 mutex_unlock(&sctx->wr_lock);
2970
2971 btrfs_release_path(path);
2972 return ret < 0 ? ret : 0;
2973}
2974
2975static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2976 struct map_lookup *map,
2977 struct btrfs_device *scrub_dev,
2978 int num, u64 base, u64 length,
2979 struct btrfs_block_group *cache)
2980{
2981 struct btrfs_path *path, *ppath;
2982 struct btrfs_fs_info *fs_info = sctx->fs_info;
2983 struct btrfs_root *root = fs_info->extent_root;
2984 struct btrfs_root *csum_root = fs_info->csum_root;
2985 struct btrfs_extent_item *extent;
2986 struct blk_plug plug;
2987 u64 flags;
2988 int ret;
2989 int slot;
2990 u64 nstripes;
2991 struct extent_buffer *l;
2992 u64 physical;
2993 u64 logical;
2994 u64 logic_end;
2995 u64 physical_end;
2996 u64 generation;
2997 int mirror_num;
2998 struct reada_control *reada1;
2999 struct reada_control *reada2;
3000 struct btrfs_key key;
3001 struct btrfs_key key_end;
3002 u64 increment = map->stripe_len;
3003 u64 offset;
3004 u64 extent_logical;
3005 u64 extent_physical;
3006 u64 extent_len;
3007 u64 stripe_logical;
3008 u64 stripe_end;
3009 struct btrfs_device *extent_dev;
3010 int extent_mirror_num;
3011 int stop_loop = 0;
3012
3013 physical = map->stripes[num].physical;
3014 offset = 0;
3015 nstripes = div64_u64(length, map->stripe_len);
3016 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3017 offset = map->stripe_len * num;
3018 increment = map->stripe_len * map->num_stripes;
3019 mirror_num = 1;
3020 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3021 int factor = map->num_stripes / map->sub_stripes;
3022 offset = map->stripe_len * (num / map->sub_stripes);
3023 increment = map->stripe_len * factor;
3024 mirror_num = num % map->sub_stripes + 1;
3025 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
3026 increment = map->stripe_len;
3027 mirror_num = num % map->num_stripes + 1;
3028 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3029 increment = map->stripe_len;
3030 mirror_num = num % map->num_stripes + 1;
3031 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3032 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3033 increment = map->stripe_len * nr_data_stripes(map);
3034 mirror_num = 1;
3035 } else {
3036 increment = map->stripe_len;
3037 mirror_num = 1;
3038 }
3039
3040 path = btrfs_alloc_path();
3041 if (!path)
3042 return -ENOMEM;
3043
3044 ppath = btrfs_alloc_path();
3045 if (!ppath) {
3046 btrfs_free_path(path);
3047 return -ENOMEM;
3048 }
3049
3050
3051
3052
3053
3054
3055 path->search_commit_root = 1;
3056 path->skip_locking = 1;
3057
3058 ppath->search_commit_root = 1;
3059 ppath->skip_locking = 1;
3060
3061
3062
3063
3064
3065 logical = base + offset;
3066 physical_end = physical + nstripes * map->stripe_len;
3067 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3068 get_raid56_logic_offset(physical_end, num,
3069 map, &logic_end, NULL);
3070 logic_end += base;
3071 } else {
3072 logic_end = logical + increment * nstripes;
3073 }
3074 wait_event(sctx->list_wait,
3075 atomic_read(&sctx->bios_in_flight) == 0);
3076 scrub_blocked_if_needed(fs_info);
3077
3078
3079 key.objectid = logical;
3080 key.type = BTRFS_EXTENT_ITEM_KEY;
3081 key.offset = (u64)0;
3082 key_end.objectid = logic_end;
3083 key_end.type = BTRFS_METADATA_ITEM_KEY;
3084 key_end.offset = (u64)-1;
3085 reada1 = btrfs_reada_add(root, &key, &key_end);
3086
3087 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3088 key.type = BTRFS_EXTENT_CSUM_KEY;
3089 key.offset = logical;
3090 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3091 key_end.type = BTRFS_EXTENT_CSUM_KEY;
3092 key_end.offset = logic_end;
3093 reada2 = btrfs_reada_add(csum_root, &key, &key_end);
3094
3095 if (!IS_ERR(reada1))
3096 btrfs_reada_wait(reada1);
3097 if (!IS_ERR(reada2))
3098 btrfs_reada_wait(reada2);
3099
3100
3101
3102
3103
3104
3105 blk_start_plug(&plug);
3106
3107
3108
3109
3110 ret = 0;
3111 while (physical < physical_end) {
3112
3113
3114
3115 if (atomic_read(&fs_info->scrub_cancel_req) ||
3116 atomic_read(&sctx->cancel_req)) {
3117 ret = -ECANCELED;
3118 goto out;
3119 }
3120
3121
3122
3123 if (atomic_read(&fs_info->scrub_pause_req)) {
3124
3125 sctx->flush_all_writes = true;
3126 scrub_submit(sctx);
3127 mutex_lock(&sctx->wr_lock);
3128 scrub_wr_submit(sctx);
3129 mutex_unlock(&sctx->wr_lock);
3130 wait_event(sctx->list_wait,
3131 atomic_read(&sctx->bios_in_flight) == 0);
3132 sctx->flush_all_writes = false;
3133 scrub_blocked_if_needed(fs_info);
3134 }
3135
3136 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3137 ret = get_raid56_logic_offset(physical, num, map,
3138 &logical,
3139 &stripe_logical);
3140 logical += base;
3141 if (ret) {
3142
3143 stripe_logical += base;
3144 stripe_end = stripe_logical + increment;
3145 ret = scrub_raid56_parity(sctx, map, scrub_dev,
3146 ppath, stripe_logical,
3147 stripe_end);
3148 if (ret)
3149 goto out;
3150 goto skip;
3151 }
3152 }
3153
3154 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
3155 key.type = BTRFS_METADATA_ITEM_KEY;
3156 else
3157 key.type = BTRFS_EXTENT_ITEM_KEY;
3158 key.objectid = logical;
3159 key.offset = (u64)-1;
3160
3161 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3162 if (ret < 0)
3163 goto out;
3164
3165 if (ret > 0) {
3166 ret = btrfs_previous_extent_item(root, path, 0);
3167 if (ret < 0)
3168 goto out;
3169 if (ret > 0) {
3170
3171
3172 btrfs_release_path(path);
3173 ret = btrfs_search_slot(NULL, root, &key,
3174 path, 0, 0);
3175 if (ret < 0)
3176 goto out;
3177 }
3178 }
3179
3180 stop_loop = 0;
3181 while (1) {
3182 u64 bytes;
3183
3184 l = path->nodes[0];
3185 slot = path->slots[0];
3186 if (slot >= btrfs_header_nritems(l)) {
3187 ret = btrfs_next_leaf(root, path);
3188 if (ret == 0)
3189 continue;
3190 if (ret < 0)
3191 goto out;
3192
3193 stop_loop = 1;
3194 break;
3195 }
3196 btrfs_item_key_to_cpu(l, &key, slot);
3197
3198 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3199 key.type != BTRFS_METADATA_ITEM_KEY)
3200 goto next;
3201
3202 if (key.type == BTRFS_METADATA_ITEM_KEY)
3203 bytes = fs_info->nodesize;
3204 else
3205 bytes = key.offset;
3206
3207 if (key.objectid + bytes <= logical)
3208 goto next;
3209
3210 if (key.objectid >= logical + map->stripe_len) {
3211
3212 if (key.objectid >= logic_end)
3213 stop_loop = 1;
3214 break;
3215 }
3216
3217
3218
3219
3220
3221
3222
3223 spin_lock(&cache->lock);
3224 if (cache->removed) {
3225 spin_unlock(&cache->lock);
3226 ret = 0;
3227 goto out;
3228 }
3229 spin_unlock(&cache->lock);
3230
3231 extent = btrfs_item_ptr(l, slot,
3232 struct btrfs_extent_item);
3233 flags = btrfs_extent_flags(l, extent);
3234 generation = btrfs_extent_generation(l, extent);
3235
3236 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3237 (key.objectid < logical ||
3238 key.objectid + bytes >
3239 logical + map->stripe_len)) {
3240 btrfs_err(fs_info,
3241 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3242 key.objectid, logical);
3243 spin_lock(&sctx->stat_lock);
3244 sctx->stat.uncorrectable_errors++;
3245 spin_unlock(&sctx->stat_lock);
3246 goto next;
3247 }
3248
3249again:
3250 extent_logical = key.objectid;
3251 extent_len = bytes;
3252
3253
3254
3255
3256 if (extent_logical < logical) {
3257 extent_len -= logical - extent_logical;
3258 extent_logical = logical;
3259 }
3260 if (extent_logical + extent_len >
3261 logical + map->stripe_len) {
3262 extent_len = logical + map->stripe_len -
3263 extent_logical;
3264 }
3265
3266 extent_physical = extent_logical - logical + physical;
3267 extent_dev = scrub_dev;
3268 extent_mirror_num = mirror_num;
3269 if (sctx->is_dev_replace)
3270 scrub_remap_extent(fs_info, extent_logical,
3271 extent_len, &extent_physical,
3272 &extent_dev,
3273 &extent_mirror_num);
3274
3275 if (flags & BTRFS_EXTENT_FLAG_DATA) {
3276 ret = btrfs_lookup_csums_range(csum_root,
3277 extent_logical,
3278 extent_logical + extent_len - 1,
3279 &sctx->csum_list, 1);
3280 if (ret)
3281 goto out;
3282 }
3283
3284 ret = scrub_extent(sctx, map, extent_logical, extent_len,
3285 extent_physical, extent_dev, flags,
3286 generation, extent_mirror_num,
3287 extent_logical - logical + physical);
3288
3289 scrub_free_csums(sctx);
3290
3291 if (ret)
3292 goto out;
3293
3294 if (extent_logical + extent_len <
3295 key.objectid + bytes) {
3296 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3297
3298
3299
3300
3301loop:
3302 physical += map->stripe_len;
3303 ret = get_raid56_logic_offset(physical,
3304 num, map, &logical,
3305 &stripe_logical);
3306 logical += base;
3307
3308 if (ret && physical < physical_end) {
3309 stripe_logical += base;
3310 stripe_end = stripe_logical +
3311 increment;
3312 ret = scrub_raid56_parity(sctx,
3313 map, scrub_dev, ppath,
3314 stripe_logical,
3315 stripe_end);
3316 if (ret)
3317 goto out;
3318 goto loop;
3319 }
3320 } else {
3321 physical += map->stripe_len;
3322 logical += increment;
3323 }
3324 if (logical < key.objectid + bytes) {
3325 cond_resched();
3326 goto again;
3327 }
3328
3329 if (physical >= physical_end) {
3330 stop_loop = 1;
3331 break;
3332 }
3333 }
3334next:
3335 path->slots[0]++;
3336 }
3337 btrfs_release_path(path);
3338skip:
3339 logical += increment;
3340 physical += map->stripe_len;
3341 spin_lock(&sctx->stat_lock);
3342 if (stop_loop)
3343 sctx->stat.last_physical = map->stripes[num].physical +
3344 length;
3345 else
3346 sctx->stat.last_physical = physical;
3347 spin_unlock(&sctx->stat_lock);
3348 if (stop_loop)
3349 break;
3350 }
3351out:
3352
3353 scrub_submit(sctx);
3354 mutex_lock(&sctx->wr_lock);
3355 scrub_wr_submit(sctx);
3356 mutex_unlock(&sctx->wr_lock);
3357
3358 blk_finish_plug(&plug);
3359 btrfs_free_path(path);
3360 btrfs_free_path(ppath);
3361 return ret < 0 ? ret : 0;
3362}
3363
3364static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
3365 struct btrfs_device *scrub_dev,
3366 u64 chunk_offset, u64 length,
3367 u64 dev_offset,
3368 struct btrfs_block_group *cache)
3369{
3370 struct btrfs_fs_info *fs_info = sctx->fs_info;
3371 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
3372 struct map_lookup *map;
3373 struct extent_map *em;
3374 int i;
3375 int ret = 0;
3376
3377 read_lock(&map_tree->lock);
3378 em = lookup_extent_mapping(map_tree, chunk_offset, 1);
3379 read_unlock(&map_tree->lock);
3380
3381 if (!em) {
3382
3383
3384
3385
3386 spin_lock(&cache->lock);
3387 if (!cache->removed)
3388 ret = -EINVAL;
3389 spin_unlock(&cache->lock);
3390
3391 return ret;
3392 }
3393
3394 map = em->map_lookup;
3395 if (em->start != chunk_offset)
3396 goto out;
3397
3398 if (em->len < length)
3399 goto out;
3400
3401 for (i = 0; i < map->num_stripes; ++i) {
3402 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
3403 map->stripes[i].physical == dev_offset) {
3404 ret = scrub_stripe(sctx, map, scrub_dev, i,
3405 chunk_offset, length, cache);
3406 if (ret)
3407 goto out;
3408 }
3409 }
3410out:
3411 free_extent_map(em);
3412
3413 return ret;
3414}
3415
3416static noinline_for_stack
3417int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3418 struct btrfs_device *scrub_dev, u64 start, u64 end)
3419{
3420 struct btrfs_dev_extent *dev_extent = NULL;
3421 struct btrfs_path *path;
3422 struct btrfs_fs_info *fs_info = sctx->fs_info;
3423 struct btrfs_root *root = fs_info->dev_root;
3424 u64 length;
3425 u64 chunk_offset;
3426 int ret = 0;
3427 int ro_set;
3428 int slot;
3429 struct extent_buffer *l;
3430 struct btrfs_key key;
3431 struct btrfs_key found_key;
3432 struct btrfs_block_group *cache;
3433 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
3434
3435 path = btrfs_alloc_path();
3436 if (!path)
3437 return -ENOMEM;
3438
3439 path->reada = READA_FORWARD;
3440 path->search_commit_root = 1;
3441 path->skip_locking = 1;
3442
3443 key.objectid = scrub_dev->devid;
3444 key.offset = 0ull;
3445 key.type = BTRFS_DEV_EXTENT_KEY;
3446
3447 while (1) {
3448 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3449 if (ret < 0)
3450 break;
3451 if (ret > 0) {
3452 if (path->slots[0] >=
3453 btrfs_header_nritems(path->nodes[0])) {
3454 ret = btrfs_next_leaf(root, path);
3455 if (ret < 0)
3456 break;
3457 if (ret > 0) {
3458 ret = 0;
3459 break;
3460 }
3461 } else {
3462 ret = 0;
3463 }
3464 }
3465
3466 l = path->nodes[0];
3467 slot = path->slots[0];
3468
3469 btrfs_item_key_to_cpu(l, &found_key, slot);
3470
3471 if (found_key.objectid != scrub_dev->devid)
3472 break;
3473
3474 if (found_key.type != BTRFS_DEV_EXTENT_KEY)
3475 break;
3476
3477 if (found_key.offset >= end)
3478 break;
3479
3480 if (found_key.offset < key.offset)
3481 break;
3482
3483 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
3484 length = btrfs_dev_extent_length(l, dev_extent);
3485
3486 if (found_key.offset + length <= start)
3487 goto skip;
3488
3489 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
3490
3491
3492
3493
3494
3495 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3496
3497
3498
3499 if (!cache)
3500 goto skip;
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510 spin_lock(&cache->lock);
3511 if (cache->removed) {
3512 spin_unlock(&cache->lock);
3513 btrfs_put_block_group(cache);
3514 goto skip;
3515 }
3516 btrfs_freeze_block_group(cache);
3517 spin_unlock(&cache->lock);
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527 scrub_pause_on(fs_info);
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559 ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace);
3560 if (ret == 0) {
3561 ro_set = 1;
3562 } else if (ret == -ENOSPC && !sctx->is_dev_replace) {
3563
3564
3565
3566
3567
3568
3569
3570 ro_set = 0;
3571 } else {
3572 btrfs_warn(fs_info,
3573 "failed setting block group ro: %d", ret);
3574 btrfs_unfreeze_block_group(cache);
3575 btrfs_put_block_group(cache);
3576 scrub_pause_off(fs_info);
3577 break;
3578 }
3579
3580
3581
3582
3583
3584
3585 if (sctx->is_dev_replace) {
3586 btrfs_wait_nocow_writers(cache);
3587 btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start,
3588 cache->length);
3589 }
3590
3591 scrub_pause_off(fs_info);
3592 down_write(&dev_replace->rwsem);
3593 dev_replace->cursor_right = found_key.offset + length;
3594 dev_replace->cursor_left = found_key.offset;
3595 dev_replace->item_needs_writeback = 1;
3596 up_write(&dev_replace->rwsem);
3597
3598 ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
3599 found_key.offset, cache);
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611 sctx->flush_all_writes = true;
3612 scrub_submit(sctx);
3613 mutex_lock(&sctx->wr_lock);
3614 scrub_wr_submit(sctx);
3615 mutex_unlock(&sctx->wr_lock);
3616
3617 wait_event(sctx->list_wait,
3618 atomic_read(&sctx->bios_in_flight) == 0);
3619
3620 scrub_pause_on(fs_info);
3621
3622
3623
3624
3625
3626
3627 wait_event(sctx->list_wait,
3628 atomic_read(&sctx->workers_pending) == 0);
3629 sctx->flush_all_writes = false;
3630
3631 scrub_pause_off(fs_info);
3632
3633 down_write(&dev_replace->rwsem);
3634 dev_replace->cursor_left = dev_replace->cursor_right;
3635 dev_replace->item_needs_writeback = 1;
3636 up_write(&dev_replace->rwsem);
3637
3638 if (ro_set)
3639 btrfs_dec_block_group_ro(cache);
3640
3641
3642
3643
3644
3645
3646
3647
3648 spin_lock(&cache->lock);
3649 if (!cache->removed && !cache->ro && cache->reserved == 0 &&
3650 cache->used == 0) {
3651 spin_unlock(&cache->lock);
3652 if (btrfs_test_opt(fs_info, DISCARD_ASYNC))
3653 btrfs_discard_queue_work(&fs_info->discard_ctl,
3654 cache);
3655 else
3656 btrfs_mark_bg_unused(cache);
3657 } else {
3658 spin_unlock(&cache->lock);
3659 }
3660
3661 btrfs_unfreeze_block_group(cache);
3662 btrfs_put_block_group(cache);
3663 if (ret)
3664 break;
3665 if (sctx->is_dev_replace &&
3666 atomic64_read(&dev_replace->num_write_errors) > 0) {
3667 ret = -EIO;
3668 break;
3669 }
3670 if (sctx->stat.malloc_errors > 0) {
3671 ret = -ENOMEM;
3672 break;
3673 }
3674skip:
3675 key.offset = found_key.offset + length;
3676 btrfs_release_path(path);
3677 }
3678
3679 btrfs_free_path(path);
3680
3681 return ret;
3682}
3683
3684static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
3685 struct btrfs_device *scrub_dev)
3686{
3687 int i;
3688 u64 bytenr;
3689 u64 gen;
3690 int ret;
3691 struct btrfs_fs_info *fs_info = sctx->fs_info;
3692
3693 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3694 return -EROFS;
3695
3696
3697 if (scrub_dev->fs_devices != fs_info->fs_devices)
3698 gen = scrub_dev->generation;
3699 else
3700 gen = fs_info->last_trans_committed;
3701
3702 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3703 bytenr = btrfs_sb_offset(i);
3704 if (bytenr + BTRFS_SUPER_INFO_SIZE >
3705 scrub_dev->commit_total_bytes)
3706 break;
3707
3708 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
3709 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
3710 NULL, 1, bytenr);
3711 if (ret)
3712 return ret;
3713 }
3714 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3715
3716 return 0;
3717}
3718
3719static void scrub_workers_put(struct btrfs_fs_info *fs_info)
3720{
3721 if (refcount_dec_and_mutex_lock(&fs_info->scrub_workers_refcnt,
3722 &fs_info->scrub_lock)) {
3723 struct btrfs_workqueue *scrub_workers = NULL;
3724 struct btrfs_workqueue *scrub_wr_comp = NULL;
3725 struct btrfs_workqueue *scrub_parity = NULL;
3726
3727 scrub_workers = fs_info->scrub_workers;
3728 scrub_wr_comp = fs_info->scrub_wr_completion_workers;
3729 scrub_parity = fs_info->scrub_parity_workers;
3730
3731 fs_info->scrub_workers = NULL;
3732 fs_info->scrub_wr_completion_workers = NULL;
3733 fs_info->scrub_parity_workers = NULL;
3734 mutex_unlock(&fs_info->scrub_lock);
3735
3736 btrfs_destroy_workqueue(scrub_workers);
3737 btrfs_destroy_workqueue(scrub_wr_comp);
3738 btrfs_destroy_workqueue(scrub_parity);
3739 }
3740}
3741
3742
3743
3744
3745static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3746 int is_dev_replace)
3747{
3748 struct btrfs_workqueue *scrub_workers = NULL;
3749 struct btrfs_workqueue *scrub_wr_comp = NULL;
3750 struct btrfs_workqueue *scrub_parity = NULL;
3751 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
3752 int max_active = fs_info->thread_pool_size;
3753 int ret = -ENOMEM;
3754
3755 if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt))
3756 return 0;
3757
3758 scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub", flags,
3759 is_dev_replace ? 1 : max_active, 4);
3760 if (!scrub_workers)
3761 goto fail_scrub_workers;
3762
3763 scrub_wr_comp = btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
3764 max_active, 2);
3765 if (!scrub_wr_comp)
3766 goto fail_scrub_wr_completion_workers;
3767
3768 scrub_parity = btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
3769 max_active, 2);
3770 if (!scrub_parity)
3771 goto fail_scrub_parity_workers;
3772
3773 mutex_lock(&fs_info->scrub_lock);
3774 if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
3775 ASSERT(fs_info->scrub_workers == NULL &&
3776 fs_info->scrub_wr_completion_workers == NULL &&
3777 fs_info->scrub_parity_workers == NULL);
3778 fs_info->scrub_workers = scrub_workers;
3779 fs_info->scrub_wr_completion_workers = scrub_wr_comp;
3780 fs_info->scrub_parity_workers = scrub_parity;
3781 refcount_set(&fs_info->scrub_workers_refcnt, 1);
3782 mutex_unlock(&fs_info->scrub_lock);
3783 return 0;
3784 }
3785
3786 refcount_inc(&fs_info->scrub_workers_refcnt);
3787 mutex_unlock(&fs_info->scrub_lock);
3788
3789 ret = 0;
3790 btrfs_destroy_workqueue(scrub_parity);
3791fail_scrub_parity_workers:
3792 btrfs_destroy_workqueue(scrub_wr_comp);
3793fail_scrub_wr_completion_workers:
3794 btrfs_destroy_workqueue(scrub_workers);
3795fail_scrub_workers:
3796 return ret;
3797}
3798
3799int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3800 u64 end, struct btrfs_scrub_progress *progress,
3801 int readonly, int is_dev_replace)
3802{
3803 struct scrub_ctx *sctx;
3804 int ret;
3805 struct btrfs_device *dev;
3806 unsigned int nofs_flag;
3807
3808 if (btrfs_fs_closing(fs_info))
3809 return -EAGAIN;
3810
3811 if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
3812
3813
3814
3815
3816
3817 btrfs_err(fs_info,
3818 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
3819 fs_info->nodesize,
3820 BTRFS_STRIPE_LEN);
3821 return -EINVAL;
3822 }
3823
3824 if (fs_info->sectorsize != PAGE_SIZE) {
3825
3826 btrfs_err_rl(fs_info,
3827 "scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails",
3828 fs_info->sectorsize, PAGE_SIZE);
3829 return -EINVAL;
3830 }
3831
3832 if (fs_info->nodesize >
3833 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
3834 fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
3835
3836
3837
3838
3839 btrfs_err(fs_info,
3840 "scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
3841 fs_info->nodesize,
3842 SCRUB_MAX_PAGES_PER_BLOCK,
3843 fs_info->sectorsize,
3844 SCRUB_MAX_PAGES_PER_BLOCK);
3845 return -EINVAL;
3846 }
3847
3848
3849 sctx = scrub_setup_ctx(fs_info, is_dev_replace);
3850 if (IS_ERR(sctx))
3851 return PTR_ERR(sctx);
3852
3853 ret = scrub_workers_get(fs_info, is_dev_replace);
3854 if (ret)
3855 goto out_free_ctx;
3856
3857 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3858 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
3859 if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
3860 !is_dev_replace)) {
3861 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3862 ret = -ENODEV;
3863 goto out;
3864 }
3865
3866 if (!is_dev_replace && !readonly &&
3867 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
3868 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3869 btrfs_err_in_rcu(fs_info,
3870 "scrub on devid %llu: filesystem on %s is not writable",
3871 devid, rcu_str_deref(dev->name));
3872 ret = -EROFS;
3873 goto out;
3874 }
3875
3876 mutex_lock(&fs_info->scrub_lock);
3877 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
3878 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
3879 mutex_unlock(&fs_info->scrub_lock);
3880 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3881 ret = -EIO;
3882 goto out;
3883 }
3884
3885 down_read(&fs_info->dev_replace.rwsem);
3886 if (dev->scrub_ctx ||
3887 (!is_dev_replace &&
3888 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
3889 up_read(&fs_info->dev_replace.rwsem);
3890 mutex_unlock(&fs_info->scrub_lock);
3891 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3892 ret = -EINPROGRESS;
3893 goto out;
3894 }
3895 up_read(&fs_info->dev_replace.rwsem);
3896
3897 sctx->readonly = readonly;
3898 dev->scrub_ctx = sctx;
3899 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3900
3901
3902
3903
3904
3905 __scrub_blocked_if_needed(fs_info);
3906 atomic_inc(&fs_info->scrubs_running);
3907 mutex_unlock(&fs_info->scrub_lock);
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918 nofs_flag = memalloc_nofs_save();
3919 if (!is_dev_replace) {
3920 btrfs_info(fs_info, "scrub: started on devid %llu", devid);
3921
3922
3923
3924
3925 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3926 ret = scrub_supers(sctx, dev);
3927 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3928 }
3929
3930 if (!ret)
3931 ret = scrub_enumerate_chunks(sctx, dev, start, end);
3932 memalloc_nofs_restore(nofs_flag);
3933
3934 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3935 atomic_dec(&fs_info->scrubs_running);
3936 wake_up(&fs_info->scrub_pause_wait);
3937
3938 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
3939
3940 if (progress)
3941 memcpy(progress, &sctx->stat, sizeof(*progress));
3942
3943 if (!is_dev_replace)
3944 btrfs_info(fs_info, "scrub: %s on devid %llu with status: %d",
3945 ret ? "not finished" : "finished", devid, ret);
3946
3947 mutex_lock(&fs_info->scrub_lock);
3948 dev->scrub_ctx = NULL;
3949 mutex_unlock(&fs_info->scrub_lock);
3950
3951 scrub_workers_put(fs_info);
3952 scrub_put_ctx(sctx);
3953
3954 return ret;
3955out:
3956 scrub_workers_put(fs_info);
3957out_free_ctx:
3958 scrub_free_ctx(sctx);
3959
3960 return ret;
3961}
3962
3963void btrfs_scrub_pause(struct btrfs_fs_info *fs_info)
3964{
3965 mutex_lock(&fs_info->scrub_lock);
3966 atomic_inc(&fs_info->scrub_pause_req);
3967 while (atomic_read(&fs_info->scrubs_paused) !=
3968 atomic_read(&fs_info->scrubs_running)) {
3969 mutex_unlock(&fs_info->scrub_lock);
3970 wait_event(fs_info->scrub_pause_wait,
3971 atomic_read(&fs_info->scrubs_paused) ==
3972 atomic_read(&fs_info->scrubs_running));
3973 mutex_lock(&fs_info->scrub_lock);
3974 }
3975 mutex_unlock(&fs_info->scrub_lock);
3976}
3977
3978void btrfs_scrub_continue(struct btrfs_fs_info *fs_info)
3979{
3980 atomic_dec(&fs_info->scrub_pause_req);
3981 wake_up(&fs_info->scrub_pause_wait);
3982}
3983
3984int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
3985{
3986 mutex_lock(&fs_info->scrub_lock);
3987 if (!atomic_read(&fs_info->scrubs_running)) {
3988 mutex_unlock(&fs_info->scrub_lock);
3989 return -ENOTCONN;
3990 }
3991
3992 atomic_inc(&fs_info->scrub_cancel_req);
3993 while (atomic_read(&fs_info->scrubs_running)) {
3994 mutex_unlock(&fs_info->scrub_lock);
3995 wait_event(fs_info->scrub_pause_wait,
3996 atomic_read(&fs_info->scrubs_running) == 0);
3997 mutex_lock(&fs_info->scrub_lock);
3998 }
3999 atomic_dec(&fs_info->scrub_cancel_req);
4000 mutex_unlock(&fs_info->scrub_lock);
4001
4002 return 0;
4003}
4004
4005int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
4006{
4007 struct btrfs_fs_info *fs_info = dev->fs_info;
4008 struct scrub_ctx *sctx;
4009
4010 mutex_lock(&fs_info->scrub_lock);
4011 sctx = dev->scrub_ctx;
4012 if (!sctx) {
4013 mutex_unlock(&fs_info->scrub_lock);
4014 return -ENOTCONN;
4015 }
4016 atomic_inc(&sctx->cancel_req);
4017 while (dev->scrub_ctx) {
4018 mutex_unlock(&fs_info->scrub_lock);
4019 wait_event(fs_info->scrub_pause_wait,
4020 dev->scrub_ctx == NULL);
4021 mutex_lock(&fs_info->scrub_lock);
4022 }
4023 mutex_unlock(&fs_info->scrub_lock);
4024
4025 return 0;
4026}
4027
4028int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
4029 struct btrfs_scrub_progress *progress)
4030{
4031 struct btrfs_device *dev;
4032 struct scrub_ctx *sctx = NULL;
4033
4034 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4035 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
4036 if (dev)
4037 sctx = dev->scrub_ctx;
4038 if (sctx)
4039 memcpy(progress, &sctx->stat, sizeof(*progress));
4040 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4041
4042 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
4043}
4044
4045static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
4046 u64 extent_logical, u64 extent_len,
4047 u64 *extent_physical,
4048 struct btrfs_device **extent_dev,
4049 int *extent_mirror_num)
4050{
4051 u64 mapped_length;
4052 struct btrfs_bio *bbio = NULL;
4053 int ret;
4054
4055 mapped_length = extent_len;
4056 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
4057 &mapped_length, &bbio, 0);
4058 if (ret || !bbio || mapped_length < extent_len ||
4059 !bbio->stripes[0].dev->bdev) {
4060 btrfs_put_bbio(bbio);
4061 return;
4062 }
4063
4064 *extent_physical = bbio->stripes[0].physical;
4065 *extent_mirror_num = bbio->mirror_num;
4066 *extent_dev = bbio->stripes[0].dev;
4067 btrfs_put_bbio(bbio);
4068}
4069