1
2
3
4
5
6#include <linux/blkdev.h>
7#include <linux/ratelimit.h>
8#include <linux/sched/mm.h>
9#include <crypto/hash.h>
10#include "ctree.h"
11#include "discard.h"
12#include "volumes.h"
13#include "disk-io.h"
14#include "ordered-data.h"
15#include "transaction.h"
16#include "backref.h"
17#include "extent_io.h"
18#include "dev-replace.h"
19#include "check-integrity.h"
20#include "rcu-string.h"
21#include "raid56.h"
22#include "block-group.h"
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37struct scrub_block;
38struct scrub_ctx;
39
40
41
42
43
44
45
46#define SCRUB_PAGES_PER_RD_BIO 32
47#define SCRUB_PAGES_PER_WR_BIO 32
48#define SCRUB_BIOS_PER_SCTX 64
49
50
51
52
53
54
55#define SCRUB_MAX_PAGES_PER_BLOCK 16
56
57struct scrub_recover {
58 refcount_t refs;
59 struct btrfs_bio *bbio;
60 u64 map_length;
61};
62
63struct scrub_page {
64 struct scrub_block *sblock;
65 struct page *page;
66 struct btrfs_device *dev;
67 struct list_head list;
68 u64 flags;
69 u64 generation;
70 u64 logical;
71 u64 physical;
72 u64 physical_for_dev_replace;
73 atomic_t refs;
74 struct {
75 unsigned int mirror_num:8;
76 unsigned int have_csum:1;
77 unsigned int io_error:1;
78 };
79 u8 csum[BTRFS_CSUM_SIZE];
80
81 struct scrub_recover *recover;
82};
83
84struct scrub_bio {
85 int index;
86 struct scrub_ctx *sctx;
87 struct btrfs_device *dev;
88 struct bio *bio;
89 blk_status_t status;
90 u64 logical;
91 u64 physical;
92#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
93 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
94#else
95 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
96#endif
97 int page_count;
98 int next_free;
99 struct btrfs_work work;
100};
101
102struct scrub_block {
103 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
104 int page_count;
105 atomic_t outstanding_pages;
106 refcount_t refs;
107 struct scrub_ctx *sctx;
108 struct scrub_parity *sparity;
109 struct {
110 unsigned int header_error:1;
111 unsigned int checksum_error:1;
112 unsigned int no_io_error_seen:1;
113 unsigned int generation_error:1;
114
115
116
117 unsigned int data_corrected:1;
118 };
119 struct btrfs_work work;
120};
121
122
123struct scrub_parity {
124 struct scrub_ctx *sctx;
125
126 struct btrfs_device *scrub_dev;
127
128 u64 logic_start;
129
130 u64 logic_end;
131
132 int nsectors;
133
134 u64 stripe_len;
135
136 refcount_t refs;
137
138 struct list_head spages;
139
140
141 struct btrfs_work work;
142
143
144 unsigned long *dbitmap;
145
146
147
148
149
150 unsigned long *ebitmap;
151
152 unsigned long bitmap[0];
153};
154
155struct scrub_ctx {
156 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
157 struct btrfs_fs_info *fs_info;
158 int first_free;
159 int curr;
160 atomic_t bios_in_flight;
161 atomic_t workers_pending;
162 spinlock_t list_lock;
163 wait_queue_head_t list_wait;
164 u16 csum_size;
165 struct list_head csum_list;
166 atomic_t cancel_req;
167 int readonly;
168 int pages_per_rd_bio;
169
170 int is_dev_replace;
171
172 struct scrub_bio *wr_curr_bio;
173 struct mutex wr_lock;
174 int pages_per_wr_bio;
175 struct btrfs_device *wr_tgtdev;
176 bool flush_all_writes;
177
178
179
180
181 struct btrfs_scrub_progress stat;
182 spinlock_t stat_lock;
183
184
185
186
187
188
189
190
191 refcount_t refs;
192};
193
194struct scrub_warning {
195 struct btrfs_path *path;
196 u64 extent_item_size;
197 const char *errstr;
198 u64 physical;
199 u64 logical;
200 struct btrfs_device *dev;
201};
202
203struct full_stripe_lock {
204 struct rb_node node;
205 u64 logical;
206 u64 refs;
207 struct mutex mutex;
208};
209
210static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
211static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
212static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
213static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
214 struct scrub_block *sblocks_for_recheck);
215static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
216 struct scrub_block *sblock,
217 int retry_failed_mirror);
218static void scrub_recheck_block_checksum(struct scrub_block *sblock);
219static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
220 struct scrub_block *sblock_good);
221static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
222 struct scrub_block *sblock_good,
223 int page_num, int force_write);
224static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
225static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
226 int page_num);
227static int scrub_checksum_data(struct scrub_block *sblock);
228static int scrub_checksum_tree_block(struct scrub_block *sblock);
229static int scrub_checksum_super(struct scrub_block *sblock);
230static void scrub_block_get(struct scrub_block *sblock);
231static void scrub_block_put(struct scrub_block *sblock);
232static void scrub_page_get(struct scrub_page *spage);
233static void scrub_page_put(struct scrub_page *spage);
234static void scrub_parity_get(struct scrub_parity *sparity);
235static void scrub_parity_put(struct scrub_parity *sparity);
236static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
237 struct scrub_page *spage);
238static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
239 u64 physical, struct btrfs_device *dev, u64 flags,
240 u64 gen, int mirror_num, u8 *csum, int force,
241 u64 physical_for_dev_replace);
242static void scrub_bio_end_io(struct bio *bio);
243static void scrub_bio_end_io_worker(struct btrfs_work *work);
244static void scrub_block_complete(struct scrub_block *sblock);
245static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
246 u64 extent_logical, u64 extent_len,
247 u64 *extent_physical,
248 struct btrfs_device **extent_dev,
249 int *extent_mirror_num);
250static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
251 struct scrub_page *spage);
252static void scrub_wr_submit(struct scrub_ctx *sctx);
253static void scrub_wr_bio_end_io(struct bio *bio);
254static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
255static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
256static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
257static void scrub_put_ctx(struct scrub_ctx *sctx);
258
259static inline int scrub_is_page_on_raid56(struct scrub_page *page)
260{
261 return page->recover &&
262 (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
263}
264
265static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
266{
267 refcount_inc(&sctx->refs);
268 atomic_inc(&sctx->bios_in_flight);
269}
270
271static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
272{
273 atomic_dec(&sctx->bios_in_flight);
274 wake_up(&sctx->list_wait);
275 scrub_put_ctx(sctx);
276}
277
278static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
279{
280 while (atomic_read(&fs_info->scrub_pause_req)) {
281 mutex_unlock(&fs_info->scrub_lock);
282 wait_event(fs_info->scrub_pause_wait,
283 atomic_read(&fs_info->scrub_pause_req) == 0);
284 mutex_lock(&fs_info->scrub_lock);
285 }
286}
287
288static void scrub_pause_on(struct btrfs_fs_info *fs_info)
289{
290 atomic_inc(&fs_info->scrubs_paused);
291 wake_up(&fs_info->scrub_pause_wait);
292}
293
294static void scrub_pause_off(struct btrfs_fs_info *fs_info)
295{
296 mutex_lock(&fs_info->scrub_lock);
297 __scrub_blocked_if_needed(fs_info);
298 atomic_dec(&fs_info->scrubs_paused);
299 mutex_unlock(&fs_info->scrub_lock);
300
301 wake_up(&fs_info->scrub_pause_wait);
302}
303
304static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
305{
306 scrub_pause_on(fs_info);
307 scrub_pause_off(fs_info);
308}
309
310
311
312
313
314
315
316
317
318
319
320static struct full_stripe_lock *insert_full_stripe_lock(
321 struct btrfs_full_stripe_locks_tree *locks_root,
322 u64 fstripe_logical)
323{
324 struct rb_node **p;
325 struct rb_node *parent = NULL;
326 struct full_stripe_lock *entry;
327 struct full_stripe_lock *ret;
328
329 lockdep_assert_held(&locks_root->lock);
330
331 p = &locks_root->root.rb_node;
332 while (*p) {
333 parent = *p;
334 entry = rb_entry(parent, struct full_stripe_lock, node);
335 if (fstripe_logical < entry->logical) {
336 p = &(*p)->rb_left;
337 } else if (fstripe_logical > entry->logical) {
338 p = &(*p)->rb_right;
339 } else {
340 entry->refs++;
341 return entry;
342 }
343 }
344
345
346
347
348 ret = kmalloc(sizeof(*ret), GFP_KERNEL);
349 if (!ret)
350 return ERR_PTR(-ENOMEM);
351 ret->logical = fstripe_logical;
352 ret->refs = 1;
353 mutex_init(&ret->mutex);
354
355 rb_link_node(&ret->node, parent, p);
356 rb_insert_color(&ret->node, &locks_root->root);
357 return ret;
358}
359
360
361
362
363
364
365
366static struct full_stripe_lock *search_full_stripe_lock(
367 struct btrfs_full_stripe_locks_tree *locks_root,
368 u64 fstripe_logical)
369{
370 struct rb_node *node;
371 struct full_stripe_lock *entry;
372
373 lockdep_assert_held(&locks_root->lock);
374
375 node = locks_root->root.rb_node;
376 while (node) {
377 entry = rb_entry(node, struct full_stripe_lock, node);
378 if (fstripe_logical < entry->logical)
379 node = node->rb_left;
380 else if (fstripe_logical > entry->logical)
381 node = node->rb_right;
382 else
383 return entry;
384 }
385 return NULL;
386}
387
388
389
390
391
392
393static u64 get_full_stripe_logical(struct btrfs_block_group *cache, u64 bytenr)
394{
395 u64 ret;
396
397
398
399
400
401 WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
402
403
404
405
406
407 ret = div64_u64(bytenr - cache->start, cache->full_stripe_len) *
408 cache->full_stripe_len + cache->start;
409 return ret;
410}
411
412
413
414
415
416
417
418
419
420
421
422
423static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
424 bool *locked_ret)
425{
426 struct btrfs_block_group *bg_cache;
427 struct btrfs_full_stripe_locks_tree *locks_root;
428 struct full_stripe_lock *existing;
429 u64 fstripe_start;
430 int ret = 0;
431
432 *locked_ret = false;
433 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
434 if (!bg_cache) {
435 ASSERT(0);
436 return -ENOENT;
437 }
438
439
440 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
441 goto out;
442 locks_root = &bg_cache->full_stripe_locks_root;
443
444 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
445
446
447 mutex_lock(&locks_root->lock);
448 existing = insert_full_stripe_lock(locks_root, fstripe_start);
449 mutex_unlock(&locks_root->lock);
450 if (IS_ERR(existing)) {
451 ret = PTR_ERR(existing);
452 goto out;
453 }
454 mutex_lock(&existing->mutex);
455 *locked_ret = true;
456out:
457 btrfs_put_block_group(bg_cache);
458 return ret;
459}
460
461
462
463
464
465
466
467
468
469
470static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
471 bool locked)
472{
473 struct btrfs_block_group *bg_cache;
474 struct btrfs_full_stripe_locks_tree *locks_root;
475 struct full_stripe_lock *fstripe_lock;
476 u64 fstripe_start;
477 bool freeit = false;
478 int ret = 0;
479
480
481 if (!locked)
482 return 0;
483
484 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
485 if (!bg_cache) {
486 ASSERT(0);
487 return -ENOENT;
488 }
489 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
490 goto out;
491
492 locks_root = &bg_cache->full_stripe_locks_root;
493 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
494
495 mutex_lock(&locks_root->lock);
496 fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
497
498 if (!fstripe_lock) {
499 WARN_ON(1);
500 ret = -ENOENT;
501 mutex_unlock(&locks_root->lock);
502 goto out;
503 }
504
505 if (fstripe_lock->refs == 0) {
506 WARN_ON(1);
507 btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
508 fstripe_lock->logical);
509 } else {
510 fstripe_lock->refs--;
511 }
512
513 if (fstripe_lock->refs == 0) {
514 rb_erase(&fstripe_lock->node, &locks_root->root);
515 freeit = true;
516 }
517 mutex_unlock(&locks_root->lock);
518
519 mutex_unlock(&fstripe_lock->mutex);
520 if (freeit)
521 kfree(fstripe_lock);
522out:
523 btrfs_put_block_group(bg_cache);
524 return ret;
525}
526
527static void scrub_free_csums(struct scrub_ctx *sctx)
528{
529 while (!list_empty(&sctx->csum_list)) {
530 struct btrfs_ordered_sum *sum;
531 sum = list_first_entry(&sctx->csum_list,
532 struct btrfs_ordered_sum, list);
533 list_del(&sum->list);
534 kfree(sum);
535 }
536}
537
538static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
539{
540 int i;
541
542 if (!sctx)
543 return;
544
545
546 if (sctx->curr != -1) {
547 struct scrub_bio *sbio = sctx->bios[sctx->curr];
548
549 for (i = 0; i < sbio->page_count; i++) {
550 WARN_ON(!sbio->pagev[i]->page);
551 scrub_block_put(sbio->pagev[i]->sblock);
552 }
553 bio_put(sbio->bio);
554 }
555
556 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
557 struct scrub_bio *sbio = sctx->bios[i];
558
559 if (!sbio)
560 break;
561 kfree(sbio);
562 }
563
564 kfree(sctx->wr_curr_bio);
565 scrub_free_csums(sctx);
566 kfree(sctx);
567}
568
569static void scrub_put_ctx(struct scrub_ctx *sctx)
570{
571 if (refcount_dec_and_test(&sctx->refs))
572 scrub_free_ctx(sctx);
573}
574
575static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
576 struct btrfs_fs_info *fs_info, int is_dev_replace)
577{
578 struct scrub_ctx *sctx;
579 int i;
580
581 sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
582 if (!sctx)
583 goto nomem;
584 refcount_set(&sctx->refs, 1);
585 sctx->is_dev_replace = is_dev_replace;
586 sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
587 sctx->curr = -1;
588 sctx->fs_info = fs_info;
589 INIT_LIST_HEAD(&sctx->csum_list);
590 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
591 struct scrub_bio *sbio;
592
593 sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
594 if (!sbio)
595 goto nomem;
596 sctx->bios[i] = sbio;
597
598 sbio->index = i;
599 sbio->sctx = sctx;
600 sbio->page_count = 0;
601 btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, NULL,
602 NULL);
603
604 if (i != SCRUB_BIOS_PER_SCTX - 1)
605 sctx->bios[i]->next_free = i + 1;
606 else
607 sctx->bios[i]->next_free = -1;
608 }
609 sctx->first_free = 0;
610 atomic_set(&sctx->bios_in_flight, 0);
611 atomic_set(&sctx->workers_pending, 0);
612 atomic_set(&sctx->cancel_req, 0);
613 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
614
615 spin_lock_init(&sctx->list_lock);
616 spin_lock_init(&sctx->stat_lock);
617 init_waitqueue_head(&sctx->list_wait);
618
619 WARN_ON(sctx->wr_curr_bio != NULL);
620 mutex_init(&sctx->wr_lock);
621 sctx->wr_curr_bio = NULL;
622 if (is_dev_replace) {
623 WARN_ON(!fs_info->dev_replace.tgtdev);
624 sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
625 sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;
626 sctx->flush_all_writes = false;
627 }
628
629 return sctx;
630
631nomem:
632 scrub_free_ctx(sctx);
633 return ERR_PTR(-ENOMEM);
634}
635
636static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
637 void *warn_ctx)
638{
639 u64 isize;
640 u32 nlink;
641 int ret;
642 int i;
643 unsigned nofs_flag;
644 struct extent_buffer *eb;
645 struct btrfs_inode_item *inode_item;
646 struct scrub_warning *swarn = warn_ctx;
647 struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
648 struct inode_fs_paths *ipath = NULL;
649 struct btrfs_root *local_root;
650 struct btrfs_key root_key;
651 struct btrfs_key key;
652
653 root_key.objectid = root;
654 root_key.type = BTRFS_ROOT_ITEM_KEY;
655 root_key.offset = (u64)-1;
656 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
657 if (IS_ERR(local_root)) {
658 ret = PTR_ERR(local_root);
659 goto err;
660 }
661
662
663
664
665 key.objectid = inum;
666 key.type = BTRFS_INODE_ITEM_KEY;
667 key.offset = 0;
668
669 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
670 if (ret) {
671 btrfs_release_path(swarn->path);
672 goto err;
673 }
674
675 eb = swarn->path->nodes[0];
676 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
677 struct btrfs_inode_item);
678 isize = btrfs_inode_size(eb, inode_item);
679 nlink = btrfs_inode_nlink(eb, inode_item);
680 btrfs_release_path(swarn->path);
681
682
683
684
685
686
687 nofs_flag = memalloc_nofs_save();
688 ipath = init_ipath(4096, local_root, swarn->path);
689 memalloc_nofs_restore(nofs_flag);
690 if (IS_ERR(ipath)) {
691 ret = PTR_ERR(ipath);
692 ipath = NULL;
693 goto err;
694 }
695 ret = paths_from_inode(inum, ipath);
696
697 if (ret < 0)
698 goto err;
699
700
701
702
703
704 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
705 btrfs_warn_in_rcu(fs_info,
706"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
707 swarn->errstr, swarn->logical,
708 rcu_str_deref(swarn->dev->name),
709 swarn->physical,
710 root, inum, offset,
711 min(isize - offset, (u64)PAGE_SIZE), nlink,
712 (char *)(unsigned long)ipath->fspath->val[i]);
713
714 free_ipath(ipath);
715 return 0;
716
717err:
718 btrfs_warn_in_rcu(fs_info,
719 "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
720 swarn->errstr, swarn->logical,
721 rcu_str_deref(swarn->dev->name),
722 swarn->physical,
723 root, inum, offset, ret);
724
725 free_ipath(ipath);
726 return 0;
727}
728
729static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
730{
731 struct btrfs_device *dev;
732 struct btrfs_fs_info *fs_info;
733 struct btrfs_path *path;
734 struct btrfs_key found_key;
735 struct extent_buffer *eb;
736 struct btrfs_extent_item *ei;
737 struct scrub_warning swarn;
738 unsigned long ptr = 0;
739 u64 extent_item_pos;
740 u64 flags = 0;
741 u64 ref_root;
742 u32 item_size;
743 u8 ref_level = 0;
744 int ret;
745
746 WARN_ON(sblock->page_count < 1);
747 dev = sblock->pagev[0]->dev;
748 fs_info = sblock->sctx->fs_info;
749
750 path = btrfs_alloc_path();
751 if (!path)
752 return;
753
754 swarn.physical = sblock->pagev[0]->physical;
755 swarn.logical = sblock->pagev[0]->logical;
756 swarn.errstr = errstr;
757 swarn.dev = NULL;
758
759 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
760 &flags);
761 if (ret < 0)
762 goto out;
763
764 extent_item_pos = swarn.logical - found_key.objectid;
765 swarn.extent_item_size = found_key.offset;
766
767 eb = path->nodes[0];
768 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
769 item_size = btrfs_item_size_nr(eb, path->slots[0]);
770
771 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
772 do {
773 ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
774 item_size, &ref_root,
775 &ref_level);
776 btrfs_warn_in_rcu(fs_info,
777"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
778 errstr, swarn.logical,
779 rcu_str_deref(dev->name),
780 swarn.physical,
781 ref_level ? "node" : "leaf",
782 ret < 0 ? -1 : ref_level,
783 ret < 0 ? -1 : ref_root);
784 } while (ret != 1);
785 btrfs_release_path(path);
786 } else {
787 btrfs_release_path(path);
788 swarn.path = path;
789 swarn.dev = dev;
790 iterate_extent_inodes(fs_info, found_key.objectid,
791 extent_item_pos, 1,
792 scrub_print_warning_inode, &swarn, false);
793 }
794
795out:
796 btrfs_free_path(path);
797}
798
799static inline void scrub_get_recover(struct scrub_recover *recover)
800{
801 refcount_inc(&recover->refs);
802}
803
804static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
805 struct scrub_recover *recover)
806{
807 if (refcount_dec_and_test(&recover->refs)) {
808 btrfs_bio_counter_dec(fs_info);
809 btrfs_put_bbio(recover->bbio);
810 kfree(recover);
811 }
812}
813
814
815
816
817
818
819
820
821
822static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
823{
824 struct scrub_ctx *sctx = sblock_to_check->sctx;
825 struct btrfs_device *dev;
826 struct btrfs_fs_info *fs_info;
827 u64 logical;
828 unsigned int failed_mirror_index;
829 unsigned int is_metadata;
830 unsigned int have_csum;
831 struct scrub_block *sblocks_for_recheck;
832 struct scrub_block *sblock_bad;
833 int ret;
834 int mirror_index;
835 int page_num;
836 int success;
837 bool full_stripe_locked;
838 unsigned int nofs_flag;
839 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
840 DEFAULT_RATELIMIT_BURST);
841
842 BUG_ON(sblock_to_check->page_count < 1);
843 fs_info = sctx->fs_info;
844 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
845
846
847
848
849
850 spin_lock(&sctx->stat_lock);
851 ++sctx->stat.super_errors;
852 spin_unlock(&sctx->stat_lock);
853 return 0;
854 }
855 logical = sblock_to_check->pagev[0]->logical;
856 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
857 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
858 is_metadata = !(sblock_to_check->pagev[0]->flags &
859 BTRFS_EXTENT_FLAG_DATA);
860 have_csum = sblock_to_check->pagev[0]->have_csum;
861 dev = sblock_to_check->pagev[0]->dev;
862
863
864
865
866
867
868
869
870
871
872 nofs_flag = memalloc_nofs_save();
873
874
875
876
877
878
879
880 ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
881 if (ret < 0) {
882 memalloc_nofs_restore(nofs_flag);
883 spin_lock(&sctx->stat_lock);
884 if (ret == -ENOMEM)
885 sctx->stat.malloc_errors++;
886 sctx->stat.read_errors++;
887 sctx->stat.uncorrectable_errors++;
888 spin_unlock(&sctx->stat_lock);
889 return ret;
890 }
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921 sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
922 sizeof(*sblocks_for_recheck), GFP_KERNEL);
923 if (!sblocks_for_recheck) {
924 spin_lock(&sctx->stat_lock);
925 sctx->stat.malloc_errors++;
926 sctx->stat.read_errors++;
927 sctx->stat.uncorrectable_errors++;
928 spin_unlock(&sctx->stat_lock);
929 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
930 goto out;
931 }
932
933
934 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
935 if (ret) {
936 spin_lock(&sctx->stat_lock);
937 sctx->stat.read_errors++;
938 sctx->stat.uncorrectable_errors++;
939 spin_unlock(&sctx->stat_lock);
940 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
941 goto out;
942 }
943 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
944 sblock_bad = sblocks_for_recheck + failed_mirror_index;
945
946
947 scrub_recheck_block(fs_info, sblock_bad, 1);
948
949 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
950 sblock_bad->no_io_error_seen) {
951
952
953
954
955
956
957
958
959 spin_lock(&sctx->stat_lock);
960 sctx->stat.unverified_errors++;
961 sblock_to_check->data_corrected = 1;
962 spin_unlock(&sctx->stat_lock);
963
964 if (sctx->is_dev_replace)
965 scrub_write_block_to_dev_replace(sblock_bad);
966 goto out;
967 }
968
969 if (!sblock_bad->no_io_error_seen) {
970 spin_lock(&sctx->stat_lock);
971 sctx->stat.read_errors++;
972 spin_unlock(&sctx->stat_lock);
973 if (__ratelimit(&_rs))
974 scrub_print_warning("i/o error", sblock_to_check);
975 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
976 } else if (sblock_bad->checksum_error) {
977 spin_lock(&sctx->stat_lock);
978 sctx->stat.csum_errors++;
979 spin_unlock(&sctx->stat_lock);
980 if (__ratelimit(&_rs))
981 scrub_print_warning("checksum error", sblock_to_check);
982 btrfs_dev_stat_inc_and_print(dev,
983 BTRFS_DEV_STAT_CORRUPTION_ERRS);
984 } else if (sblock_bad->header_error) {
985 spin_lock(&sctx->stat_lock);
986 sctx->stat.verify_errors++;
987 spin_unlock(&sctx->stat_lock);
988 if (__ratelimit(&_rs))
989 scrub_print_warning("checksum/header error",
990 sblock_to_check);
991 if (sblock_bad->generation_error)
992 btrfs_dev_stat_inc_and_print(dev,
993 BTRFS_DEV_STAT_GENERATION_ERRS);
994 else
995 btrfs_dev_stat_inc_and_print(dev,
996 BTRFS_DEV_STAT_CORRUPTION_ERRS);
997 }
998
999 if (sctx->readonly) {
1000 ASSERT(!sctx->is_dev_replace);
1001 goto out;
1002 }
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019 for (mirror_index = 0; ;mirror_index++) {
1020 struct scrub_block *sblock_other;
1021
1022 if (mirror_index == failed_mirror_index)
1023 continue;
1024
1025
1026 if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1027 if (mirror_index >= BTRFS_MAX_MIRRORS)
1028 break;
1029 if (!sblocks_for_recheck[mirror_index].page_count)
1030 break;
1031
1032 sblock_other = sblocks_for_recheck + mirror_index;
1033 } else {
1034 struct scrub_recover *r = sblock_bad->pagev[0]->recover;
1035 int max_allowed = r->bbio->num_stripes -
1036 r->bbio->num_tgtdevs;
1037
1038 if (mirror_index >= max_allowed)
1039 break;
1040 if (!sblocks_for_recheck[1].page_count)
1041 break;
1042
1043 ASSERT(failed_mirror_index == 0);
1044 sblock_other = sblocks_for_recheck + 1;
1045 sblock_other->pagev[0]->mirror_num = 1 + mirror_index;
1046 }
1047
1048
1049 scrub_recheck_block(fs_info, sblock_other, 0);
1050
1051 if (!sblock_other->header_error &&
1052 !sblock_other->checksum_error &&
1053 sblock_other->no_io_error_seen) {
1054 if (sctx->is_dev_replace) {
1055 scrub_write_block_to_dev_replace(sblock_other);
1056 goto corrected_error;
1057 } else {
1058 ret = scrub_repair_block_from_good_copy(
1059 sblock_bad, sblock_other);
1060 if (!ret)
1061 goto corrected_error;
1062 }
1063 }
1064 }
1065
1066 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1067 goto did_not_correct_error;
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093 success = 1;
1094 for (page_num = 0; page_num < sblock_bad->page_count;
1095 page_num++) {
1096 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1097 struct scrub_block *sblock_other = NULL;
1098
1099
1100 if (!page_bad->io_error && !sctx->is_dev_replace)
1101 continue;
1102
1103 if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1104
1105
1106
1107
1108
1109
1110
1111 sblock_other = NULL;
1112 } else if (page_bad->io_error) {
1113
1114 for (mirror_index = 0;
1115 mirror_index < BTRFS_MAX_MIRRORS &&
1116 sblocks_for_recheck[mirror_index].page_count > 0;
1117 mirror_index++) {
1118 if (!sblocks_for_recheck[mirror_index].
1119 pagev[page_num]->io_error) {
1120 sblock_other = sblocks_for_recheck +
1121 mirror_index;
1122 break;
1123 }
1124 }
1125 if (!sblock_other)
1126 success = 0;
1127 }
1128
1129 if (sctx->is_dev_replace) {
1130
1131
1132
1133
1134
1135
1136
1137 if (!sblock_other)
1138 sblock_other = sblock_bad;
1139
1140 if (scrub_write_page_to_dev_replace(sblock_other,
1141 page_num) != 0) {
1142 atomic64_inc(
1143 &fs_info->dev_replace.num_write_errors);
1144 success = 0;
1145 }
1146 } else if (sblock_other) {
1147 ret = scrub_repair_page_from_good_copy(sblock_bad,
1148 sblock_other,
1149 page_num, 0);
1150 if (0 == ret)
1151 page_bad->io_error = 0;
1152 else
1153 success = 0;
1154 }
1155 }
1156
1157 if (success && !sctx->is_dev_replace) {
1158 if (is_metadata || have_csum) {
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168 scrub_recheck_block(fs_info, sblock_bad, 1);
1169 if (!sblock_bad->header_error &&
1170 !sblock_bad->checksum_error &&
1171 sblock_bad->no_io_error_seen)
1172 goto corrected_error;
1173 else
1174 goto did_not_correct_error;
1175 } else {
1176corrected_error:
1177 spin_lock(&sctx->stat_lock);
1178 sctx->stat.corrected_errors++;
1179 sblock_to_check->data_corrected = 1;
1180 spin_unlock(&sctx->stat_lock);
1181 btrfs_err_rl_in_rcu(fs_info,
1182 "fixed up error at logical %llu on dev %s",
1183 logical, rcu_str_deref(dev->name));
1184 }
1185 } else {
1186did_not_correct_error:
1187 spin_lock(&sctx->stat_lock);
1188 sctx->stat.uncorrectable_errors++;
1189 spin_unlock(&sctx->stat_lock);
1190 btrfs_err_rl_in_rcu(fs_info,
1191 "unable to fixup (regular) error at logical %llu on dev %s",
1192 logical, rcu_str_deref(dev->name));
1193 }
1194
1195out:
1196 if (sblocks_for_recheck) {
1197 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1198 mirror_index++) {
1199 struct scrub_block *sblock = sblocks_for_recheck +
1200 mirror_index;
1201 struct scrub_recover *recover;
1202 int page_index;
1203
1204 for (page_index = 0; page_index < sblock->page_count;
1205 page_index++) {
1206 sblock->pagev[page_index]->sblock = NULL;
1207 recover = sblock->pagev[page_index]->recover;
1208 if (recover) {
1209 scrub_put_recover(fs_info, recover);
1210 sblock->pagev[page_index]->recover =
1211 NULL;
1212 }
1213 scrub_page_put(sblock->pagev[page_index]);
1214 }
1215 }
1216 kfree(sblocks_for_recheck);
1217 }
1218
1219 ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
1220 memalloc_nofs_restore(nofs_flag);
1221 if (ret < 0)
1222 return ret;
1223 return 0;
1224}
1225
1226static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1227{
1228 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1229 return 2;
1230 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1231 return 3;
1232 else
1233 return (int)bbio->num_stripes;
1234}
1235
1236static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1237 u64 *raid_map,
1238 u64 mapped_length,
1239 int nstripes, int mirror,
1240 int *stripe_index,
1241 u64 *stripe_offset)
1242{
1243 int i;
1244
1245 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1246
1247 for (i = 0; i < nstripes; i++) {
1248 if (raid_map[i] == RAID6_Q_STRIPE ||
1249 raid_map[i] == RAID5_P_STRIPE)
1250 continue;
1251
1252 if (logical >= raid_map[i] &&
1253 logical < raid_map[i] + mapped_length)
1254 break;
1255 }
1256
1257 *stripe_index = i;
1258 *stripe_offset = logical - raid_map[i];
1259 } else {
1260
1261 *stripe_index = mirror;
1262 *stripe_offset = 0;
1263 }
1264}
1265
1266static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1267 struct scrub_block *sblocks_for_recheck)
1268{
1269 struct scrub_ctx *sctx = original_sblock->sctx;
1270 struct btrfs_fs_info *fs_info = sctx->fs_info;
1271 u64 length = original_sblock->page_count * PAGE_SIZE;
1272 u64 logical = original_sblock->pagev[0]->logical;
1273 u64 generation = original_sblock->pagev[0]->generation;
1274 u64 flags = original_sblock->pagev[0]->flags;
1275 u64 have_csum = original_sblock->pagev[0]->have_csum;
1276 struct scrub_recover *recover;
1277 struct btrfs_bio *bbio;
1278 u64 sublen;
1279 u64 mapped_length;
1280 u64 stripe_offset;
1281 int stripe_index;
1282 int page_index = 0;
1283 int mirror_index;
1284 int nmirrors;
1285 int ret;
1286
1287
1288
1289
1290
1291
1292
1293 while (length > 0) {
1294 sublen = min_t(u64, length, PAGE_SIZE);
1295 mapped_length = sublen;
1296 bbio = NULL;
1297
1298
1299
1300
1301
1302 btrfs_bio_counter_inc_blocked(fs_info);
1303 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
1304 logical, &mapped_length, &bbio);
1305 if (ret || !bbio || mapped_length < sublen) {
1306 btrfs_put_bbio(bbio);
1307 btrfs_bio_counter_dec(fs_info);
1308 return -EIO;
1309 }
1310
1311 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1312 if (!recover) {
1313 btrfs_put_bbio(bbio);
1314 btrfs_bio_counter_dec(fs_info);
1315 return -ENOMEM;
1316 }
1317
1318 refcount_set(&recover->refs, 1);
1319 recover->bbio = bbio;
1320 recover->map_length = mapped_length;
1321
1322 BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
1323
1324 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1325
1326 for (mirror_index = 0; mirror_index < nmirrors;
1327 mirror_index++) {
1328 struct scrub_block *sblock;
1329 struct scrub_page *page;
1330
1331 sblock = sblocks_for_recheck + mirror_index;
1332 sblock->sctx = sctx;
1333
1334 page = kzalloc(sizeof(*page), GFP_NOFS);
1335 if (!page) {
1336leave_nomem:
1337 spin_lock(&sctx->stat_lock);
1338 sctx->stat.malloc_errors++;
1339 spin_unlock(&sctx->stat_lock);
1340 scrub_put_recover(fs_info, recover);
1341 return -ENOMEM;
1342 }
1343 scrub_page_get(page);
1344 sblock->pagev[page_index] = page;
1345 page->sblock = sblock;
1346 page->flags = flags;
1347 page->generation = generation;
1348 page->logical = logical;
1349 page->have_csum = have_csum;
1350 if (have_csum)
1351 memcpy(page->csum,
1352 original_sblock->pagev[0]->csum,
1353 sctx->csum_size);
1354
1355 scrub_stripe_index_and_offset(logical,
1356 bbio->map_type,
1357 bbio->raid_map,
1358 mapped_length,
1359 bbio->num_stripes -
1360 bbio->num_tgtdevs,
1361 mirror_index,
1362 &stripe_index,
1363 &stripe_offset);
1364 page->physical = bbio->stripes[stripe_index].physical +
1365 stripe_offset;
1366 page->dev = bbio->stripes[stripe_index].dev;
1367
1368 BUG_ON(page_index >= original_sblock->page_count);
1369 page->physical_for_dev_replace =
1370 original_sblock->pagev[page_index]->
1371 physical_for_dev_replace;
1372
1373 page->mirror_num = mirror_index + 1;
1374 sblock->page_count++;
1375 page->page = alloc_page(GFP_NOFS);
1376 if (!page->page)
1377 goto leave_nomem;
1378
1379 scrub_get_recover(recover);
1380 page->recover = recover;
1381 }
1382 scrub_put_recover(fs_info, recover);
1383 length -= sublen;
1384 logical += sublen;
1385 page_index++;
1386 }
1387
1388 return 0;
1389}
1390
1391static void scrub_bio_wait_endio(struct bio *bio)
1392{
1393 complete(bio->bi_private);
1394}
1395
1396static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1397 struct bio *bio,
1398 struct scrub_page *page)
1399{
1400 DECLARE_COMPLETION_ONSTACK(done);
1401 int ret;
1402 int mirror_num;
1403
1404 bio->bi_iter.bi_sector = page->logical >> 9;
1405 bio->bi_private = &done;
1406 bio->bi_end_io = scrub_bio_wait_endio;
1407
1408 mirror_num = page->sblock->pagev[0]->mirror_num;
1409 ret = raid56_parity_recover(fs_info, bio, page->recover->bbio,
1410 page->recover->map_length,
1411 mirror_num, 0);
1412 if (ret)
1413 return ret;
1414
1415 wait_for_completion_io(&done);
1416 return blk_status_to_errno(bio->bi_status);
1417}
1418
1419static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
1420 struct scrub_block *sblock)
1421{
1422 struct scrub_page *first_page = sblock->pagev[0];
1423 struct bio *bio;
1424 int page_num;
1425
1426
1427 ASSERT(first_page->dev);
1428 if (!first_page->dev->bdev)
1429 goto out;
1430
1431 bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
1432 bio_set_dev(bio, first_page->dev->bdev);
1433
1434 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1435 struct scrub_page *page = sblock->pagev[page_num];
1436
1437 WARN_ON(!page->page);
1438 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1439 }
1440
1441 if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
1442 bio_put(bio);
1443 goto out;
1444 }
1445
1446 bio_put(bio);
1447
1448 scrub_recheck_block_checksum(sblock);
1449
1450 return;
1451out:
1452 for (page_num = 0; page_num < sblock->page_count; page_num++)
1453 sblock->pagev[page_num]->io_error = 1;
1454
1455 sblock->no_io_error_seen = 0;
1456}
1457
1458
1459
1460
1461
1462
1463
1464
1465static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1466 struct scrub_block *sblock,
1467 int retry_failed_mirror)
1468{
1469 int page_num;
1470
1471 sblock->no_io_error_seen = 1;
1472
1473
1474 if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
1475 return scrub_recheck_block_on_raid56(fs_info, sblock);
1476
1477 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1478 struct bio *bio;
1479 struct scrub_page *page = sblock->pagev[page_num];
1480
1481 if (page->dev->bdev == NULL) {
1482 page->io_error = 1;
1483 sblock->no_io_error_seen = 0;
1484 continue;
1485 }
1486
1487 WARN_ON(!page->page);
1488 bio = btrfs_io_bio_alloc(1);
1489 bio_set_dev(bio, page->dev->bdev);
1490
1491 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1492 bio->bi_iter.bi_sector = page->physical >> 9;
1493 bio->bi_opf = REQ_OP_READ;
1494
1495 if (btrfsic_submit_bio_wait(bio)) {
1496 page->io_error = 1;
1497 sblock->no_io_error_seen = 0;
1498 }
1499
1500 bio_put(bio);
1501 }
1502
1503 if (sblock->no_io_error_seen)
1504 scrub_recheck_block_checksum(sblock);
1505}
1506
1507static inline int scrub_check_fsid(u8 fsid[],
1508 struct scrub_page *spage)
1509{
1510 struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
1511 int ret;
1512
1513 ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1514 return !ret;
1515}
1516
1517static void scrub_recheck_block_checksum(struct scrub_block *sblock)
1518{
1519 sblock->header_error = 0;
1520 sblock->checksum_error = 0;
1521 sblock->generation_error = 0;
1522
1523 if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
1524 scrub_checksum_data(sblock);
1525 else
1526 scrub_checksum_tree_block(sblock);
1527}
1528
1529static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1530 struct scrub_block *sblock_good)
1531{
1532 int page_num;
1533 int ret = 0;
1534
1535 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1536 int ret_sub;
1537
1538 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1539 sblock_good,
1540 page_num, 1);
1541 if (ret_sub)
1542 ret = ret_sub;
1543 }
1544
1545 return ret;
1546}
1547
1548static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1549 struct scrub_block *sblock_good,
1550 int page_num, int force_write)
1551{
1552 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1553 struct scrub_page *page_good = sblock_good->pagev[page_num];
1554 struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
1555
1556 BUG_ON(page_bad->page == NULL);
1557 BUG_ON(page_good->page == NULL);
1558 if (force_write || sblock_bad->header_error ||
1559 sblock_bad->checksum_error || page_bad->io_error) {
1560 struct bio *bio;
1561 int ret;
1562
1563 if (!page_bad->dev->bdev) {
1564 btrfs_warn_rl(fs_info,
1565 "scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
1566 return -EIO;
1567 }
1568
1569 bio = btrfs_io_bio_alloc(1);
1570 bio_set_dev(bio, page_bad->dev->bdev);
1571 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1572 bio->bi_opf = REQ_OP_WRITE;
1573
1574 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1575 if (PAGE_SIZE != ret) {
1576 bio_put(bio);
1577 return -EIO;
1578 }
1579
1580 if (btrfsic_submit_bio_wait(bio)) {
1581 btrfs_dev_stat_inc_and_print(page_bad->dev,
1582 BTRFS_DEV_STAT_WRITE_ERRS);
1583 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1584 bio_put(bio);
1585 return -EIO;
1586 }
1587 bio_put(bio);
1588 }
1589
1590 return 0;
1591}
1592
1593static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1594{
1595 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
1596 int page_num;
1597
1598
1599
1600
1601
1602 if (sblock->sparity)
1603 return;
1604
1605 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1606 int ret;
1607
1608 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1609 if (ret)
1610 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1611 }
1612}
1613
1614static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1615 int page_num)
1616{
1617 struct scrub_page *spage = sblock->pagev[page_num];
1618
1619 BUG_ON(spage->page == NULL);
1620 if (spage->io_error) {
1621 void *mapped_buffer = kmap_atomic(spage->page);
1622
1623 clear_page(mapped_buffer);
1624 flush_dcache_page(spage->page);
1625 kunmap_atomic(mapped_buffer);
1626 }
1627 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1628}
1629
1630static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1631 struct scrub_page *spage)
1632{
1633 struct scrub_bio *sbio;
1634 int ret;
1635
1636 mutex_lock(&sctx->wr_lock);
1637again:
1638 if (!sctx->wr_curr_bio) {
1639 sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio),
1640 GFP_KERNEL);
1641 if (!sctx->wr_curr_bio) {
1642 mutex_unlock(&sctx->wr_lock);
1643 return -ENOMEM;
1644 }
1645 sctx->wr_curr_bio->sctx = sctx;
1646 sctx->wr_curr_bio->page_count = 0;
1647 }
1648 sbio = sctx->wr_curr_bio;
1649 if (sbio->page_count == 0) {
1650 struct bio *bio;
1651
1652 sbio->physical = spage->physical_for_dev_replace;
1653 sbio->logical = spage->logical;
1654 sbio->dev = sctx->wr_tgtdev;
1655 bio = sbio->bio;
1656 if (!bio) {
1657 bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
1658 sbio->bio = bio;
1659 }
1660
1661 bio->bi_private = sbio;
1662 bio->bi_end_io = scrub_wr_bio_end_io;
1663 bio_set_dev(bio, sbio->dev->bdev);
1664 bio->bi_iter.bi_sector = sbio->physical >> 9;
1665 bio->bi_opf = REQ_OP_WRITE;
1666 sbio->status = 0;
1667 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1668 spage->physical_for_dev_replace ||
1669 sbio->logical + sbio->page_count * PAGE_SIZE !=
1670 spage->logical) {
1671 scrub_wr_submit(sctx);
1672 goto again;
1673 }
1674
1675 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1676 if (ret != PAGE_SIZE) {
1677 if (sbio->page_count < 1) {
1678 bio_put(sbio->bio);
1679 sbio->bio = NULL;
1680 mutex_unlock(&sctx->wr_lock);
1681 return -EIO;
1682 }
1683 scrub_wr_submit(sctx);
1684 goto again;
1685 }
1686
1687 sbio->pagev[sbio->page_count] = spage;
1688 scrub_page_get(spage);
1689 sbio->page_count++;
1690 if (sbio->page_count == sctx->pages_per_wr_bio)
1691 scrub_wr_submit(sctx);
1692 mutex_unlock(&sctx->wr_lock);
1693
1694 return 0;
1695}
1696
1697static void scrub_wr_submit(struct scrub_ctx *sctx)
1698{
1699 struct scrub_bio *sbio;
1700
1701 if (!sctx->wr_curr_bio)
1702 return;
1703
1704 sbio = sctx->wr_curr_bio;
1705 sctx->wr_curr_bio = NULL;
1706 WARN_ON(!sbio->bio->bi_disk);
1707 scrub_pending_bio_inc(sctx);
1708
1709
1710
1711
1712 btrfsic_submit_bio(sbio->bio);
1713}
1714
1715static void scrub_wr_bio_end_io(struct bio *bio)
1716{
1717 struct scrub_bio *sbio = bio->bi_private;
1718 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
1719
1720 sbio->status = bio->bi_status;
1721 sbio->bio = bio;
1722
1723 btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
1724 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1725}
1726
1727static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1728{
1729 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1730 struct scrub_ctx *sctx = sbio->sctx;
1731 int i;
1732
1733 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1734 if (sbio->status) {
1735 struct btrfs_dev_replace *dev_replace =
1736 &sbio->sctx->fs_info->dev_replace;
1737
1738 for (i = 0; i < sbio->page_count; i++) {
1739 struct scrub_page *spage = sbio->pagev[i];
1740
1741 spage->io_error = 1;
1742 atomic64_inc(&dev_replace->num_write_errors);
1743 }
1744 }
1745
1746 for (i = 0; i < sbio->page_count; i++)
1747 scrub_page_put(sbio->pagev[i]);
1748
1749 bio_put(sbio->bio);
1750 kfree(sbio);
1751 scrub_pending_bio_dec(sctx);
1752}
1753
1754static int scrub_checksum(struct scrub_block *sblock)
1755{
1756 u64 flags;
1757 int ret;
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767 sblock->header_error = 0;
1768 sblock->generation_error = 0;
1769 sblock->checksum_error = 0;
1770
1771 WARN_ON(sblock->page_count < 1);
1772 flags = sblock->pagev[0]->flags;
1773 ret = 0;
1774 if (flags & BTRFS_EXTENT_FLAG_DATA)
1775 ret = scrub_checksum_data(sblock);
1776 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1777 ret = scrub_checksum_tree_block(sblock);
1778 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1779 (void)scrub_checksum_super(sblock);
1780 else
1781 WARN_ON(1);
1782 if (ret)
1783 scrub_handle_errored_block(sblock);
1784
1785 return ret;
1786}
1787
1788static int scrub_checksum_data(struct scrub_block *sblock)
1789{
1790 struct scrub_ctx *sctx = sblock->sctx;
1791 struct btrfs_fs_info *fs_info = sctx->fs_info;
1792 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1793 u8 csum[BTRFS_CSUM_SIZE];
1794 u8 *on_disk_csum;
1795 struct page *page;
1796 void *buffer;
1797 u64 len;
1798 int index;
1799
1800 BUG_ON(sblock->page_count < 1);
1801 if (!sblock->pagev[0]->have_csum)
1802 return 0;
1803
1804 shash->tfm = fs_info->csum_shash;
1805 crypto_shash_init(shash);
1806
1807 on_disk_csum = sblock->pagev[0]->csum;
1808 page = sblock->pagev[0]->page;
1809 buffer = kmap_atomic(page);
1810
1811 len = sctx->fs_info->sectorsize;
1812 index = 0;
1813 for (;;) {
1814 u64 l = min_t(u64, len, PAGE_SIZE);
1815
1816 crypto_shash_update(shash, buffer, l);
1817 kunmap_atomic(buffer);
1818 len -= l;
1819 if (len == 0)
1820 break;
1821 index++;
1822 BUG_ON(index >= sblock->page_count);
1823 BUG_ON(!sblock->pagev[index]->page);
1824 page = sblock->pagev[index]->page;
1825 buffer = kmap_atomic(page);
1826 }
1827
1828 crypto_shash_final(shash, csum);
1829 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1830 sblock->checksum_error = 1;
1831
1832 return sblock->checksum_error;
1833}
1834
1835static int scrub_checksum_tree_block(struct scrub_block *sblock)
1836{
1837 struct scrub_ctx *sctx = sblock->sctx;
1838 struct btrfs_header *h;
1839 struct btrfs_fs_info *fs_info = sctx->fs_info;
1840 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1841 u8 calculated_csum[BTRFS_CSUM_SIZE];
1842 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1843 struct page *page;
1844 void *mapped_buffer;
1845 u64 mapped_size;
1846 void *p;
1847 u64 len;
1848 int index;
1849
1850 shash->tfm = fs_info->csum_shash;
1851 crypto_shash_init(shash);
1852
1853 BUG_ON(sblock->page_count < 1);
1854 page = sblock->pagev[0]->page;
1855 mapped_buffer = kmap_atomic(page);
1856 h = (struct btrfs_header *)mapped_buffer;
1857 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1858
1859
1860
1861
1862
1863
1864 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
1865 sblock->header_error = 1;
1866
1867 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) {
1868 sblock->header_error = 1;
1869 sblock->generation_error = 1;
1870 }
1871
1872 if (!scrub_check_fsid(h->fsid, sblock->pagev[0]))
1873 sblock->header_error = 1;
1874
1875 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1876 BTRFS_UUID_SIZE))
1877 sblock->header_error = 1;
1878
1879 len = sctx->fs_info->nodesize - BTRFS_CSUM_SIZE;
1880 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1881 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1882 index = 0;
1883 for (;;) {
1884 u64 l = min_t(u64, len, mapped_size);
1885
1886 crypto_shash_update(shash, p, l);
1887 kunmap_atomic(mapped_buffer);
1888 len -= l;
1889 if (len == 0)
1890 break;
1891 index++;
1892 BUG_ON(index >= sblock->page_count);
1893 BUG_ON(!sblock->pagev[index]->page);
1894 page = sblock->pagev[index]->page;
1895 mapped_buffer = kmap_atomic(page);
1896 mapped_size = PAGE_SIZE;
1897 p = mapped_buffer;
1898 }
1899
1900 crypto_shash_final(shash, calculated_csum);
1901 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1902 sblock->checksum_error = 1;
1903
1904 return sblock->header_error || sblock->checksum_error;
1905}
1906
1907static int scrub_checksum_super(struct scrub_block *sblock)
1908{
1909 struct btrfs_super_block *s;
1910 struct scrub_ctx *sctx = sblock->sctx;
1911 struct btrfs_fs_info *fs_info = sctx->fs_info;
1912 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1913 u8 calculated_csum[BTRFS_CSUM_SIZE];
1914 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1915 struct page *page;
1916 void *mapped_buffer;
1917 u64 mapped_size;
1918 void *p;
1919 int fail_gen = 0;
1920 int fail_cor = 0;
1921 u64 len;
1922 int index;
1923
1924 shash->tfm = fs_info->csum_shash;
1925 crypto_shash_init(shash);
1926
1927 BUG_ON(sblock->page_count < 1);
1928 page = sblock->pagev[0]->page;
1929 mapped_buffer = kmap_atomic(page);
1930 s = (struct btrfs_super_block *)mapped_buffer;
1931 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1932
1933 if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
1934 ++fail_cor;
1935
1936 if (sblock->pagev[0]->generation != btrfs_super_generation(s))
1937 ++fail_gen;
1938
1939 if (!scrub_check_fsid(s->fsid, sblock->pagev[0]))
1940 ++fail_cor;
1941
1942 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1943 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1944 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1945 index = 0;
1946 for (;;) {
1947 u64 l = min_t(u64, len, mapped_size);
1948
1949 crypto_shash_update(shash, p, l);
1950 kunmap_atomic(mapped_buffer);
1951 len -= l;
1952 if (len == 0)
1953 break;
1954 index++;
1955 BUG_ON(index >= sblock->page_count);
1956 BUG_ON(!sblock->pagev[index]->page);
1957 page = sblock->pagev[index]->page;
1958 mapped_buffer = kmap_atomic(page);
1959 mapped_size = PAGE_SIZE;
1960 p = mapped_buffer;
1961 }
1962
1963 crypto_shash_final(shash, calculated_csum);
1964 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1965 ++fail_cor;
1966
1967 if (fail_cor + fail_gen) {
1968
1969
1970
1971
1972
1973 spin_lock(&sctx->stat_lock);
1974 ++sctx->stat.super_errors;
1975 spin_unlock(&sctx->stat_lock);
1976 if (fail_cor)
1977 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1978 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1979 else
1980 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1981 BTRFS_DEV_STAT_GENERATION_ERRS);
1982 }
1983
1984 return fail_cor + fail_gen;
1985}
1986
1987static void scrub_block_get(struct scrub_block *sblock)
1988{
1989 refcount_inc(&sblock->refs);
1990}
1991
1992static void scrub_block_put(struct scrub_block *sblock)
1993{
1994 if (refcount_dec_and_test(&sblock->refs)) {
1995 int i;
1996
1997 if (sblock->sparity)
1998 scrub_parity_put(sblock->sparity);
1999
2000 for (i = 0; i < sblock->page_count; i++)
2001 scrub_page_put(sblock->pagev[i]);
2002 kfree(sblock);
2003 }
2004}
2005
2006static void scrub_page_get(struct scrub_page *spage)
2007{
2008 atomic_inc(&spage->refs);
2009}
2010
2011static void scrub_page_put(struct scrub_page *spage)
2012{
2013 if (atomic_dec_and_test(&spage->refs)) {
2014 if (spage->page)
2015 __free_page(spage->page);
2016 kfree(spage);
2017 }
2018}
2019
2020static void scrub_submit(struct scrub_ctx *sctx)
2021{
2022 struct scrub_bio *sbio;
2023
2024 if (sctx->curr == -1)
2025 return;
2026
2027 sbio = sctx->bios[sctx->curr];
2028 sctx->curr = -1;
2029 scrub_pending_bio_inc(sctx);
2030 btrfsic_submit_bio(sbio->bio);
2031}
2032
2033static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
2034 struct scrub_page *spage)
2035{
2036 struct scrub_block *sblock = spage->sblock;
2037 struct scrub_bio *sbio;
2038 int ret;
2039
2040again:
2041
2042
2043
2044 while (sctx->curr == -1) {
2045 spin_lock(&sctx->list_lock);
2046 sctx->curr = sctx->first_free;
2047 if (sctx->curr != -1) {
2048 sctx->first_free = sctx->bios[sctx->curr]->next_free;
2049 sctx->bios[sctx->curr]->next_free = -1;
2050 sctx->bios[sctx->curr]->page_count = 0;
2051 spin_unlock(&sctx->list_lock);
2052 } else {
2053 spin_unlock(&sctx->list_lock);
2054 wait_event(sctx->list_wait, sctx->first_free != -1);
2055 }
2056 }
2057 sbio = sctx->bios[sctx->curr];
2058 if (sbio->page_count == 0) {
2059 struct bio *bio;
2060
2061 sbio->physical = spage->physical;
2062 sbio->logical = spage->logical;
2063 sbio->dev = spage->dev;
2064 bio = sbio->bio;
2065 if (!bio) {
2066 bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
2067 sbio->bio = bio;
2068 }
2069
2070 bio->bi_private = sbio;
2071 bio->bi_end_io = scrub_bio_end_io;
2072 bio_set_dev(bio, sbio->dev->bdev);
2073 bio->bi_iter.bi_sector = sbio->physical >> 9;
2074 bio->bi_opf = REQ_OP_READ;
2075 sbio->status = 0;
2076 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
2077 spage->physical ||
2078 sbio->logical + sbio->page_count * PAGE_SIZE !=
2079 spage->logical ||
2080 sbio->dev != spage->dev) {
2081 scrub_submit(sctx);
2082 goto again;
2083 }
2084
2085 sbio->pagev[sbio->page_count] = spage;
2086 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
2087 if (ret != PAGE_SIZE) {
2088 if (sbio->page_count < 1) {
2089 bio_put(sbio->bio);
2090 sbio->bio = NULL;
2091 return -EIO;
2092 }
2093 scrub_submit(sctx);
2094 goto again;
2095 }
2096
2097 scrub_block_get(sblock);
2098 atomic_inc(&sblock->outstanding_pages);
2099 sbio->page_count++;
2100 if (sbio->page_count == sctx->pages_per_rd_bio)
2101 scrub_submit(sctx);
2102
2103 return 0;
2104}
2105
2106static void scrub_missing_raid56_end_io(struct bio *bio)
2107{
2108 struct scrub_block *sblock = bio->bi_private;
2109 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
2110
2111 if (bio->bi_status)
2112 sblock->no_io_error_seen = 0;
2113
2114 bio_put(bio);
2115
2116 btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
2117}
2118
2119static void scrub_missing_raid56_worker(struct btrfs_work *work)
2120{
2121 struct scrub_block *sblock = container_of(work, struct scrub_block, work);
2122 struct scrub_ctx *sctx = sblock->sctx;
2123 struct btrfs_fs_info *fs_info = sctx->fs_info;
2124 u64 logical;
2125 struct btrfs_device *dev;
2126
2127 logical = sblock->pagev[0]->logical;
2128 dev = sblock->pagev[0]->dev;
2129
2130 if (sblock->no_io_error_seen)
2131 scrub_recheck_block_checksum(sblock);
2132
2133 if (!sblock->no_io_error_seen) {
2134 spin_lock(&sctx->stat_lock);
2135 sctx->stat.read_errors++;
2136 spin_unlock(&sctx->stat_lock);
2137 btrfs_err_rl_in_rcu(fs_info,
2138 "IO error rebuilding logical %llu for dev %s",
2139 logical, rcu_str_deref(dev->name));
2140 } else if (sblock->header_error || sblock->checksum_error) {
2141 spin_lock(&sctx->stat_lock);
2142 sctx->stat.uncorrectable_errors++;
2143 spin_unlock(&sctx->stat_lock);
2144 btrfs_err_rl_in_rcu(fs_info,
2145 "failed to rebuild valid logical %llu for dev %s",
2146 logical, rcu_str_deref(dev->name));
2147 } else {
2148 scrub_write_block_to_dev_replace(sblock);
2149 }
2150
2151 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2152 mutex_lock(&sctx->wr_lock);
2153 scrub_wr_submit(sctx);
2154 mutex_unlock(&sctx->wr_lock);
2155 }
2156
2157 scrub_block_put(sblock);
2158 scrub_pending_bio_dec(sctx);
2159}
2160
2161static void scrub_missing_raid56_pages(struct scrub_block *sblock)
2162{
2163 struct scrub_ctx *sctx = sblock->sctx;
2164 struct btrfs_fs_info *fs_info = sctx->fs_info;
2165 u64 length = sblock->page_count * PAGE_SIZE;
2166 u64 logical = sblock->pagev[0]->logical;
2167 struct btrfs_bio *bbio = NULL;
2168 struct bio *bio;
2169 struct btrfs_raid_bio *rbio;
2170 int ret;
2171 int i;
2172
2173 btrfs_bio_counter_inc_blocked(fs_info);
2174 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
2175 &length, &bbio);
2176 if (ret || !bbio || !bbio->raid_map)
2177 goto bbio_out;
2178
2179 if (WARN_ON(!sctx->is_dev_replace ||
2180 !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
2181
2182
2183
2184
2185
2186
2187 goto bbio_out;
2188 }
2189
2190 bio = btrfs_io_bio_alloc(0);
2191 bio->bi_iter.bi_sector = logical >> 9;
2192 bio->bi_private = sblock;
2193 bio->bi_end_io = scrub_missing_raid56_end_io;
2194
2195 rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length);
2196 if (!rbio)
2197 goto rbio_out;
2198
2199 for (i = 0; i < sblock->page_count; i++) {
2200 struct scrub_page *spage = sblock->pagev[i];
2201
2202 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2203 }
2204
2205 btrfs_init_work(&sblock->work, scrub_missing_raid56_worker, NULL, NULL);
2206 scrub_block_get(sblock);
2207 scrub_pending_bio_inc(sctx);
2208 raid56_submit_missing_rbio(rbio);
2209 return;
2210
2211rbio_out:
2212 bio_put(bio);
2213bbio_out:
2214 btrfs_bio_counter_dec(fs_info);
2215 btrfs_put_bbio(bbio);
2216 spin_lock(&sctx->stat_lock);
2217 sctx->stat.malloc_errors++;
2218 spin_unlock(&sctx->stat_lock);
2219}
2220
2221static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2222 u64 physical, struct btrfs_device *dev, u64 flags,
2223 u64 gen, int mirror_num, u8 *csum, int force,
2224 u64 physical_for_dev_replace)
2225{
2226 struct scrub_block *sblock;
2227 int index;
2228
2229 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2230 if (!sblock) {
2231 spin_lock(&sctx->stat_lock);
2232 sctx->stat.malloc_errors++;
2233 spin_unlock(&sctx->stat_lock);
2234 return -ENOMEM;
2235 }
2236
2237
2238
2239 refcount_set(&sblock->refs, 1);
2240 sblock->sctx = sctx;
2241 sblock->no_io_error_seen = 1;
2242
2243 for (index = 0; len > 0; index++) {
2244 struct scrub_page *spage;
2245 u64 l = min_t(u64, len, PAGE_SIZE);
2246
2247 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2248 if (!spage) {
2249leave_nomem:
2250 spin_lock(&sctx->stat_lock);
2251 sctx->stat.malloc_errors++;
2252 spin_unlock(&sctx->stat_lock);
2253 scrub_block_put(sblock);
2254 return -ENOMEM;
2255 }
2256 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2257 scrub_page_get(spage);
2258 sblock->pagev[index] = spage;
2259 spage->sblock = sblock;
2260 spage->dev = dev;
2261 spage->flags = flags;
2262 spage->generation = gen;
2263 spage->logical = logical;
2264 spage->physical = physical;
2265 spage->physical_for_dev_replace = physical_for_dev_replace;
2266 spage->mirror_num = mirror_num;
2267 if (csum) {
2268 spage->have_csum = 1;
2269 memcpy(spage->csum, csum, sctx->csum_size);
2270 } else {
2271 spage->have_csum = 0;
2272 }
2273 sblock->page_count++;
2274 spage->page = alloc_page(GFP_KERNEL);
2275 if (!spage->page)
2276 goto leave_nomem;
2277 len -= l;
2278 logical += l;
2279 physical += l;
2280 physical_for_dev_replace += l;
2281 }
2282
2283 WARN_ON(sblock->page_count == 0);
2284 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2285
2286
2287
2288
2289 scrub_missing_raid56_pages(sblock);
2290 } else {
2291 for (index = 0; index < sblock->page_count; index++) {
2292 struct scrub_page *spage = sblock->pagev[index];
2293 int ret;
2294
2295 ret = scrub_add_page_to_rd_bio(sctx, spage);
2296 if (ret) {
2297 scrub_block_put(sblock);
2298 return ret;
2299 }
2300 }
2301
2302 if (force)
2303 scrub_submit(sctx);
2304 }
2305
2306
2307 scrub_block_put(sblock);
2308 return 0;
2309}
2310
2311static void scrub_bio_end_io(struct bio *bio)
2312{
2313 struct scrub_bio *sbio = bio->bi_private;
2314 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
2315
2316 sbio->status = bio->bi_status;
2317 sbio->bio = bio;
2318
2319 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
2320}
2321
2322static void scrub_bio_end_io_worker(struct btrfs_work *work)
2323{
2324 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2325 struct scrub_ctx *sctx = sbio->sctx;
2326 int i;
2327
2328 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2329 if (sbio->status) {
2330 for (i = 0; i < sbio->page_count; i++) {
2331 struct scrub_page *spage = sbio->pagev[i];
2332
2333 spage->io_error = 1;
2334 spage->sblock->no_io_error_seen = 0;
2335 }
2336 }
2337
2338
2339 for (i = 0; i < sbio->page_count; i++) {
2340 struct scrub_page *spage = sbio->pagev[i];
2341 struct scrub_block *sblock = spage->sblock;
2342
2343 if (atomic_dec_and_test(&sblock->outstanding_pages))
2344 scrub_block_complete(sblock);
2345 scrub_block_put(sblock);
2346 }
2347
2348 bio_put(sbio->bio);
2349 sbio->bio = NULL;
2350 spin_lock(&sctx->list_lock);
2351 sbio->next_free = sctx->first_free;
2352 sctx->first_free = sbio->index;
2353 spin_unlock(&sctx->list_lock);
2354
2355 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2356 mutex_lock(&sctx->wr_lock);
2357 scrub_wr_submit(sctx);
2358 mutex_unlock(&sctx->wr_lock);
2359 }
2360
2361 scrub_pending_bio_dec(sctx);
2362}
2363
2364static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2365 unsigned long *bitmap,
2366 u64 start, u64 len)
2367{
2368 u64 offset;
2369 u64 nsectors64;
2370 u32 nsectors;
2371 int sectorsize = sparity->sctx->fs_info->sectorsize;
2372
2373 if (len >= sparity->stripe_len) {
2374 bitmap_set(bitmap, 0, sparity->nsectors);
2375 return;
2376 }
2377
2378 start -= sparity->logic_start;
2379 start = div64_u64_rem(start, sparity->stripe_len, &offset);
2380 offset = div_u64(offset, sectorsize);
2381 nsectors64 = div_u64(len, sectorsize);
2382
2383 ASSERT(nsectors64 < UINT_MAX);
2384 nsectors = (u32)nsectors64;
2385
2386 if (offset + nsectors <= sparity->nsectors) {
2387 bitmap_set(bitmap, offset, nsectors);
2388 return;
2389 }
2390
2391 bitmap_set(bitmap, offset, sparity->nsectors - offset);
2392 bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
2393}
2394
2395static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
2396 u64 start, u64 len)
2397{
2398 __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
2399}
2400
2401static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
2402 u64 start, u64 len)
2403{
2404 __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
2405}
2406
2407static void scrub_block_complete(struct scrub_block *sblock)
2408{
2409 int corrupted = 0;
2410
2411 if (!sblock->no_io_error_seen) {
2412 corrupted = 1;
2413 scrub_handle_errored_block(sblock);
2414 } else {
2415
2416
2417
2418
2419
2420 corrupted = scrub_checksum(sblock);
2421 if (!corrupted && sblock->sctx->is_dev_replace)
2422 scrub_write_block_to_dev_replace(sblock);
2423 }
2424
2425 if (sblock->sparity && corrupted && !sblock->data_corrected) {
2426 u64 start = sblock->pagev[0]->logical;
2427 u64 end = sblock->pagev[sblock->page_count - 1]->logical +
2428 PAGE_SIZE;
2429
2430 scrub_parity_mark_sectors_error(sblock->sparity,
2431 start, end - start);
2432 }
2433}
2434
2435static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
2436{
2437 struct btrfs_ordered_sum *sum = NULL;
2438 unsigned long index;
2439 unsigned long num_sectors;
2440
2441 while (!list_empty(&sctx->csum_list)) {
2442 sum = list_first_entry(&sctx->csum_list,
2443 struct btrfs_ordered_sum, list);
2444 if (sum->bytenr > logical)
2445 return 0;
2446 if (sum->bytenr + sum->len > logical)
2447 break;
2448
2449 ++sctx->stat.csum_discards;
2450 list_del(&sum->list);
2451 kfree(sum);
2452 sum = NULL;
2453 }
2454 if (!sum)
2455 return 0;
2456
2457 index = div_u64(logical - sum->bytenr, sctx->fs_info->sectorsize);
2458 ASSERT(index < UINT_MAX);
2459
2460 num_sectors = sum->len / sctx->fs_info->sectorsize;
2461 memcpy(csum, sum->sums + index * sctx->csum_size, sctx->csum_size);
2462 if (index == num_sectors - 1) {
2463 list_del(&sum->list);
2464 kfree(sum);
2465 }
2466 return 1;
2467}
2468
2469
2470static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
2471 u64 logical, u64 len,
2472 u64 physical, struct btrfs_device *dev, u64 flags,
2473 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2474{
2475 int ret;
2476 u8 csum[BTRFS_CSUM_SIZE];
2477 u32 blocksize;
2478
2479 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2480 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2481 blocksize = map->stripe_len;
2482 else
2483 blocksize = sctx->fs_info->sectorsize;
2484 spin_lock(&sctx->stat_lock);
2485 sctx->stat.data_extents_scrubbed++;
2486 sctx->stat.data_bytes_scrubbed += len;
2487 spin_unlock(&sctx->stat_lock);
2488 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2489 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2490 blocksize = map->stripe_len;
2491 else
2492 blocksize = sctx->fs_info->nodesize;
2493 spin_lock(&sctx->stat_lock);
2494 sctx->stat.tree_extents_scrubbed++;
2495 sctx->stat.tree_bytes_scrubbed += len;
2496 spin_unlock(&sctx->stat_lock);
2497 } else {
2498 blocksize = sctx->fs_info->sectorsize;
2499 WARN_ON(1);
2500 }
2501
2502 while (len) {
2503 u64 l = min_t(u64, len, blocksize);
2504 int have_csum = 0;
2505
2506 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2507
2508 have_csum = scrub_find_csum(sctx, logical, csum);
2509 if (have_csum == 0)
2510 ++sctx->stat.no_csum;
2511 }
2512 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2513 mirror_num, have_csum ? csum : NULL, 0,
2514 physical_for_dev_replace);
2515 if (ret)
2516 return ret;
2517 len -= l;
2518 logical += l;
2519 physical += l;
2520 physical_for_dev_replace += l;
2521 }
2522 return 0;
2523}
2524
2525static int scrub_pages_for_parity(struct scrub_parity *sparity,
2526 u64 logical, u64 len,
2527 u64 physical, struct btrfs_device *dev,
2528 u64 flags, u64 gen, int mirror_num, u8 *csum)
2529{
2530 struct scrub_ctx *sctx = sparity->sctx;
2531 struct scrub_block *sblock;
2532 int index;
2533
2534 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2535 if (!sblock) {
2536 spin_lock(&sctx->stat_lock);
2537 sctx->stat.malloc_errors++;
2538 spin_unlock(&sctx->stat_lock);
2539 return -ENOMEM;
2540 }
2541
2542
2543
2544 refcount_set(&sblock->refs, 1);
2545 sblock->sctx = sctx;
2546 sblock->no_io_error_seen = 1;
2547 sblock->sparity = sparity;
2548 scrub_parity_get(sparity);
2549
2550 for (index = 0; len > 0; index++) {
2551 struct scrub_page *spage;
2552 u64 l = min_t(u64, len, PAGE_SIZE);
2553
2554 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2555 if (!spage) {
2556leave_nomem:
2557 spin_lock(&sctx->stat_lock);
2558 sctx->stat.malloc_errors++;
2559 spin_unlock(&sctx->stat_lock);
2560 scrub_block_put(sblock);
2561 return -ENOMEM;
2562 }
2563 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2564
2565 scrub_page_get(spage);
2566 sblock->pagev[index] = spage;
2567
2568 scrub_page_get(spage);
2569 list_add_tail(&spage->list, &sparity->spages);
2570 spage->sblock = sblock;
2571 spage->dev = dev;
2572 spage->flags = flags;
2573 spage->generation = gen;
2574 spage->logical = logical;
2575 spage->physical = physical;
2576 spage->mirror_num = mirror_num;
2577 if (csum) {
2578 spage->have_csum = 1;
2579 memcpy(spage->csum, csum, sctx->csum_size);
2580 } else {
2581 spage->have_csum = 0;
2582 }
2583 sblock->page_count++;
2584 spage->page = alloc_page(GFP_KERNEL);
2585 if (!spage->page)
2586 goto leave_nomem;
2587 len -= l;
2588 logical += l;
2589 physical += l;
2590 }
2591
2592 WARN_ON(sblock->page_count == 0);
2593 for (index = 0; index < sblock->page_count; index++) {
2594 struct scrub_page *spage = sblock->pagev[index];
2595 int ret;
2596
2597 ret = scrub_add_page_to_rd_bio(sctx, spage);
2598 if (ret) {
2599 scrub_block_put(sblock);
2600 return ret;
2601 }
2602 }
2603
2604
2605 scrub_block_put(sblock);
2606 return 0;
2607}
2608
2609static int scrub_extent_for_parity(struct scrub_parity *sparity,
2610 u64 logical, u64 len,
2611 u64 physical, struct btrfs_device *dev,
2612 u64 flags, u64 gen, int mirror_num)
2613{
2614 struct scrub_ctx *sctx = sparity->sctx;
2615 int ret;
2616 u8 csum[BTRFS_CSUM_SIZE];
2617 u32 blocksize;
2618
2619 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2620 scrub_parity_mark_sectors_error(sparity, logical, len);
2621 return 0;
2622 }
2623
2624 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2625 blocksize = sparity->stripe_len;
2626 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2627 blocksize = sparity->stripe_len;
2628 } else {
2629 blocksize = sctx->fs_info->sectorsize;
2630 WARN_ON(1);
2631 }
2632
2633 while (len) {
2634 u64 l = min_t(u64, len, blocksize);
2635 int have_csum = 0;
2636
2637 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2638
2639 have_csum = scrub_find_csum(sctx, logical, csum);
2640 if (have_csum == 0)
2641 goto skip;
2642 }
2643 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2644 flags, gen, mirror_num,
2645 have_csum ? csum : NULL);
2646 if (ret)
2647 return ret;
2648skip:
2649 len -= l;
2650 logical += l;
2651 physical += l;
2652 }
2653 return 0;
2654}
2655
2656
2657
2658
2659
2660
2661
2662
2663static int get_raid56_logic_offset(u64 physical, int num,
2664 struct map_lookup *map, u64 *offset,
2665 u64 *stripe_start)
2666{
2667 int i;
2668 int j = 0;
2669 u64 stripe_nr;
2670 u64 last_offset;
2671 u32 stripe_index;
2672 u32 rot;
2673 const int data_stripes = nr_data_stripes(map);
2674
2675 last_offset = (physical - map->stripes[num].physical) * data_stripes;
2676 if (stripe_start)
2677 *stripe_start = last_offset;
2678
2679 *offset = last_offset;
2680 for (i = 0; i < data_stripes; i++) {
2681 *offset = last_offset + i * map->stripe_len;
2682
2683 stripe_nr = div64_u64(*offset, map->stripe_len);
2684 stripe_nr = div_u64(stripe_nr, data_stripes);
2685
2686
2687 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
2688
2689 rot += i;
2690 stripe_index = rot % map->num_stripes;
2691 if (stripe_index == num)
2692 return 0;
2693 if (stripe_index < num)
2694 j++;
2695 }
2696 *offset = last_offset + j * map->stripe_len;
2697 return 1;
2698}
2699
2700static void scrub_free_parity(struct scrub_parity *sparity)
2701{
2702 struct scrub_ctx *sctx = sparity->sctx;
2703 struct scrub_page *curr, *next;
2704 int nbits;
2705
2706 nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
2707 if (nbits) {
2708 spin_lock(&sctx->stat_lock);
2709 sctx->stat.read_errors += nbits;
2710 sctx->stat.uncorrectable_errors += nbits;
2711 spin_unlock(&sctx->stat_lock);
2712 }
2713
2714 list_for_each_entry_safe(curr, next, &sparity->spages, list) {
2715 list_del_init(&curr->list);
2716 scrub_page_put(curr);
2717 }
2718
2719 kfree(sparity);
2720}
2721
2722static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
2723{
2724 struct scrub_parity *sparity = container_of(work, struct scrub_parity,
2725 work);
2726 struct scrub_ctx *sctx = sparity->sctx;
2727
2728 scrub_free_parity(sparity);
2729 scrub_pending_bio_dec(sctx);
2730}
2731
2732static void scrub_parity_bio_endio(struct bio *bio)
2733{
2734 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
2735 struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
2736
2737 if (bio->bi_status)
2738 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2739 sparity->nsectors);
2740
2741 bio_put(bio);
2742
2743 btrfs_init_work(&sparity->work, scrub_parity_bio_endio_worker, NULL,
2744 NULL);
2745 btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
2746}
2747
2748static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2749{
2750 struct scrub_ctx *sctx = sparity->sctx;
2751 struct btrfs_fs_info *fs_info = sctx->fs_info;
2752 struct bio *bio;
2753 struct btrfs_raid_bio *rbio;
2754 struct btrfs_bio *bbio = NULL;
2755 u64 length;
2756 int ret;
2757
2758 if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
2759 sparity->nsectors))
2760 goto out;
2761
2762 length = sparity->logic_end - sparity->logic_start;
2763
2764 btrfs_bio_counter_inc_blocked(fs_info);
2765 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
2766 &length, &bbio);
2767 if (ret || !bbio || !bbio->raid_map)
2768 goto bbio_out;
2769
2770 bio = btrfs_io_bio_alloc(0);
2771 bio->bi_iter.bi_sector = sparity->logic_start >> 9;
2772 bio->bi_private = sparity;
2773 bio->bi_end_io = scrub_parity_bio_endio;
2774
2775 rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio,
2776 length, sparity->scrub_dev,
2777 sparity->dbitmap,
2778 sparity->nsectors);
2779 if (!rbio)
2780 goto rbio_out;
2781
2782 scrub_pending_bio_inc(sctx);
2783 raid56_parity_submit_scrub_rbio(rbio);
2784 return;
2785
2786rbio_out:
2787 bio_put(bio);
2788bbio_out:
2789 btrfs_bio_counter_dec(fs_info);
2790 btrfs_put_bbio(bbio);
2791 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2792 sparity->nsectors);
2793 spin_lock(&sctx->stat_lock);
2794 sctx->stat.malloc_errors++;
2795 spin_unlock(&sctx->stat_lock);
2796out:
2797 scrub_free_parity(sparity);
2798}
2799
2800static inline int scrub_calc_parity_bitmap_len(int nsectors)
2801{
2802 return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
2803}
2804
2805static void scrub_parity_get(struct scrub_parity *sparity)
2806{
2807 refcount_inc(&sparity->refs);
2808}
2809
2810static void scrub_parity_put(struct scrub_parity *sparity)
2811{
2812 if (!refcount_dec_and_test(&sparity->refs))
2813 return;
2814
2815 scrub_parity_check_and_repair(sparity);
2816}
2817
2818static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2819 struct map_lookup *map,
2820 struct btrfs_device *sdev,
2821 struct btrfs_path *path,
2822 u64 logic_start,
2823 u64 logic_end)
2824{
2825 struct btrfs_fs_info *fs_info = sctx->fs_info;
2826 struct btrfs_root *root = fs_info->extent_root;
2827 struct btrfs_root *csum_root = fs_info->csum_root;
2828 struct btrfs_extent_item *extent;
2829 struct btrfs_bio *bbio = NULL;
2830 u64 flags;
2831 int ret;
2832 int slot;
2833 struct extent_buffer *l;
2834 struct btrfs_key key;
2835 u64 generation;
2836 u64 extent_logical;
2837 u64 extent_physical;
2838 u64 extent_len;
2839 u64 mapped_length;
2840 struct btrfs_device *extent_dev;
2841 struct scrub_parity *sparity;
2842 int nsectors;
2843 int bitmap_len;
2844 int extent_mirror_num;
2845 int stop_loop = 0;
2846
2847 nsectors = div_u64(map->stripe_len, fs_info->sectorsize);
2848 bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
2849 sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
2850 GFP_NOFS);
2851 if (!sparity) {
2852 spin_lock(&sctx->stat_lock);
2853 sctx->stat.malloc_errors++;
2854 spin_unlock(&sctx->stat_lock);
2855 return -ENOMEM;
2856 }
2857
2858 sparity->stripe_len = map->stripe_len;
2859 sparity->nsectors = nsectors;
2860 sparity->sctx = sctx;
2861 sparity->scrub_dev = sdev;
2862 sparity->logic_start = logic_start;
2863 sparity->logic_end = logic_end;
2864 refcount_set(&sparity->refs, 1);
2865 INIT_LIST_HEAD(&sparity->spages);
2866 sparity->dbitmap = sparity->bitmap;
2867 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
2868
2869 ret = 0;
2870 while (logic_start < logic_end) {
2871 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2872 key.type = BTRFS_METADATA_ITEM_KEY;
2873 else
2874 key.type = BTRFS_EXTENT_ITEM_KEY;
2875 key.objectid = logic_start;
2876 key.offset = (u64)-1;
2877
2878 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2879 if (ret < 0)
2880 goto out;
2881
2882 if (ret > 0) {
2883 ret = btrfs_previous_extent_item(root, path, 0);
2884 if (ret < 0)
2885 goto out;
2886 if (ret > 0) {
2887 btrfs_release_path(path);
2888 ret = btrfs_search_slot(NULL, root, &key,
2889 path, 0, 0);
2890 if (ret < 0)
2891 goto out;
2892 }
2893 }
2894
2895 stop_loop = 0;
2896 while (1) {
2897 u64 bytes;
2898
2899 l = path->nodes[0];
2900 slot = path->slots[0];
2901 if (slot >= btrfs_header_nritems(l)) {
2902 ret = btrfs_next_leaf(root, path);
2903 if (ret == 0)
2904 continue;
2905 if (ret < 0)
2906 goto out;
2907
2908 stop_loop = 1;
2909 break;
2910 }
2911 btrfs_item_key_to_cpu(l, &key, slot);
2912
2913 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2914 key.type != BTRFS_METADATA_ITEM_KEY)
2915 goto next;
2916
2917 if (key.type == BTRFS_METADATA_ITEM_KEY)
2918 bytes = fs_info->nodesize;
2919 else
2920 bytes = key.offset;
2921
2922 if (key.objectid + bytes <= logic_start)
2923 goto next;
2924
2925 if (key.objectid >= logic_end) {
2926 stop_loop = 1;
2927 break;
2928 }
2929
2930 while (key.objectid >= logic_start + map->stripe_len)
2931 logic_start += map->stripe_len;
2932
2933 extent = btrfs_item_ptr(l, slot,
2934 struct btrfs_extent_item);
2935 flags = btrfs_extent_flags(l, extent);
2936 generation = btrfs_extent_generation(l, extent);
2937
2938 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
2939 (key.objectid < logic_start ||
2940 key.objectid + bytes >
2941 logic_start + map->stripe_len)) {
2942 btrfs_err(fs_info,
2943 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
2944 key.objectid, logic_start);
2945 spin_lock(&sctx->stat_lock);
2946 sctx->stat.uncorrectable_errors++;
2947 spin_unlock(&sctx->stat_lock);
2948 goto next;
2949 }
2950again:
2951 extent_logical = key.objectid;
2952 extent_len = bytes;
2953
2954 if (extent_logical < logic_start) {
2955 extent_len -= logic_start - extent_logical;
2956 extent_logical = logic_start;
2957 }
2958
2959 if (extent_logical + extent_len >
2960 logic_start + map->stripe_len)
2961 extent_len = logic_start + map->stripe_len -
2962 extent_logical;
2963
2964 scrub_parity_mark_sectors_data(sparity, extent_logical,
2965 extent_len);
2966
2967 mapped_length = extent_len;
2968 bbio = NULL;
2969 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
2970 extent_logical, &mapped_length, &bbio,
2971 0);
2972 if (!ret) {
2973 if (!bbio || mapped_length < extent_len)
2974 ret = -EIO;
2975 }
2976 if (ret) {
2977 btrfs_put_bbio(bbio);
2978 goto out;
2979 }
2980 extent_physical = bbio->stripes[0].physical;
2981 extent_mirror_num = bbio->mirror_num;
2982 extent_dev = bbio->stripes[0].dev;
2983 btrfs_put_bbio(bbio);
2984
2985 ret = btrfs_lookup_csums_range(csum_root,
2986 extent_logical,
2987 extent_logical + extent_len - 1,
2988 &sctx->csum_list, 1);
2989 if (ret)
2990 goto out;
2991
2992 ret = scrub_extent_for_parity(sparity, extent_logical,
2993 extent_len,
2994 extent_physical,
2995 extent_dev, flags,
2996 generation,
2997 extent_mirror_num);
2998
2999 scrub_free_csums(sctx);
3000
3001 if (ret)
3002 goto out;
3003
3004 if (extent_logical + extent_len <
3005 key.objectid + bytes) {
3006 logic_start += map->stripe_len;
3007
3008 if (logic_start >= logic_end) {
3009 stop_loop = 1;
3010 break;
3011 }
3012
3013 if (logic_start < key.objectid + bytes) {
3014 cond_resched();
3015 goto again;
3016 }
3017 }
3018next:
3019 path->slots[0]++;
3020 }
3021
3022 btrfs_release_path(path);
3023
3024 if (stop_loop)
3025 break;
3026
3027 logic_start += map->stripe_len;
3028 }
3029out:
3030 if (ret < 0)
3031 scrub_parity_mark_sectors_error(sparity, logic_start,
3032 logic_end - logic_start);
3033 scrub_parity_put(sparity);
3034 scrub_submit(sctx);
3035 mutex_lock(&sctx->wr_lock);
3036 scrub_wr_submit(sctx);
3037 mutex_unlock(&sctx->wr_lock);
3038
3039 btrfs_release_path(path);
3040 return ret < 0 ? ret : 0;
3041}
3042
3043static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3044 struct map_lookup *map,
3045 struct btrfs_device *scrub_dev,
3046 int num, u64 base, u64 length)
3047{
3048 struct btrfs_path *path, *ppath;
3049 struct btrfs_fs_info *fs_info = sctx->fs_info;
3050 struct btrfs_root *root = fs_info->extent_root;
3051 struct btrfs_root *csum_root = fs_info->csum_root;
3052 struct btrfs_extent_item *extent;
3053 struct blk_plug plug;
3054 u64 flags;
3055 int ret;
3056 int slot;
3057 u64 nstripes;
3058 struct extent_buffer *l;
3059 u64 physical;
3060 u64 logical;
3061 u64 logic_end;
3062 u64 physical_end;
3063 u64 generation;
3064 int mirror_num;
3065 struct reada_control *reada1;
3066 struct reada_control *reada2;
3067 struct btrfs_key key;
3068 struct btrfs_key key_end;
3069 u64 increment = map->stripe_len;
3070 u64 offset;
3071 u64 extent_logical;
3072 u64 extent_physical;
3073 u64 extent_len;
3074 u64 stripe_logical;
3075 u64 stripe_end;
3076 struct btrfs_device *extent_dev;
3077 int extent_mirror_num;
3078 int stop_loop = 0;
3079
3080 physical = map->stripes[num].physical;
3081 offset = 0;
3082 nstripes = div64_u64(length, map->stripe_len);
3083 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3084 offset = map->stripe_len * num;
3085 increment = map->stripe_len * map->num_stripes;
3086 mirror_num = 1;
3087 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3088 int factor = map->num_stripes / map->sub_stripes;
3089 offset = map->stripe_len * (num / map->sub_stripes);
3090 increment = map->stripe_len * factor;
3091 mirror_num = num % map->sub_stripes + 1;
3092 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
3093 increment = map->stripe_len;
3094 mirror_num = num % map->num_stripes + 1;
3095 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3096 increment = map->stripe_len;
3097 mirror_num = num % map->num_stripes + 1;
3098 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3099 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3100 increment = map->stripe_len * nr_data_stripes(map);
3101 mirror_num = 1;
3102 } else {
3103 increment = map->stripe_len;
3104 mirror_num = 1;
3105 }
3106
3107 path = btrfs_alloc_path();
3108 if (!path)
3109 return -ENOMEM;
3110
3111 ppath = btrfs_alloc_path();
3112 if (!ppath) {
3113 btrfs_free_path(path);
3114 return -ENOMEM;
3115 }
3116
3117
3118
3119
3120
3121
3122 path->search_commit_root = 1;
3123 path->skip_locking = 1;
3124
3125 ppath->search_commit_root = 1;
3126 ppath->skip_locking = 1;
3127
3128
3129
3130
3131
3132 logical = base + offset;
3133 physical_end = physical + nstripes * map->stripe_len;
3134 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3135 get_raid56_logic_offset(physical_end, num,
3136 map, &logic_end, NULL);
3137 logic_end += base;
3138 } else {
3139 logic_end = logical + increment * nstripes;
3140 }
3141 wait_event(sctx->list_wait,
3142 atomic_read(&sctx->bios_in_flight) == 0);
3143 scrub_blocked_if_needed(fs_info);
3144
3145
3146 key.objectid = logical;
3147 key.type = BTRFS_EXTENT_ITEM_KEY;
3148 key.offset = (u64)0;
3149 key_end.objectid = logic_end;
3150 key_end.type = BTRFS_METADATA_ITEM_KEY;
3151 key_end.offset = (u64)-1;
3152 reada1 = btrfs_reada_add(root, &key, &key_end);
3153
3154 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3155 key.type = BTRFS_EXTENT_CSUM_KEY;
3156 key.offset = logical;
3157 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3158 key_end.type = BTRFS_EXTENT_CSUM_KEY;
3159 key_end.offset = logic_end;
3160 reada2 = btrfs_reada_add(csum_root, &key, &key_end);
3161
3162 if (!IS_ERR(reada1))
3163 btrfs_reada_wait(reada1);
3164 if (!IS_ERR(reada2))
3165 btrfs_reada_wait(reada2);
3166
3167
3168
3169
3170
3171
3172 blk_start_plug(&plug);
3173
3174
3175
3176
3177 ret = 0;
3178 while (physical < physical_end) {
3179
3180
3181
3182 if (atomic_read(&fs_info->scrub_cancel_req) ||
3183 atomic_read(&sctx->cancel_req)) {
3184 ret = -ECANCELED;
3185 goto out;
3186 }
3187
3188
3189
3190 if (atomic_read(&fs_info->scrub_pause_req)) {
3191
3192 sctx->flush_all_writes = true;
3193 scrub_submit(sctx);
3194 mutex_lock(&sctx->wr_lock);
3195 scrub_wr_submit(sctx);
3196 mutex_unlock(&sctx->wr_lock);
3197 wait_event(sctx->list_wait,
3198 atomic_read(&sctx->bios_in_flight) == 0);
3199 sctx->flush_all_writes = false;
3200 scrub_blocked_if_needed(fs_info);
3201 }
3202
3203 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3204 ret = get_raid56_logic_offset(physical, num, map,
3205 &logical,
3206 &stripe_logical);
3207 logical += base;
3208 if (ret) {
3209
3210 stripe_logical += base;
3211 stripe_end = stripe_logical + increment;
3212 ret = scrub_raid56_parity(sctx, map, scrub_dev,
3213 ppath, stripe_logical,
3214 stripe_end);
3215 if (ret)
3216 goto out;
3217 goto skip;
3218 }
3219 }
3220
3221 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
3222 key.type = BTRFS_METADATA_ITEM_KEY;
3223 else
3224 key.type = BTRFS_EXTENT_ITEM_KEY;
3225 key.objectid = logical;
3226 key.offset = (u64)-1;
3227
3228 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3229 if (ret < 0)
3230 goto out;
3231
3232 if (ret > 0) {
3233 ret = btrfs_previous_extent_item(root, path, 0);
3234 if (ret < 0)
3235 goto out;
3236 if (ret > 0) {
3237
3238
3239 btrfs_release_path(path);
3240 ret = btrfs_search_slot(NULL, root, &key,
3241 path, 0, 0);
3242 if (ret < 0)
3243 goto out;
3244 }
3245 }
3246
3247 stop_loop = 0;
3248 while (1) {
3249 u64 bytes;
3250
3251 l = path->nodes[0];
3252 slot = path->slots[0];
3253 if (slot >= btrfs_header_nritems(l)) {
3254 ret = btrfs_next_leaf(root, path);
3255 if (ret == 0)
3256 continue;
3257 if (ret < 0)
3258 goto out;
3259
3260 stop_loop = 1;
3261 break;
3262 }
3263 btrfs_item_key_to_cpu(l, &key, slot);
3264
3265 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3266 key.type != BTRFS_METADATA_ITEM_KEY)
3267 goto next;
3268
3269 if (key.type == BTRFS_METADATA_ITEM_KEY)
3270 bytes = fs_info->nodesize;
3271 else
3272 bytes = key.offset;
3273
3274 if (key.objectid + bytes <= logical)
3275 goto next;
3276
3277 if (key.objectid >= logical + map->stripe_len) {
3278
3279 if (key.objectid >= logic_end)
3280 stop_loop = 1;
3281 break;
3282 }
3283
3284 extent = btrfs_item_ptr(l, slot,
3285 struct btrfs_extent_item);
3286 flags = btrfs_extent_flags(l, extent);
3287 generation = btrfs_extent_generation(l, extent);
3288
3289 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3290 (key.objectid < logical ||
3291 key.objectid + bytes >
3292 logical + map->stripe_len)) {
3293 btrfs_err(fs_info,
3294 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3295 key.objectid, logical);
3296 spin_lock(&sctx->stat_lock);
3297 sctx->stat.uncorrectable_errors++;
3298 spin_unlock(&sctx->stat_lock);
3299 goto next;
3300 }
3301
3302again:
3303 extent_logical = key.objectid;
3304 extent_len = bytes;
3305
3306
3307
3308
3309 if (extent_logical < logical) {
3310 extent_len -= logical - extent_logical;
3311 extent_logical = logical;
3312 }
3313 if (extent_logical + extent_len >
3314 logical + map->stripe_len) {
3315 extent_len = logical + map->stripe_len -
3316 extent_logical;
3317 }
3318
3319 extent_physical = extent_logical - logical + physical;
3320 extent_dev = scrub_dev;
3321 extent_mirror_num = mirror_num;
3322 if (sctx->is_dev_replace)
3323 scrub_remap_extent(fs_info, extent_logical,
3324 extent_len, &extent_physical,
3325 &extent_dev,
3326 &extent_mirror_num);
3327
3328 ret = btrfs_lookup_csums_range(csum_root,
3329 extent_logical,
3330 extent_logical +
3331 extent_len - 1,
3332 &sctx->csum_list, 1);
3333 if (ret)
3334 goto out;
3335
3336 ret = scrub_extent(sctx, map, extent_logical, extent_len,
3337 extent_physical, extent_dev, flags,
3338 generation, extent_mirror_num,
3339 extent_logical - logical + physical);
3340
3341 scrub_free_csums(sctx);
3342
3343 if (ret)
3344 goto out;
3345
3346 if (extent_logical + extent_len <
3347 key.objectid + bytes) {
3348 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3349
3350
3351
3352
3353loop:
3354 physical += map->stripe_len;
3355 ret = get_raid56_logic_offset(physical,
3356 num, map, &logical,
3357 &stripe_logical);
3358 logical += base;
3359
3360 if (ret && physical < physical_end) {
3361 stripe_logical += base;
3362 stripe_end = stripe_logical +
3363 increment;
3364 ret = scrub_raid56_parity(sctx,
3365 map, scrub_dev, ppath,
3366 stripe_logical,
3367 stripe_end);
3368 if (ret)
3369 goto out;
3370 goto loop;
3371 }
3372 } else {
3373 physical += map->stripe_len;
3374 logical += increment;
3375 }
3376 if (logical < key.objectid + bytes) {
3377 cond_resched();
3378 goto again;
3379 }
3380
3381 if (physical >= physical_end) {
3382 stop_loop = 1;
3383 break;
3384 }
3385 }
3386next:
3387 path->slots[0]++;
3388 }
3389 btrfs_release_path(path);
3390skip:
3391 logical += increment;
3392 physical += map->stripe_len;
3393 spin_lock(&sctx->stat_lock);
3394 if (stop_loop)
3395 sctx->stat.last_physical = map->stripes[num].physical +
3396 length;
3397 else
3398 sctx->stat.last_physical = physical;
3399 spin_unlock(&sctx->stat_lock);
3400 if (stop_loop)
3401 break;
3402 }
3403out:
3404
3405 scrub_submit(sctx);
3406 mutex_lock(&sctx->wr_lock);
3407 scrub_wr_submit(sctx);
3408 mutex_unlock(&sctx->wr_lock);
3409
3410 blk_finish_plug(&plug);
3411 btrfs_free_path(path);
3412 btrfs_free_path(ppath);
3413 return ret < 0 ? ret : 0;
3414}
3415
3416static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
3417 struct btrfs_device *scrub_dev,
3418 u64 chunk_offset, u64 length,
3419 u64 dev_offset,
3420 struct btrfs_block_group *cache)
3421{
3422 struct btrfs_fs_info *fs_info = sctx->fs_info;
3423 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
3424 struct map_lookup *map;
3425 struct extent_map *em;
3426 int i;
3427 int ret = 0;
3428
3429 read_lock(&map_tree->lock);
3430 em = lookup_extent_mapping(map_tree, chunk_offset, 1);
3431 read_unlock(&map_tree->lock);
3432
3433 if (!em) {
3434
3435
3436
3437
3438 spin_lock(&cache->lock);
3439 if (!cache->removed)
3440 ret = -EINVAL;
3441 spin_unlock(&cache->lock);
3442
3443 return ret;
3444 }
3445
3446 map = em->map_lookup;
3447 if (em->start != chunk_offset)
3448 goto out;
3449
3450 if (em->len < length)
3451 goto out;
3452
3453 for (i = 0; i < map->num_stripes; ++i) {
3454 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
3455 map->stripes[i].physical == dev_offset) {
3456 ret = scrub_stripe(sctx, map, scrub_dev, i,
3457 chunk_offset, length);
3458 if (ret)
3459 goto out;
3460 }
3461 }
3462out:
3463 free_extent_map(em);
3464
3465 return ret;
3466}
3467
3468static noinline_for_stack
3469int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3470 struct btrfs_device *scrub_dev, u64 start, u64 end)
3471{
3472 struct btrfs_dev_extent *dev_extent = NULL;
3473 struct btrfs_path *path;
3474 struct btrfs_fs_info *fs_info = sctx->fs_info;
3475 struct btrfs_root *root = fs_info->dev_root;
3476 u64 length;
3477 u64 chunk_offset;
3478 int ret = 0;
3479 int ro_set;
3480 int slot;
3481 struct extent_buffer *l;
3482 struct btrfs_key key;
3483 struct btrfs_key found_key;
3484 struct btrfs_block_group *cache;
3485 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
3486
3487 path = btrfs_alloc_path();
3488 if (!path)
3489 return -ENOMEM;
3490
3491 path->reada = READA_FORWARD;
3492 path->search_commit_root = 1;
3493 path->skip_locking = 1;
3494
3495 key.objectid = scrub_dev->devid;
3496 key.offset = 0ull;
3497 key.type = BTRFS_DEV_EXTENT_KEY;
3498
3499 while (1) {
3500 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3501 if (ret < 0)
3502 break;
3503 if (ret > 0) {
3504 if (path->slots[0] >=
3505 btrfs_header_nritems(path->nodes[0])) {
3506 ret = btrfs_next_leaf(root, path);
3507 if (ret < 0)
3508 break;
3509 if (ret > 0) {
3510 ret = 0;
3511 break;
3512 }
3513 } else {
3514 ret = 0;
3515 }
3516 }
3517
3518 l = path->nodes[0];
3519 slot = path->slots[0];
3520
3521 btrfs_item_key_to_cpu(l, &found_key, slot);
3522
3523 if (found_key.objectid != scrub_dev->devid)
3524 break;
3525
3526 if (found_key.type != BTRFS_DEV_EXTENT_KEY)
3527 break;
3528
3529 if (found_key.offset >= end)
3530 break;
3531
3532 if (found_key.offset < key.offset)
3533 break;
3534
3535 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
3536 length = btrfs_dev_extent_length(l, dev_extent);
3537
3538 if (found_key.offset + length <= start)
3539 goto skip;
3540
3541 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
3542
3543
3544
3545
3546
3547 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3548
3549
3550
3551 if (!cache)
3552 goto skip;
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562 scrub_pause_on(fs_info);
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594 ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace);
3595 if (ret == 0) {
3596 ro_set = 1;
3597 } else if (ret == -ENOSPC && !sctx->is_dev_replace) {
3598
3599
3600
3601
3602
3603
3604
3605 ro_set = 0;
3606 } else {
3607 btrfs_warn(fs_info,
3608 "failed setting block group ro: %d", ret);
3609 btrfs_put_block_group(cache);
3610 scrub_pause_off(fs_info);
3611 break;
3612 }
3613
3614
3615
3616
3617
3618
3619 if (sctx->is_dev_replace) {
3620 btrfs_wait_nocow_writers(cache);
3621 btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start,
3622 cache->length);
3623 }
3624
3625 scrub_pause_off(fs_info);
3626 down_write(&dev_replace->rwsem);
3627 dev_replace->cursor_right = found_key.offset + length;
3628 dev_replace->cursor_left = found_key.offset;
3629 dev_replace->item_needs_writeback = 1;
3630 up_write(&dev_replace->rwsem);
3631
3632 ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
3633 found_key.offset, cache);
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645 sctx->flush_all_writes = true;
3646 scrub_submit(sctx);
3647 mutex_lock(&sctx->wr_lock);
3648 scrub_wr_submit(sctx);
3649 mutex_unlock(&sctx->wr_lock);
3650
3651 wait_event(sctx->list_wait,
3652 atomic_read(&sctx->bios_in_flight) == 0);
3653
3654 scrub_pause_on(fs_info);
3655
3656
3657
3658
3659
3660
3661 wait_event(sctx->list_wait,
3662 atomic_read(&sctx->workers_pending) == 0);
3663 sctx->flush_all_writes = false;
3664
3665 scrub_pause_off(fs_info);
3666
3667 down_write(&dev_replace->rwsem);
3668 dev_replace->cursor_left = dev_replace->cursor_right;
3669 dev_replace->item_needs_writeback = 1;
3670 up_write(&dev_replace->rwsem);
3671
3672 if (ro_set)
3673 btrfs_dec_block_group_ro(cache);
3674
3675
3676
3677
3678
3679
3680
3681
3682 spin_lock(&cache->lock);
3683 if (!cache->removed && !cache->ro && cache->reserved == 0 &&
3684 cache->used == 0) {
3685 spin_unlock(&cache->lock);
3686 if (btrfs_test_opt(fs_info, DISCARD_ASYNC))
3687 btrfs_discard_queue_work(&fs_info->discard_ctl,
3688 cache);
3689 else
3690 btrfs_mark_bg_unused(cache);
3691 } else {
3692 spin_unlock(&cache->lock);
3693 }
3694
3695 btrfs_put_block_group(cache);
3696 if (ret)
3697 break;
3698 if (sctx->is_dev_replace &&
3699 atomic64_read(&dev_replace->num_write_errors) > 0) {
3700 ret = -EIO;
3701 break;
3702 }
3703 if (sctx->stat.malloc_errors > 0) {
3704 ret = -ENOMEM;
3705 break;
3706 }
3707skip:
3708 key.offset = found_key.offset + length;
3709 btrfs_release_path(path);
3710 }
3711
3712 btrfs_free_path(path);
3713
3714 return ret;
3715}
3716
3717static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
3718 struct btrfs_device *scrub_dev)
3719{
3720 int i;
3721 u64 bytenr;
3722 u64 gen;
3723 int ret;
3724 struct btrfs_fs_info *fs_info = sctx->fs_info;
3725
3726 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3727 return -EIO;
3728
3729
3730 if (scrub_dev->fs_devices != fs_info->fs_devices)
3731 gen = scrub_dev->generation;
3732 else
3733 gen = fs_info->last_trans_committed;
3734
3735 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3736 bytenr = btrfs_sb_offset(i);
3737 if (bytenr + BTRFS_SUPER_INFO_SIZE >
3738 scrub_dev->commit_total_bytes)
3739 break;
3740
3741 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
3742 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
3743 NULL, 1, bytenr);
3744 if (ret)
3745 return ret;
3746 }
3747 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3748
3749 return 0;
3750}
3751
3752
3753
3754
3755static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3756 int is_dev_replace)
3757{
3758 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
3759 int max_active = fs_info->thread_pool_size;
3760
3761 lockdep_assert_held(&fs_info->scrub_lock);
3762
3763 if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
3764 ASSERT(fs_info->scrub_workers == NULL);
3765 fs_info->scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub",
3766 flags, is_dev_replace ? 1 : max_active, 4);
3767 if (!fs_info->scrub_workers)
3768 goto fail_scrub_workers;
3769
3770 ASSERT(fs_info->scrub_wr_completion_workers == NULL);
3771 fs_info->scrub_wr_completion_workers =
3772 btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
3773 max_active, 2);
3774 if (!fs_info->scrub_wr_completion_workers)
3775 goto fail_scrub_wr_completion_workers;
3776
3777 ASSERT(fs_info->scrub_parity_workers == NULL);
3778 fs_info->scrub_parity_workers =
3779 btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
3780 max_active, 2);
3781 if (!fs_info->scrub_parity_workers)
3782 goto fail_scrub_parity_workers;
3783
3784 refcount_set(&fs_info->scrub_workers_refcnt, 1);
3785 } else {
3786 refcount_inc(&fs_info->scrub_workers_refcnt);
3787 }
3788 return 0;
3789
3790fail_scrub_parity_workers:
3791 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
3792fail_scrub_wr_completion_workers:
3793 btrfs_destroy_workqueue(fs_info->scrub_workers);
3794fail_scrub_workers:
3795 return -ENOMEM;
3796}
3797
3798int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3799 u64 end, struct btrfs_scrub_progress *progress,
3800 int readonly, int is_dev_replace)
3801{
3802 struct scrub_ctx *sctx;
3803 int ret;
3804 struct btrfs_device *dev;
3805 unsigned int nofs_flag;
3806 struct btrfs_workqueue *scrub_workers = NULL;
3807 struct btrfs_workqueue *scrub_wr_comp = NULL;
3808 struct btrfs_workqueue *scrub_parity = NULL;
3809
3810 if (btrfs_fs_closing(fs_info))
3811 return -EAGAIN;
3812
3813 if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
3814
3815
3816
3817
3818
3819 btrfs_err(fs_info,
3820 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
3821 fs_info->nodesize,
3822 BTRFS_STRIPE_LEN);
3823 return -EINVAL;
3824 }
3825
3826 if (fs_info->sectorsize != PAGE_SIZE) {
3827
3828 btrfs_err_rl(fs_info,
3829 "scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails",
3830 fs_info->sectorsize, PAGE_SIZE);
3831 return -EINVAL;
3832 }
3833
3834 if (fs_info->nodesize >
3835 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
3836 fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
3837
3838
3839
3840
3841 btrfs_err(fs_info,
3842 "scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
3843 fs_info->nodesize,
3844 SCRUB_MAX_PAGES_PER_BLOCK,
3845 fs_info->sectorsize,
3846 SCRUB_MAX_PAGES_PER_BLOCK);
3847 return -EINVAL;
3848 }
3849
3850
3851 sctx = scrub_setup_ctx(fs_info, is_dev_replace);
3852 if (IS_ERR(sctx))
3853 return PTR_ERR(sctx);
3854
3855 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3856 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
3857 if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
3858 !is_dev_replace)) {
3859 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3860 ret = -ENODEV;
3861 goto out_free_ctx;
3862 }
3863
3864 if (!is_dev_replace && !readonly &&
3865 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
3866 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3867 btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
3868 rcu_str_deref(dev->name));
3869 ret = -EROFS;
3870 goto out_free_ctx;
3871 }
3872
3873 mutex_lock(&fs_info->scrub_lock);
3874 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
3875 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
3876 mutex_unlock(&fs_info->scrub_lock);
3877 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3878 ret = -EIO;
3879 goto out_free_ctx;
3880 }
3881
3882 down_read(&fs_info->dev_replace.rwsem);
3883 if (dev->scrub_ctx ||
3884 (!is_dev_replace &&
3885 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
3886 up_read(&fs_info->dev_replace.rwsem);
3887 mutex_unlock(&fs_info->scrub_lock);
3888 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3889 ret = -EINPROGRESS;
3890 goto out_free_ctx;
3891 }
3892 up_read(&fs_info->dev_replace.rwsem);
3893
3894 ret = scrub_workers_get(fs_info, is_dev_replace);
3895 if (ret) {
3896 mutex_unlock(&fs_info->scrub_lock);
3897 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3898 goto out_free_ctx;
3899 }
3900
3901 sctx->readonly = readonly;
3902 dev->scrub_ctx = sctx;
3903 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3904
3905
3906
3907
3908
3909 __scrub_blocked_if_needed(fs_info);
3910 atomic_inc(&fs_info->scrubs_running);
3911 mutex_unlock(&fs_info->scrub_lock);
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922 nofs_flag = memalloc_nofs_save();
3923 if (!is_dev_replace) {
3924 btrfs_info(fs_info, "scrub: started on devid %llu", devid);
3925
3926
3927
3928
3929 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3930 ret = scrub_supers(sctx, dev);
3931 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3932 }
3933
3934 if (!ret)
3935 ret = scrub_enumerate_chunks(sctx, dev, start, end);
3936 memalloc_nofs_restore(nofs_flag);
3937
3938 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3939 atomic_dec(&fs_info->scrubs_running);
3940 wake_up(&fs_info->scrub_pause_wait);
3941
3942 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
3943
3944 if (progress)
3945 memcpy(progress, &sctx->stat, sizeof(*progress));
3946
3947 if (!is_dev_replace)
3948 btrfs_info(fs_info, "scrub: %s on devid %llu with status: %d",
3949 ret ? "not finished" : "finished", devid, ret);
3950
3951 mutex_lock(&fs_info->scrub_lock);
3952 dev->scrub_ctx = NULL;
3953 if (refcount_dec_and_test(&fs_info->scrub_workers_refcnt)) {
3954 scrub_workers = fs_info->scrub_workers;
3955 scrub_wr_comp = fs_info->scrub_wr_completion_workers;
3956 scrub_parity = fs_info->scrub_parity_workers;
3957
3958 fs_info->scrub_workers = NULL;
3959 fs_info->scrub_wr_completion_workers = NULL;
3960 fs_info->scrub_parity_workers = NULL;
3961 }
3962 mutex_unlock(&fs_info->scrub_lock);
3963
3964 btrfs_destroy_workqueue(scrub_workers);
3965 btrfs_destroy_workqueue(scrub_wr_comp);
3966 btrfs_destroy_workqueue(scrub_parity);
3967 scrub_put_ctx(sctx);
3968
3969 return ret;
3970
3971out_free_ctx:
3972 scrub_free_ctx(sctx);
3973
3974 return ret;
3975}
3976
3977void btrfs_scrub_pause(struct btrfs_fs_info *fs_info)
3978{
3979 mutex_lock(&fs_info->scrub_lock);
3980 atomic_inc(&fs_info->scrub_pause_req);
3981 while (atomic_read(&fs_info->scrubs_paused) !=
3982 atomic_read(&fs_info->scrubs_running)) {
3983 mutex_unlock(&fs_info->scrub_lock);
3984 wait_event(fs_info->scrub_pause_wait,
3985 atomic_read(&fs_info->scrubs_paused) ==
3986 atomic_read(&fs_info->scrubs_running));
3987 mutex_lock(&fs_info->scrub_lock);
3988 }
3989 mutex_unlock(&fs_info->scrub_lock);
3990}
3991
3992void btrfs_scrub_continue(struct btrfs_fs_info *fs_info)
3993{
3994 atomic_dec(&fs_info->scrub_pause_req);
3995 wake_up(&fs_info->scrub_pause_wait);
3996}
3997
3998int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
3999{
4000 mutex_lock(&fs_info->scrub_lock);
4001 if (!atomic_read(&fs_info->scrubs_running)) {
4002 mutex_unlock(&fs_info->scrub_lock);
4003 return -ENOTCONN;
4004 }
4005
4006 atomic_inc(&fs_info->scrub_cancel_req);
4007 while (atomic_read(&fs_info->scrubs_running)) {
4008 mutex_unlock(&fs_info->scrub_lock);
4009 wait_event(fs_info->scrub_pause_wait,
4010 atomic_read(&fs_info->scrubs_running) == 0);
4011 mutex_lock(&fs_info->scrub_lock);
4012 }
4013 atomic_dec(&fs_info->scrub_cancel_req);
4014 mutex_unlock(&fs_info->scrub_lock);
4015
4016 return 0;
4017}
4018
4019int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
4020{
4021 struct btrfs_fs_info *fs_info = dev->fs_info;
4022 struct scrub_ctx *sctx;
4023
4024 mutex_lock(&fs_info->scrub_lock);
4025 sctx = dev->scrub_ctx;
4026 if (!sctx) {
4027 mutex_unlock(&fs_info->scrub_lock);
4028 return -ENOTCONN;
4029 }
4030 atomic_inc(&sctx->cancel_req);
4031 while (dev->scrub_ctx) {
4032 mutex_unlock(&fs_info->scrub_lock);
4033 wait_event(fs_info->scrub_pause_wait,
4034 dev->scrub_ctx == NULL);
4035 mutex_lock(&fs_info->scrub_lock);
4036 }
4037 mutex_unlock(&fs_info->scrub_lock);
4038
4039 return 0;
4040}
4041
4042int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
4043 struct btrfs_scrub_progress *progress)
4044{
4045 struct btrfs_device *dev;
4046 struct scrub_ctx *sctx = NULL;
4047
4048 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4049 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
4050 if (dev)
4051 sctx = dev->scrub_ctx;
4052 if (sctx)
4053 memcpy(progress, &sctx->stat, sizeof(*progress));
4054 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4055
4056 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
4057}
4058
4059static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
4060 u64 extent_logical, u64 extent_len,
4061 u64 *extent_physical,
4062 struct btrfs_device **extent_dev,
4063 int *extent_mirror_num)
4064{
4065 u64 mapped_length;
4066 struct btrfs_bio *bbio = NULL;
4067 int ret;
4068
4069 mapped_length = extent_len;
4070 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
4071 &mapped_length, &bbio, 0);
4072 if (ret || !bbio || mapped_length < extent_len ||
4073 !bbio->stripes[0].dev->bdev) {
4074 btrfs_put_bbio(bbio);
4075 return;
4076 }
4077
4078 *extent_physical = bbio->stripes[0].physical;
4079 *extent_mirror_num = bbio->mirror_num;
4080 *extent_dev = bbio->stripes[0].dev;
4081 btrfs_put_bbio(bbio);
4082}
4083