1
2
3
4
5
6#include <linux/blkdev.h>
7#include <linux/ratelimit.h>
8#include <linux/sched/mm.h>
9#include <crypto/hash.h>
10#include "ctree.h"
11#include "volumes.h"
12#include "disk-io.h"
13#include "ordered-data.h"
14#include "transaction.h"
15#include "backref.h"
16#include "extent_io.h"
17#include "dev-replace.h"
18#include "check-integrity.h"
19#include "rcu-string.h"
20#include "raid56.h"
21#include "block-group.h"
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36struct scrub_block;
37struct scrub_ctx;
38
39
40
41
42
43
44
45#define SCRUB_PAGES_PER_RD_BIO 32
46#define SCRUB_PAGES_PER_WR_BIO 32
47#define SCRUB_BIOS_PER_SCTX 64
48
49
50
51
52
53
54#define SCRUB_MAX_PAGES_PER_BLOCK 16
55
56struct scrub_recover {
57 refcount_t refs;
58 struct btrfs_bio *bbio;
59 u64 map_length;
60};
61
62struct scrub_page {
63 struct scrub_block *sblock;
64 struct page *page;
65 struct btrfs_device *dev;
66 struct list_head list;
67 u64 flags;
68 u64 generation;
69 u64 logical;
70 u64 physical;
71 u64 physical_for_dev_replace;
72 atomic_t refs;
73 struct {
74 unsigned int mirror_num:8;
75 unsigned int have_csum:1;
76 unsigned int io_error:1;
77 };
78 u8 csum[BTRFS_CSUM_SIZE];
79
80 struct scrub_recover *recover;
81};
82
83struct scrub_bio {
84 int index;
85 struct scrub_ctx *sctx;
86 struct btrfs_device *dev;
87 struct bio *bio;
88 blk_status_t status;
89 u64 logical;
90 u64 physical;
91#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
92 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
93#else
94 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
95#endif
96 int page_count;
97 int next_free;
98 struct btrfs_work work;
99};
100
101struct scrub_block {
102 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
103 int page_count;
104 atomic_t outstanding_pages;
105 refcount_t refs;
106 struct scrub_ctx *sctx;
107 struct scrub_parity *sparity;
108 struct {
109 unsigned int header_error:1;
110 unsigned int checksum_error:1;
111 unsigned int no_io_error_seen:1;
112 unsigned int generation_error:1;
113
114
115
116 unsigned int data_corrected:1;
117 };
118 struct btrfs_work work;
119};
120
121
122struct scrub_parity {
123 struct scrub_ctx *sctx;
124
125 struct btrfs_device *scrub_dev;
126
127 u64 logic_start;
128
129 u64 logic_end;
130
131 int nsectors;
132
133 u64 stripe_len;
134
135 refcount_t refs;
136
137 struct list_head spages;
138
139
140 struct btrfs_work work;
141
142
143 unsigned long *dbitmap;
144
145
146
147
148
149 unsigned long *ebitmap;
150
151 unsigned long bitmap[0];
152};
153
154struct scrub_ctx {
155 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
156 struct btrfs_fs_info *fs_info;
157 int first_free;
158 int curr;
159 atomic_t bios_in_flight;
160 atomic_t workers_pending;
161 spinlock_t list_lock;
162 wait_queue_head_t list_wait;
163 u16 csum_size;
164 struct list_head csum_list;
165 atomic_t cancel_req;
166 int readonly;
167 int pages_per_rd_bio;
168
169 int is_dev_replace;
170
171 struct scrub_bio *wr_curr_bio;
172 struct mutex wr_lock;
173 int pages_per_wr_bio;
174 struct btrfs_device *wr_tgtdev;
175 bool flush_all_writes;
176
177
178
179
180 struct btrfs_scrub_progress stat;
181 spinlock_t stat_lock;
182
183
184
185
186
187
188
189
190 refcount_t refs;
191};
192
193struct scrub_warning {
194 struct btrfs_path *path;
195 u64 extent_item_size;
196 const char *errstr;
197 u64 physical;
198 u64 logical;
199 struct btrfs_device *dev;
200};
201
202struct full_stripe_lock {
203 struct rb_node node;
204 u64 logical;
205 u64 refs;
206 struct mutex mutex;
207};
208
209static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
210static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
211static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
212static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
213 struct scrub_block *sblocks_for_recheck);
214static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
215 struct scrub_block *sblock,
216 int retry_failed_mirror);
217static void scrub_recheck_block_checksum(struct scrub_block *sblock);
218static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
219 struct scrub_block *sblock_good);
220static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
221 struct scrub_block *sblock_good,
222 int page_num, int force_write);
223static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
224static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
225 int page_num);
226static int scrub_checksum_data(struct scrub_block *sblock);
227static int scrub_checksum_tree_block(struct scrub_block *sblock);
228static int scrub_checksum_super(struct scrub_block *sblock);
229static void scrub_block_get(struct scrub_block *sblock);
230static void scrub_block_put(struct scrub_block *sblock);
231static void scrub_page_get(struct scrub_page *spage);
232static void scrub_page_put(struct scrub_page *spage);
233static void scrub_parity_get(struct scrub_parity *sparity);
234static void scrub_parity_put(struct scrub_parity *sparity);
235static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
236 struct scrub_page *spage);
237static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
238 u64 physical, struct btrfs_device *dev, u64 flags,
239 u64 gen, int mirror_num, u8 *csum, int force,
240 u64 physical_for_dev_replace);
241static void scrub_bio_end_io(struct bio *bio);
242static void scrub_bio_end_io_worker(struct btrfs_work *work);
243static void scrub_block_complete(struct scrub_block *sblock);
244static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
245 u64 extent_logical, u64 extent_len,
246 u64 *extent_physical,
247 struct btrfs_device **extent_dev,
248 int *extent_mirror_num);
249static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
250 struct scrub_page *spage);
251static void scrub_wr_submit(struct scrub_ctx *sctx);
252static void scrub_wr_bio_end_io(struct bio *bio);
253static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
254static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
255static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
256static void scrub_put_ctx(struct scrub_ctx *sctx);
257
258static inline int scrub_is_page_on_raid56(struct scrub_page *page)
259{
260 return page->recover &&
261 (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
262}
263
264static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
265{
266 refcount_inc(&sctx->refs);
267 atomic_inc(&sctx->bios_in_flight);
268}
269
270static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
271{
272 atomic_dec(&sctx->bios_in_flight);
273 wake_up(&sctx->list_wait);
274 scrub_put_ctx(sctx);
275}
276
277static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
278{
279 while (atomic_read(&fs_info->scrub_pause_req)) {
280 mutex_unlock(&fs_info->scrub_lock);
281 wait_event(fs_info->scrub_pause_wait,
282 atomic_read(&fs_info->scrub_pause_req) == 0);
283 mutex_lock(&fs_info->scrub_lock);
284 }
285}
286
287static void scrub_pause_on(struct btrfs_fs_info *fs_info)
288{
289 atomic_inc(&fs_info->scrubs_paused);
290 wake_up(&fs_info->scrub_pause_wait);
291}
292
293static void scrub_pause_off(struct btrfs_fs_info *fs_info)
294{
295 mutex_lock(&fs_info->scrub_lock);
296 __scrub_blocked_if_needed(fs_info);
297 atomic_dec(&fs_info->scrubs_paused);
298 mutex_unlock(&fs_info->scrub_lock);
299
300 wake_up(&fs_info->scrub_pause_wait);
301}
302
303static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
304{
305 scrub_pause_on(fs_info);
306 scrub_pause_off(fs_info);
307}
308
309
310
311
312
313
314
315
316
317
318
319static struct full_stripe_lock *insert_full_stripe_lock(
320 struct btrfs_full_stripe_locks_tree *locks_root,
321 u64 fstripe_logical)
322{
323 struct rb_node **p;
324 struct rb_node *parent = NULL;
325 struct full_stripe_lock *entry;
326 struct full_stripe_lock *ret;
327
328 lockdep_assert_held(&locks_root->lock);
329
330 p = &locks_root->root.rb_node;
331 while (*p) {
332 parent = *p;
333 entry = rb_entry(parent, struct full_stripe_lock, node);
334 if (fstripe_logical < entry->logical) {
335 p = &(*p)->rb_left;
336 } else if (fstripe_logical > entry->logical) {
337 p = &(*p)->rb_right;
338 } else {
339 entry->refs++;
340 return entry;
341 }
342 }
343
344
345
346
347 ret = kmalloc(sizeof(*ret), GFP_KERNEL);
348 if (!ret)
349 return ERR_PTR(-ENOMEM);
350 ret->logical = fstripe_logical;
351 ret->refs = 1;
352 mutex_init(&ret->mutex);
353
354 rb_link_node(&ret->node, parent, p);
355 rb_insert_color(&ret->node, &locks_root->root);
356 return ret;
357}
358
359
360
361
362
363
364
365static struct full_stripe_lock *search_full_stripe_lock(
366 struct btrfs_full_stripe_locks_tree *locks_root,
367 u64 fstripe_logical)
368{
369 struct rb_node *node;
370 struct full_stripe_lock *entry;
371
372 lockdep_assert_held(&locks_root->lock);
373
374 node = locks_root->root.rb_node;
375 while (node) {
376 entry = rb_entry(node, struct full_stripe_lock, node);
377 if (fstripe_logical < entry->logical)
378 node = node->rb_left;
379 else if (fstripe_logical > entry->logical)
380 node = node->rb_right;
381 else
382 return entry;
383 }
384 return NULL;
385}
386
387
388
389
390
391
392static u64 get_full_stripe_logical(struct btrfs_block_group *cache, u64 bytenr)
393{
394 u64 ret;
395
396
397
398
399
400 WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
401
402
403
404
405
406 ret = div64_u64(bytenr - cache->start, cache->full_stripe_len) *
407 cache->full_stripe_len + cache->start;
408 return ret;
409}
410
411
412
413
414
415
416
417
418
419
420
421
422static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
423 bool *locked_ret)
424{
425 struct btrfs_block_group *bg_cache;
426 struct btrfs_full_stripe_locks_tree *locks_root;
427 struct full_stripe_lock *existing;
428 u64 fstripe_start;
429 int ret = 0;
430
431 *locked_ret = false;
432 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
433 if (!bg_cache) {
434 ASSERT(0);
435 return -ENOENT;
436 }
437
438
439 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
440 goto out;
441 locks_root = &bg_cache->full_stripe_locks_root;
442
443 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
444
445
446 mutex_lock(&locks_root->lock);
447 existing = insert_full_stripe_lock(locks_root, fstripe_start);
448 mutex_unlock(&locks_root->lock);
449 if (IS_ERR(existing)) {
450 ret = PTR_ERR(existing);
451 goto out;
452 }
453 mutex_lock(&existing->mutex);
454 *locked_ret = true;
455out:
456 btrfs_put_block_group(bg_cache);
457 return ret;
458}
459
460
461
462
463
464
465
466
467
468
469static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
470 bool locked)
471{
472 struct btrfs_block_group *bg_cache;
473 struct btrfs_full_stripe_locks_tree *locks_root;
474 struct full_stripe_lock *fstripe_lock;
475 u64 fstripe_start;
476 bool freeit = false;
477 int ret = 0;
478
479
480 if (!locked)
481 return 0;
482
483 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
484 if (!bg_cache) {
485 ASSERT(0);
486 return -ENOENT;
487 }
488 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
489 goto out;
490
491 locks_root = &bg_cache->full_stripe_locks_root;
492 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
493
494 mutex_lock(&locks_root->lock);
495 fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
496
497 if (!fstripe_lock) {
498 WARN_ON(1);
499 ret = -ENOENT;
500 mutex_unlock(&locks_root->lock);
501 goto out;
502 }
503
504 if (fstripe_lock->refs == 0) {
505 WARN_ON(1);
506 btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
507 fstripe_lock->logical);
508 } else {
509 fstripe_lock->refs--;
510 }
511
512 if (fstripe_lock->refs == 0) {
513 rb_erase(&fstripe_lock->node, &locks_root->root);
514 freeit = true;
515 }
516 mutex_unlock(&locks_root->lock);
517
518 mutex_unlock(&fstripe_lock->mutex);
519 if (freeit)
520 kfree(fstripe_lock);
521out:
522 btrfs_put_block_group(bg_cache);
523 return ret;
524}
525
526static void scrub_free_csums(struct scrub_ctx *sctx)
527{
528 while (!list_empty(&sctx->csum_list)) {
529 struct btrfs_ordered_sum *sum;
530 sum = list_first_entry(&sctx->csum_list,
531 struct btrfs_ordered_sum, list);
532 list_del(&sum->list);
533 kfree(sum);
534 }
535}
536
537static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
538{
539 int i;
540
541 if (!sctx)
542 return;
543
544
545 if (sctx->curr != -1) {
546 struct scrub_bio *sbio = sctx->bios[sctx->curr];
547
548 for (i = 0; i < sbio->page_count; i++) {
549 WARN_ON(!sbio->pagev[i]->page);
550 scrub_block_put(sbio->pagev[i]->sblock);
551 }
552 bio_put(sbio->bio);
553 }
554
555 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
556 struct scrub_bio *sbio = sctx->bios[i];
557
558 if (!sbio)
559 break;
560 kfree(sbio);
561 }
562
563 kfree(sctx->wr_curr_bio);
564 scrub_free_csums(sctx);
565 kfree(sctx);
566}
567
568static void scrub_put_ctx(struct scrub_ctx *sctx)
569{
570 if (refcount_dec_and_test(&sctx->refs))
571 scrub_free_ctx(sctx);
572}
573
574static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
575 struct btrfs_fs_info *fs_info, int is_dev_replace)
576{
577 struct scrub_ctx *sctx;
578 int i;
579
580 sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
581 if (!sctx)
582 goto nomem;
583 refcount_set(&sctx->refs, 1);
584 sctx->is_dev_replace = is_dev_replace;
585 sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
586 sctx->curr = -1;
587 sctx->fs_info = fs_info;
588 INIT_LIST_HEAD(&sctx->csum_list);
589 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
590 struct scrub_bio *sbio;
591
592 sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
593 if (!sbio)
594 goto nomem;
595 sctx->bios[i] = sbio;
596
597 sbio->index = i;
598 sbio->sctx = sctx;
599 sbio->page_count = 0;
600 btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, NULL,
601 NULL);
602
603 if (i != SCRUB_BIOS_PER_SCTX - 1)
604 sctx->bios[i]->next_free = i + 1;
605 else
606 sctx->bios[i]->next_free = -1;
607 }
608 sctx->first_free = 0;
609 atomic_set(&sctx->bios_in_flight, 0);
610 atomic_set(&sctx->workers_pending, 0);
611 atomic_set(&sctx->cancel_req, 0);
612 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
613
614 spin_lock_init(&sctx->list_lock);
615 spin_lock_init(&sctx->stat_lock);
616 init_waitqueue_head(&sctx->list_wait);
617
618 WARN_ON(sctx->wr_curr_bio != NULL);
619 mutex_init(&sctx->wr_lock);
620 sctx->wr_curr_bio = NULL;
621 if (is_dev_replace) {
622 WARN_ON(!fs_info->dev_replace.tgtdev);
623 sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
624 sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;
625 sctx->flush_all_writes = false;
626 }
627
628 return sctx;
629
630nomem:
631 scrub_free_ctx(sctx);
632 return ERR_PTR(-ENOMEM);
633}
634
635static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
636 void *warn_ctx)
637{
638 u64 isize;
639 u32 nlink;
640 int ret;
641 int i;
642 unsigned nofs_flag;
643 struct extent_buffer *eb;
644 struct btrfs_inode_item *inode_item;
645 struct scrub_warning *swarn = warn_ctx;
646 struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
647 struct inode_fs_paths *ipath = NULL;
648 struct btrfs_root *local_root;
649 struct btrfs_key root_key;
650 struct btrfs_key key;
651
652 root_key.objectid = root;
653 root_key.type = BTRFS_ROOT_ITEM_KEY;
654 root_key.offset = (u64)-1;
655 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
656 if (IS_ERR(local_root)) {
657 ret = PTR_ERR(local_root);
658 goto err;
659 }
660
661
662
663
664 key.objectid = inum;
665 key.type = BTRFS_INODE_ITEM_KEY;
666 key.offset = 0;
667
668 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
669 if (ret) {
670 btrfs_release_path(swarn->path);
671 goto err;
672 }
673
674 eb = swarn->path->nodes[0];
675 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
676 struct btrfs_inode_item);
677 isize = btrfs_inode_size(eb, inode_item);
678 nlink = btrfs_inode_nlink(eb, inode_item);
679 btrfs_release_path(swarn->path);
680
681
682
683
684
685
686 nofs_flag = memalloc_nofs_save();
687 ipath = init_ipath(4096, local_root, swarn->path);
688 memalloc_nofs_restore(nofs_flag);
689 if (IS_ERR(ipath)) {
690 ret = PTR_ERR(ipath);
691 ipath = NULL;
692 goto err;
693 }
694 ret = paths_from_inode(inum, ipath);
695
696 if (ret < 0)
697 goto err;
698
699
700
701
702
703 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
704 btrfs_warn_in_rcu(fs_info,
705"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
706 swarn->errstr, swarn->logical,
707 rcu_str_deref(swarn->dev->name),
708 swarn->physical,
709 root, inum, offset,
710 min(isize - offset, (u64)PAGE_SIZE), nlink,
711 (char *)(unsigned long)ipath->fspath->val[i]);
712
713 free_ipath(ipath);
714 return 0;
715
716err:
717 btrfs_warn_in_rcu(fs_info,
718 "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
719 swarn->errstr, swarn->logical,
720 rcu_str_deref(swarn->dev->name),
721 swarn->physical,
722 root, inum, offset, ret);
723
724 free_ipath(ipath);
725 return 0;
726}
727
728static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
729{
730 struct btrfs_device *dev;
731 struct btrfs_fs_info *fs_info;
732 struct btrfs_path *path;
733 struct btrfs_key found_key;
734 struct extent_buffer *eb;
735 struct btrfs_extent_item *ei;
736 struct scrub_warning swarn;
737 unsigned long ptr = 0;
738 u64 extent_item_pos;
739 u64 flags = 0;
740 u64 ref_root;
741 u32 item_size;
742 u8 ref_level = 0;
743 int ret;
744
745 WARN_ON(sblock->page_count < 1);
746 dev = sblock->pagev[0]->dev;
747 fs_info = sblock->sctx->fs_info;
748
749 path = btrfs_alloc_path();
750 if (!path)
751 return;
752
753 swarn.physical = sblock->pagev[0]->physical;
754 swarn.logical = sblock->pagev[0]->logical;
755 swarn.errstr = errstr;
756 swarn.dev = NULL;
757
758 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
759 &flags);
760 if (ret < 0)
761 goto out;
762
763 extent_item_pos = swarn.logical - found_key.objectid;
764 swarn.extent_item_size = found_key.offset;
765
766 eb = path->nodes[0];
767 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
768 item_size = btrfs_item_size_nr(eb, path->slots[0]);
769
770 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
771 do {
772 ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
773 item_size, &ref_root,
774 &ref_level);
775 btrfs_warn_in_rcu(fs_info,
776"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
777 errstr, swarn.logical,
778 rcu_str_deref(dev->name),
779 swarn.physical,
780 ref_level ? "node" : "leaf",
781 ret < 0 ? -1 : ref_level,
782 ret < 0 ? -1 : ref_root);
783 } while (ret != 1);
784 btrfs_release_path(path);
785 } else {
786 btrfs_release_path(path);
787 swarn.path = path;
788 swarn.dev = dev;
789 iterate_extent_inodes(fs_info, found_key.objectid,
790 extent_item_pos, 1,
791 scrub_print_warning_inode, &swarn, false);
792 }
793
794out:
795 btrfs_free_path(path);
796}
797
798static inline void scrub_get_recover(struct scrub_recover *recover)
799{
800 refcount_inc(&recover->refs);
801}
802
803static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
804 struct scrub_recover *recover)
805{
806 if (refcount_dec_and_test(&recover->refs)) {
807 btrfs_bio_counter_dec(fs_info);
808 btrfs_put_bbio(recover->bbio);
809 kfree(recover);
810 }
811}
812
813
814
815
816
817
818
819
820
821static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
822{
823 struct scrub_ctx *sctx = sblock_to_check->sctx;
824 struct btrfs_device *dev;
825 struct btrfs_fs_info *fs_info;
826 u64 logical;
827 unsigned int failed_mirror_index;
828 unsigned int is_metadata;
829 unsigned int have_csum;
830 struct scrub_block *sblocks_for_recheck;
831 struct scrub_block *sblock_bad;
832 int ret;
833 int mirror_index;
834 int page_num;
835 int success;
836 bool full_stripe_locked;
837 unsigned int nofs_flag;
838 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
839 DEFAULT_RATELIMIT_BURST);
840
841 BUG_ON(sblock_to_check->page_count < 1);
842 fs_info = sctx->fs_info;
843 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
844
845
846
847
848
849 spin_lock(&sctx->stat_lock);
850 ++sctx->stat.super_errors;
851 spin_unlock(&sctx->stat_lock);
852 return 0;
853 }
854 logical = sblock_to_check->pagev[0]->logical;
855 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
856 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
857 is_metadata = !(sblock_to_check->pagev[0]->flags &
858 BTRFS_EXTENT_FLAG_DATA);
859 have_csum = sblock_to_check->pagev[0]->have_csum;
860 dev = sblock_to_check->pagev[0]->dev;
861
862
863
864
865
866
867
868
869
870
871 nofs_flag = memalloc_nofs_save();
872
873
874
875
876
877
878
879 ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
880 if (ret < 0) {
881 memalloc_nofs_restore(nofs_flag);
882 spin_lock(&sctx->stat_lock);
883 if (ret == -ENOMEM)
884 sctx->stat.malloc_errors++;
885 sctx->stat.read_errors++;
886 sctx->stat.uncorrectable_errors++;
887 spin_unlock(&sctx->stat_lock);
888 return ret;
889 }
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920 sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
921 sizeof(*sblocks_for_recheck), GFP_KERNEL);
922 if (!sblocks_for_recheck) {
923 spin_lock(&sctx->stat_lock);
924 sctx->stat.malloc_errors++;
925 sctx->stat.read_errors++;
926 sctx->stat.uncorrectable_errors++;
927 spin_unlock(&sctx->stat_lock);
928 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
929 goto out;
930 }
931
932
933 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
934 if (ret) {
935 spin_lock(&sctx->stat_lock);
936 sctx->stat.read_errors++;
937 sctx->stat.uncorrectable_errors++;
938 spin_unlock(&sctx->stat_lock);
939 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
940 goto out;
941 }
942 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
943 sblock_bad = sblocks_for_recheck + failed_mirror_index;
944
945
946 scrub_recheck_block(fs_info, sblock_bad, 1);
947
948 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
949 sblock_bad->no_io_error_seen) {
950
951
952
953
954
955
956
957
958 spin_lock(&sctx->stat_lock);
959 sctx->stat.unverified_errors++;
960 sblock_to_check->data_corrected = 1;
961 spin_unlock(&sctx->stat_lock);
962
963 if (sctx->is_dev_replace)
964 scrub_write_block_to_dev_replace(sblock_bad);
965 goto out;
966 }
967
968 if (!sblock_bad->no_io_error_seen) {
969 spin_lock(&sctx->stat_lock);
970 sctx->stat.read_errors++;
971 spin_unlock(&sctx->stat_lock);
972 if (__ratelimit(&_rs))
973 scrub_print_warning("i/o error", sblock_to_check);
974 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
975 } else if (sblock_bad->checksum_error) {
976 spin_lock(&sctx->stat_lock);
977 sctx->stat.csum_errors++;
978 spin_unlock(&sctx->stat_lock);
979 if (__ratelimit(&_rs))
980 scrub_print_warning("checksum error", sblock_to_check);
981 btrfs_dev_stat_inc_and_print(dev,
982 BTRFS_DEV_STAT_CORRUPTION_ERRS);
983 } else if (sblock_bad->header_error) {
984 spin_lock(&sctx->stat_lock);
985 sctx->stat.verify_errors++;
986 spin_unlock(&sctx->stat_lock);
987 if (__ratelimit(&_rs))
988 scrub_print_warning("checksum/header error",
989 sblock_to_check);
990 if (sblock_bad->generation_error)
991 btrfs_dev_stat_inc_and_print(dev,
992 BTRFS_DEV_STAT_GENERATION_ERRS);
993 else
994 btrfs_dev_stat_inc_and_print(dev,
995 BTRFS_DEV_STAT_CORRUPTION_ERRS);
996 }
997
998 if (sctx->readonly) {
999 ASSERT(!sctx->is_dev_replace);
1000 goto out;
1001 }
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018 for (mirror_index = 0; ;mirror_index++) {
1019 struct scrub_block *sblock_other;
1020
1021 if (mirror_index == failed_mirror_index)
1022 continue;
1023
1024
1025 if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1026 if (mirror_index >= BTRFS_MAX_MIRRORS)
1027 break;
1028 if (!sblocks_for_recheck[mirror_index].page_count)
1029 break;
1030
1031 sblock_other = sblocks_for_recheck + mirror_index;
1032 } else {
1033 struct scrub_recover *r = sblock_bad->pagev[0]->recover;
1034 int max_allowed = r->bbio->num_stripes -
1035 r->bbio->num_tgtdevs;
1036
1037 if (mirror_index >= max_allowed)
1038 break;
1039 if (!sblocks_for_recheck[1].page_count)
1040 break;
1041
1042 ASSERT(failed_mirror_index == 0);
1043 sblock_other = sblocks_for_recheck + 1;
1044 sblock_other->pagev[0]->mirror_num = 1 + mirror_index;
1045 }
1046
1047
1048 scrub_recheck_block(fs_info, sblock_other, 0);
1049
1050 if (!sblock_other->header_error &&
1051 !sblock_other->checksum_error &&
1052 sblock_other->no_io_error_seen) {
1053 if (sctx->is_dev_replace) {
1054 scrub_write_block_to_dev_replace(sblock_other);
1055 goto corrected_error;
1056 } else {
1057 ret = scrub_repair_block_from_good_copy(
1058 sblock_bad, sblock_other);
1059 if (!ret)
1060 goto corrected_error;
1061 }
1062 }
1063 }
1064
1065 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1066 goto did_not_correct_error;
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092 success = 1;
1093 for (page_num = 0; page_num < sblock_bad->page_count;
1094 page_num++) {
1095 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1096 struct scrub_block *sblock_other = NULL;
1097
1098
1099 if (!page_bad->io_error && !sctx->is_dev_replace)
1100 continue;
1101
1102 if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1103
1104
1105
1106
1107
1108
1109
1110 sblock_other = NULL;
1111 } else if (page_bad->io_error) {
1112
1113 for (mirror_index = 0;
1114 mirror_index < BTRFS_MAX_MIRRORS &&
1115 sblocks_for_recheck[mirror_index].page_count > 0;
1116 mirror_index++) {
1117 if (!sblocks_for_recheck[mirror_index].
1118 pagev[page_num]->io_error) {
1119 sblock_other = sblocks_for_recheck +
1120 mirror_index;
1121 break;
1122 }
1123 }
1124 if (!sblock_other)
1125 success = 0;
1126 }
1127
1128 if (sctx->is_dev_replace) {
1129
1130
1131
1132
1133
1134
1135
1136 if (!sblock_other)
1137 sblock_other = sblock_bad;
1138
1139 if (scrub_write_page_to_dev_replace(sblock_other,
1140 page_num) != 0) {
1141 atomic64_inc(
1142 &fs_info->dev_replace.num_write_errors);
1143 success = 0;
1144 }
1145 } else if (sblock_other) {
1146 ret = scrub_repair_page_from_good_copy(sblock_bad,
1147 sblock_other,
1148 page_num, 0);
1149 if (0 == ret)
1150 page_bad->io_error = 0;
1151 else
1152 success = 0;
1153 }
1154 }
1155
1156 if (success && !sctx->is_dev_replace) {
1157 if (is_metadata || have_csum) {
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167 scrub_recheck_block(fs_info, sblock_bad, 1);
1168 if (!sblock_bad->header_error &&
1169 !sblock_bad->checksum_error &&
1170 sblock_bad->no_io_error_seen)
1171 goto corrected_error;
1172 else
1173 goto did_not_correct_error;
1174 } else {
1175corrected_error:
1176 spin_lock(&sctx->stat_lock);
1177 sctx->stat.corrected_errors++;
1178 sblock_to_check->data_corrected = 1;
1179 spin_unlock(&sctx->stat_lock);
1180 btrfs_err_rl_in_rcu(fs_info,
1181 "fixed up error at logical %llu on dev %s",
1182 logical, rcu_str_deref(dev->name));
1183 }
1184 } else {
1185did_not_correct_error:
1186 spin_lock(&sctx->stat_lock);
1187 sctx->stat.uncorrectable_errors++;
1188 spin_unlock(&sctx->stat_lock);
1189 btrfs_err_rl_in_rcu(fs_info,
1190 "unable to fixup (regular) error at logical %llu on dev %s",
1191 logical, rcu_str_deref(dev->name));
1192 }
1193
1194out:
1195 if (sblocks_for_recheck) {
1196 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1197 mirror_index++) {
1198 struct scrub_block *sblock = sblocks_for_recheck +
1199 mirror_index;
1200 struct scrub_recover *recover;
1201 int page_index;
1202
1203 for (page_index = 0; page_index < sblock->page_count;
1204 page_index++) {
1205 sblock->pagev[page_index]->sblock = NULL;
1206 recover = sblock->pagev[page_index]->recover;
1207 if (recover) {
1208 scrub_put_recover(fs_info, recover);
1209 sblock->pagev[page_index]->recover =
1210 NULL;
1211 }
1212 scrub_page_put(sblock->pagev[page_index]);
1213 }
1214 }
1215 kfree(sblocks_for_recheck);
1216 }
1217
1218 ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
1219 memalloc_nofs_restore(nofs_flag);
1220 if (ret < 0)
1221 return ret;
1222 return 0;
1223}
1224
1225static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1226{
1227 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1228 return 2;
1229 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1230 return 3;
1231 else
1232 return (int)bbio->num_stripes;
1233}
1234
1235static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1236 u64 *raid_map,
1237 u64 mapped_length,
1238 int nstripes, int mirror,
1239 int *stripe_index,
1240 u64 *stripe_offset)
1241{
1242 int i;
1243
1244 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1245
1246 for (i = 0; i < nstripes; i++) {
1247 if (raid_map[i] == RAID6_Q_STRIPE ||
1248 raid_map[i] == RAID5_P_STRIPE)
1249 continue;
1250
1251 if (logical >= raid_map[i] &&
1252 logical < raid_map[i] + mapped_length)
1253 break;
1254 }
1255
1256 *stripe_index = i;
1257 *stripe_offset = logical - raid_map[i];
1258 } else {
1259
1260 *stripe_index = mirror;
1261 *stripe_offset = 0;
1262 }
1263}
1264
1265static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1266 struct scrub_block *sblocks_for_recheck)
1267{
1268 struct scrub_ctx *sctx = original_sblock->sctx;
1269 struct btrfs_fs_info *fs_info = sctx->fs_info;
1270 u64 length = original_sblock->page_count * PAGE_SIZE;
1271 u64 logical = original_sblock->pagev[0]->logical;
1272 u64 generation = original_sblock->pagev[0]->generation;
1273 u64 flags = original_sblock->pagev[0]->flags;
1274 u64 have_csum = original_sblock->pagev[0]->have_csum;
1275 struct scrub_recover *recover;
1276 struct btrfs_bio *bbio;
1277 u64 sublen;
1278 u64 mapped_length;
1279 u64 stripe_offset;
1280 int stripe_index;
1281 int page_index = 0;
1282 int mirror_index;
1283 int nmirrors;
1284 int ret;
1285
1286
1287
1288
1289
1290
1291
1292 while (length > 0) {
1293 sublen = min_t(u64, length, PAGE_SIZE);
1294 mapped_length = sublen;
1295 bbio = NULL;
1296
1297
1298
1299
1300
1301 btrfs_bio_counter_inc_blocked(fs_info);
1302 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
1303 logical, &mapped_length, &bbio);
1304 if (ret || !bbio || mapped_length < sublen) {
1305 btrfs_put_bbio(bbio);
1306 btrfs_bio_counter_dec(fs_info);
1307 return -EIO;
1308 }
1309
1310 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1311 if (!recover) {
1312 btrfs_put_bbio(bbio);
1313 btrfs_bio_counter_dec(fs_info);
1314 return -ENOMEM;
1315 }
1316
1317 refcount_set(&recover->refs, 1);
1318 recover->bbio = bbio;
1319 recover->map_length = mapped_length;
1320
1321 BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
1322
1323 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1324
1325 for (mirror_index = 0; mirror_index < nmirrors;
1326 mirror_index++) {
1327 struct scrub_block *sblock;
1328 struct scrub_page *page;
1329
1330 sblock = sblocks_for_recheck + mirror_index;
1331 sblock->sctx = sctx;
1332
1333 page = kzalloc(sizeof(*page), GFP_NOFS);
1334 if (!page) {
1335leave_nomem:
1336 spin_lock(&sctx->stat_lock);
1337 sctx->stat.malloc_errors++;
1338 spin_unlock(&sctx->stat_lock);
1339 scrub_put_recover(fs_info, recover);
1340 return -ENOMEM;
1341 }
1342 scrub_page_get(page);
1343 sblock->pagev[page_index] = page;
1344 page->sblock = sblock;
1345 page->flags = flags;
1346 page->generation = generation;
1347 page->logical = logical;
1348 page->have_csum = have_csum;
1349 if (have_csum)
1350 memcpy(page->csum,
1351 original_sblock->pagev[0]->csum,
1352 sctx->csum_size);
1353
1354 scrub_stripe_index_and_offset(logical,
1355 bbio->map_type,
1356 bbio->raid_map,
1357 mapped_length,
1358 bbio->num_stripes -
1359 bbio->num_tgtdevs,
1360 mirror_index,
1361 &stripe_index,
1362 &stripe_offset);
1363 page->physical = bbio->stripes[stripe_index].physical +
1364 stripe_offset;
1365 page->dev = bbio->stripes[stripe_index].dev;
1366
1367 BUG_ON(page_index >= original_sblock->page_count);
1368 page->physical_for_dev_replace =
1369 original_sblock->pagev[page_index]->
1370 physical_for_dev_replace;
1371
1372 page->mirror_num = mirror_index + 1;
1373 sblock->page_count++;
1374 page->page = alloc_page(GFP_NOFS);
1375 if (!page->page)
1376 goto leave_nomem;
1377
1378 scrub_get_recover(recover);
1379 page->recover = recover;
1380 }
1381 scrub_put_recover(fs_info, recover);
1382 length -= sublen;
1383 logical += sublen;
1384 page_index++;
1385 }
1386
1387 return 0;
1388}
1389
1390static void scrub_bio_wait_endio(struct bio *bio)
1391{
1392 complete(bio->bi_private);
1393}
1394
1395static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1396 struct bio *bio,
1397 struct scrub_page *page)
1398{
1399 DECLARE_COMPLETION_ONSTACK(done);
1400 int ret;
1401 int mirror_num;
1402
1403 bio->bi_iter.bi_sector = page->logical >> 9;
1404 bio->bi_private = &done;
1405 bio->bi_end_io = scrub_bio_wait_endio;
1406
1407 mirror_num = page->sblock->pagev[0]->mirror_num;
1408 ret = raid56_parity_recover(fs_info, bio, page->recover->bbio,
1409 page->recover->map_length,
1410 mirror_num, 0);
1411 if (ret)
1412 return ret;
1413
1414 wait_for_completion_io(&done);
1415 return blk_status_to_errno(bio->bi_status);
1416}
1417
1418static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
1419 struct scrub_block *sblock)
1420{
1421 struct scrub_page *first_page = sblock->pagev[0];
1422 struct bio *bio;
1423 int page_num;
1424
1425
1426 ASSERT(first_page->dev);
1427 if (!first_page->dev->bdev)
1428 goto out;
1429
1430 bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
1431 bio_set_dev(bio, first_page->dev->bdev);
1432
1433 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1434 struct scrub_page *page = sblock->pagev[page_num];
1435
1436 WARN_ON(!page->page);
1437 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1438 }
1439
1440 if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
1441 bio_put(bio);
1442 goto out;
1443 }
1444
1445 bio_put(bio);
1446
1447 scrub_recheck_block_checksum(sblock);
1448
1449 return;
1450out:
1451 for (page_num = 0; page_num < sblock->page_count; page_num++)
1452 sblock->pagev[page_num]->io_error = 1;
1453
1454 sblock->no_io_error_seen = 0;
1455}
1456
1457
1458
1459
1460
1461
1462
1463
1464static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1465 struct scrub_block *sblock,
1466 int retry_failed_mirror)
1467{
1468 int page_num;
1469
1470 sblock->no_io_error_seen = 1;
1471
1472
1473 if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
1474 return scrub_recheck_block_on_raid56(fs_info, sblock);
1475
1476 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1477 struct bio *bio;
1478 struct scrub_page *page = sblock->pagev[page_num];
1479
1480 if (page->dev->bdev == NULL) {
1481 page->io_error = 1;
1482 sblock->no_io_error_seen = 0;
1483 continue;
1484 }
1485
1486 WARN_ON(!page->page);
1487 bio = btrfs_io_bio_alloc(1);
1488 bio_set_dev(bio, page->dev->bdev);
1489
1490 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1491 bio->bi_iter.bi_sector = page->physical >> 9;
1492 bio->bi_opf = REQ_OP_READ;
1493
1494 if (btrfsic_submit_bio_wait(bio)) {
1495 page->io_error = 1;
1496 sblock->no_io_error_seen = 0;
1497 }
1498
1499 bio_put(bio);
1500 }
1501
1502 if (sblock->no_io_error_seen)
1503 scrub_recheck_block_checksum(sblock);
1504}
1505
1506static inline int scrub_check_fsid(u8 fsid[],
1507 struct scrub_page *spage)
1508{
1509 struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
1510 int ret;
1511
1512 ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1513 return !ret;
1514}
1515
1516static void scrub_recheck_block_checksum(struct scrub_block *sblock)
1517{
1518 sblock->header_error = 0;
1519 sblock->checksum_error = 0;
1520 sblock->generation_error = 0;
1521
1522 if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
1523 scrub_checksum_data(sblock);
1524 else
1525 scrub_checksum_tree_block(sblock);
1526}
1527
1528static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1529 struct scrub_block *sblock_good)
1530{
1531 int page_num;
1532 int ret = 0;
1533
1534 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1535 int ret_sub;
1536
1537 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1538 sblock_good,
1539 page_num, 1);
1540 if (ret_sub)
1541 ret = ret_sub;
1542 }
1543
1544 return ret;
1545}
1546
1547static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1548 struct scrub_block *sblock_good,
1549 int page_num, int force_write)
1550{
1551 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1552 struct scrub_page *page_good = sblock_good->pagev[page_num];
1553 struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
1554
1555 BUG_ON(page_bad->page == NULL);
1556 BUG_ON(page_good->page == NULL);
1557 if (force_write || sblock_bad->header_error ||
1558 sblock_bad->checksum_error || page_bad->io_error) {
1559 struct bio *bio;
1560 int ret;
1561
1562 if (!page_bad->dev->bdev) {
1563 btrfs_warn_rl(fs_info,
1564 "scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
1565 return -EIO;
1566 }
1567
1568 bio = btrfs_io_bio_alloc(1);
1569 bio_set_dev(bio, page_bad->dev->bdev);
1570 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1571 bio->bi_opf = REQ_OP_WRITE;
1572
1573 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1574 if (PAGE_SIZE != ret) {
1575 bio_put(bio);
1576 return -EIO;
1577 }
1578
1579 if (btrfsic_submit_bio_wait(bio)) {
1580 btrfs_dev_stat_inc_and_print(page_bad->dev,
1581 BTRFS_DEV_STAT_WRITE_ERRS);
1582 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1583 bio_put(bio);
1584 return -EIO;
1585 }
1586 bio_put(bio);
1587 }
1588
1589 return 0;
1590}
1591
1592static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1593{
1594 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
1595 int page_num;
1596
1597
1598
1599
1600
1601 if (sblock->sparity)
1602 return;
1603
1604 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1605 int ret;
1606
1607 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1608 if (ret)
1609 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1610 }
1611}
1612
1613static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1614 int page_num)
1615{
1616 struct scrub_page *spage = sblock->pagev[page_num];
1617
1618 BUG_ON(spage->page == NULL);
1619 if (spage->io_error) {
1620 void *mapped_buffer = kmap_atomic(spage->page);
1621
1622 clear_page(mapped_buffer);
1623 flush_dcache_page(spage->page);
1624 kunmap_atomic(mapped_buffer);
1625 }
1626 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1627}
1628
1629static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1630 struct scrub_page *spage)
1631{
1632 struct scrub_bio *sbio;
1633 int ret;
1634
1635 mutex_lock(&sctx->wr_lock);
1636again:
1637 if (!sctx->wr_curr_bio) {
1638 sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio),
1639 GFP_KERNEL);
1640 if (!sctx->wr_curr_bio) {
1641 mutex_unlock(&sctx->wr_lock);
1642 return -ENOMEM;
1643 }
1644 sctx->wr_curr_bio->sctx = sctx;
1645 sctx->wr_curr_bio->page_count = 0;
1646 }
1647 sbio = sctx->wr_curr_bio;
1648 if (sbio->page_count == 0) {
1649 struct bio *bio;
1650
1651 sbio->physical = spage->physical_for_dev_replace;
1652 sbio->logical = spage->logical;
1653 sbio->dev = sctx->wr_tgtdev;
1654 bio = sbio->bio;
1655 if (!bio) {
1656 bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
1657 sbio->bio = bio;
1658 }
1659
1660 bio->bi_private = sbio;
1661 bio->bi_end_io = scrub_wr_bio_end_io;
1662 bio_set_dev(bio, sbio->dev->bdev);
1663 bio->bi_iter.bi_sector = sbio->physical >> 9;
1664 bio->bi_opf = REQ_OP_WRITE;
1665 sbio->status = 0;
1666 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1667 spage->physical_for_dev_replace ||
1668 sbio->logical + sbio->page_count * PAGE_SIZE !=
1669 spage->logical) {
1670 scrub_wr_submit(sctx);
1671 goto again;
1672 }
1673
1674 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1675 if (ret != PAGE_SIZE) {
1676 if (sbio->page_count < 1) {
1677 bio_put(sbio->bio);
1678 sbio->bio = NULL;
1679 mutex_unlock(&sctx->wr_lock);
1680 return -EIO;
1681 }
1682 scrub_wr_submit(sctx);
1683 goto again;
1684 }
1685
1686 sbio->pagev[sbio->page_count] = spage;
1687 scrub_page_get(spage);
1688 sbio->page_count++;
1689 if (sbio->page_count == sctx->pages_per_wr_bio)
1690 scrub_wr_submit(sctx);
1691 mutex_unlock(&sctx->wr_lock);
1692
1693 return 0;
1694}
1695
1696static void scrub_wr_submit(struct scrub_ctx *sctx)
1697{
1698 struct scrub_bio *sbio;
1699
1700 if (!sctx->wr_curr_bio)
1701 return;
1702
1703 sbio = sctx->wr_curr_bio;
1704 sctx->wr_curr_bio = NULL;
1705 WARN_ON(!sbio->bio->bi_disk);
1706 scrub_pending_bio_inc(sctx);
1707
1708
1709
1710
1711 btrfsic_submit_bio(sbio->bio);
1712}
1713
1714static void scrub_wr_bio_end_io(struct bio *bio)
1715{
1716 struct scrub_bio *sbio = bio->bi_private;
1717 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
1718
1719 sbio->status = bio->bi_status;
1720 sbio->bio = bio;
1721
1722 btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
1723 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1724}
1725
1726static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1727{
1728 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1729 struct scrub_ctx *sctx = sbio->sctx;
1730 int i;
1731
1732 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1733 if (sbio->status) {
1734 struct btrfs_dev_replace *dev_replace =
1735 &sbio->sctx->fs_info->dev_replace;
1736
1737 for (i = 0; i < sbio->page_count; i++) {
1738 struct scrub_page *spage = sbio->pagev[i];
1739
1740 spage->io_error = 1;
1741 atomic64_inc(&dev_replace->num_write_errors);
1742 }
1743 }
1744
1745 for (i = 0; i < sbio->page_count; i++)
1746 scrub_page_put(sbio->pagev[i]);
1747
1748 bio_put(sbio->bio);
1749 kfree(sbio);
1750 scrub_pending_bio_dec(sctx);
1751}
1752
1753static int scrub_checksum(struct scrub_block *sblock)
1754{
1755 u64 flags;
1756 int ret;
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766 sblock->header_error = 0;
1767 sblock->generation_error = 0;
1768 sblock->checksum_error = 0;
1769
1770 WARN_ON(sblock->page_count < 1);
1771 flags = sblock->pagev[0]->flags;
1772 ret = 0;
1773 if (flags & BTRFS_EXTENT_FLAG_DATA)
1774 ret = scrub_checksum_data(sblock);
1775 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1776 ret = scrub_checksum_tree_block(sblock);
1777 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1778 (void)scrub_checksum_super(sblock);
1779 else
1780 WARN_ON(1);
1781 if (ret)
1782 scrub_handle_errored_block(sblock);
1783
1784 return ret;
1785}
1786
1787static int scrub_checksum_data(struct scrub_block *sblock)
1788{
1789 struct scrub_ctx *sctx = sblock->sctx;
1790 struct btrfs_fs_info *fs_info = sctx->fs_info;
1791 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1792 u8 csum[BTRFS_CSUM_SIZE];
1793 u8 *on_disk_csum;
1794 struct page *page;
1795 void *buffer;
1796 u64 len;
1797 int index;
1798
1799 BUG_ON(sblock->page_count < 1);
1800 if (!sblock->pagev[0]->have_csum)
1801 return 0;
1802
1803 shash->tfm = fs_info->csum_shash;
1804 crypto_shash_init(shash);
1805
1806 on_disk_csum = sblock->pagev[0]->csum;
1807 page = sblock->pagev[0]->page;
1808 buffer = kmap_atomic(page);
1809
1810 len = sctx->fs_info->sectorsize;
1811 index = 0;
1812 for (;;) {
1813 u64 l = min_t(u64, len, PAGE_SIZE);
1814
1815 crypto_shash_update(shash, buffer, l);
1816 kunmap_atomic(buffer);
1817 len -= l;
1818 if (len == 0)
1819 break;
1820 index++;
1821 BUG_ON(index >= sblock->page_count);
1822 BUG_ON(!sblock->pagev[index]->page);
1823 page = sblock->pagev[index]->page;
1824 buffer = kmap_atomic(page);
1825 }
1826
1827 crypto_shash_final(shash, csum);
1828 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1829 sblock->checksum_error = 1;
1830
1831 return sblock->checksum_error;
1832}
1833
1834static int scrub_checksum_tree_block(struct scrub_block *sblock)
1835{
1836 struct scrub_ctx *sctx = sblock->sctx;
1837 struct btrfs_header *h;
1838 struct btrfs_fs_info *fs_info = sctx->fs_info;
1839 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1840 u8 calculated_csum[BTRFS_CSUM_SIZE];
1841 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1842 struct page *page;
1843 void *mapped_buffer;
1844 u64 mapped_size;
1845 void *p;
1846 u64 len;
1847 int index;
1848
1849 shash->tfm = fs_info->csum_shash;
1850 crypto_shash_init(shash);
1851
1852 BUG_ON(sblock->page_count < 1);
1853 page = sblock->pagev[0]->page;
1854 mapped_buffer = kmap_atomic(page);
1855 h = (struct btrfs_header *)mapped_buffer;
1856 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1857
1858
1859
1860
1861
1862
1863 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
1864 sblock->header_error = 1;
1865
1866 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) {
1867 sblock->header_error = 1;
1868 sblock->generation_error = 1;
1869 }
1870
1871 if (!scrub_check_fsid(h->fsid, sblock->pagev[0]))
1872 sblock->header_error = 1;
1873
1874 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1875 BTRFS_UUID_SIZE))
1876 sblock->header_error = 1;
1877
1878 len = sctx->fs_info->nodesize - BTRFS_CSUM_SIZE;
1879 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1880 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1881 index = 0;
1882 for (;;) {
1883 u64 l = min_t(u64, len, mapped_size);
1884
1885 crypto_shash_update(shash, p, l);
1886 kunmap_atomic(mapped_buffer);
1887 len -= l;
1888 if (len == 0)
1889 break;
1890 index++;
1891 BUG_ON(index >= sblock->page_count);
1892 BUG_ON(!sblock->pagev[index]->page);
1893 page = sblock->pagev[index]->page;
1894 mapped_buffer = kmap_atomic(page);
1895 mapped_size = PAGE_SIZE;
1896 p = mapped_buffer;
1897 }
1898
1899 crypto_shash_final(shash, calculated_csum);
1900 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1901 sblock->checksum_error = 1;
1902
1903 return sblock->header_error || sblock->checksum_error;
1904}
1905
1906static int scrub_checksum_super(struct scrub_block *sblock)
1907{
1908 struct btrfs_super_block *s;
1909 struct scrub_ctx *sctx = sblock->sctx;
1910 struct btrfs_fs_info *fs_info = sctx->fs_info;
1911 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1912 u8 calculated_csum[BTRFS_CSUM_SIZE];
1913 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1914 struct page *page;
1915 void *mapped_buffer;
1916 u64 mapped_size;
1917 void *p;
1918 int fail_gen = 0;
1919 int fail_cor = 0;
1920 u64 len;
1921 int index;
1922
1923 shash->tfm = fs_info->csum_shash;
1924 crypto_shash_init(shash);
1925
1926 BUG_ON(sblock->page_count < 1);
1927 page = sblock->pagev[0]->page;
1928 mapped_buffer = kmap_atomic(page);
1929 s = (struct btrfs_super_block *)mapped_buffer;
1930 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1931
1932 if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
1933 ++fail_cor;
1934
1935 if (sblock->pagev[0]->generation != btrfs_super_generation(s))
1936 ++fail_gen;
1937
1938 if (!scrub_check_fsid(s->fsid, sblock->pagev[0]))
1939 ++fail_cor;
1940
1941 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1942 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1943 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1944 index = 0;
1945 for (;;) {
1946 u64 l = min_t(u64, len, mapped_size);
1947
1948 crypto_shash_update(shash, p, l);
1949 kunmap_atomic(mapped_buffer);
1950 len -= l;
1951 if (len == 0)
1952 break;
1953 index++;
1954 BUG_ON(index >= sblock->page_count);
1955 BUG_ON(!sblock->pagev[index]->page);
1956 page = sblock->pagev[index]->page;
1957 mapped_buffer = kmap_atomic(page);
1958 mapped_size = PAGE_SIZE;
1959 p = mapped_buffer;
1960 }
1961
1962 crypto_shash_final(shash, calculated_csum);
1963 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1964 ++fail_cor;
1965
1966 if (fail_cor + fail_gen) {
1967
1968
1969
1970
1971
1972 spin_lock(&sctx->stat_lock);
1973 ++sctx->stat.super_errors;
1974 spin_unlock(&sctx->stat_lock);
1975 if (fail_cor)
1976 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1977 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1978 else
1979 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1980 BTRFS_DEV_STAT_GENERATION_ERRS);
1981 }
1982
1983 return fail_cor + fail_gen;
1984}
1985
1986static void scrub_block_get(struct scrub_block *sblock)
1987{
1988 refcount_inc(&sblock->refs);
1989}
1990
1991static void scrub_block_put(struct scrub_block *sblock)
1992{
1993 if (refcount_dec_and_test(&sblock->refs)) {
1994 int i;
1995
1996 if (sblock->sparity)
1997 scrub_parity_put(sblock->sparity);
1998
1999 for (i = 0; i < sblock->page_count; i++)
2000 scrub_page_put(sblock->pagev[i]);
2001 kfree(sblock);
2002 }
2003}
2004
2005static void scrub_page_get(struct scrub_page *spage)
2006{
2007 atomic_inc(&spage->refs);
2008}
2009
2010static void scrub_page_put(struct scrub_page *spage)
2011{
2012 if (atomic_dec_and_test(&spage->refs)) {
2013 if (spage->page)
2014 __free_page(spage->page);
2015 kfree(spage);
2016 }
2017}
2018
2019static void scrub_submit(struct scrub_ctx *sctx)
2020{
2021 struct scrub_bio *sbio;
2022
2023 if (sctx->curr == -1)
2024 return;
2025
2026 sbio = sctx->bios[sctx->curr];
2027 sctx->curr = -1;
2028 scrub_pending_bio_inc(sctx);
2029 btrfsic_submit_bio(sbio->bio);
2030}
2031
2032static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
2033 struct scrub_page *spage)
2034{
2035 struct scrub_block *sblock = spage->sblock;
2036 struct scrub_bio *sbio;
2037 int ret;
2038
2039again:
2040
2041
2042
2043 while (sctx->curr == -1) {
2044 spin_lock(&sctx->list_lock);
2045 sctx->curr = sctx->first_free;
2046 if (sctx->curr != -1) {
2047 sctx->first_free = sctx->bios[sctx->curr]->next_free;
2048 sctx->bios[sctx->curr]->next_free = -1;
2049 sctx->bios[sctx->curr]->page_count = 0;
2050 spin_unlock(&sctx->list_lock);
2051 } else {
2052 spin_unlock(&sctx->list_lock);
2053 wait_event(sctx->list_wait, sctx->first_free != -1);
2054 }
2055 }
2056 sbio = sctx->bios[sctx->curr];
2057 if (sbio->page_count == 0) {
2058 struct bio *bio;
2059
2060 sbio->physical = spage->physical;
2061 sbio->logical = spage->logical;
2062 sbio->dev = spage->dev;
2063 bio = sbio->bio;
2064 if (!bio) {
2065 bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
2066 sbio->bio = bio;
2067 }
2068
2069 bio->bi_private = sbio;
2070 bio->bi_end_io = scrub_bio_end_io;
2071 bio_set_dev(bio, sbio->dev->bdev);
2072 bio->bi_iter.bi_sector = sbio->physical >> 9;
2073 bio->bi_opf = REQ_OP_READ;
2074 sbio->status = 0;
2075 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
2076 spage->physical ||
2077 sbio->logical + sbio->page_count * PAGE_SIZE !=
2078 spage->logical ||
2079 sbio->dev != spage->dev) {
2080 scrub_submit(sctx);
2081 goto again;
2082 }
2083
2084 sbio->pagev[sbio->page_count] = spage;
2085 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
2086 if (ret != PAGE_SIZE) {
2087 if (sbio->page_count < 1) {
2088 bio_put(sbio->bio);
2089 sbio->bio = NULL;
2090 return -EIO;
2091 }
2092 scrub_submit(sctx);
2093 goto again;
2094 }
2095
2096 scrub_block_get(sblock);
2097 atomic_inc(&sblock->outstanding_pages);
2098 sbio->page_count++;
2099 if (sbio->page_count == sctx->pages_per_rd_bio)
2100 scrub_submit(sctx);
2101
2102 return 0;
2103}
2104
2105static void scrub_missing_raid56_end_io(struct bio *bio)
2106{
2107 struct scrub_block *sblock = bio->bi_private;
2108 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
2109
2110 if (bio->bi_status)
2111 sblock->no_io_error_seen = 0;
2112
2113 bio_put(bio);
2114
2115 btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
2116}
2117
2118static void scrub_missing_raid56_worker(struct btrfs_work *work)
2119{
2120 struct scrub_block *sblock = container_of(work, struct scrub_block, work);
2121 struct scrub_ctx *sctx = sblock->sctx;
2122 struct btrfs_fs_info *fs_info = sctx->fs_info;
2123 u64 logical;
2124 struct btrfs_device *dev;
2125
2126 logical = sblock->pagev[0]->logical;
2127 dev = sblock->pagev[0]->dev;
2128
2129 if (sblock->no_io_error_seen)
2130 scrub_recheck_block_checksum(sblock);
2131
2132 if (!sblock->no_io_error_seen) {
2133 spin_lock(&sctx->stat_lock);
2134 sctx->stat.read_errors++;
2135 spin_unlock(&sctx->stat_lock);
2136 btrfs_err_rl_in_rcu(fs_info,
2137 "IO error rebuilding logical %llu for dev %s",
2138 logical, rcu_str_deref(dev->name));
2139 } else if (sblock->header_error || sblock->checksum_error) {
2140 spin_lock(&sctx->stat_lock);
2141 sctx->stat.uncorrectable_errors++;
2142 spin_unlock(&sctx->stat_lock);
2143 btrfs_err_rl_in_rcu(fs_info,
2144 "failed to rebuild valid logical %llu for dev %s",
2145 logical, rcu_str_deref(dev->name));
2146 } else {
2147 scrub_write_block_to_dev_replace(sblock);
2148 }
2149
2150 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2151 mutex_lock(&sctx->wr_lock);
2152 scrub_wr_submit(sctx);
2153 mutex_unlock(&sctx->wr_lock);
2154 }
2155
2156 scrub_block_put(sblock);
2157 scrub_pending_bio_dec(sctx);
2158}
2159
2160static void scrub_missing_raid56_pages(struct scrub_block *sblock)
2161{
2162 struct scrub_ctx *sctx = sblock->sctx;
2163 struct btrfs_fs_info *fs_info = sctx->fs_info;
2164 u64 length = sblock->page_count * PAGE_SIZE;
2165 u64 logical = sblock->pagev[0]->logical;
2166 struct btrfs_bio *bbio = NULL;
2167 struct bio *bio;
2168 struct btrfs_raid_bio *rbio;
2169 int ret;
2170 int i;
2171
2172 btrfs_bio_counter_inc_blocked(fs_info);
2173 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
2174 &length, &bbio);
2175 if (ret || !bbio || !bbio->raid_map)
2176 goto bbio_out;
2177
2178 if (WARN_ON(!sctx->is_dev_replace ||
2179 !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
2180
2181
2182
2183
2184
2185
2186 goto bbio_out;
2187 }
2188
2189 bio = btrfs_io_bio_alloc(0);
2190 bio->bi_iter.bi_sector = logical >> 9;
2191 bio->bi_private = sblock;
2192 bio->bi_end_io = scrub_missing_raid56_end_io;
2193
2194 rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length);
2195 if (!rbio)
2196 goto rbio_out;
2197
2198 for (i = 0; i < sblock->page_count; i++) {
2199 struct scrub_page *spage = sblock->pagev[i];
2200
2201 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2202 }
2203
2204 btrfs_init_work(&sblock->work, scrub_missing_raid56_worker, NULL, NULL);
2205 scrub_block_get(sblock);
2206 scrub_pending_bio_inc(sctx);
2207 raid56_submit_missing_rbio(rbio);
2208 return;
2209
2210rbio_out:
2211 bio_put(bio);
2212bbio_out:
2213 btrfs_bio_counter_dec(fs_info);
2214 btrfs_put_bbio(bbio);
2215 spin_lock(&sctx->stat_lock);
2216 sctx->stat.malloc_errors++;
2217 spin_unlock(&sctx->stat_lock);
2218}
2219
2220static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2221 u64 physical, struct btrfs_device *dev, u64 flags,
2222 u64 gen, int mirror_num, u8 *csum, int force,
2223 u64 physical_for_dev_replace)
2224{
2225 struct scrub_block *sblock;
2226 int index;
2227
2228 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2229 if (!sblock) {
2230 spin_lock(&sctx->stat_lock);
2231 sctx->stat.malloc_errors++;
2232 spin_unlock(&sctx->stat_lock);
2233 return -ENOMEM;
2234 }
2235
2236
2237
2238 refcount_set(&sblock->refs, 1);
2239 sblock->sctx = sctx;
2240 sblock->no_io_error_seen = 1;
2241
2242 for (index = 0; len > 0; index++) {
2243 struct scrub_page *spage;
2244 u64 l = min_t(u64, len, PAGE_SIZE);
2245
2246 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2247 if (!spage) {
2248leave_nomem:
2249 spin_lock(&sctx->stat_lock);
2250 sctx->stat.malloc_errors++;
2251 spin_unlock(&sctx->stat_lock);
2252 scrub_block_put(sblock);
2253 return -ENOMEM;
2254 }
2255 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2256 scrub_page_get(spage);
2257 sblock->pagev[index] = spage;
2258 spage->sblock = sblock;
2259 spage->dev = dev;
2260 spage->flags = flags;
2261 spage->generation = gen;
2262 spage->logical = logical;
2263 spage->physical = physical;
2264 spage->physical_for_dev_replace = physical_for_dev_replace;
2265 spage->mirror_num = mirror_num;
2266 if (csum) {
2267 spage->have_csum = 1;
2268 memcpy(spage->csum, csum, sctx->csum_size);
2269 } else {
2270 spage->have_csum = 0;
2271 }
2272 sblock->page_count++;
2273 spage->page = alloc_page(GFP_KERNEL);
2274 if (!spage->page)
2275 goto leave_nomem;
2276 len -= l;
2277 logical += l;
2278 physical += l;
2279 physical_for_dev_replace += l;
2280 }
2281
2282 WARN_ON(sblock->page_count == 0);
2283 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2284
2285
2286
2287
2288 scrub_missing_raid56_pages(sblock);
2289 } else {
2290 for (index = 0; index < sblock->page_count; index++) {
2291 struct scrub_page *spage = sblock->pagev[index];
2292 int ret;
2293
2294 ret = scrub_add_page_to_rd_bio(sctx, spage);
2295 if (ret) {
2296 scrub_block_put(sblock);
2297 return ret;
2298 }
2299 }
2300
2301 if (force)
2302 scrub_submit(sctx);
2303 }
2304
2305
2306 scrub_block_put(sblock);
2307 return 0;
2308}
2309
2310static void scrub_bio_end_io(struct bio *bio)
2311{
2312 struct scrub_bio *sbio = bio->bi_private;
2313 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
2314
2315 sbio->status = bio->bi_status;
2316 sbio->bio = bio;
2317
2318 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
2319}
2320
2321static void scrub_bio_end_io_worker(struct btrfs_work *work)
2322{
2323 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2324 struct scrub_ctx *sctx = sbio->sctx;
2325 int i;
2326
2327 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2328 if (sbio->status) {
2329 for (i = 0; i < sbio->page_count; i++) {
2330 struct scrub_page *spage = sbio->pagev[i];
2331
2332 spage->io_error = 1;
2333 spage->sblock->no_io_error_seen = 0;
2334 }
2335 }
2336
2337
2338 for (i = 0; i < sbio->page_count; i++) {
2339 struct scrub_page *spage = sbio->pagev[i];
2340 struct scrub_block *sblock = spage->sblock;
2341
2342 if (atomic_dec_and_test(&sblock->outstanding_pages))
2343 scrub_block_complete(sblock);
2344 scrub_block_put(sblock);
2345 }
2346
2347 bio_put(sbio->bio);
2348 sbio->bio = NULL;
2349 spin_lock(&sctx->list_lock);
2350 sbio->next_free = sctx->first_free;
2351 sctx->first_free = sbio->index;
2352 spin_unlock(&sctx->list_lock);
2353
2354 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2355 mutex_lock(&sctx->wr_lock);
2356 scrub_wr_submit(sctx);
2357 mutex_unlock(&sctx->wr_lock);
2358 }
2359
2360 scrub_pending_bio_dec(sctx);
2361}
2362
2363static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2364 unsigned long *bitmap,
2365 u64 start, u64 len)
2366{
2367 u64 offset;
2368 u64 nsectors64;
2369 u32 nsectors;
2370 int sectorsize = sparity->sctx->fs_info->sectorsize;
2371
2372 if (len >= sparity->stripe_len) {
2373 bitmap_set(bitmap, 0, sparity->nsectors);
2374 return;
2375 }
2376
2377 start -= sparity->logic_start;
2378 start = div64_u64_rem(start, sparity->stripe_len, &offset);
2379 offset = div_u64(offset, sectorsize);
2380 nsectors64 = div_u64(len, sectorsize);
2381
2382 ASSERT(nsectors64 < UINT_MAX);
2383 nsectors = (u32)nsectors64;
2384
2385 if (offset + nsectors <= sparity->nsectors) {
2386 bitmap_set(bitmap, offset, nsectors);
2387 return;
2388 }
2389
2390 bitmap_set(bitmap, offset, sparity->nsectors - offset);
2391 bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
2392}
2393
2394static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
2395 u64 start, u64 len)
2396{
2397 __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
2398}
2399
2400static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
2401 u64 start, u64 len)
2402{
2403 __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
2404}
2405
2406static void scrub_block_complete(struct scrub_block *sblock)
2407{
2408 int corrupted = 0;
2409
2410 if (!sblock->no_io_error_seen) {
2411 corrupted = 1;
2412 scrub_handle_errored_block(sblock);
2413 } else {
2414
2415
2416
2417
2418
2419 corrupted = scrub_checksum(sblock);
2420 if (!corrupted && sblock->sctx->is_dev_replace)
2421 scrub_write_block_to_dev_replace(sblock);
2422 }
2423
2424 if (sblock->sparity && corrupted && !sblock->data_corrected) {
2425 u64 start = sblock->pagev[0]->logical;
2426 u64 end = sblock->pagev[sblock->page_count - 1]->logical +
2427 PAGE_SIZE;
2428
2429 scrub_parity_mark_sectors_error(sblock->sparity,
2430 start, end - start);
2431 }
2432}
2433
2434static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
2435{
2436 struct btrfs_ordered_sum *sum = NULL;
2437 unsigned long index;
2438 unsigned long num_sectors;
2439
2440 while (!list_empty(&sctx->csum_list)) {
2441 sum = list_first_entry(&sctx->csum_list,
2442 struct btrfs_ordered_sum, list);
2443 if (sum->bytenr > logical)
2444 return 0;
2445 if (sum->bytenr + sum->len > logical)
2446 break;
2447
2448 ++sctx->stat.csum_discards;
2449 list_del(&sum->list);
2450 kfree(sum);
2451 sum = NULL;
2452 }
2453 if (!sum)
2454 return 0;
2455
2456 index = div_u64(logical - sum->bytenr, sctx->fs_info->sectorsize);
2457 ASSERT(index < UINT_MAX);
2458
2459 num_sectors = sum->len / sctx->fs_info->sectorsize;
2460 memcpy(csum, sum->sums + index * sctx->csum_size, sctx->csum_size);
2461 if (index == num_sectors - 1) {
2462 list_del(&sum->list);
2463 kfree(sum);
2464 }
2465 return 1;
2466}
2467
2468
2469static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
2470 u64 logical, u64 len,
2471 u64 physical, struct btrfs_device *dev, u64 flags,
2472 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2473{
2474 int ret;
2475 u8 csum[BTRFS_CSUM_SIZE];
2476 u32 blocksize;
2477
2478 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2479 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2480 blocksize = map->stripe_len;
2481 else
2482 blocksize = sctx->fs_info->sectorsize;
2483 spin_lock(&sctx->stat_lock);
2484 sctx->stat.data_extents_scrubbed++;
2485 sctx->stat.data_bytes_scrubbed += len;
2486 spin_unlock(&sctx->stat_lock);
2487 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2488 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2489 blocksize = map->stripe_len;
2490 else
2491 blocksize = sctx->fs_info->nodesize;
2492 spin_lock(&sctx->stat_lock);
2493 sctx->stat.tree_extents_scrubbed++;
2494 sctx->stat.tree_bytes_scrubbed += len;
2495 spin_unlock(&sctx->stat_lock);
2496 } else {
2497 blocksize = sctx->fs_info->sectorsize;
2498 WARN_ON(1);
2499 }
2500
2501 while (len) {
2502 u64 l = min_t(u64, len, blocksize);
2503 int have_csum = 0;
2504
2505 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2506
2507 have_csum = scrub_find_csum(sctx, logical, csum);
2508 if (have_csum == 0)
2509 ++sctx->stat.no_csum;
2510 }
2511 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2512 mirror_num, have_csum ? csum : NULL, 0,
2513 physical_for_dev_replace);
2514 if (ret)
2515 return ret;
2516 len -= l;
2517 logical += l;
2518 physical += l;
2519 physical_for_dev_replace += l;
2520 }
2521 return 0;
2522}
2523
2524static int scrub_pages_for_parity(struct scrub_parity *sparity,
2525 u64 logical, u64 len,
2526 u64 physical, struct btrfs_device *dev,
2527 u64 flags, u64 gen, int mirror_num, u8 *csum)
2528{
2529 struct scrub_ctx *sctx = sparity->sctx;
2530 struct scrub_block *sblock;
2531 int index;
2532
2533 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2534 if (!sblock) {
2535 spin_lock(&sctx->stat_lock);
2536 sctx->stat.malloc_errors++;
2537 spin_unlock(&sctx->stat_lock);
2538 return -ENOMEM;
2539 }
2540
2541
2542
2543 refcount_set(&sblock->refs, 1);
2544 sblock->sctx = sctx;
2545 sblock->no_io_error_seen = 1;
2546 sblock->sparity = sparity;
2547 scrub_parity_get(sparity);
2548
2549 for (index = 0; len > 0; index++) {
2550 struct scrub_page *spage;
2551 u64 l = min_t(u64, len, PAGE_SIZE);
2552
2553 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2554 if (!spage) {
2555leave_nomem:
2556 spin_lock(&sctx->stat_lock);
2557 sctx->stat.malloc_errors++;
2558 spin_unlock(&sctx->stat_lock);
2559 scrub_block_put(sblock);
2560 return -ENOMEM;
2561 }
2562 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2563
2564 scrub_page_get(spage);
2565 sblock->pagev[index] = spage;
2566
2567 scrub_page_get(spage);
2568 list_add_tail(&spage->list, &sparity->spages);
2569 spage->sblock = sblock;
2570 spage->dev = dev;
2571 spage->flags = flags;
2572 spage->generation = gen;
2573 spage->logical = logical;
2574 spage->physical = physical;
2575 spage->mirror_num = mirror_num;
2576 if (csum) {
2577 spage->have_csum = 1;
2578 memcpy(spage->csum, csum, sctx->csum_size);
2579 } else {
2580 spage->have_csum = 0;
2581 }
2582 sblock->page_count++;
2583 spage->page = alloc_page(GFP_KERNEL);
2584 if (!spage->page)
2585 goto leave_nomem;
2586 len -= l;
2587 logical += l;
2588 physical += l;
2589 }
2590
2591 WARN_ON(sblock->page_count == 0);
2592 for (index = 0; index < sblock->page_count; index++) {
2593 struct scrub_page *spage = sblock->pagev[index];
2594 int ret;
2595
2596 ret = scrub_add_page_to_rd_bio(sctx, spage);
2597 if (ret) {
2598 scrub_block_put(sblock);
2599 return ret;
2600 }
2601 }
2602
2603
2604 scrub_block_put(sblock);
2605 return 0;
2606}
2607
2608static int scrub_extent_for_parity(struct scrub_parity *sparity,
2609 u64 logical, u64 len,
2610 u64 physical, struct btrfs_device *dev,
2611 u64 flags, u64 gen, int mirror_num)
2612{
2613 struct scrub_ctx *sctx = sparity->sctx;
2614 int ret;
2615 u8 csum[BTRFS_CSUM_SIZE];
2616 u32 blocksize;
2617
2618 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2619 scrub_parity_mark_sectors_error(sparity, logical, len);
2620 return 0;
2621 }
2622
2623 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2624 blocksize = sparity->stripe_len;
2625 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2626 blocksize = sparity->stripe_len;
2627 } else {
2628 blocksize = sctx->fs_info->sectorsize;
2629 WARN_ON(1);
2630 }
2631
2632 while (len) {
2633 u64 l = min_t(u64, len, blocksize);
2634 int have_csum = 0;
2635
2636 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2637
2638 have_csum = scrub_find_csum(sctx, logical, csum);
2639 if (have_csum == 0)
2640 goto skip;
2641 }
2642 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2643 flags, gen, mirror_num,
2644 have_csum ? csum : NULL);
2645 if (ret)
2646 return ret;
2647skip:
2648 len -= l;
2649 logical += l;
2650 physical += l;
2651 }
2652 return 0;
2653}
2654
2655
2656
2657
2658
2659
2660
2661
2662static int get_raid56_logic_offset(u64 physical, int num,
2663 struct map_lookup *map, u64 *offset,
2664 u64 *stripe_start)
2665{
2666 int i;
2667 int j = 0;
2668 u64 stripe_nr;
2669 u64 last_offset;
2670 u32 stripe_index;
2671 u32 rot;
2672 const int data_stripes = nr_data_stripes(map);
2673
2674 last_offset = (physical - map->stripes[num].physical) * data_stripes;
2675 if (stripe_start)
2676 *stripe_start = last_offset;
2677
2678 *offset = last_offset;
2679 for (i = 0; i < data_stripes; i++) {
2680 *offset = last_offset + i * map->stripe_len;
2681
2682 stripe_nr = div64_u64(*offset, map->stripe_len);
2683 stripe_nr = div_u64(stripe_nr, data_stripes);
2684
2685
2686 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
2687
2688 rot += i;
2689 stripe_index = rot % map->num_stripes;
2690 if (stripe_index == num)
2691 return 0;
2692 if (stripe_index < num)
2693 j++;
2694 }
2695 *offset = last_offset + j * map->stripe_len;
2696 return 1;
2697}
2698
2699static void scrub_free_parity(struct scrub_parity *sparity)
2700{
2701 struct scrub_ctx *sctx = sparity->sctx;
2702 struct scrub_page *curr, *next;
2703 int nbits;
2704
2705 nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
2706 if (nbits) {
2707 spin_lock(&sctx->stat_lock);
2708 sctx->stat.read_errors += nbits;
2709 sctx->stat.uncorrectable_errors += nbits;
2710 spin_unlock(&sctx->stat_lock);
2711 }
2712
2713 list_for_each_entry_safe(curr, next, &sparity->spages, list) {
2714 list_del_init(&curr->list);
2715 scrub_page_put(curr);
2716 }
2717
2718 kfree(sparity);
2719}
2720
2721static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
2722{
2723 struct scrub_parity *sparity = container_of(work, struct scrub_parity,
2724 work);
2725 struct scrub_ctx *sctx = sparity->sctx;
2726
2727 scrub_free_parity(sparity);
2728 scrub_pending_bio_dec(sctx);
2729}
2730
2731static void scrub_parity_bio_endio(struct bio *bio)
2732{
2733 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
2734 struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
2735
2736 if (bio->bi_status)
2737 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2738 sparity->nsectors);
2739
2740 bio_put(bio);
2741
2742 btrfs_init_work(&sparity->work, scrub_parity_bio_endio_worker, NULL,
2743 NULL);
2744 btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
2745}
2746
2747static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2748{
2749 struct scrub_ctx *sctx = sparity->sctx;
2750 struct btrfs_fs_info *fs_info = sctx->fs_info;
2751 struct bio *bio;
2752 struct btrfs_raid_bio *rbio;
2753 struct btrfs_bio *bbio = NULL;
2754 u64 length;
2755 int ret;
2756
2757 if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
2758 sparity->nsectors))
2759 goto out;
2760
2761 length = sparity->logic_end - sparity->logic_start;
2762
2763 btrfs_bio_counter_inc_blocked(fs_info);
2764 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
2765 &length, &bbio);
2766 if (ret || !bbio || !bbio->raid_map)
2767 goto bbio_out;
2768
2769 bio = btrfs_io_bio_alloc(0);
2770 bio->bi_iter.bi_sector = sparity->logic_start >> 9;
2771 bio->bi_private = sparity;
2772 bio->bi_end_io = scrub_parity_bio_endio;
2773
2774 rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio,
2775 length, sparity->scrub_dev,
2776 sparity->dbitmap,
2777 sparity->nsectors);
2778 if (!rbio)
2779 goto rbio_out;
2780
2781 scrub_pending_bio_inc(sctx);
2782 raid56_parity_submit_scrub_rbio(rbio);
2783 return;
2784
2785rbio_out:
2786 bio_put(bio);
2787bbio_out:
2788 btrfs_bio_counter_dec(fs_info);
2789 btrfs_put_bbio(bbio);
2790 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2791 sparity->nsectors);
2792 spin_lock(&sctx->stat_lock);
2793 sctx->stat.malloc_errors++;
2794 spin_unlock(&sctx->stat_lock);
2795out:
2796 scrub_free_parity(sparity);
2797}
2798
2799static inline int scrub_calc_parity_bitmap_len(int nsectors)
2800{
2801 return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
2802}
2803
2804static void scrub_parity_get(struct scrub_parity *sparity)
2805{
2806 refcount_inc(&sparity->refs);
2807}
2808
2809static void scrub_parity_put(struct scrub_parity *sparity)
2810{
2811 if (!refcount_dec_and_test(&sparity->refs))
2812 return;
2813
2814 scrub_parity_check_and_repair(sparity);
2815}
2816
2817static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2818 struct map_lookup *map,
2819 struct btrfs_device *sdev,
2820 struct btrfs_path *path,
2821 u64 logic_start,
2822 u64 logic_end)
2823{
2824 struct btrfs_fs_info *fs_info = sctx->fs_info;
2825 struct btrfs_root *root = fs_info->extent_root;
2826 struct btrfs_root *csum_root = fs_info->csum_root;
2827 struct btrfs_extent_item *extent;
2828 struct btrfs_bio *bbio = NULL;
2829 u64 flags;
2830 int ret;
2831 int slot;
2832 struct extent_buffer *l;
2833 struct btrfs_key key;
2834 u64 generation;
2835 u64 extent_logical;
2836 u64 extent_physical;
2837 u64 extent_len;
2838 u64 mapped_length;
2839 struct btrfs_device *extent_dev;
2840 struct scrub_parity *sparity;
2841 int nsectors;
2842 int bitmap_len;
2843 int extent_mirror_num;
2844 int stop_loop = 0;
2845
2846 nsectors = div_u64(map->stripe_len, fs_info->sectorsize);
2847 bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
2848 sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
2849 GFP_NOFS);
2850 if (!sparity) {
2851 spin_lock(&sctx->stat_lock);
2852 sctx->stat.malloc_errors++;
2853 spin_unlock(&sctx->stat_lock);
2854 return -ENOMEM;
2855 }
2856
2857 sparity->stripe_len = map->stripe_len;
2858 sparity->nsectors = nsectors;
2859 sparity->sctx = sctx;
2860 sparity->scrub_dev = sdev;
2861 sparity->logic_start = logic_start;
2862 sparity->logic_end = logic_end;
2863 refcount_set(&sparity->refs, 1);
2864 INIT_LIST_HEAD(&sparity->spages);
2865 sparity->dbitmap = sparity->bitmap;
2866 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
2867
2868 ret = 0;
2869 while (logic_start < logic_end) {
2870 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2871 key.type = BTRFS_METADATA_ITEM_KEY;
2872 else
2873 key.type = BTRFS_EXTENT_ITEM_KEY;
2874 key.objectid = logic_start;
2875 key.offset = (u64)-1;
2876
2877 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2878 if (ret < 0)
2879 goto out;
2880
2881 if (ret > 0) {
2882 ret = btrfs_previous_extent_item(root, path, 0);
2883 if (ret < 0)
2884 goto out;
2885 if (ret > 0) {
2886 btrfs_release_path(path);
2887 ret = btrfs_search_slot(NULL, root, &key,
2888 path, 0, 0);
2889 if (ret < 0)
2890 goto out;
2891 }
2892 }
2893
2894 stop_loop = 0;
2895 while (1) {
2896 u64 bytes;
2897
2898 l = path->nodes[0];
2899 slot = path->slots[0];
2900 if (slot >= btrfs_header_nritems(l)) {
2901 ret = btrfs_next_leaf(root, path);
2902 if (ret == 0)
2903 continue;
2904 if (ret < 0)
2905 goto out;
2906
2907 stop_loop = 1;
2908 break;
2909 }
2910 btrfs_item_key_to_cpu(l, &key, slot);
2911
2912 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2913 key.type != BTRFS_METADATA_ITEM_KEY)
2914 goto next;
2915
2916 if (key.type == BTRFS_METADATA_ITEM_KEY)
2917 bytes = fs_info->nodesize;
2918 else
2919 bytes = key.offset;
2920
2921 if (key.objectid + bytes <= logic_start)
2922 goto next;
2923
2924 if (key.objectid >= logic_end) {
2925 stop_loop = 1;
2926 break;
2927 }
2928
2929 while (key.objectid >= logic_start + map->stripe_len)
2930 logic_start += map->stripe_len;
2931
2932 extent = btrfs_item_ptr(l, slot,
2933 struct btrfs_extent_item);
2934 flags = btrfs_extent_flags(l, extent);
2935 generation = btrfs_extent_generation(l, extent);
2936
2937 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
2938 (key.objectid < logic_start ||
2939 key.objectid + bytes >
2940 logic_start + map->stripe_len)) {
2941 btrfs_err(fs_info,
2942 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
2943 key.objectid, logic_start);
2944 spin_lock(&sctx->stat_lock);
2945 sctx->stat.uncorrectable_errors++;
2946 spin_unlock(&sctx->stat_lock);
2947 goto next;
2948 }
2949again:
2950 extent_logical = key.objectid;
2951 extent_len = bytes;
2952
2953 if (extent_logical < logic_start) {
2954 extent_len -= logic_start - extent_logical;
2955 extent_logical = logic_start;
2956 }
2957
2958 if (extent_logical + extent_len >
2959 logic_start + map->stripe_len)
2960 extent_len = logic_start + map->stripe_len -
2961 extent_logical;
2962
2963 scrub_parity_mark_sectors_data(sparity, extent_logical,
2964 extent_len);
2965
2966 mapped_length = extent_len;
2967 bbio = NULL;
2968 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
2969 extent_logical, &mapped_length, &bbio,
2970 0);
2971 if (!ret) {
2972 if (!bbio || mapped_length < extent_len)
2973 ret = -EIO;
2974 }
2975 if (ret) {
2976 btrfs_put_bbio(bbio);
2977 goto out;
2978 }
2979 extent_physical = bbio->stripes[0].physical;
2980 extent_mirror_num = bbio->mirror_num;
2981 extent_dev = bbio->stripes[0].dev;
2982 btrfs_put_bbio(bbio);
2983
2984 ret = btrfs_lookup_csums_range(csum_root,
2985 extent_logical,
2986 extent_logical + extent_len - 1,
2987 &sctx->csum_list, 1);
2988 if (ret)
2989 goto out;
2990
2991 ret = scrub_extent_for_parity(sparity, extent_logical,
2992 extent_len,
2993 extent_physical,
2994 extent_dev, flags,
2995 generation,
2996 extent_mirror_num);
2997
2998 scrub_free_csums(sctx);
2999
3000 if (ret)
3001 goto out;
3002
3003 if (extent_logical + extent_len <
3004 key.objectid + bytes) {
3005 logic_start += map->stripe_len;
3006
3007 if (logic_start >= logic_end) {
3008 stop_loop = 1;
3009 break;
3010 }
3011
3012 if (logic_start < key.objectid + bytes) {
3013 cond_resched();
3014 goto again;
3015 }
3016 }
3017next:
3018 path->slots[0]++;
3019 }
3020
3021 btrfs_release_path(path);
3022
3023 if (stop_loop)
3024 break;
3025
3026 logic_start += map->stripe_len;
3027 }
3028out:
3029 if (ret < 0)
3030 scrub_parity_mark_sectors_error(sparity, logic_start,
3031 logic_end - logic_start);
3032 scrub_parity_put(sparity);
3033 scrub_submit(sctx);
3034 mutex_lock(&sctx->wr_lock);
3035 scrub_wr_submit(sctx);
3036 mutex_unlock(&sctx->wr_lock);
3037
3038 btrfs_release_path(path);
3039 return ret < 0 ? ret : 0;
3040}
3041
3042static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3043 struct map_lookup *map,
3044 struct btrfs_device *scrub_dev,
3045 int num, u64 base, u64 length)
3046{
3047 struct btrfs_path *path, *ppath;
3048 struct btrfs_fs_info *fs_info = sctx->fs_info;
3049 struct btrfs_root *root = fs_info->extent_root;
3050 struct btrfs_root *csum_root = fs_info->csum_root;
3051 struct btrfs_extent_item *extent;
3052 struct blk_plug plug;
3053 u64 flags;
3054 int ret;
3055 int slot;
3056 u64 nstripes;
3057 struct extent_buffer *l;
3058 u64 physical;
3059 u64 logical;
3060 u64 logic_end;
3061 u64 physical_end;
3062 u64 generation;
3063 int mirror_num;
3064 struct reada_control *reada1;
3065 struct reada_control *reada2;
3066 struct btrfs_key key;
3067 struct btrfs_key key_end;
3068 u64 increment = map->stripe_len;
3069 u64 offset;
3070 u64 extent_logical;
3071 u64 extent_physical;
3072 u64 extent_len;
3073 u64 stripe_logical;
3074 u64 stripe_end;
3075 struct btrfs_device *extent_dev;
3076 int extent_mirror_num;
3077 int stop_loop = 0;
3078
3079 physical = map->stripes[num].physical;
3080 offset = 0;
3081 nstripes = div64_u64(length, map->stripe_len);
3082 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3083 offset = map->stripe_len * num;
3084 increment = map->stripe_len * map->num_stripes;
3085 mirror_num = 1;
3086 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3087 int factor = map->num_stripes / map->sub_stripes;
3088 offset = map->stripe_len * (num / map->sub_stripes);
3089 increment = map->stripe_len * factor;
3090 mirror_num = num % map->sub_stripes + 1;
3091 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
3092 increment = map->stripe_len;
3093 mirror_num = num % map->num_stripes + 1;
3094 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3095 increment = map->stripe_len;
3096 mirror_num = num % map->num_stripes + 1;
3097 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3098 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3099 increment = map->stripe_len * nr_data_stripes(map);
3100 mirror_num = 1;
3101 } else {
3102 increment = map->stripe_len;
3103 mirror_num = 1;
3104 }
3105
3106 path = btrfs_alloc_path();
3107 if (!path)
3108 return -ENOMEM;
3109
3110 ppath = btrfs_alloc_path();
3111 if (!ppath) {
3112 btrfs_free_path(path);
3113 return -ENOMEM;
3114 }
3115
3116
3117
3118
3119
3120
3121 path->search_commit_root = 1;
3122 path->skip_locking = 1;
3123
3124 ppath->search_commit_root = 1;
3125 ppath->skip_locking = 1;
3126
3127
3128
3129
3130
3131 logical = base + offset;
3132 physical_end = physical + nstripes * map->stripe_len;
3133 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3134 get_raid56_logic_offset(physical_end, num,
3135 map, &logic_end, NULL);
3136 logic_end += base;
3137 } else {
3138 logic_end = logical + increment * nstripes;
3139 }
3140 wait_event(sctx->list_wait,
3141 atomic_read(&sctx->bios_in_flight) == 0);
3142 scrub_blocked_if_needed(fs_info);
3143
3144
3145 key.objectid = logical;
3146 key.type = BTRFS_EXTENT_ITEM_KEY;
3147 key.offset = (u64)0;
3148 key_end.objectid = logic_end;
3149 key_end.type = BTRFS_METADATA_ITEM_KEY;
3150 key_end.offset = (u64)-1;
3151 reada1 = btrfs_reada_add(root, &key, &key_end);
3152
3153 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3154 key.type = BTRFS_EXTENT_CSUM_KEY;
3155 key.offset = logical;
3156 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3157 key_end.type = BTRFS_EXTENT_CSUM_KEY;
3158 key_end.offset = logic_end;
3159 reada2 = btrfs_reada_add(csum_root, &key, &key_end);
3160
3161 if (!IS_ERR(reada1))
3162 btrfs_reada_wait(reada1);
3163 if (!IS_ERR(reada2))
3164 btrfs_reada_wait(reada2);
3165
3166
3167
3168
3169
3170
3171 blk_start_plug(&plug);
3172
3173
3174
3175
3176 ret = 0;
3177 while (physical < physical_end) {
3178
3179
3180
3181 if (atomic_read(&fs_info->scrub_cancel_req) ||
3182 atomic_read(&sctx->cancel_req)) {
3183 ret = -ECANCELED;
3184 goto out;
3185 }
3186
3187
3188
3189 if (atomic_read(&fs_info->scrub_pause_req)) {
3190
3191 sctx->flush_all_writes = true;
3192 scrub_submit(sctx);
3193 mutex_lock(&sctx->wr_lock);
3194 scrub_wr_submit(sctx);
3195 mutex_unlock(&sctx->wr_lock);
3196 wait_event(sctx->list_wait,
3197 atomic_read(&sctx->bios_in_flight) == 0);
3198 sctx->flush_all_writes = false;
3199 scrub_blocked_if_needed(fs_info);
3200 }
3201
3202 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3203 ret = get_raid56_logic_offset(physical, num, map,
3204 &logical,
3205 &stripe_logical);
3206 logical += base;
3207 if (ret) {
3208
3209 stripe_logical += base;
3210 stripe_end = stripe_logical + increment;
3211 ret = scrub_raid56_parity(sctx, map, scrub_dev,
3212 ppath, stripe_logical,
3213 stripe_end);
3214 if (ret)
3215 goto out;
3216 goto skip;
3217 }
3218 }
3219
3220 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
3221 key.type = BTRFS_METADATA_ITEM_KEY;
3222 else
3223 key.type = BTRFS_EXTENT_ITEM_KEY;
3224 key.objectid = logical;
3225 key.offset = (u64)-1;
3226
3227 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3228 if (ret < 0)
3229 goto out;
3230
3231 if (ret > 0) {
3232 ret = btrfs_previous_extent_item(root, path, 0);
3233 if (ret < 0)
3234 goto out;
3235 if (ret > 0) {
3236
3237
3238 btrfs_release_path(path);
3239 ret = btrfs_search_slot(NULL, root, &key,
3240 path, 0, 0);
3241 if (ret < 0)
3242 goto out;
3243 }
3244 }
3245
3246 stop_loop = 0;
3247 while (1) {
3248 u64 bytes;
3249
3250 l = path->nodes[0];
3251 slot = path->slots[0];
3252 if (slot >= btrfs_header_nritems(l)) {
3253 ret = btrfs_next_leaf(root, path);
3254 if (ret == 0)
3255 continue;
3256 if (ret < 0)
3257 goto out;
3258
3259 stop_loop = 1;
3260 break;
3261 }
3262 btrfs_item_key_to_cpu(l, &key, slot);
3263
3264 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3265 key.type != BTRFS_METADATA_ITEM_KEY)
3266 goto next;
3267
3268 if (key.type == BTRFS_METADATA_ITEM_KEY)
3269 bytes = fs_info->nodesize;
3270 else
3271 bytes = key.offset;
3272
3273 if (key.objectid + bytes <= logical)
3274 goto next;
3275
3276 if (key.objectid >= logical + map->stripe_len) {
3277
3278 if (key.objectid >= logic_end)
3279 stop_loop = 1;
3280 break;
3281 }
3282
3283 extent = btrfs_item_ptr(l, slot,
3284 struct btrfs_extent_item);
3285 flags = btrfs_extent_flags(l, extent);
3286 generation = btrfs_extent_generation(l, extent);
3287
3288 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3289 (key.objectid < logical ||
3290 key.objectid + bytes >
3291 logical + map->stripe_len)) {
3292 btrfs_err(fs_info,
3293 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3294 key.objectid, logical);
3295 spin_lock(&sctx->stat_lock);
3296 sctx->stat.uncorrectable_errors++;
3297 spin_unlock(&sctx->stat_lock);
3298 goto next;
3299 }
3300
3301again:
3302 extent_logical = key.objectid;
3303 extent_len = bytes;
3304
3305
3306
3307
3308 if (extent_logical < logical) {
3309 extent_len -= logical - extent_logical;
3310 extent_logical = logical;
3311 }
3312 if (extent_logical + extent_len >
3313 logical + map->stripe_len) {
3314 extent_len = logical + map->stripe_len -
3315 extent_logical;
3316 }
3317
3318 extent_physical = extent_logical - logical + physical;
3319 extent_dev = scrub_dev;
3320 extent_mirror_num = mirror_num;
3321 if (sctx->is_dev_replace)
3322 scrub_remap_extent(fs_info, extent_logical,
3323 extent_len, &extent_physical,
3324 &extent_dev,
3325 &extent_mirror_num);
3326
3327 ret = btrfs_lookup_csums_range(csum_root,
3328 extent_logical,
3329 extent_logical +
3330 extent_len - 1,
3331 &sctx->csum_list, 1);
3332 if (ret)
3333 goto out;
3334
3335 ret = scrub_extent(sctx, map, extent_logical, extent_len,
3336 extent_physical, extent_dev, flags,
3337 generation, extent_mirror_num,
3338 extent_logical - logical + physical);
3339
3340 scrub_free_csums(sctx);
3341
3342 if (ret)
3343 goto out;
3344
3345 if (extent_logical + extent_len <
3346 key.objectid + bytes) {
3347 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3348
3349
3350
3351
3352loop:
3353 physical += map->stripe_len;
3354 ret = get_raid56_logic_offset(physical,
3355 num, map, &logical,
3356 &stripe_logical);
3357 logical += base;
3358
3359 if (ret && physical < physical_end) {
3360 stripe_logical += base;
3361 stripe_end = stripe_logical +
3362 increment;
3363 ret = scrub_raid56_parity(sctx,
3364 map, scrub_dev, ppath,
3365 stripe_logical,
3366 stripe_end);
3367 if (ret)
3368 goto out;
3369 goto loop;
3370 }
3371 } else {
3372 physical += map->stripe_len;
3373 logical += increment;
3374 }
3375 if (logical < key.objectid + bytes) {
3376 cond_resched();
3377 goto again;
3378 }
3379
3380 if (physical >= physical_end) {
3381 stop_loop = 1;
3382 break;
3383 }
3384 }
3385next:
3386 path->slots[0]++;
3387 }
3388 btrfs_release_path(path);
3389skip:
3390 logical += increment;
3391 physical += map->stripe_len;
3392 spin_lock(&sctx->stat_lock);
3393 if (stop_loop)
3394 sctx->stat.last_physical = map->stripes[num].physical +
3395 length;
3396 else
3397 sctx->stat.last_physical = physical;
3398 spin_unlock(&sctx->stat_lock);
3399 if (stop_loop)
3400 break;
3401 }
3402out:
3403
3404 scrub_submit(sctx);
3405 mutex_lock(&sctx->wr_lock);
3406 scrub_wr_submit(sctx);
3407 mutex_unlock(&sctx->wr_lock);
3408
3409 blk_finish_plug(&plug);
3410 btrfs_free_path(path);
3411 btrfs_free_path(ppath);
3412 return ret < 0 ? ret : 0;
3413}
3414
3415static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
3416 struct btrfs_device *scrub_dev,
3417 u64 chunk_offset, u64 length,
3418 u64 dev_offset,
3419 struct btrfs_block_group *cache)
3420{
3421 struct btrfs_fs_info *fs_info = sctx->fs_info;
3422 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
3423 struct map_lookup *map;
3424 struct extent_map *em;
3425 int i;
3426 int ret = 0;
3427
3428 read_lock(&map_tree->lock);
3429 em = lookup_extent_mapping(map_tree, chunk_offset, 1);
3430 read_unlock(&map_tree->lock);
3431
3432 if (!em) {
3433
3434
3435
3436
3437 spin_lock(&cache->lock);
3438 if (!cache->removed)
3439 ret = -EINVAL;
3440 spin_unlock(&cache->lock);
3441
3442 return ret;
3443 }
3444
3445 map = em->map_lookup;
3446 if (em->start != chunk_offset)
3447 goto out;
3448
3449 if (em->len < length)
3450 goto out;
3451
3452 for (i = 0; i < map->num_stripes; ++i) {
3453 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
3454 map->stripes[i].physical == dev_offset) {
3455 ret = scrub_stripe(sctx, map, scrub_dev, i,
3456 chunk_offset, length);
3457 if (ret)
3458 goto out;
3459 }
3460 }
3461out:
3462 free_extent_map(em);
3463
3464 return ret;
3465}
3466
3467static noinline_for_stack
3468int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3469 struct btrfs_device *scrub_dev, u64 start, u64 end)
3470{
3471 struct btrfs_dev_extent *dev_extent = NULL;
3472 struct btrfs_path *path;
3473 struct btrfs_fs_info *fs_info = sctx->fs_info;
3474 struct btrfs_root *root = fs_info->dev_root;
3475 u64 length;
3476 u64 chunk_offset;
3477 int ret = 0;
3478 int ro_set;
3479 int slot;
3480 struct extent_buffer *l;
3481 struct btrfs_key key;
3482 struct btrfs_key found_key;
3483 struct btrfs_block_group *cache;
3484 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
3485
3486 path = btrfs_alloc_path();
3487 if (!path)
3488 return -ENOMEM;
3489
3490 path->reada = READA_FORWARD;
3491 path->search_commit_root = 1;
3492 path->skip_locking = 1;
3493
3494 key.objectid = scrub_dev->devid;
3495 key.offset = 0ull;
3496 key.type = BTRFS_DEV_EXTENT_KEY;
3497
3498 while (1) {
3499 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3500 if (ret < 0)
3501 break;
3502 if (ret > 0) {
3503 if (path->slots[0] >=
3504 btrfs_header_nritems(path->nodes[0])) {
3505 ret = btrfs_next_leaf(root, path);
3506 if (ret < 0)
3507 break;
3508 if (ret > 0) {
3509 ret = 0;
3510 break;
3511 }
3512 } else {
3513 ret = 0;
3514 }
3515 }
3516
3517 l = path->nodes[0];
3518 slot = path->slots[0];
3519
3520 btrfs_item_key_to_cpu(l, &found_key, slot);
3521
3522 if (found_key.objectid != scrub_dev->devid)
3523 break;
3524
3525 if (found_key.type != BTRFS_DEV_EXTENT_KEY)
3526 break;
3527
3528 if (found_key.offset >= end)
3529 break;
3530
3531 if (found_key.offset < key.offset)
3532 break;
3533
3534 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
3535 length = btrfs_dev_extent_length(l, dev_extent);
3536
3537 if (found_key.offset + length <= start)
3538 goto skip;
3539
3540 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
3541
3542
3543
3544
3545
3546 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3547
3548
3549
3550 if (!cache)
3551 goto skip;
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561 scrub_pause_on(fs_info);
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593 ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace);
3594 if (ret == 0) {
3595 ro_set = 1;
3596 } else if (ret == -ENOSPC && !sctx->is_dev_replace) {
3597
3598
3599
3600
3601
3602
3603
3604 ro_set = 0;
3605 } else {
3606 btrfs_warn(fs_info,
3607 "failed setting block group ro: %d", ret);
3608 btrfs_put_block_group(cache);
3609 scrub_pause_off(fs_info);
3610 break;
3611 }
3612
3613
3614
3615
3616
3617
3618 if (sctx->is_dev_replace) {
3619 btrfs_wait_nocow_writers(cache);
3620 btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start,
3621 cache->length);
3622 }
3623
3624 scrub_pause_off(fs_info);
3625 down_write(&dev_replace->rwsem);
3626 dev_replace->cursor_right = found_key.offset + length;
3627 dev_replace->cursor_left = found_key.offset;
3628 dev_replace->item_needs_writeback = 1;
3629 up_write(&dev_replace->rwsem);
3630
3631 ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
3632 found_key.offset, cache);
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644 sctx->flush_all_writes = true;
3645 scrub_submit(sctx);
3646 mutex_lock(&sctx->wr_lock);
3647 scrub_wr_submit(sctx);
3648 mutex_unlock(&sctx->wr_lock);
3649
3650 wait_event(sctx->list_wait,
3651 atomic_read(&sctx->bios_in_flight) == 0);
3652
3653 scrub_pause_on(fs_info);
3654
3655
3656
3657
3658
3659
3660 wait_event(sctx->list_wait,
3661 atomic_read(&sctx->workers_pending) == 0);
3662 sctx->flush_all_writes = false;
3663
3664 scrub_pause_off(fs_info);
3665
3666 down_write(&dev_replace->rwsem);
3667 dev_replace->cursor_left = dev_replace->cursor_right;
3668 dev_replace->item_needs_writeback = 1;
3669 up_write(&dev_replace->rwsem);
3670
3671 if (ro_set)
3672 btrfs_dec_block_group_ro(cache);
3673
3674
3675
3676
3677
3678
3679
3680
3681 spin_lock(&cache->lock);
3682 if (!cache->removed && !cache->ro && cache->reserved == 0 &&
3683 cache->used == 0) {
3684 spin_unlock(&cache->lock);
3685 btrfs_mark_bg_unused(cache);
3686 } else {
3687 spin_unlock(&cache->lock);
3688 }
3689
3690 btrfs_put_block_group(cache);
3691 if (ret)
3692 break;
3693 if (sctx->is_dev_replace &&
3694 atomic64_read(&dev_replace->num_write_errors) > 0) {
3695 ret = -EIO;
3696 break;
3697 }
3698 if (sctx->stat.malloc_errors > 0) {
3699 ret = -ENOMEM;
3700 break;
3701 }
3702skip:
3703 key.offset = found_key.offset + length;
3704 btrfs_release_path(path);
3705 }
3706
3707 btrfs_free_path(path);
3708
3709 return ret;
3710}
3711
3712static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
3713 struct btrfs_device *scrub_dev)
3714{
3715 int i;
3716 u64 bytenr;
3717 u64 gen;
3718 int ret;
3719 struct btrfs_fs_info *fs_info = sctx->fs_info;
3720
3721 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3722 return -EIO;
3723
3724
3725 if (scrub_dev->fs_devices != fs_info->fs_devices)
3726 gen = scrub_dev->generation;
3727 else
3728 gen = fs_info->last_trans_committed;
3729
3730 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3731 bytenr = btrfs_sb_offset(i);
3732 if (bytenr + BTRFS_SUPER_INFO_SIZE >
3733 scrub_dev->commit_total_bytes)
3734 break;
3735
3736 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
3737 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
3738 NULL, 1, bytenr);
3739 if (ret)
3740 return ret;
3741 }
3742 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3743
3744 return 0;
3745}
3746
3747
3748
3749
3750static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3751 int is_dev_replace)
3752{
3753 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
3754 int max_active = fs_info->thread_pool_size;
3755
3756 lockdep_assert_held(&fs_info->scrub_lock);
3757
3758 if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
3759 ASSERT(fs_info->scrub_workers == NULL);
3760 fs_info->scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub",
3761 flags, is_dev_replace ? 1 : max_active, 4);
3762 if (!fs_info->scrub_workers)
3763 goto fail_scrub_workers;
3764
3765 ASSERT(fs_info->scrub_wr_completion_workers == NULL);
3766 fs_info->scrub_wr_completion_workers =
3767 btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
3768 max_active, 2);
3769 if (!fs_info->scrub_wr_completion_workers)
3770 goto fail_scrub_wr_completion_workers;
3771
3772 ASSERT(fs_info->scrub_parity_workers == NULL);
3773 fs_info->scrub_parity_workers =
3774 btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
3775 max_active, 2);
3776 if (!fs_info->scrub_parity_workers)
3777 goto fail_scrub_parity_workers;
3778
3779 refcount_set(&fs_info->scrub_workers_refcnt, 1);
3780 } else {
3781 refcount_inc(&fs_info->scrub_workers_refcnt);
3782 }
3783 return 0;
3784
3785fail_scrub_parity_workers:
3786 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
3787fail_scrub_wr_completion_workers:
3788 btrfs_destroy_workqueue(fs_info->scrub_workers);
3789fail_scrub_workers:
3790 return -ENOMEM;
3791}
3792
3793int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3794 u64 end, struct btrfs_scrub_progress *progress,
3795 int readonly, int is_dev_replace)
3796{
3797 struct scrub_ctx *sctx;
3798 int ret;
3799 struct btrfs_device *dev;
3800 unsigned int nofs_flag;
3801 struct btrfs_workqueue *scrub_workers = NULL;
3802 struct btrfs_workqueue *scrub_wr_comp = NULL;
3803 struct btrfs_workqueue *scrub_parity = NULL;
3804
3805 if (btrfs_fs_closing(fs_info))
3806 return -EAGAIN;
3807
3808 if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
3809
3810
3811
3812
3813
3814 btrfs_err(fs_info,
3815 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
3816 fs_info->nodesize,
3817 BTRFS_STRIPE_LEN);
3818 return -EINVAL;
3819 }
3820
3821 if (fs_info->sectorsize != PAGE_SIZE) {
3822
3823 btrfs_err_rl(fs_info,
3824 "scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails",
3825 fs_info->sectorsize, PAGE_SIZE);
3826 return -EINVAL;
3827 }
3828
3829 if (fs_info->nodesize >
3830 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
3831 fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
3832
3833
3834
3835
3836 btrfs_err(fs_info,
3837 "scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
3838 fs_info->nodesize,
3839 SCRUB_MAX_PAGES_PER_BLOCK,
3840 fs_info->sectorsize,
3841 SCRUB_MAX_PAGES_PER_BLOCK);
3842 return -EINVAL;
3843 }
3844
3845
3846 sctx = scrub_setup_ctx(fs_info, is_dev_replace);
3847 if (IS_ERR(sctx))
3848 return PTR_ERR(sctx);
3849
3850 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3851 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
3852 if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
3853 !is_dev_replace)) {
3854 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3855 ret = -ENODEV;
3856 goto out_free_ctx;
3857 }
3858
3859 if (!is_dev_replace && !readonly &&
3860 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
3861 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3862 btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
3863 rcu_str_deref(dev->name));
3864 ret = -EROFS;
3865 goto out_free_ctx;
3866 }
3867
3868 mutex_lock(&fs_info->scrub_lock);
3869 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
3870 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
3871 mutex_unlock(&fs_info->scrub_lock);
3872 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3873 ret = -EIO;
3874 goto out_free_ctx;
3875 }
3876
3877 down_read(&fs_info->dev_replace.rwsem);
3878 if (dev->scrub_ctx ||
3879 (!is_dev_replace &&
3880 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
3881 up_read(&fs_info->dev_replace.rwsem);
3882 mutex_unlock(&fs_info->scrub_lock);
3883 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3884 ret = -EINPROGRESS;
3885 goto out_free_ctx;
3886 }
3887 up_read(&fs_info->dev_replace.rwsem);
3888
3889 ret = scrub_workers_get(fs_info, is_dev_replace);
3890 if (ret) {
3891 mutex_unlock(&fs_info->scrub_lock);
3892 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3893 goto out_free_ctx;
3894 }
3895
3896 sctx->readonly = readonly;
3897 dev->scrub_ctx = sctx;
3898 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3899
3900
3901
3902
3903
3904 __scrub_blocked_if_needed(fs_info);
3905 atomic_inc(&fs_info->scrubs_running);
3906 mutex_unlock(&fs_info->scrub_lock);
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917 nofs_flag = memalloc_nofs_save();
3918 if (!is_dev_replace) {
3919 btrfs_info(fs_info, "scrub: started on devid %llu", devid);
3920
3921
3922
3923
3924 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3925 ret = scrub_supers(sctx, dev);
3926 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3927 }
3928
3929 if (!ret)
3930 ret = scrub_enumerate_chunks(sctx, dev, start, end);
3931 memalloc_nofs_restore(nofs_flag);
3932
3933 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3934 atomic_dec(&fs_info->scrubs_running);
3935 wake_up(&fs_info->scrub_pause_wait);
3936
3937 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
3938
3939 if (progress)
3940 memcpy(progress, &sctx->stat, sizeof(*progress));
3941
3942 if (!is_dev_replace)
3943 btrfs_info(fs_info, "scrub: %s on devid %llu with status: %d",
3944 ret ? "not finished" : "finished", devid, ret);
3945
3946 mutex_lock(&fs_info->scrub_lock);
3947 dev->scrub_ctx = NULL;
3948 if (refcount_dec_and_test(&fs_info->scrub_workers_refcnt)) {
3949 scrub_workers = fs_info->scrub_workers;
3950 scrub_wr_comp = fs_info->scrub_wr_completion_workers;
3951 scrub_parity = fs_info->scrub_parity_workers;
3952
3953 fs_info->scrub_workers = NULL;
3954 fs_info->scrub_wr_completion_workers = NULL;
3955 fs_info->scrub_parity_workers = NULL;
3956 }
3957 mutex_unlock(&fs_info->scrub_lock);
3958
3959 btrfs_destroy_workqueue(scrub_workers);
3960 btrfs_destroy_workqueue(scrub_wr_comp);
3961 btrfs_destroy_workqueue(scrub_parity);
3962 scrub_put_ctx(sctx);
3963
3964 return ret;
3965
3966out_free_ctx:
3967 scrub_free_ctx(sctx);
3968
3969 return ret;
3970}
3971
3972void btrfs_scrub_pause(struct btrfs_fs_info *fs_info)
3973{
3974 mutex_lock(&fs_info->scrub_lock);
3975 atomic_inc(&fs_info->scrub_pause_req);
3976 while (atomic_read(&fs_info->scrubs_paused) !=
3977 atomic_read(&fs_info->scrubs_running)) {
3978 mutex_unlock(&fs_info->scrub_lock);
3979 wait_event(fs_info->scrub_pause_wait,
3980 atomic_read(&fs_info->scrubs_paused) ==
3981 atomic_read(&fs_info->scrubs_running));
3982 mutex_lock(&fs_info->scrub_lock);
3983 }
3984 mutex_unlock(&fs_info->scrub_lock);
3985}
3986
3987void btrfs_scrub_continue(struct btrfs_fs_info *fs_info)
3988{
3989 atomic_dec(&fs_info->scrub_pause_req);
3990 wake_up(&fs_info->scrub_pause_wait);
3991}
3992
3993int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
3994{
3995 mutex_lock(&fs_info->scrub_lock);
3996 if (!atomic_read(&fs_info->scrubs_running)) {
3997 mutex_unlock(&fs_info->scrub_lock);
3998 return -ENOTCONN;
3999 }
4000
4001 atomic_inc(&fs_info->scrub_cancel_req);
4002 while (atomic_read(&fs_info->scrubs_running)) {
4003 mutex_unlock(&fs_info->scrub_lock);
4004 wait_event(fs_info->scrub_pause_wait,
4005 atomic_read(&fs_info->scrubs_running) == 0);
4006 mutex_lock(&fs_info->scrub_lock);
4007 }
4008 atomic_dec(&fs_info->scrub_cancel_req);
4009 mutex_unlock(&fs_info->scrub_lock);
4010
4011 return 0;
4012}
4013
4014int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
4015{
4016 struct btrfs_fs_info *fs_info = dev->fs_info;
4017 struct scrub_ctx *sctx;
4018
4019 mutex_lock(&fs_info->scrub_lock);
4020 sctx = dev->scrub_ctx;
4021 if (!sctx) {
4022 mutex_unlock(&fs_info->scrub_lock);
4023 return -ENOTCONN;
4024 }
4025 atomic_inc(&sctx->cancel_req);
4026 while (dev->scrub_ctx) {
4027 mutex_unlock(&fs_info->scrub_lock);
4028 wait_event(fs_info->scrub_pause_wait,
4029 dev->scrub_ctx == NULL);
4030 mutex_lock(&fs_info->scrub_lock);
4031 }
4032 mutex_unlock(&fs_info->scrub_lock);
4033
4034 return 0;
4035}
4036
4037int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
4038 struct btrfs_scrub_progress *progress)
4039{
4040 struct btrfs_device *dev;
4041 struct scrub_ctx *sctx = NULL;
4042
4043 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4044 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
4045 if (dev)
4046 sctx = dev->scrub_ctx;
4047 if (sctx)
4048 memcpy(progress, &sctx->stat, sizeof(*progress));
4049 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4050
4051 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
4052}
4053
4054static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
4055 u64 extent_logical, u64 extent_len,
4056 u64 *extent_physical,
4057 struct btrfs_device **extent_dev,
4058 int *extent_mirror_num)
4059{
4060 u64 mapped_length;
4061 struct btrfs_bio *bbio = NULL;
4062 int ret;
4063
4064 mapped_length = extent_len;
4065 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
4066 &mapped_length, &bbio, 0);
4067 if (ret || !bbio || mapped_length < extent_len ||
4068 !bbio->stripes[0].dev->bdev) {
4069 btrfs_put_bbio(bbio);
4070 return;
4071 }
4072
4073 *extent_physical = bbio->stripes[0].physical;
4074 *extent_mirror_num = bbio->mirror_num;
4075 *extent_dev = bbio->stripes[0].dev;
4076 btrfs_put_bbio(bbio);
4077}
4078