1
2
3
4
5
6#include <linux/blkdev.h>
7#include <linux/ratelimit.h>
8#include <linux/sched/mm.h>
9#include "ctree.h"
10#include "volumes.h"
11#include "disk-io.h"
12#include "ordered-data.h"
13#include "transaction.h"
14#include "backref.h"
15#include "extent_io.h"
16#include "dev-replace.h"
17#include "check-integrity.h"
18#include "rcu-string.h"
19#include "raid56.h"
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34struct scrub_block;
35struct scrub_ctx;
36
37
38
39
40
41
42
43#define SCRUB_PAGES_PER_RD_BIO 32
44#define SCRUB_PAGES_PER_WR_BIO 32
45#define SCRUB_BIOS_PER_SCTX 64
46
47
48
49
50
51
52#define SCRUB_MAX_PAGES_PER_BLOCK 16
53
54struct scrub_recover {
55 refcount_t refs;
56 struct btrfs_bio *bbio;
57 u64 map_length;
58};
59
60struct scrub_page {
61 struct scrub_block *sblock;
62 struct page *page;
63 struct btrfs_device *dev;
64 struct list_head list;
65 u64 flags;
66 u64 generation;
67 u64 logical;
68 u64 physical;
69 u64 physical_for_dev_replace;
70 atomic_t refs;
71 struct {
72 unsigned int mirror_num:8;
73 unsigned int have_csum:1;
74 unsigned int io_error:1;
75 };
76 u8 csum[BTRFS_CSUM_SIZE];
77
78 struct scrub_recover *recover;
79};
80
81struct scrub_bio {
82 int index;
83 struct scrub_ctx *sctx;
84 struct btrfs_device *dev;
85 struct bio *bio;
86 blk_status_t status;
87 u64 logical;
88 u64 physical;
89#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
90 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
91#else
92 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
93#endif
94 int page_count;
95 int next_free;
96 struct btrfs_work work;
97};
98
99struct scrub_block {
100 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
101 int page_count;
102 atomic_t outstanding_pages;
103 refcount_t refs;
104 struct scrub_ctx *sctx;
105 struct scrub_parity *sparity;
106 struct {
107 unsigned int header_error:1;
108 unsigned int checksum_error:1;
109 unsigned int no_io_error_seen:1;
110 unsigned int generation_error:1;
111
112
113
114 unsigned int data_corrected:1;
115 };
116 struct btrfs_work work;
117};
118
119
120struct scrub_parity {
121 struct scrub_ctx *sctx;
122
123 struct btrfs_device *scrub_dev;
124
125 u64 logic_start;
126
127 u64 logic_end;
128
129 int nsectors;
130
131 u64 stripe_len;
132
133 refcount_t refs;
134
135 struct list_head spages;
136
137
138 struct btrfs_work work;
139
140
141 unsigned long *dbitmap;
142
143
144
145
146
147 unsigned long *ebitmap;
148
149 unsigned long bitmap[0];
150};
151
152struct scrub_ctx {
153 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
154 struct btrfs_fs_info *fs_info;
155 int first_free;
156 int curr;
157 atomic_t bios_in_flight;
158 atomic_t workers_pending;
159 spinlock_t list_lock;
160 wait_queue_head_t list_wait;
161 u16 csum_size;
162 struct list_head csum_list;
163 atomic_t cancel_req;
164 int readonly;
165 int pages_per_rd_bio;
166
167 int is_dev_replace;
168
169 struct scrub_bio *wr_curr_bio;
170 struct mutex wr_lock;
171 int pages_per_wr_bio;
172 struct btrfs_device *wr_tgtdev;
173 bool flush_all_writes;
174
175
176
177
178 struct btrfs_scrub_progress stat;
179 spinlock_t stat_lock;
180
181
182
183
184
185
186
187
188 refcount_t refs;
189};
190
191struct scrub_warning {
192 struct btrfs_path *path;
193 u64 extent_item_size;
194 const char *errstr;
195 u64 physical;
196 u64 logical;
197 struct btrfs_device *dev;
198};
199
200struct full_stripe_lock {
201 struct rb_node node;
202 u64 logical;
203 u64 refs;
204 struct mutex mutex;
205};
206
207static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
208static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
209static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
210static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
211 struct scrub_block *sblocks_for_recheck);
212static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
213 struct scrub_block *sblock,
214 int retry_failed_mirror);
215static void scrub_recheck_block_checksum(struct scrub_block *sblock);
216static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
217 struct scrub_block *sblock_good);
218static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
219 struct scrub_block *sblock_good,
220 int page_num, int force_write);
221static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
222static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
223 int page_num);
224static int scrub_checksum_data(struct scrub_block *sblock);
225static int scrub_checksum_tree_block(struct scrub_block *sblock);
226static int scrub_checksum_super(struct scrub_block *sblock);
227static void scrub_block_get(struct scrub_block *sblock);
228static void scrub_block_put(struct scrub_block *sblock);
229static void scrub_page_get(struct scrub_page *spage);
230static void scrub_page_put(struct scrub_page *spage);
231static void scrub_parity_get(struct scrub_parity *sparity);
232static void scrub_parity_put(struct scrub_parity *sparity);
233static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
234 struct scrub_page *spage);
235static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
236 u64 physical, struct btrfs_device *dev, u64 flags,
237 u64 gen, int mirror_num, u8 *csum, int force,
238 u64 physical_for_dev_replace);
239static void scrub_bio_end_io(struct bio *bio);
240static void scrub_bio_end_io_worker(struct btrfs_work *work);
241static void scrub_block_complete(struct scrub_block *sblock);
242static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
243 u64 extent_logical, u64 extent_len,
244 u64 *extent_physical,
245 struct btrfs_device **extent_dev,
246 int *extent_mirror_num);
247static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
248 struct scrub_page *spage);
249static void scrub_wr_submit(struct scrub_ctx *sctx);
250static void scrub_wr_bio_end_io(struct bio *bio);
251static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
252static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
253static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
254static void scrub_put_ctx(struct scrub_ctx *sctx);
255
256static inline int scrub_is_page_on_raid56(struct scrub_page *page)
257{
258 return page->recover &&
259 (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
260}
261
262static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
263{
264 refcount_inc(&sctx->refs);
265 atomic_inc(&sctx->bios_in_flight);
266}
267
268static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
269{
270 atomic_dec(&sctx->bios_in_flight);
271 wake_up(&sctx->list_wait);
272 scrub_put_ctx(sctx);
273}
274
275static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
276{
277 while (atomic_read(&fs_info->scrub_pause_req)) {
278 mutex_unlock(&fs_info->scrub_lock);
279 wait_event(fs_info->scrub_pause_wait,
280 atomic_read(&fs_info->scrub_pause_req) == 0);
281 mutex_lock(&fs_info->scrub_lock);
282 }
283}
284
285static void scrub_pause_on(struct btrfs_fs_info *fs_info)
286{
287 atomic_inc(&fs_info->scrubs_paused);
288 wake_up(&fs_info->scrub_pause_wait);
289}
290
291static void scrub_pause_off(struct btrfs_fs_info *fs_info)
292{
293 mutex_lock(&fs_info->scrub_lock);
294 __scrub_blocked_if_needed(fs_info);
295 atomic_dec(&fs_info->scrubs_paused);
296 mutex_unlock(&fs_info->scrub_lock);
297
298 wake_up(&fs_info->scrub_pause_wait);
299}
300
301static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
302{
303 scrub_pause_on(fs_info);
304 scrub_pause_off(fs_info);
305}
306
307
308
309
310
311
312
313
314
315
316
317static struct full_stripe_lock *insert_full_stripe_lock(
318 struct btrfs_full_stripe_locks_tree *locks_root,
319 u64 fstripe_logical)
320{
321 struct rb_node **p;
322 struct rb_node *parent = NULL;
323 struct full_stripe_lock *entry;
324 struct full_stripe_lock *ret;
325
326 lockdep_assert_held(&locks_root->lock);
327
328 p = &locks_root->root.rb_node;
329 while (*p) {
330 parent = *p;
331 entry = rb_entry(parent, struct full_stripe_lock, node);
332 if (fstripe_logical < entry->logical) {
333 p = &(*p)->rb_left;
334 } else if (fstripe_logical > entry->logical) {
335 p = &(*p)->rb_right;
336 } else {
337 entry->refs++;
338 return entry;
339 }
340 }
341
342
343
344
345 ret = kmalloc(sizeof(*ret), GFP_KERNEL);
346 if (!ret)
347 return ERR_PTR(-ENOMEM);
348 ret->logical = fstripe_logical;
349 ret->refs = 1;
350 mutex_init(&ret->mutex);
351
352 rb_link_node(&ret->node, parent, p);
353 rb_insert_color(&ret->node, &locks_root->root);
354 return ret;
355}
356
357
358
359
360
361
362
363static struct full_stripe_lock *search_full_stripe_lock(
364 struct btrfs_full_stripe_locks_tree *locks_root,
365 u64 fstripe_logical)
366{
367 struct rb_node *node;
368 struct full_stripe_lock *entry;
369
370 lockdep_assert_held(&locks_root->lock);
371
372 node = locks_root->root.rb_node;
373 while (node) {
374 entry = rb_entry(node, struct full_stripe_lock, node);
375 if (fstripe_logical < entry->logical)
376 node = node->rb_left;
377 else if (fstripe_logical > entry->logical)
378 node = node->rb_right;
379 else
380 return entry;
381 }
382 return NULL;
383}
384
385
386
387
388
389
390static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache,
391 u64 bytenr)
392{
393 u64 ret;
394
395
396
397
398
399 WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
400
401
402
403
404
405 ret = div64_u64(bytenr - cache->key.objectid, cache->full_stripe_len) *
406 cache->full_stripe_len + cache->key.objectid;
407 return ret;
408}
409
410
411
412
413
414
415
416
417
418
419
420
421static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
422 bool *locked_ret)
423{
424 struct btrfs_block_group_cache *bg_cache;
425 struct btrfs_full_stripe_locks_tree *locks_root;
426 struct full_stripe_lock *existing;
427 u64 fstripe_start;
428 int ret = 0;
429
430 *locked_ret = false;
431 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
432 if (!bg_cache) {
433 ASSERT(0);
434 return -ENOENT;
435 }
436
437
438 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
439 goto out;
440 locks_root = &bg_cache->full_stripe_locks_root;
441
442 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
443
444
445 mutex_lock(&locks_root->lock);
446 existing = insert_full_stripe_lock(locks_root, fstripe_start);
447 mutex_unlock(&locks_root->lock);
448 if (IS_ERR(existing)) {
449 ret = PTR_ERR(existing);
450 goto out;
451 }
452 mutex_lock(&existing->mutex);
453 *locked_ret = true;
454out:
455 btrfs_put_block_group(bg_cache);
456 return ret;
457}
458
459
460
461
462
463
464
465
466
467
468static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
469 bool locked)
470{
471 struct btrfs_block_group_cache *bg_cache;
472 struct btrfs_full_stripe_locks_tree *locks_root;
473 struct full_stripe_lock *fstripe_lock;
474 u64 fstripe_start;
475 bool freeit = false;
476 int ret = 0;
477
478
479 if (!locked)
480 return 0;
481
482 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
483 if (!bg_cache) {
484 ASSERT(0);
485 return -ENOENT;
486 }
487 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
488 goto out;
489
490 locks_root = &bg_cache->full_stripe_locks_root;
491 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
492
493 mutex_lock(&locks_root->lock);
494 fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
495
496 if (!fstripe_lock) {
497 WARN_ON(1);
498 ret = -ENOENT;
499 mutex_unlock(&locks_root->lock);
500 goto out;
501 }
502
503 if (fstripe_lock->refs == 0) {
504 WARN_ON(1);
505 btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
506 fstripe_lock->logical);
507 } else {
508 fstripe_lock->refs--;
509 }
510
511 if (fstripe_lock->refs == 0) {
512 rb_erase(&fstripe_lock->node, &locks_root->root);
513 freeit = true;
514 }
515 mutex_unlock(&locks_root->lock);
516
517 mutex_unlock(&fstripe_lock->mutex);
518 if (freeit)
519 kfree(fstripe_lock);
520out:
521 btrfs_put_block_group(bg_cache);
522 return ret;
523}
524
525static void scrub_free_csums(struct scrub_ctx *sctx)
526{
527 while (!list_empty(&sctx->csum_list)) {
528 struct btrfs_ordered_sum *sum;
529 sum = list_first_entry(&sctx->csum_list,
530 struct btrfs_ordered_sum, list);
531 list_del(&sum->list);
532 kfree(sum);
533 }
534}
535
536static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
537{
538 int i;
539
540 if (!sctx)
541 return;
542
543
544 if (sctx->curr != -1) {
545 struct scrub_bio *sbio = sctx->bios[sctx->curr];
546
547 for (i = 0; i < sbio->page_count; i++) {
548 WARN_ON(!sbio->pagev[i]->page);
549 scrub_block_put(sbio->pagev[i]->sblock);
550 }
551 bio_put(sbio->bio);
552 }
553
554 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
555 struct scrub_bio *sbio = sctx->bios[i];
556
557 if (!sbio)
558 break;
559 kfree(sbio);
560 }
561
562 kfree(sctx->wr_curr_bio);
563 scrub_free_csums(sctx);
564 kfree(sctx);
565}
566
567static void scrub_put_ctx(struct scrub_ctx *sctx)
568{
569 if (refcount_dec_and_test(&sctx->refs))
570 scrub_free_ctx(sctx);
571}
572
573static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
574 struct btrfs_fs_info *fs_info, int is_dev_replace)
575{
576 struct scrub_ctx *sctx;
577 int i;
578
579 sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
580 if (!sctx)
581 goto nomem;
582 refcount_set(&sctx->refs, 1);
583 sctx->is_dev_replace = is_dev_replace;
584 sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
585 sctx->curr = -1;
586 sctx->fs_info = fs_info;
587 INIT_LIST_HEAD(&sctx->csum_list);
588 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
589 struct scrub_bio *sbio;
590
591 sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
592 if (!sbio)
593 goto nomem;
594 sctx->bios[i] = sbio;
595
596 sbio->index = i;
597 sbio->sctx = sctx;
598 sbio->page_count = 0;
599 btrfs_init_work(&sbio->work, btrfs_scrub_helper,
600 scrub_bio_end_io_worker, NULL, NULL);
601
602 if (i != SCRUB_BIOS_PER_SCTX - 1)
603 sctx->bios[i]->next_free = i + 1;
604 else
605 sctx->bios[i]->next_free = -1;
606 }
607 sctx->first_free = 0;
608 atomic_set(&sctx->bios_in_flight, 0);
609 atomic_set(&sctx->workers_pending, 0);
610 atomic_set(&sctx->cancel_req, 0);
611 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
612
613 spin_lock_init(&sctx->list_lock);
614 spin_lock_init(&sctx->stat_lock);
615 init_waitqueue_head(&sctx->list_wait);
616
617 WARN_ON(sctx->wr_curr_bio != NULL);
618 mutex_init(&sctx->wr_lock);
619 sctx->wr_curr_bio = NULL;
620 if (is_dev_replace) {
621 WARN_ON(!fs_info->dev_replace.tgtdev);
622 sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
623 sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;
624 sctx->flush_all_writes = false;
625 }
626
627 return sctx;
628
629nomem:
630 scrub_free_ctx(sctx);
631 return ERR_PTR(-ENOMEM);
632}
633
634static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
635 void *warn_ctx)
636{
637 u64 isize;
638 u32 nlink;
639 int ret;
640 int i;
641 unsigned nofs_flag;
642 struct extent_buffer *eb;
643 struct btrfs_inode_item *inode_item;
644 struct scrub_warning *swarn = warn_ctx;
645 struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
646 struct inode_fs_paths *ipath = NULL;
647 struct btrfs_root *local_root;
648 struct btrfs_key root_key;
649 struct btrfs_key key;
650
651 root_key.objectid = root;
652 root_key.type = BTRFS_ROOT_ITEM_KEY;
653 root_key.offset = (u64)-1;
654 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
655 if (IS_ERR(local_root)) {
656 ret = PTR_ERR(local_root);
657 goto err;
658 }
659
660
661
662
663 key.objectid = inum;
664 key.type = BTRFS_INODE_ITEM_KEY;
665 key.offset = 0;
666
667 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
668 if (ret) {
669 btrfs_release_path(swarn->path);
670 goto err;
671 }
672
673 eb = swarn->path->nodes[0];
674 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
675 struct btrfs_inode_item);
676 isize = btrfs_inode_size(eb, inode_item);
677 nlink = btrfs_inode_nlink(eb, inode_item);
678 btrfs_release_path(swarn->path);
679
680
681
682
683
684
685 nofs_flag = memalloc_nofs_save();
686 ipath = init_ipath(4096, local_root, swarn->path);
687 memalloc_nofs_restore(nofs_flag);
688 if (IS_ERR(ipath)) {
689 ret = PTR_ERR(ipath);
690 ipath = NULL;
691 goto err;
692 }
693 ret = paths_from_inode(inum, ipath);
694
695 if (ret < 0)
696 goto err;
697
698
699
700
701
702 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
703 btrfs_warn_in_rcu(fs_info,
704"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
705 swarn->errstr, swarn->logical,
706 rcu_str_deref(swarn->dev->name),
707 swarn->physical,
708 root, inum, offset,
709 min(isize - offset, (u64)PAGE_SIZE), nlink,
710 (char *)(unsigned long)ipath->fspath->val[i]);
711
712 free_ipath(ipath);
713 return 0;
714
715err:
716 btrfs_warn_in_rcu(fs_info,
717 "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
718 swarn->errstr, swarn->logical,
719 rcu_str_deref(swarn->dev->name),
720 swarn->physical,
721 root, inum, offset, ret);
722
723 free_ipath(ipath);
724 return 0;
725}
726
727static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
728{
729 struct btrfs_device *dev;
730 struct btrfs_fs_info *fs_info;
731 struct btrfs_path *path;
732 struct btrfs_key found_key;
733 struct extent_buffer *eb;
734 struct btrfs_extent_item *ei;
735 struct scrub_warning swarn;
736 unsigned long ptr = 0;
737 u64 extent_item_pos;
738 u64 flags = 0;
739 u64 ref_root;
740 u32 item_size;
741 u8 ref_level = 0;
742 int ret;
743
744 WARN_ON(sblock->page_count < 1);
745 dev = sblock->pagev[0]->dev;
746 fs_info = sblock->sctx->fs_info;
747
748 path = btrfs_alloc_path();
749 if (!path)
750 return;
751
752 swarn.physical = sblock->pagev[0]->physical;
753 swarn.logical = sblock->pagev[0]->logical;
754 swarn.errstr = errstr;
755 swarn.dev = NULL;
756
757 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
758 &flags);
759 if (ret < 0)
760 goto out;
761
762 extent_item_pos = swarn.logical - found_key.objectid;
763 swarn.extent_item_size = found_key.offset;
764
765 eb = path->nodes[0];
766 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
767 item_size = btrfs_item_size_nr(eb, path->slots[0]);
768
769 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
770 do {
771 ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
772 item_size, &ref_root,
773 &ref_level);
774 btrfs_warn_in_rcu(fs_info,
775"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
776 errstr, swarn.logical,
777 rcu_str_deref(dev->name),
778 swarn.physical,
779 ref_level ? "node" : "leaf",
780 ret < 0 ? -1 : ref_level,
781 ret < 0 ? -1 : ref_root);
782 } while (ret != 1);
783 btrfs_release_path(path);
784 } else {
785 btrfs_release_path(path);
786 swarn.path = path;
787 swarn.dev = dev;
788 iterate_extent_inodes(fs_info, found_key.objectid,
789 extent_item_pos, 1,
790 scrub_print_warning_inode, &swarn, false);
791 }
792
793out:
794 btrfs_free_path(path);
795}
796
797static inline void scrub_get_recover(struct scrub_recover *recover)
798{
799 refcount_inc(&recover->refs);
800}
801
802static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
803 struct scrub_recover *recover)
804{
805 if (refcount_dec_and_test(&recover->refs)) {
806 btrfs_bio_counter_dec(fs_info);
807 btrfs_put_bbio(recover->bbio);
808 kfree(recover);
809 }
810}
811
812
813
814
815
816
817
818
819
820static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
821{
822 struct scrub_ctx *sctx = sblock_to_check->sctx;
823 struct btrfs_device *dev;
824 struct btrfs_fs_info *fs_info;
825 u64 logical;
826 unsigned int failed_mirror_index;
827 unsigned int is_metadata;
828 unsigned int have_csum;
829 struct scrub_block *sblocks_for_recheck;
830 struct scrub_block *sblock_bad;
831 int ret;
832 int mirror_index;
833 int page_num;
834 int success;
835 bool full_stripe_locked;
836 unsigned int nofs_flag;
837 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
838 DEFAULT_RATELIMIT_BURST);
839
840 BUG_ON(sblock_to_check->page_count < 1);
841 fs_info = sctx->fs_info;
842 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
843
844
845
846
847
848 spin_lock(&sctx->stat_lock);
849 ++sctx->stat.super_errors;
850 spin_unlock(&sctx->stat_lock);
851 return 0;
852 }
853 logical = sblock_to_check->pagev[0]->logical;
854 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
855 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
856 is_metadata = !(sblock_to_check->pagev[0]->flags &
857 BTRFS_EXTENT_FLAG_DATA);
858 have_csum = sblock_to_check->pagev[0]->have_csum;
859 dev = sblock_to_check->pagev[0]->dev;
860
861
862
863
864
865
866
867
868
869
870 nofs_flag = memalloc_nofs_save();
871
872
873
874
875
876
877
878 ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
879 if (ret < 0) {
880 memalloc_nofs_restore(nofs_flag);
881 spin_lock(&sctx->stat_lock);
882 if (ret == -ENOMEM)
883 sctx->stat.malloc_errors++;
884 sctx->stat.read_errors++;
885 sctx->stat.uncorrectable_errors++;
886 spin_unlock(&sctx->stat_lock);
887 return ret;
888 }
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919 sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
920 sizeof(*sblocks_for_recheck), GFP_KERNEL);
921 if (!sblocks_for_recheck) {
922 spin_lock(&sctx->stat_lock);
923 sctx->stat.malloc_errors++;
924 sctx->stat.read_errors++;
925 sctx->stat.uncorrectable_errors++;
926 spin_unlock(&sctx->stat_lock);
927 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
928 goto out;
929 }
930
931
932 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
933 if (ret) {
934 spin_lock(&sctx->stat_lock);
935 sctx->stat.read_errors++;
936 sctx->stat.uncorrectable_errors++;
937 spin_unlock(&sctx->stat_lock);
938 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
939 goto out;
940 }
941 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
942 sblock_bad = sblocks_for_recheck + failed_mirror_index;
943
944
945 scrub_recheck_block(fs_info, sblock_bad, 1);
946
947 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
948 sblock_bad->no_io_error_seen) {
949
950
951
952
953
954
955
956
957 spin_lock(&sctx->stat_lock);
958 sctx->stat.unverified_errors++;
959 sblock_to_check->data_corrected = 1;
960 spin_unlock(&sctx->stat_lock);
961
962 if (sctx->is_dev_replace)
963 scrub_write_block_to_dev_replace(sblock_bad);
964 goto out;
965 }
966
967 if (!sblock_bad->no_io_error_seen) {
968 spin_lock(&sctx->stat_lock);
969 sctx->stat.read_errors++;
970 spin_unlock(&sctx->stat_lock);
971 if (__ratelimit(&_rs))
972 scrub_print_warning("i/o error", sblock_to_check);
973 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
974 } else if (sblock_bad->checksum_error) {
975 spin_lock(&sctx->stat_lock);
976 sctx->stat.csum_errors++;
977 spin_unlock(&sctx->stat_lock);
978 if (__ratelimit(&_rs))
979 scrub_print_warning("checksum error", sblock_to_check);
980 btrfs_dev_stat_inc_and_print(dev,
981 BTRFS_DEV_STAT_CORRUPTION_ERRS);
982 } else if (sblock_bad->header_error) {
983 spin_lock(&sctx->stat_lock);
984 sctx->stat.verify_errors++;
985 spin_unlock(&sctx->stat_lock);
986 if (__ratelimit(&_rs))
987 scrub_print_warning("checksum/header error",
988 sblock_to_check);
989 if (sblock_bad->generation_error)
990 btrfs_dev_stat_inc_and_print(dev,
991 BTRFS_DEV_STAT_GENERATION_ERRS);
992 else
993 btrfs_dev_stat_inc_and_print(dev,
994 BTRFS_DEV_STAT_CORRUPTION_ERRS);
995 }
996
997 if (sctx->readonly) {
998 ASSERT(!sctx->is_dev_replace);
999 goto out;
1000 }
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017 for (mirror_index = 0; ;mirror_index++) {
1018 struct scrub_block *sblock_other;
1019
1020 if (mirror_index == failed_mirror_index)
1021 continue;
1022
1023
1024 if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1025 if (mirror_index >= BTRFS_MAX_MIRRORS)
1026 break;
1027 if (!sblocks_for_recheck[mirror_index].page_count)
1028 break;
1029
1030 sblock_other = sblocks_for_recheck + mirror_index;
1031 } else {
1032 struct scrub_recover *r = sblock_bad->pagev[0]->recover;
1033 int max_allowed = r->bbio->num_stripes -
1034 r->bbio->num_tgtdevs;
1035
1036 if (mirror_index >= max_allowed)
1037 break;
1038 if (!sblocks_for_recheck[1].page_count)
1039 break;
1040
1041 ASSERT(failed_mirror_index == 0);
1042 sblock_other = sblocks_for_recheck + 1;
1043 sblock_other->pagev[0]->mirror_num = 1 + mirror_index;
1044 }
1045
1046
1047 scrub_recheck_block(fs_info, sblock_other, 0);
1048
1049 if (!sblock_other->header_error &&
1050 !sblock_other->checksum_error &&
1051 sblock_other->no_io_error_seen) {
1052 if (sctx->is_dev_replace) {
1053 scrub_write_block_to_dev_replace(sblock_other);
1054 goto corrected_error;
1055 } else {
1056 ret = scrub_repair_block_from_good_copy(
1057 sblock_bad, sblock_other);
1058 if (!ret)
1059 goto corrected_error;
1060 }
1061 }
1062 }
1063
1064 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1065 goto did_not_correct_error;
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091 success = 1;
1092 for (page_num = 0; page_num < sblock_bad->page_count;
1093 page_num++) {
1094 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1095 struct scrub_block *sblock_other = NULL;
1096
1097
1098 if (!page_bad->io_error && !sctx->is_dev_replace)
1099 continue;
1100
1101 if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1102
1103
1104
1105
1106
1107
1108
1109 sblock_other = NULL;
1110 } else if (page_bad->io_error) {
1111
1112 for (mirror_index = 0;
1113 mirror_index < BTRFS_MAX_MIRRORS &&
1114 sblocks_for_recheck[mirror_index].page_count > 0;
1115 mirror_index++) {
1116 if (!sblocks_for_recheck[mirror_index].
1117 pagev[page_num]->io_error) {
1118 sblock_other = sblocks_for_recheck +
1119 mirror_index;
1120 break;
1121 }
1122 }
1123 if (!sblock_other)
1124 success = 0;
1125 }
1126
1127 if (sctx->is_dev_replace) {
1128
1129
1130
1131
1132
1133
1134
1135 if (!sblock_other)
1136 sblock_other = sblock_bad;
1137
1138 if (scrub_write_page_to_dev_replace(sblock_other,
1139 page_num) != 0) {
1140 atomic64_inc(
1141 &fs_info->dev_replace.num_write_errors);
1142 success = 0;
1143 }
1144 } else if (sblock_other) {
1145 ret = scrub_repair_page_from_good_copy(sblock_bad,
1146 sblock_other,
1147 page_num, 0);
1148 if (0 == ret)
1149 page_bad->io_error = 0;
1150 else
1151 success = 0;
1152 }
1153 }
1154
1155 if (success && !sctx->is_dev_replace) {
1156 if (is_metadata || have_csum) {
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166 scrub_recheck_block(fs_info, sblock_bad, 1);
1167 if (!sblock_bad->header_error &&
1168 !sblock_bad->checksum_error &&
1169 sblock_bad->no_io_error_seen)
1170 goto corrected_error;
1171 else
1172 goto did_not_correct_error;
1173 } else {
1174corrected_error:
1175 spin_lock(&sctx->stat_lock);
1176 sctx->stat.corrected_errors++;
1177 sblock_to_check->data_corrected = 1;
1178 spin_unlock(&sctx->stat_lock);
1179 btrfs_err_rl_in_rcu(fs_info,
1180 "fixed up error at logical %llu on dev %s",
1181 logical, rcu_str_deref(dev->name));
1182 }
1183 } else {
1184did_not_correct_error:
1185 spin_lock(&sctx->stat_lock);
1186 sctx->stat.uncorrectable_errors++;
1187 spin_unlock(&sctx->stat_lock);
1188 btrfs_err_rl_in_rcu(fs_info,
1189 "unable to fixup (regular) error at logical %llu on dev %s",
1190 logical, rcu_str_deref(dev->name));
1191 }
1192
1193out:
1194 if (sblocks_for_recheck) {
1195 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1196 mirror_index++) {
1197 struct scrub_block *sblock = sblocks_for_recheck +
1198 mirror_index;
1199 struct scrub_recover *recover;
1200 int page_index;
1201
1202 for (page_index = 0; page_index < sblock->page_count;
1203 page_index++) {
1204 sblock->pagev[page_index]->sblock = NULL;
1205 recover = sblock->pagev[page_index]->recover;
1206 if (recover) {
1207 scrub_put_recover(fs_info, recover);
1208 sblock->pagev[page_index]->recover =
1209 NULL;
1210 }
1211 scrub_page_put(sblock->pagev[page_index]);
1212 }
1213 }
1214 kfree(sblocks_for_recheck);
1215 }
1216
1217 ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
1218 memalloc_nofs_restore(nofs_flag);
1219 if (ret < 0)
1220 return ret;
1221 return 0;
1222}
1223
1224static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1225{
1226 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1227 return 2;
1228 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1229 return 3;
1230 else
1231 return (int)bbio->num_stripes;
1232}
1233
1234static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1235 u64 *raid_map,
1236 u64 mapped_length,
1237 int nstripes, int mirror,
1238 int *stripe_index,
1239 u64 *stripe_offset)
1240{
1241 int i;
1242
1243 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1244
1245 for (i = 0; i < nstripes; i++) {
1246 if (raid_map[i] == RAID6_Q_STRIPE ||
1247 raid_map[i] == RAID5_P_STRIPE)
1248 continue;
1249
1250 if (logical >= raid_map[i] &&
1251 logical < raid_map[i] + mapped_length)
1252 break;
1253 }
1254
1255 *stripe_index = i;
1256 *stripe_offset = logical - raid_map[i];
1257 } else {
1258
1259 *stripe_index = mirror;
1260 *stripe_offset = 0;
1261 }
1262}
1263
1264static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1265 struct scrub_block *sblocks_for_recheck)
1266{
1267 struct scrub_ctx *sctx = original_sblock->sctx;
1268 struct btrfs_fs_info *fs_info = sctx->fs_info;
1269 u64 length = original_sblock->page_count * PAGE_SIZE;
1270 u64 logical = original_sblock->pagev[0]->logical;
1271 u64 generation = original_sblock->pagev[0]->generation;
1272 u64 flags = original_sblock->pagev[0]->flags;
1273 u64 have_csum = original_sblock->pagev[0]->have_csum;
1274 struct scrub_recover *recover;
1275 struct btrfs_bio *bbio;
1276 u64 sublen;
1277 u64 mapped_length;
1278 u64 stripe_offset;
1279 int stripe_index;
1280 int page_index = 0;
1281 int mirror_index;
1282 int nmirrors;
1283 int ret;
1284
1285
1286
1287
1288
1289
1290
1291 while (length > 0) {
1292 sublen = min_t(u64, length, PAGE_SIZE);
1293 mapped_length = sublen;
1294 bbio = NULL;
1295
1296
1297
1298
1299
1300 btrfs_bio_counter_inc_blocked(fs_info);
1301 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
1302 logical, &mapped_length, &bbio);
1303 if (ret || !bbio || mapped_length < sublen) {
1304 btrfs_put_bbio(bbio);
1305 btrfs_bio_counter_dec(fs_info);
1306 return -EIO;
1307 }
1308
1309 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1310 if (!recover) {
1311 btrfs_put_bbio(bbio);
1312 btrfs_bio_counter_dec(fs_info);
1313 return -ENOMEM;
1314 }
1315
1316 refcount_set(&recover->refs, 1);
1317 recover->bbio = bbio;
1318 recover->map_length = mapped_length;
1319
1320 BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
1321
1322 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1323
1324 for (mirror_index = 0; mirror_index < nmirrors;
1325 mirror_index++) {
1326 struct scrub_block *sblock;
1327 struct scrub_page *page;
1328
1329 sblock = sblocks_for_recheck + mirror_index;
1330 sblock->sctx = sctx;
1331
1332 page = kzalloc(sizeof(*page), GFP_NOFS);
1333 if (!page) {
1334leave_nomem:
1335 spin_lock(&sctx->stat_lock);
1336 sctx->stat.malloc_errors++;
1337 spin_unlock(&sctx->stat_lock);
1338 scrub_put_recover(fs_info, recover);
1339 return -ENOMEM;
1340 }
1341 scrub_page_get(page);
1342 sblock->pagev[page_index] = page;
1343 page->sblock = sblock;
1344 page->flags = flags;
1345 page->generation = generation;
1346 page->logical = logical;
1347 page->have_csum = have_csum;
1348 if (have_csum)
1349 memcpy(page->csum,
1350 original_sblock->pagev[0]->csum,
1351 sctx->csum_size);
1352
1353 scrub_stripe_index_and_offset(logical,
1354 bbio->map_type,
1355 bbio->raid_map,
1356 mapped_length,
1357 bbio->num_stripes -
1358 bbio->num_tgtdevs,
1359 mirror_index,
1360 &stripe_index,
1361 &stripe_offset);
1362 page->physical = bbio->stripes[stripe_index].physical +
1363 stripe_offset;
1364 page->dev = bbio->stripes[stripe_index].dev;
1365
1366 BUG_ON(page_index >= original_sblock->page_count);
1367 page->physical_for_dev_replace =
1368 original_sblock->pagev[page_index]->
1369 physical_for_dev_replace;
1370
1371 page->mirror_num = mirror_index + 1;
1372 sblock->page_count++;
1373 page->page = alloc_page(GFP_NOFS);
1374 if (!page->page)
1375 goto leave_nomem;
1376
1377 scrub_get_recover(recover);
1378 page->recover = recover;
1379 }
1380 scrub_put_recover(fs_info, recover);
1381 length -= sublen;
1382 logical += sublen;
1383 page_index++;
1384 }
1385
1386 return 0;
1387}
1388
1389static void scrub_bio_wait_endio(struct bio *bio)
1390{
1391 complete(bio->bi_private);
1392}
1393
1394static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1395 struct bio *bio,
1396 struct scrub_page *page)
1397{
1398 DECLARE_COMPLETION_ONSTACK(done);
1399 int ret;
1400 int mirror_num;
1401
1402 bio->bi_iter.bi_sector = page->logical >> 9;
1403 bio->bi_private = &done;
1404 bio->bi_end_io = scrub_bio_wait_endio;
1405
1406 mirror_num = page->sblock->pagev[0]->mirror_num;
1407 ret = raid56_parity_recover(fs_info, bio, page->recover->bbio,
1408 page->recover->map_length,
1409 mirror_num, 0);
1410 if (ret)
1411 return ret;
1412
1413 wait_for_completion_io(&done);
1414 return blk_status_to_errno(bio->bi_status);
1415}
1416
1417static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
1418 struct scrub_block *sblock)
1419{
1420 struct scrub_page *first_page = sblock->pagev[0];
1421 struct bio *bio;
1422 int page_num;
1423
1424
1425 ASSERT(first_page->dev);
1426 if (!first_page->dev->bdev)
1427 goto out;
1428
1429 bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
1430 bio_set_dev(bio, first_page->dev->bdev);
1431
1432 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1433 struct scrub_page *page = sblock->pagev[page_num];
1434
1435 WARN_ON(!page->page);
1436 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1437 }
1438
1439 if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
1440 bio_put(bio);
1441 goto out;
1442 }
1443
1444 bio_put(bio);
1445
1446 scrub_recheck_block_checksum(sblock);
1447
1448 return;
1449out:
1450 for (page_num = 0; page_num < sblock->page_count; page_num++)
1451 sblock->pagev[page_num]->io_error = 1;
1452
1453 sblock->no_io_error_seen = 0;
1454}
1455
1456
1457
1458
1459
1460
1461
1462
1463static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1464 struct scrub_block *sblock,
1465 int retry_failed_mirror)
1466{
1467 int page_num;
1468
1469 sblock->no_io_error_seen = 1;
1470
1471
1472 if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
1473 return scrub_recheck_block_on_raid56(fs_info, sblock);
1474
1475 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1476 struct bio *bio;
1477 struct scrub_page *page = sblock->pagev[page_num];
1478
1479 if (page->dev->bdev == NULL) {
1480 page->io_error = 1;
1481 sblock->no_io_error_seen = 0;
1482 continue;
1483 }
1484
1485 WARN_ON(!page->page);
1486 bio = btrfs_io_bio_alloc(1);
1487 bio_set_dev(bio, page->dev->bdev);
1488
1489 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1490 bio->bi_iter.bi_sector = page->physical >> 9;
1491 bio->bi_opf = REQ_OP_READ;
1492
1493 if (btrfsic_submit_bio_wait(bio)) {
1494 page->io_error = 1;
1495 sblock->no_io_error_seen = 0;
1496 }
1497
1498 bio_put(bio);
1499 }
1500
1501 if (sblock->no_io_error_seen)
1502 scrub_recheck_block_checksum(sblock);
1503}
1504
1505static inline int scrub_check_fsid(u8 fsid[],
1506 struct scrub_page *spage)
1507{
1508 struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
1509 int ret;
1510
1511 ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1512 return !ret;
1513}
1514
1515static void scrub_recheck_block_checksum(struct scrub_block *sblock)
1516{
1517 sblock->header_error = 0;
1518 sblock->checksum_error = 0;
1519 sblock->generation_error = 0;
1520
1521 if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
1522 scrub_checksum_data(sblock);
1523 else
1524 scrub_checksum_tree_block(sblock);
1525}
1526
1527static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1528 struct scrub_block *sblock_good)
1529{
1530 int page_num;
1531 int ret = 0;
1532
1533 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1534 int ret_sub;
1535
1536 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1537 sblock_good,
1538 page_num, 1);
1539 if (ret_sub)
1540 ret = ret_sub;
1541 }
1542
1543 return ret;
1544}
1545
1546static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1547 struct scrub_block *sblock_good,
1548 int page_num, int force_write)
1549{
1550 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1551 struct scrub_page *page_good = sblock_good->pagev[page_num];
1552 struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
1553
1554 BUG_ON(page_bad->page == NULL);
1555 BUG_ON(page_good->page == NULL);
1556 if (force_write || sblock_bad->header_error ||
1557 sblock_bad->checksum_error || page_bad->io_error) {
1558 struct bio *bio;
1559 int ret;
1560
1561 if (!page_bad->dev->bdev) {
1562 btrfs_warn_rl(fs_info,
1563 "scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
1564 return -EIO;
1565 }
1566
1567 bio = btrfs_io_bio_alloc(1);
1568 bio_set_dev(bio, page_bad->dev->bdev);
1569 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1570 bio->bi_opf = REQ_OP_WRITE;
1571
1572 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1573 if (PAGE_SIZE != ret) {
1574 bio_put(bio);
1575 return -EIO;
1576 }
1577
1578 if (btrfsic_submit_bio_wait(bio)) {
1579 btrfs_dev_stat_inc_and_print(page_bad->dev,
1580 BTRFS_DEV_STAT_WRITE_ERRS);
1581 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1582 bio_put(bio);
1583 return -EIO;
1584 }
1585 bio_put(bio);
1586 }
1587
1588 return 0;
1589}
1590
1591static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1592{
1593 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
1594 int page_num;
1595
1596
1597
1598
1599
1600 if (sblock->sparity)
1601 return;
1602
1603 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1604 int ret;
1605
1606 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1607 if (ret)
1608 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1609 }
1610}
1611
1612static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1613 int page_num)
1614{
1615 struct scrub_page *spage = sblock->pagev[page_num];
1616
1617 BUG_ON(spage->page == NULL);
1618 if (spage->io_error) {
1619 void *mapped_buffer = kmap_atomic(spage->page);
1620
1621 clear_page(mapped_buffer);
1622 flush_dcache_page(spage->page);
1623 kunmap_atomic(mapped_buffer);
1624 }
1625 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1626}
1627
1628static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1629 struct scrub_page *spage)
1630{
1631 struct scrub_bio *sbio;
1632 int ret;
1633
1634 mutex_lock(&sctx->wr_lock);
1635again:
1636 if (!sctx->wr_curr_bio) {
1637 sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio),
1638 GFP_KERNEL);
1639 if (!sctx->wr_curr_bio) {
1640 mutex_unlock(&sctx->wr_lock);
1641 return -ENOMEM;
1642 }
1643 sctx->wr_curr_bio->sctx = sctx;
1644 sctx->wr_curr_bio->page_count = 0;
1645 }
1646 sbio = sctx->wr_curr_bio;
1647 if (sbio->page_count == 0) {
1648 struct bio *bio;
1649
1650 sbio->physical = spage->physical_for_dev_replace;
1651 sbio->logical = spage->logical;
1652 sbio->dev = sctx->wr_tgtdev;
1653 bio = sbio->bio;
1654 if (!bio) {
1655 bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
1656 sbio->bio = bio;
1657 }
1658
1659 bio->bi_private = sbio;
1660 bio->bi_end_io = scrub_wr_bio_end_io;
1661 bio_set_dev(bio, sbio->dev->bdev);
1662 bio->bi_iter.bi_sector = sbio->physical >> 9;
1663 bio->bi_opf = REQ_OP_WRITE;
1664 sbio->status = 0;
1665 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1666 spage->physical_for_dev_replace ||
1667 sbio->logical + sbio->page_count * PAGE_SIZE !=
1668 spage->logical) {
1669 scrub_wr_submit(sctx);
1670 goto again;
1671 }
1672
1673 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1674 if (ret != PAGE_SIZE) {
1675 if (sbio->page_count < 1) {
1676 bio_put(sbio->bio);
1677 sbio->bio = NULL;
1678 mutex_unlock(&sctx->wr_lock);
1679 return -EIO;
1680 }
1681 scrub_wr_submit(sctx);
1682 goto again;
1683 }
1684
1685 sbio->pagev[sbio->page_count] = spage;
1686 scrub_page_get(spage);
1687 sbio->page_count++;
1688 if (sbio->page_count == sctx->pages_per_wr_bio)
1689 scrub_wr_submit(sctx);
1690 mutex_unlock(&sctx->wr_lock);
1691
1692 return 0;
1693}
1694
1695static void scrub_wr_submit(struct scrub_ctx *sctx)
1696{
1697 struct scrub_bio *sbio;
1698
1699 if (!sctx->wr_curr_bio)
1700 return;
1701
1702 sbio = sctx->wr_curr_bio;
1703 sctx->wr_curr_bio = NULL;
1704 WARN_ON(!sbio->bio->bi_disk);
1705 scrub_pending_bio_inc(sctx);
1706
1707
1708
1709
1710 btrfsic_submit_bio(sbio->bio);
1711}
1712
1713static void scrub_wr_bio_end_io(struct bio *bio)
1714{
1715 struct scrub_bio *sbio = bio->bi_private;
1716 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
1717
1718 sbio->status = bio->bi_status;
1719 sbio->bio = bio;
1720
1721 btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
1722 scrub_wr_bio_end_io_worker, NULL, NULL);
1723 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1724}
1725
1726static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1727{
1728 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1729 struct scrub_ctx *sctx = sbio->sctx;
1730 int i;
1731
1732 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1733 if (sbio->status) {
1734 struct btrfs_dev_replace *dev_replace =
1735 &sbio->sctx->fs_info->dev_replace;
1736
1737 for (i = 0; i < sbio->page_count; i++) {
1738 struct scrub_page *spage = sbio->pagev[i];
1739
1740 spage->io_error = 1;
1741 atomic64_inc(&dev_replace->num_write_errors);
1742 }
1743 }
1744
1745 for (i = 0; i < sbio->page_count; i++)
1746 scrub_page_put(sbio->pagev[i]);
1747
1748 bio_put(sbio->bio);
1749 kfree(sbio);
1750 scrub_pending_bio_dec(sctx);
1751}
1752
1753static int scrub_checksum(struct scrub_block *sblock)
1754{
1755 u64 flags;
1756 int ret;
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766 sblock->header_error = 0;
1767 sblock->generation_error = 0;
1768 sblock->checksum_error = 0;
1769
1770 WARN_ON(sblock->page_count < 1);
1771 flags = sblock->pagev[0]->flags;
1772 ret = 0;
1773 if (flags & BTRFS_EXTENT_FLAG_DATA)
1774 ret = scrub_checksum_data(sblock);
1775 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1776 ret = scrub_checksum_tree_block(sblock);
1777 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1778 (void)scrub_checksum_super(sblock);
1779 else
1780 WARN_ON(1);
1781 if (ret)
1782 scrub_handle_errored_block(sblock);
1783
1784 return ret;
1785}
1786
1787static int scrub_checksum_data(struct scrub_block *sblock)
1788{
1789 struct scrub_ctx *sctx = sblock->sctx;
1790 u8 csum[BTRFS_CSUM_SIZE];
1791 u8 *on_disk_csum;
1792 struct page *page;
1793 void *buffer;
1794 u32 crc = ~(u32)0;
1795 u64 len;
1796 int index;
1797
1798 BUG_ON(sblock->page_count < 1);
1799 if (!sblock->pagev[0]->have_csum)
1800 return 0;
1801
1802 on_disk_csum = sblock->pagev[0]->csum;
1803 page = sblock->pagev[0]->page;
1804 buffer = kmap_atomic(page);
1805
1806 len = sctx->fs_info->sectorsize;
1807 index = 0;
1808 for (;;) {
1809 u64 l = min_t(u64, len, PAGE_SIZE);
1810
1811 crc = btrfs_csum_data(buffer, crc, l);
1812 kunmap_atomic(buffer);
1813 len -= l;
1814 if (len == 0)
1815 break;
1816 index++;
1817 BUG_ON(index >= sblock->page_count);
1818 BUG_ON(!sblock->pagev[index]->page);
1819 page = sblock->pagev[index]->page;
1820 buffer = kmap_atomic(page);
1821 }
1822
1823 btrfs_csum_final(crc, csum);
1824 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1825 sblock->checksum_error = 1;
1826
1827 return sblock->checksum_error;
1828}
1829
1830static int scrub_checksum_tree_block(struct scrub_block *sblock)
1831{
1832 struct scrub_ctx *sctx = sblock->sctx;
1833 struct btrfs_header *h;
1834 struct btrfs_fs_info *fs_info = sctx->fs_info;
1835 u8 calculated_csum[BTRFS_CSUM_SIZE];
1836 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1837 struct page *page;
1838 void *mapped_buffer;
1839 u64 mapped_size;
1840 void *p;
1841 u32 crc = ~(u32)0;
1842 u64 len;
1843 int index;
1844
1845 BUG_ON(sblock->page_count < 1);
1846 page = sblock->pagev[0]->page;
1847 mapped_buffer = kmap_atomic(page);
1848 h = (struct btrfs_header *)mapped_buffer;
1849 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1850
1851
1852
1853
1854
1855
1856 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
1857 sblock->header_error = 1;
1858
1859 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) {
1860 sblock->header_error = 1;
1861 sblock->generation_error = 1;
1862 }
1863
1864 if (!scrub_check_fsid(h->fsid, sblock->pagev[0]))
1865 sblock->header_error = 1;
1866
1867 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1868 BTRFS_UUID_SIZE))
1869 sblock->header_error = 1;
1870
1871 len = sctx->fs_info->nodesize - BTRFS_CSUM_SIZE;
1872 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1873 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1874 index = 0;
1875 for (;;) {
1876 u64 l = min_t(u64, len, mapped_size);
1877
1878 crc = btrfs_csum_data(p, crc, l);
1879 kunmap_atomic(mapped_buffer);
1880 len -= l;
1881 if (len == 0)
1882 break;
1883 index++;
1884 BUG_ON(index >= sblock->page_count);
1885 BUG_ON(!sblock->pagev[index]->page);
1886 page = sblock->pagev[index]->page;
1887 mapped_buffer = kmap_atomic(page);
1888 mapped_size = PAGE_SIZE;
1889 p = mapped_buffer;
1890 }
1891
1892 btrfs_csum_final(crc, calculated_csum);
1893 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1894 sblock->checksum_error = 1;
1895
1896 return sblock->header_error || sblock->checksum_error;
1897}
1898
1899static int scrub_checksum_super(struct scrub_block *sblock)
1900{
1901 struct btrfs_super_block *s;
1902 struct scrub_ctx *sctx = sblock->sctx;
1903 u8 calculated_csum[BTRFS_CSUM_SIZE];
1904 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1905 struct page *page;
1906 void *mapped_buffer;
1907 u64 mapped_size;
1908 void *p;
1909 u32 crc = ~(u32)0;
1910 int fail_gen = 0;
1911 int fail_cor = 0;
1912 u64 len;
1913 int index;
1914
1915 BUG_ON(sblock->page_count < 1);
1916 page = sblock->pagev[0]->page;
1917 mapped_buffer = kmap_atomic(page);
1918 s = (struct btrfs_super_block *)mapped_buffer;
1919 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1920
1921 if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
1922 ++fail_cor;
1923
1924 if (sblock->pagev[0]->generation != btrfs_super_generation(s))
1925 ++fail_gen;
1926
1927 if (!scrub_check_fsid(s->fsid, sblock->pagev[0]))
1928 ++fail_cor;
1929
1930 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1931 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1932 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1933 index = 0;
1934 for (;;) {
1935 u64 l = min_t(u64, len, mapped_size);
1936
1937 crc = btrfs_csum_data(p, crc, l);
1938 kunmap_atomic(mapped_buffer);
1939 len -= l;
1940 if (len == 0)
1941 break;
1942 index++;
1943 BUG_ON(index >= sblock->page_count);
1944 BUG_ON(!sblock->pagev[index]->page);
1945 page = sblock->pagev[index]->page;
1946 mapped_buffer = kmap_atomic(page);
1947 mapped_size = PAGE_SIZE;
1948 p = mapped_buffer;
1949 }
1950
1951 btrfs_csum_final(crc, calculated_csum);
1952 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1953 ++fail_cor;
1954
1955 if (fail_cor + fail_gen) {
1956
1957
1958
1959
1960
1961 spin_lock(&sctx->stat_lock);
1962 ++sctx->stat.super_errors;
1963 spin_unlock(&sctx->stat_lock);
1964 if (fail_cor)
1965 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1966 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1967 else
1968 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1969 BTRFS_DEV_STAT_GENERATION_ERRS);
1970 }
1971
1972 return fail_cor + fail_gen;
1973}
1974
1975static void scrub_block_get(struct scrub_block *sblock)
1976{
1977 refcount_inc(&sblock->refs);
1978}
1979
1980static void scrub_block_put(struct scrub_block *sblock)
1981{
1982 if (refcount_dec_and_test(&sblock->refs)) {
1983 int i;
1984
1985 if (sblock->sparity)
1986 scrub_parity_put(sblock->sparity);
1987
1988 for (i = 0; i < sblock->page_count; i++)
1989 scrub_page_put(sblock->pagev[i]);
1990 kfree(sblock);
1991 }
1992}
1993
1994static void scrub_page_get(struct scrub_page *spage)
1995{
1996 atomic_inc(&spage->refs);
1997}
1998
1999static void scrub_page_put(struct scrub_page *spage)
2000{
2001 if (atomic_dec_and_test(&spage->refs)) {
2002 if (spage->page)
2003 __free_page(spage->page);
2004 kfree(spage);
2005 }
2006}
2007
2008static void scrub_submit(struct scrub_ctx *sctx)
2009{
2010 struct scrub_bio *sbio;
2011
2012 if (sctx->curr == -1)
2013 return;
2014
2015 sbio = sctx->bios[sctx->curr];
2016 sctx->curr = -1;
2017 scrub_pending_bio_inc(sctx);
2018 btrfsic_submit_bio(sbio->bio);
2019}
2020
2021static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
2022 struct scrub_page *spage)
2023{
2024 struct scrub_block *sblock = spage->sblock;
2025 struct scrub_bio *sbio;
2026 int ret;
2027
2028again:
2029
2030
2031
2032 while (sctx->curr == -1) {
2033 spin_lock(&sctx->list_lock);
2034 sctx->curr = sctx->first_free;
2035 if (sctx->curr != -1) {
2036 sctx->first_free = sctx->bios[sctx->curr]->next_free;
2037 sctx->bios[sctx->curr]->next_free = -1;
2038 sctx->bios[sctx->curr]->page_count = 0;
2039 spin_unlock(&sctx->list_lock);
2040 } else {
2041 spin_unlock(&sctx->list_lock);
2042 wait_event(sctx->list_wait, sctx->first_free != -1);
2043 }
2044 }
2045 sbio = sctx->bios[sctx->curr];
2046 if (sbio->page_count == 0) {
2047 struct bio *bio;
2048
2049 sbio->physical = spage->physical;
2050 sbio->logical = spage->logical;
2051 sbio->dev = spage->dev;
2052 bio = sbio->bio;
2053 if (!bio) {
2054 bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
2055 sbio->bio = bio;
2056 }
2057
2058 bio->bi_private = sbio;
2059 bio->bi_end_io = scrub_bio_end_io;
2060 bio_set_dev(bio, sbio->dev->bdev);
2061 bio->bi_iter.bi_sector = sbio->physical >> 9;
2062 bio->bi_opf = REQ_OP_READ;
2063 sbio->status = 0;
2064 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
2065 spage->physical ||
2066 sbio->logical + sbio->page_count * PAGE_SIZE !=
2067 spage->logical ||
2068 sbio->dev != spage->dev) {
2069 scrub_submit(sctx);
2070 goto again;
2071 }
2072
2073 sbio->pagev[sbio->page_count] = spage;
2074 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
2075 if (ret != PAGE_SIZE) {
2076 if (sbio->page_count < 1) {
2077 bio_put(sbio->bio);
2078 sbio->bio = NULL;
2079 return -EIO;
2080 }
2081 scrub_submit(sctx);
2082 goto again;
2083 }
2084
2085 scrub_block_get(sblock);
2086 atomic_inc(&sblock->outstanding_pages);
2087 sbio->page_count++;
2088 if (sbio->page_count == sctx->pages_per_rd_bio)
2089 scrub_submit(sctx);
2090
2091 return 0;
2092}
2093
2094static void scrub_missing_raid56_end_io(struct bio *bio)
2095{
2096 struct scrub_block *sblock = bio->bi_private;
2097 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
2098
2099 if (bio->bi_status)
2100 sblock->no_io_error_seen = 0;
2101
2102 bio_put(bio);
2103
2104 btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
2105}
2106
2107static void scrub_missing_raid56_worker(struct btrfs_work *work)
2108{
2109 struct scrub_block *sblock = container_of(work, struct scrub_block, work);
2110 struct scrub_ctx *sctx = sblock->sctx;
2111 struct btrfs_fs_info *fs_info = sctx->fs_info;
2112 u64 logical;
2113 struct btrfs_device *dev;
2114
2115 logical = sblock->pagev[0]->logical;
2116 dev = sblock->pagev[0]->dev;
2117
2118 if (sblock->no_io_error_seen)
2119 scrub_recheck_block_checksum(sblock);
2120
2121 if (!sblock->no_io_error_seen) {
2122 spin_lock(&sctx->stat_lock);
2123 sctx->stat.read_errors++;
2124 spin_unlock(&sctx->stat_lock);
2125 btrfs_err_rl_in_rcu(fs_info,
2126 "IO error rebuilding logical %llu for dev %s",
2127 logical, rcu_str_deref(dev->name));
2128 } else if (sblock->header_error || sblock->checksum_error) {
2129 spin_lock(&sctx->stat_lock);
2130 sctx->stat.uncorrectable_errors++;
2131 spin_unlock(&sctx->stat_lock);
2132 btrfs_err_rl_in_rcu(fs_info,
2133 "failed to rebuild valid logical %llu for dev %s",
2134 logical, rcu_str_deref(dev->name));
2135 } else {
2136 scrub_write_block_to_dev_replace(sblock);
2137 }
2138
2139 scrub_block_put(sblock);
2140
2141 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2142 mutex_lock(&sctx->wr_lock);
2143 scrub_wr_submit(sctx);
2144 mutex_unlock(&sctx->wr_lock);
2145 }
2146
2147 scrub_pending_bio_dec(sctx);
2148}
2149
2150static void scrub_missing_raid56_pages(struct scrub_block *sblock)
2151{
2152 struct scrub_ctx *sctx = sblock->sctx;
2153 struct btrfs_fs_info *fs_info = sctx->fs_info;
2154 u64 length = sblock->page_count * PAGE_SIZE;
2155 u64 logical = sblock->pagev[0]->logical;
2156 struct btrfs_bio *bbio = NULL;
2157 struct bio *bio;
2158 struct btrfs_raid_bio *rbio;
2159 int ret;
2160 int i;
2161
2162 btrfs_bio_counter_inc_blocked(fs_info);
2163 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
2164 &length, &bbio);
2165 if (ret || !bbio || !bbio->raid_map)
2166 goto bbio_out;
2167
2168 if (WARN_ON(!sctx->is_dev_replace ||
2169 !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
2170
2171
2172
2173
2174
2175
2176 goto bbio_out;
2177 }
2178
2179 bio = btrfs_io_bio_alloc(0);
2180 bio->bi_iter.bi_sector = logical >> 9;
2181 bio->bi_private = sblock;
2182 bio->bi_end_io = scrub_missing_raid56_end_io;
2183
2184 rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length);
2185 if (!rbio)
2186 goto rbio_out;
2187
2188 for (i = 0; i < sblock->page_count; i++) {
2189 struct scrub_page *spage = sblock->pagev[i];
2190
2191 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2192 }
2193
2194 btrfs_init_work(&sblock->work, btrfs_scrub_helper,
2195 scrub_missing_raid56_worker, NULL, NULL);
2196 scrub_block_get(sblock);
2197 scrub_pending_bio_inc(sctx);
2198 raid56_submit_missing_rbio(rbio);
2199 return;
2200
2201rbio_out:
2202 bio_put(bio);
2203bbio_out:
2204 btrfs_bio_counter_dec(fs_info);
2205 btrfs_put_bbio(bbio);
2206 spin_lock(&sctx->stat_lock);
2207 sctx->stat.malloc_errors++;
2208 spin_unlock(&sctx->stat_lock);
2209}
2210
2211static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2212 u64 physical, struct btrfs_device *dev, u64 flags,
2213 u64 gen, int mirror_num, u8 *csum, int force,
2214 u64 physical_for_dev_replace)
2215{
2216 struct scrub_block *sblock;
2217 int index;
2218
2219 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2220 if (!sblock) {
2221 spin_lock(&sctx->stat_lock);
2222 sctx->stat.malloc_errors++;
2223 spin_unlock(&sctx->stat_lock);
2224 return -ENOMEM;
2225 }
2226
2227
2228
2229 refcount_set(&sblock->refs, 1);
2230 sblock->sctx = sctx;
2231 sblock->no_io_error_seen = 1;
2232
2233 for (index = 0; len > 0; index++) {
2234 struct scrub_page *spage;
2235 u64 l = min_t(u64, len, PAGE_SIZE);
2236
2237 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2238 if (!spage) {
2239leave_nomem:
2240 spin_lock(&sctx->stat_lock);
2241 sctx->stat.malloc_errors++;
2242 spin_unlock(&sctx->stat_lock);
2243 scrub_block_put(sblock);
2244 return -ENOMEM;
2245 }
2246 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2247 scrub_page_get(spage);
2248 sblock->pagev[index] = spage;
2249 spage->sblock = sblock;
2250 spage->dev = dev;
2251 spage->flags = flags;
2252 spage->generation = gen;
2253 spage->logical = logical;
2254 spage->physical = physical;
2255 spage->physical_for_dev_replace = physical_for_dev_replace;
2256 spage->mirror_num = mirror_num;
2257 if (csum) {
2258 spage->have_csum = 1;
2259 memcpy(spage->csum, csum, sctx->csum_size);
2260 } else {
2261 spage->have_csum = 0;
2262 }
2263 sblock->page_count++;
2264 spage->page = alloc_page(GFP_KERNEL);
2265 if (!spage->page)
2266 goto leave_nomem;
2267 len -= l;
2268 logical += l;
2269 physical += l;
2270 physical_for_dev_replace += l;
2271 }
2272
2273 WARN_ON(sblock->page_count == 0);
2274 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2275
2276
2277
2278
2279 scrub_missing_raid56_pages(sblock);
2280 } else {
2281 for (index = 0; index < sblock->page_count; index++) {
2282 struct scrub_page *spage = sblock->pagev[index];
2283 int ret;
2284
2285 ret = scrub_add_page_to_rd_bio(sctx, spage);
2286 if (ret) {
2287 scrub_block_put(sblock);
2288 return ret;
2289 }
2290 }
2291
2292 if (force)
2293 scrub_submit(sctx);
2294 }
2295
2296
2297 scrub_block_put(sblock);
2298 return 0;
2299}
2300
2301static void scrub_bio_end_io(struct bio *bio)
2302{
2303 struct scrub_bio *sbio = bio->bi_private;
2304 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
2305
2306 sbio->status = bio->bi_status;
2307 sbio->bio = bio;
2308
2309 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
2310}
2311
2312static void scrub_bio_end_io_worker(struct btrfs_work *work)
2313{
2314 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2315 struct scrub_ctx *sctx = sbio->sctx;
2316 int i;
2317
2318 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2319 if (sbio->status) {
2320 for (i = 0; i < sbio->page_count; i++) {
2321 struct scrub_page *spage = sbio->pagev[i];
2322
2323 spage->io_error = 1;
2324 spage->sblock->no_io_error_seen = 0;
2325 }
2326 }
2327
2328
2329 for (i = 0; i < sbio->page_count; i++) {
2330 struct scrub_page *spage = sbio->pagev[i];
2331 struct scrub_block *sblock = spage->sblock;
2332
2333 if (atomic_dec_and_test(&sblock->outstanding_pages))
2334 scrub_block_complete(sblock);
2335 scrub_block_put(sblock);
2336 }
2337
2338 bio_put(sbio->bio);
2339 sbio->bio = NULL;
2340 spin_lock(&sctx->list_lock);
2341 sbio->next_free = sctx->first_free;
2342 sctx->first_free = sbio->index;
2343 spin_unlock(&sctx->list_lock);
2344
2345 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2346 mutex_lock(&sctx->wr_lock);
2347 scrub_wr_submit(sctx);
2348 mutex_unlock(&sctx->wr_lock);
2349 }
2350
2351 scrub_pending_bio_dec(sctx);
2352}
2353
2354static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2355 unsigned long *bitmap,
2356 u64 start, u64 len)
2357{
2358 u64 offset;
2359 u64 nsectors64;
2360 u32 nsectors;
2361 int sectorsize = sparity->sctx->fs_info->sectorsize;
2362
2363 if (len >= sparity->stripe_len) {
2364 bitmap_set(bitmap, 0, sparity->nsectors);
2365 return;
2366 }
2367
2368 start -= sparity->logic_start;
2369 start = div64_u64_rem(start, sparity->stripe_len, &offset);
2370 offset = div_u64(offset, sectorsize);
2371 nsectors64 = div_u64(len, sectorsize);
2372
2373 ASSERT(nsectors64 < UINT_MAX);
2374 nsectors = (u32)nsectors64;
2375
2376 if (offset + nsectors <= sparity->nsectors) {
2377 bitmap_set(bitmap, offset, nsectors);
2378 return;
2379 }
2380
2381 bitmap_set(bitmap, offset, sparity->nsectors - offset);
2382 bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
2383}
2384
2385static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
2386 u64 start, u64 len)
2387{
2388 __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
2389}
2390
2391static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
2392 u64 start, u64 len)
2393{
2394 __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
2395}
2396
2397static void scrub_block_complete(struct scrub_block *sblock)
2398{
2399 int corrupted = 0;
2400
2401 if (!sblock->no_io_error_seen) {
2402 corrupted = 1;
2403 scrub_handle_errored_block(sblock);
2404 } else {
2405
2406
2407
2408
2409
2410 corrupted = scrub_checksum(sblock);
2411 if (!corrupted && sblock->sctx->is_dev_replace)
2412 scrub_write_block_to_dev_replace(sblock);
2413 }
2414
2415 if (sblock->sparity && corrupted && !sblock->data_corrected) {
2416 u64 start = sblock->pagev[0]->logical;
2417 u64 end = sblock->pagev[sblock->page_count - 1]->logical +
2418 PAGE_SIZE;
2419
2420 scrub_parity_mark_sectors_error(sblock->sparity,
2421 start, end - start);
2422 }
2423}
2424
2425static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
2426{
2427 struct btrfs_ordered_sum *sum = NULL;
2428 unsigned long index;
2429 unsigned long num_sectors;
2430
2431 while (!list_empty(&sctx->csum_list)) {
2432 sum = list_first_entry(&sctx->csum_list,
2433 struct btrfs_ordered_sum, list);
2434 if (sum->bytenr > logical)
2435 return 0;
2436 if (sum->bytenr + sum->len > logical)
2437 break;
2438
2439 ++sctx->stat.csum_discards;
2440 list_del(&sum->list);
2441 kfree(sum);
2442 sum = NULL;
2443 }
2444 if (!sum)
2445 return 0;
2446
2447 index = div_u64(logical - sum->bytenr, sctx->fs_info->sectorsize);
2448 ASSERT(index < UINT_MAX);
2449
2450 num_sectors = sum->len / sctx->fs_info->sectorsize;
2451 memcpy(csum, sum->sums + index, sctx->csum_size);
2452 if (index == num_sectors - 1) {
2453 list_del(&sum->list);
2454 kfree(sum);
2455 }
2456 return 1;
2457}
2458
2459
2460static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
2461 u64 logical, u64 len,
2462 u64 physical, struct btrfs_device *dev, u64 flags,
2463 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2464{
2465 int ret;
2466 u8 csum[BTRFS_CSUM_SIZE];
2467 u32 blocksize;
2468
2469 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2470 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2471 blocksize = map->stripe_len;
2472 else
2473 blocksize = sctx->fs_info->sectorsize;
2474 spin_lock(&sctx->stat_lock);
2475 sctx->stat.data_extents_scrubbed++;
2476 sctx->stat.data_bytes_scrubbed += len;
2477 spin_unlock(&sctx->stat_lock);
2478 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2479 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2480 blocksize = map->stripe_len;
2481 else
2482 blocksize = sctx->fs_info->nodesize;
2483 spin_lock(&sctx->stat_lock);
2484 sctx->stat.tree_extents_scrubbed++;
2485 sctx->stat.tree_bytes_scrubbed += len;
2486 spin_unlock(&sctx->stat_lock);
2487 } else {
2488 blocksize = sctx->fs_info->sectorsize;
2489 WARN_ON(1);
2490 }
2491
2492 while (len) {
2493 u64 l = min_t(u64, len, blocksize);
2494 int have_csum = 0;
2495
2496 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2497
2498 have_csum = scrub_find_csum(sctx, logical, csum);
2499 if (have_csum == 0)
2500 ++sctx->stat.no_csum;
2501 }
2502 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2503 mirror_num, have_csum ? csum : NULL, 0,
2504 physical_for_dev_replace);
2505 if (ret)
2506 return ret;
2507 len -= l;
2508 logical += l;
2509 physical += l;
2510 physical_for_dev_replace += l;
2511 }
2512 return 0;
2513}
2514
2515static int scrub_pages_for_parity(struct scrub_parity *sparity,
2516 u64 logical, u64 len,
2517 u64 physical, struct btrfs_device *dev,
2518 u64 flags, u64 gen, int mirror_num, u8 *csum)
2519{
2520 struct scrub_ctx *sctx = sparity->sctx;
2521 struct scrub_block *sblock;
2522 int index;
2523
2524 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2525 if (!sblock) {
2526 spin_lock(&sctx->stat_lock);
2527 sctx->stat.malloc_errors++;
2528 spin_unlock(&sctx->stat_lock);
2529 return -ENOMEM;
2530 }
2531
2532
2533
2534 refcount_set(&sblock->refs, 1);
2535 sblock->sctx = sctx;
2536 sblock->no_io_error_seen = 1;
2537 sblock->sparity = sparity;
2538 scrub_parity_get(sparity);
2539
2540 for (index = 0; len > 0; index++) {
2541 struct scrub_page *spage;
2542 u64 l = min_t(u64, len, PAGE_SIZE);
2543
2544 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2545 if (!spage) {
2546leave_nomem:
2547 spin_lock(&sctx->stat_lock);
2548 sctx->stat.malloc_errors++;
2549 spin_unlock(&sctx->stat_lock);
2550 scrub_block_put(sblock);
2551 return -ENOMEM;
2552 }
2553 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2554
2555 scrub_page_get(spage);
2556 sblock->pagev[index] = spage;
2557
2558 scrub_page_get(spage);
2559 list_add_tail(&spage->list, &sparity->spages);
2560 spage->sblock = sblock;
2561 spage->dev = dev;
2562 spage->flags = flags;
2563 spage->generation = gen;
2564 spage->logical = logical;
2565 spage->physical = physical;
2566 spage->mirror_num = mirror_num;
2567 if (csum) {
2568 spage->have_csum = 1;
2569 memcpy(spage->csum, csum, sctx->csum_size);
2570 } else {
2571 spage->have_csum = 0;
2572 }
2573 sblock->page_count++;
2574 spage->page = alloc_page(GFP_KERNEL);
2575 if (!spage->page)
2576 goto leave_nomem;
2577 len -= l;
2578 logical += l;
2579 physical += l;
2580 }
2581
2582 WARN_ON(sblock->page_count == 0);
2583 for (index = 0; index < sblock->page_count; index++) {
2584 struct scrub_page *spage = sblock->pagev[index];
2585 int ret;
2586
2587 ret = scrub_add_page_to_rd_bio(sctx, spage);
2588 if (ret) {
2589 scrub_block_put(sblock);
2590 return ret;
2591 }
2592 }
2593
2594
2595 scrub_block_put(sblock);
2596 return 0;
2597}
2598
2599static int scrub_extent_for_parity(struct scrub_parity *sparity,
2600 u64 logical, u64 len,
2601 u64 physical, struct btrfs_device *dev,
2602 u64 flags, u64 gen, int mirror_num)
2603{
2604 struct scrub_ctx *sctx = sparity->sctx;
2605 int ret;
2606 u8 csum[BTRFS_CSUM_SIZE];
2607 u32 blocksize;
2608
2609 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2610 scrub_parity_mark_sectors_error(sparity, logical, len);
2611 return 0;
2612 }
2613
2614 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2615 blocksize = sparity->stripe_len;
2616 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2617 blocksize = sparity->stripe_len;
2618 } else {
2619 blocksize = sctx->fs_info->sectorsize;
2620 WARN_ON(1);
2621 }
2622
2623 while (len) {
2624 u64 l = min_t(u64, len, blocksize);
2625 int have_csum = 0;
2626
2627 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2628
2629 have_csum = scrub_find_csum(sctx, logical, csum);
2630 if (have_csum == 0)
2631 goto skip;
2632 }
2633 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2634 flags, gen, mirror_num,
2635 have_csum ? csum : NULL);
2636 if (ret)
2637 return ret;
2638skip:
2639 len -= l;
2640 logical += l;
2641 physical += l;
2642 }
2643 return 0;
2644}
2645
2646
2647
2648
2649
2650
2651
2652
2653static int get_raid56_logic_offset(u64 physical, int num,
2654 struct map_lookup *map, u64 *offset,
2655 u64 *stripe_start)
2656{
2657 int i;
2658 int j = 0;
2659 u64 stripe_nr;
2660 u64 last_offset;
2661 u32 stripe_index;
2662 u32 rot;
2663
2664 last_offset = (physical - map->stripes[num].physical) *
2665 nr_data_stripes(map);
2666 if (stripe_start)
2667 *stripe_start = last_offset;
2668
2669 *offset = last_offset;
2670 for (i = 0; i < nr_data_stripes(map); i++) {
2671 *offset = last_offset + i * map->stripe_len;
2672
2673 stripe_nr = div64_u64(*offset, map->stripe_len);
2674 stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
2675
2676
2677 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
2678
2679 rot += i;
2680 stripe_index = rot % map->num_stripes;
2681 if (stripe_index == num)
2682 return 0;
2683 if (stripe_index < num)
2684 j++;
2685 }
2686 *offset = last_offset + j * map->stripe_len;
2687 return 1;
2688}
2689
2690static void scrub_free_parity(struct scrub_parity *sparity)
2691{
2692 struct scrub_ctx *sctx = sparity->sctx;
2693 struct scrub_page *curr, *next;
2694 int nbits;
2695
2696 nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
2697 if (nbits) {
2698 spin_lock(&sctx->stat_lock);
2699 sctx->stat.read_errors += nbits;
2700 sctx->stat.uncorrectable_errors += nbits;
2701 spin_unlock(&sctx->stat_lock);
2702 }
2703
2704 list_for_each_entry_safe(curr, next, &sparity->spages, list) {
2705 list_del_init(&curr->list);
2706 scrub_page_put(curr);
2707 }
2708
2709 kfree(sparity);
2710}
2711
2712static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
2713{
2714 struct scrub_parity *sparity = container_of(work, struct scrub_parity,
2715 work);
2716 struct scrub_ctx *sctx = sparity->sctx;
2717
2718 scrub_free_parity(sparity);
2719 scrub_pending_bio_dec(sctx);
2720}
2721
2722static void scrub_parity_bio_endio(struct bio *bio)
2723{
2724 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
2725 struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
2726
2727 if (bio->bi_status)
2728 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2729 sparity->nsectors);
2730
2731 bio_put(bio);
2732
2733 btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
2734 scrub_parity_bio_endio_worker, NULL, NULL);
2735 btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
2736}
2737
2738static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2739{
2740 struct scrub_ctx *sctx = sparity->sctx;
2741 struct btrfs_fs_info *fs_info = sctx->fs_info;
2742 struct bio *bio;
2743 struct btrfs_raid_bio *rbio;
2744 struct btrfs_bio *bbio = NULL;
2745 u64 length;
2746 int ret;
2747
2748 if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
2749 sparity->nsectors))
2750 goto out;
2751
2752 length = sparity->logic_end - sparity->logic_start;
2753
2754 btrfs_bio_counter_inc_blocked(fs_info);
2755 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
2756 &length, &bbio);
2757 if (ret || !bbio || !bbio->raid_map)
2758 goto bbio_out;
2759
2760 bio = btrfs_io_bio_alloc(0);
2761 bio->bi_iter.bi_sector = sparity->logic_start >> 9;
2762 bio->bi_private = sparity;
2763 bio->bi_end_io = scrub_parity_bio_endio;
2764
2765 rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio,
2766 length, sparity->scrub_dev,
2767 sparity->dbitmap,
2768 sparity->nsectors);
2769 if (!rbio)
2770 goto rbio_out;
2771
2772 scrub_pending_bio_inc(sctx);
2773 raid56_parity_submit_scrub_rbio(rbio);
2774 return;
2775
2776rbio_out:
2777 bio_put(bio);
2778bbio_out:
2779 btrfs_bio_counter_dec(fs_info);
2780 btrfs_put_bbio(bbio);
2781 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2782 sparity->nsectors);
2783 spin_lock(&sctx->stat_lock);
2784 sctx->stat.malloc_errors++;
2785 spin_unlock(&sctx->stat_lock);
2786out:
2787 scrub_free_parity(sparity);
2788}
2789
2790static inline int scrub_calc_parity_bitmap_len(int nsectors)
2791{
2792 return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
2793}
2794
2795static void scrub_parity_get(struct scrub_parity *sparity)
2796{
2797 refcount_inc(&sparity->refs);
2798}
2799
2800static void scrub_parity_put(struct scrub_parity *sparity)
2801{
2802 if (!refcount_dec_and_test(&sparity->refs))
2803 return;
2804
2805 scrub_parity_check_and_repair(sparity);
2806}
2807
2808static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2809 struct map_lookup *map,
2810 struct btrfs_device *sdev,
2811 struct btrfs_path *path,
2812 u64 logic_start,
2813 u64 logic_end)
2814{
2815 struct btrfs_fs_info *fs_info = sctx->fs_info;
2816 struct btrfs_root *root = fs_info->extent_root;
2817 struct btrfs_root *csum_root = fs_info->csum_root;
2818 struct btrfs_extent_item *extent;
2819 struct btrfs_bio *bbio = NULL;
2820 u64 flags;
2821 int ret;
2822 int slot;
2823 struct extent_buffer *l;
2824 struct btrfs_key key;
2825 u64 generation;
2826 u64 extent_logical;
2827 u64 extent_physical;
2828 u64 extent_len;
2829 u64 mapped_length;
2830 struct btrfs_device *extent_dev;
2831 struct scrub_parity *sparity;
2832 int nsectors;
2833 int bitmap_len;
2834 int extent_mirror_num;
2835 int stop_loop = 0;
2836
2837 nsectors = div_u64(map->stripe_len, fs_info->sectorsize);
2838 bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
2839 sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
2840 GFP_NOFS);
2841 if (!sparity) {
2842 spin_lock(&sctx->stat_lock);
2843 sctx->stat.malloc_errors++;
2844 spin_unlock(&sctx->stat_lock);
2845 return -ENOMEM;
2846 }
2847
2848 sparity->stripe_len = map->stripe_len;
2849 sparity->nsectors = nsectors;
2850 sparity->sctx = sctx;
2851 sparity->scrub_dev = sdev;
2852 sparity->logic_start = logic_start;
2853 sparity->logic_end = logic_end;
2854 refcount_set(&sparity->refs, 1);
2855 INIT_LIST_HEAD(&sparity->spages);
2856 sparity->dbitmap = sparity->bitmap;
2857 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
2858
2859 ret = 0;
2860 while (logic_start < logic_end) {
2861 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2862 key.type = BTRFS_METADATA_ITEM_KEY;
2863 else
2864 key.type = BTRFS_EXTENT_ITEM_KEY;
2865 key.objectid = logic_start;
2866 key.offset = (u64)-1;
2867
2868 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2869 if (ret < 0)
2870 goto out;
2871
2872 if (ret > 0) {
2873 ret = btrfs_previous_extent_item(root, path, 0);
2874 if (ret < 0)
2875 goto out;
2876 if (ret > 0) {
2877 btrfs_release_path(path);
2878 ret = btrfs_search_slot(NULL, root, &key,
2879 path, 0, 0);
2880 if (ret < 0)
2881 goto out;
2882 }
2883 }
2884
2885 stop_loop = 0;
2886 while (1) {
2887 u64 bytes;
2888
2889 l = path->nodes[0];
2890 slot = path->slots[0];
2891 if (slot >= btrfs_header_nritems(l)) {
2892 ret = btrfs_next_leaf(root, path);
2893 if (ret == 0)
2894 continue;
2895 if (ret < 0)
2896 goto out;
2897
2898 stop_loop = 1;
2899 break;
2900 }
2901 btrfs_item_key_to_cpu(l, &key, slot);
2902
2903 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2904 key.type != BTRFS_METADATA_ITEM_KEY)
2905 goto next;
2906
2907 if (key.type == BTRFS_METADATA_ITEM_KEY)
2908 bytes = fs_info->nodesize;
2909 else
2910 bytes = key.offset;
2911
2912 if (key.objectid + bytes <= logic_start)
2913 goto next;
2914
2915 if (key.objectid >= logic_end) {
2916 stop_loop = 1;
2917 break;
2918 }
2919
2920 while (key.objectid >= logic_start + map->stripe_len)
2921 logic_start += map->stripe_len;
2922
2923 extent = btrfs_item_ptr(l, slot,
2924 struct btrfs_extent_item);
2925 flags = btrfs_extent_flags(l, extent);
2926 generation = btrfs_extent_generation(l, extent);
2927
2928 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
2929 (key.objectid < logic_start ||
2930 key.objectid + bytes >
2931 logic_start + map->stripe_len)) {
2932 btrfs_err(fs_info,
2933 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
2934 key.objectid, logic_start);
2935 spin_lock(&sctx->stat_lock);
2936 sctx->stat.uncorrectable_errors++;
2937 spin_unlock(&sctx->stat_lock);
2938 goto next;
2939 }
2940again:
2941 extent_logical = key.objectid;
2942 extent_len = bytes;
2943
2944 if (extent_logical < logic_start) {
2945 extent_len -= logic_start - extent_logical;
2946 extent_logical = logic_start;
2947 }
2948
2949 if (extent_logical + extent_len >
2950 logic_start + map->stripe_len)
2951 extent_len = logic_start + map->stripe_len -
2952 extent_logical;
2953
2954 scrub_parity_mark_sectors_data(sparity, extent_logical,
2955 extent_len);
2956
2957 mapped_length = extent_len;
2958 bbio = NULL;
2959 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
2960 extent_logical, &mapped_length, &bbio,
2961 0);
2962 if (!ret) {
2963 if (!bbio || mapped_length < extent_len)
2964 ret = -EIO;
2965 }
2966 if (ret) {
2967 btrfs_put_bbio(bbio);
2968 goto out;
2969 }
2970 extent_physical = bbio->stripes[0].physical;
2971 extent_mirror_num = bbio->mirror_num;
2972 extent_dev = bbio->stripes[0].dev;
2973 btrfs_put_bbio(bbio);
2974
2975 ret = btrfs_lookup_csums_range(csum_root,
2976 extent_logical,
2977 extent_logical + extent_len - 1,
2978 &sctx->csum_list, 1);
2979 if (ret)
2980 goto out;
2981
2982 ret = scrub_extent_for_parity(sparity, extent_logical,
2983 extent_len,
2984 extent_physical,
2985 extent_dev, flags,
2986 generation,
2987 extent_mirror_num);
2988
2989 scrub_free_csums(sctx);
2990
2991 if (ret)
2992 goto out;
2993
2994 if (extent_logical + extent_len <
2995 key.objectid + bytes) {
2996 logic_start += map->stripe_len;
2997
2998 if (logic_start >= logic_end) {
2999 stop_loop = 1;
3000 break;
3001 }
3002
3003 if (logic_start < key.objectid + bytes) {
3004 cond_resched();
3005 goto again;
3006 }
3007 }
3008next:
3009 path->slots[0]++;
3010 }
3011
3012 btrfs_release_path(path);
3013
3014 if (stop_loop)
3015 break;
3016
3017 logic_start += map->stripe_len;
3018 }
3019out:
3020 if (ret < 0)
3021 scrub_parity_mark_sectors_error(sparity, logic_start,
3022 logic_end - logic_start);
3023 scrub_parity_put(sparity);
3024 scrub_submit(sctx);
3025 mutex_lock(&sctx->wr_lock);
3026 scrub_wr_submit(sctx);
3027 mutex_unlock(&sctx->wr_lock);
3028
3029 btrfs_release_path(path);
3030 return ret < 0 ? ret : 0;
3031}
3032
3033static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3034 struct map_lookup *map,
3035 struct btrfs_device *scrub_dev,
3036 int num, u64 base, u64 length)
3037{
3038 struct btrfs_path *path, *ppath;
3039 struct btrfs_fs_info *fs_info = sctx->fs_info;
3040 struct btrfs_root *root = fs_info->extent_root;
3041 struct btrfs_root *csum_root = fs_info->csum_root;
3042 struct btrfs_extent_item *extent;
3043 struct blk_plug plug;
3044 u64 flags;
3045 int ret;
3046 int slot;
3047 u64 nstripes;
3048 struct extent_buffer *l;
3049 u64 physical;
3050 u64 logical;
3051 u64 logic_end;
3052 u64 physical_end;
3053 u64 generation;
3054 int mirror_num;
3055 struct reada_control *reada1;
3056 struct reada_control *reada2;
3057 struct btrfs_key key;
3058 struct btrfs_key key_end;
3059 u64 increment = map->stripe_len;
3060 u64 offset;
3061 u64 extent_logical;
3062 u64 extent_physical;
3063 u64 extent_len;
3064 u64 stripe_logical;
3065 u64 stripe_end;
3066 struct btrfs_device *extent_dev;
3067 int extent_mirror_num;
3068 int stop_loop = 0;
3069
3070 physical = map->stripes[num].physical;
3071 offset = 0;
3072 nstripes = div64_u64(length, map->stripe_len);
3073 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3074 offset = map->stripe_len * num;
3075 increment = map->stripe_len * map->num_stripes;
3076 mirror_num = 1;
3077 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3078 int factor = map->num_stripes / map->sub_stripes;
3079 offset = map->stripe_len * (num / map->sub_stripes);
3080 increment = map->stripe_len * factor;
3081 mirror_num = num % map->sub_stripes + 1;
3082 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
3083 increment = map->stripe_len;
3084 mirror_num = num % map->num_stripes + 1;
3085 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3086 increment = map->stripe_len;
3087 mirror_num = num % map->num_stripes + 1;
3088 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3089 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3090 increment = map->stripe_len * nr_data_stripes(map);
3091 mirror_num = 1;
3092 } else {
3093 increment = map->stripe_len;
3094 mirror_num = 1;
3095 }
3096
3097 path = btrfs_alloc_path();
3098 if (!path)
3099 return -ENOMEM;
3100
3101 ppath = btrfs_alloc_path();
3102 if (!ppath) {
3103 btrfs_free_path(path);
3104 return -ENOMEM;
3105 }
3106
3107
3108
3109
3110
3111
3112 path->search_commit_root = 1;
3113 path->skip_locking = 1;
3114
3115 ppath->search_commit_root = 1;
3116 ppath->skip_locking = 1;
3117
3118
3119
3120
3121
3122 logical = base + offset;
3123 physical_end = physical + nstripes * map->stripe_len;
3124 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3125 get_raid56_logic_offset(physical_end, num,
3126 map, &logic_end, NULL);
3127 logic_end += base;
3128 } else {
3129 logic_end = logical + increment * nstripes;
3130 }
3131 wait_event(sctx->list_wait,
3132 atomic_read(&sctx->bios_in_flight) == 0);
3133 scrub_blocked_if_needed(fs_info);
3134
3135
3136 key.objectid = logical;
3137 key.type = BTRFS_EXTENT_ITEM_KEY;
3138 key.offset = (u64)0;
3139 key_end.objectid = logic_end;
3140 key_end.type = BTRFS_METADATA_ITEM_KEY;
3141 key_end.offset = (u64)-1;
3142 reada1 = btrfs_reada_add(root, &key, &key_end);
3143
3144 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3145 key.type = BTRFS_EXTENT_CSUM_KEY;
3146 key.offset = logical;
3147 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3148 key_end.type = BTRFS_EXTENT_CSUM_KEY;
3149 key_end.offset = logic_end;
3150 reada2 = btrfs_reada_add(csum_root, &key, &key_end);
3151
3152 if (!IS_ERR(reada1))
3153 btrfs_reada_wait(reada1);
3154 if (!IS_ERR(reada2))
3155 btrfs_reada_wait(reada2);
3156
3157
3158
3159
3160
3161
3162 blk_start_plug(&plug);
3163
3164
3165
3166
3167 ret = 0;
3168 while (physical < physical_end) {
3169
3170
3171
3172 if (atomic_read(&fs_info->scrub_cancel_req) ||
3173 atomic_read(&sctx->cancel_req)) {
3174 ret = -ECANCELED;
3175 goto out;
3176 }
3177
3178
3179
3180 if (atomic_read(&fs_info->scrub_pause_req)) {
3181
3182 sctx->flush_all_writes = true;
3183 scrub_submit(sctx);
3184 mutex_lock(&sctx->wr_lock);
3185 scrub_wr_submit(sctx);
3186 mutex_unlock(&sctx->wr_lock);
3187 wait_event(sctx->list_wait,
3188 atomic_read(&sctx->bios_in_flight) == 0);
3189 sctx->flush_all_writes = false;
3190 scrub_blocked_if_needed(fs_info);
3191 }
3192
3193 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3194 ret = get_raid56_logic_offset(physical, num, map,
3195 &logical,
3196 &stripe_logical);
3197 logical += base;
3198 if (ret) {
3199
3200 stripe_logical += base;
3201 stripe_end = stripe_logical + increment;
3202 ret = scrub_raid56_parity(sctx, map, scrub_dev,
3203 ppath, stripe_logical,
3204 stripe_end);
3205 if (ret)
3206 goto out;
3207 goto skip;
3208 }
3209 }
3210
3211 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
3212 key.type = BTRFS_METADATA_ITEM_KEY;
3213 else
3214 key.type = BTRFS_EXTENT_ITEM_KEY;
3215 key.objectid = logical;
3216 key.offset = (u64)-1;
3217
3218 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3219 if (ret < 0)
3220 goto out;
3221
3222 if (ret > 0) {
3223 ret = btrfs_previous_extent_item(root, path, 0);
3224 if (ret < 0)
3225 goto out;
3226 if (ret > 0) {
3227
3228
3229 btrfs_release_path(path);
3230 ret = btrfs_search_slot(NULL, root, &key,
3231 path, 0, 0);
3232 if (ret < 0)
3233 goto out;
3234 }
3235 }
3236
3237 stop_loop = 0;
3238 while (1) {
3239 u64 bytes;
3240
3241 l = path->nodes[0];
3242 slot = path->slots[0];
3243 if (slot >= btrfs_header_nritems(l)) {
3244 ret = btrfs_next_leaf(root, path);
3245 if (ret == 0)
3246 continue;
3247 if (ret < 0)
3248 goto out;
3249
3250 stop_loop = 1;
3251 break;
3252 }
3253 btrfs_item_key_to_cpu(l, &key, slot);
3254
3255 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3256 key.type != BTRFS_METADATA_ITEM_KEY)
3257 goto next;
3258
3259 if (key.type == BTRFS_METADATA_ITEM_KEY)
3260 bytes = fs_info->nodesize;
3261 else
3262 bytes = key.offset;
3263
3264 if (key.objectid + bytes <= logical)
3265 goto next;
3266
3267 if (key.objectid >= logical + map->stripe_len) {
3268
3269 if (key.objectid >= logic_end)
3270 stop_loop = 1;
3271 break;
3272 }
3273
3274 extent = btrfs_item_ptr(l, slot,
3275 struct btrfs_extent_item);
3276 flags = btrfs_extent_flags(l, extent);
3277 generation = btrfs_extent_generation(l, extent);
3278
3279 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3280 (key.objectid < logical ||
3281 key.objectid + bytes >
3282 logical + map->stripe_len)) {
3283 btrfs_err(fs_info,
3284 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3285 key.objectid, logical);
3286 spin_lock(&sctx->stat_lock);
3287 sctx->stat.uncorrectable_errors++;
3288 spin_unlock(&sctx->stat_lock);
3289 goto next;
3290 }
3291
3292again:
3293 extent_logical = key.objectid;
3294 extent_len = bytes;
3295
3296
3297
3298
3299 if (extent_logical < logical) {
3300 extent_len -= logical - extent_logical;
3301 extent_logical = logical;
3302 }
3303 if (extent_logical + extent_len >
3304 logical + map->stripe_len) {
3305 extent_len = logical + map->stripe_len -
3306 extent_logical;
3307 }
3308
3309 extent_physical = extent_logical - logical + physical;
3310 extent_dev = scrub_dev;
3311 extent_mirror_num = mirror_num;
3312 if (sctx->is_dev_replace)
3313 scrub_remap_extent(fs_info, extent_logical,
3314 extent_len, &extent_physical,
3315 &extent_dev,
3316 &extent_mirror_num);
3317
3318 ret = btrfs_lookup_csums_range(csum_root,
3319 extent_logical,
3320 extent_logical +
3321 extent_len - 1,
3322 &sctx->csum_list, 1);
3323 if (ret)
3324 goto out;
3325
3326 ret = scrub_extent(sctx, map, extent_logical, extent_len,
3327 extent_physical, extent_dev, flags,
3328 generation, extent_mirror_num,
3329 extent_logical - logical + physical);
3330
3331 scrub_free_csums(sctx);
3332
3333 if (ret)
3334 goto out;
3335
3336 if (extent_logical + extent_len <
3337 key.objectid + bytes) {
3338 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3339
3340
3341
3342
3343loop:
3344 physical += map->stripe_len;
3345 ret = get_raid56_logic_offset(physical,
3346 num, map, &logical,
3347 &stripe_logical);
3348 logical += base;
3349
3350 if (ret && physical < physical_end) {
3351 stripe_logical += base;
3352 stripe_end = stripe_logical +
3353 increment;
3354 ret = scrub_raid56_parity(sctx,
3355 map, scrub_dev, ppath,
3356 stripe_logical,
3357 stripe_end);
3358 if (ret)
3359 goto out;
3360 goto loop;
3361 }
3362 } else {
3363 physical += map->stripe_len;
3364 logical += increment;
3365 }
3366 if (logical < key.objectid + bytes) {
3367 cond_resched();
3368 goto again;
3369 }
3370
3371 if (physical >= physical_end) {
3372 stop_loop = 1;
3373 break;
3374 }
3375 }
3376next:
3377 path->slots[0]++;
3378 }
3379 btrfs_release_path(path);
3380skip:
3381 logical += increment;
3382 physical += map->stripe_len;
3383 spin_lock(&sctx->stat_lock);
3384 if (stop_loop)
3385 sctx->stat.last_physical = map->stripes[num].physical +
3386 length;
3387 else
3388 sctx->stat.last_physical = physical;
3389 spin_unlock(&sctx->stat_lock);
3390 if (stop_loop)
3391 break;
3392 }
3393out:
3394
3395 scrub_submit(sctx);
3396 mutex_lock(&sctx->wr_lock);
3397 scrub_wr_submit(sctx);
3398 mutex_unlock(&sctx->wr_lock);
3399
3400 blk_finish_plug(&plug);
3401 btrfs_free_path(path);
3402 btrfs_free_path(ppath);
3403 return ret < 0 ? ret : 0;
3404}
3405
3406static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
3407 struct btrfs_device *scrub_dev,
3408 u64 chunk_offset, u64 length,
3409 u64 dev_offset,
3410 struct btrfs_block_group_cache *cache)
3411{
3412 struct btrfs_fs_info *fs_info = sctx->fs_info;
3413 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
3414 struct map_lookup *map;
3415 struct extent_map *em;
3416 int i;
3417 int ret = 0;
3418
3419 read_lock(&map_tree->map_tree.lock);
3420 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
3421 read_unlock(&map_tree->map_tree.lock);
3422
3423 if (!em) {
3424
3425
3426
3427
3428 spin_lock(&cache->lock);
3429 if (!cache->removed)
3430 ret = -EINVAL;
3431 spin_unlock(&cache->lock);
3432
3433 return ret;
3434 }
3435
3436 map = em->map_lookup;
3437 if (em->start != chunk_offset)
3438 goto out;
3439
3440 if (em->len < length)
3441 goto out;
3442
3443 for (i = 0; i < map->num_stripes; ++i) {
3444 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
3445 map->stripes[i].physical == dev_offset) {
3446 ret = scrub_stripe(sctx, map, scrub_dev, i,
3447 chunk_offset, length);
3448 if (ret)
3449 goto out;
3450 }
3451 }
3452out:
3453 free_extent_map(em);
3454
3455 return ret;
3456}
3457
3458static noinline_for_stack
3459int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3460 struct btrfs_device *scrub_dev, u64 start, u64 end)
3461{
3462 struct btrfs_dev_extent *dev_extent = NULL;
3463 struct btrfs_path *path;
3464 struct btrfs_fs_info *fs_info = sctx->fs_info;
3465 struct btrfs_root *root = fs_info->dev_root;
3466 u64 length;
3467 u64 chunk_offset;
3468 int ret = 0;
3469 int ro_set;
3470 int slot;
3471 struct extent_buffer *l;
3472 struct btrfs_key key;
3473 struct btrfs_key found_key;
3474 struct btrfs_block_group_cache *cache;
3475 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
3476
3477 path = btrfs_alloc_path();
3478 if (!path)
3479 return -ENOMEM;
3480
3481 path->reada = READA_FORWARD;
3482 path->search_commit_root = 1;
3483 path->skip_locking = 1;
3484
3485 key.objectid = scrub_dev->devid;
3486 key.offset = 0ull;
3487 key.type = BTRFS_DEV_EXTENT_KEY;
3488
3489 while (1) {
3490 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3491 if (ret < 0)
3492 break;
3493 if (ret > 0) {
3494 if (path->slots[0] >=
3495 btrfs_header_nritems(path->nodes[0])) {
3496 ret = btrfs_next_leaf(root, path);
3497 if (ret < 0)
3498 break;
3499 if (ret > 0) {
3500 ret = 0;
3501 break;
3502 }
3503 } else {
3504 ret = 0;
3505 }
3506 }
3507
3508 l = path->nodes[0];
3509 slot = path->slots[0];
3510
3511 btrfs_item_key_to_cpu(l, &found_key, slot);
3512
3513 if (found_key.objectid != scrub_dev->devid)
3514 break;
3515
3516 if (found_key.type != BTRFS_DEV_EXTENT_KEY)
3517 break;
3518
3519 if (found_key.offset >= end)
3520 break;
3521
3522 if (found_key.offset < key.offset)
3523 break;
3524
3525 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
3526 length = btrfs_dev_extent_length(l, dev_extent);
3527
3528 if (found_key.offset + length <= start)
3529 goto skip;
3530
3531 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
3532
3533
3534
3535
3536
3537 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3538
3539
3540
3541 if (!cache)
3542 goto skip;
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552 scrub_pause_on(fs_info);
3553 ret = btrfs_inc_block_group_ro(cache);
3554 if (!ret && sctx->is_dev_replace) {
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573 btrfs_wait_block_group_reservations(cache);
3574 btrfs_wait_nocow_writers(cache);
3575 ret = btrfs_wait_ordered_roots(fs_info, U64_MAX,
3576 cache->key.objectid,
3577 cache->key.offset);
3578 if (ret > 0) {
3579 struct btrfs_trans_handle *trans;
3580
3581 trans = btrfs_join_transaction(root);
3582 if (IS_ERR(trans))
3583 ret = PTR_ERR(trans);
3584 else
3585 ret = btrfs_commit_transaction(trans);
3586 if (ret) {
3587 scrub_pause_off(fs_info);
3588 btrfs_put_block_group(cache);
3589 break;
3590 }
3591 }
3592 }
3593 scrub_pause_off(fs_info);
3594
3595 if (ret == 0) {
3596 ro_set = 1;
3597 } else if (ret == -ENOSPC) {
3598
3599
3600
3601
3602
3603
3604
3605 ro_set = 0;
3606 } else {
3607 btrfs_warn(fs_info,
3608 "failed setting block group ro: %d", ret);
3609 btrfs_put_block_group(cache);
3610 break;
3611 }
3612
3613 down_write(&fs_info->dev_replace.rwsem);
3614 dev_replace->cursor_right = found_key.offset + length;
3615 dev_replace->cursor_left = found_key.offset;
3616 dev_replace->item_needs_writeback = 1;
3617 up_write(&dev_replace->rwsem);
3618
3619 ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
3620 found_key.offset, cache);
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632 sctx->flush_all_writes = true;
3633 scrub_submit(sctx);
3634 mutex_lock(&sctx->wr_lock);
3635 scrub_wr_submit(sctx);
3636 mutex_unlock(&sctx->wr_lock);
3637
3638 wait_event(sctx->list_wait,
3639 atomic_read(&sctx->bios_in_flight) == 0);
3640
3641 scrub_pause_on(fs_info);
3642
3643
3644
3645
3646
3647
3648 wait_event(sctx->list_wait,
3649 atomic_read(&sctx->workers_pending) == 0);
3650 sctx->flush_all_writes = false;
3651
3652 scrub_pause_off(fs_info);
3653
3654 down_write(&fs_info->dev_replace.rwsem);
3655 dev_replace->cursor_left = dev_replace->cursor_right;
3656 dev_replace->item_needs_writeback = 1;
3657 up_write(&fs_info->dev_replace.rwsem);
3658
3659 if (ro_set)
3660 btrfs_dec_block_group_ro(cache);
3661
3662
3663
3664
3665
3666
3667
3668
3669 spin_lock(&cache->lock);
3670 if (!cache->removed && !cache->ro && cache->reserved == 0 &&
3671 btrfs_block_group_used(&cache->item) == 0) {
3672 spin_unlock(&cache->lock);
3673 btrfs_mark_bg_unused(cache);
3674 } else {
3675 spin_unlock(&cache->lock);
3676 }
3677
3678 btrfs_put_block_group(cache);
3679 if (ret)
3680 break;
3681 if (sctx->is_dev_replace &&
3682 atomic64_read(&dev_replace->num_write_errors) > 0) {
3683 ret = -EIO;
3684 break;
3685 }
3686 if (sctx->stat.malloc_errors > 0) {
3687 ret = -ENOMEM;
3688 break;
3689 }
3690skip:
3691 key.offset = found_key.offset + length;
3692 btrfs_release_path(path);
3693 }
3694
3695 btrfs_free_path(path);
3696
3697 return ret;
3698}
3699
3700static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
3701 struct btrfs_device *scrub_dev)
3702{
3703 int i;
3704 u64 bytenr;
3705 u64 gen;
3706 int ret;
3707 struct btrfs_fs_info *fs_info = sctx->fs_info;
3708
3709 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3710 return -EIO;
3711
3712
3713 if (scrub_dev->fs_devices != fs_info->fs_devices)
3714 gen = scrub_dev->generation;
3715 else
3716 gen = fs_info->last_trans_committed;
3717
3718 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3719 bytenr = btrfs_sb_offset(i);
3720 if (bytenr + BTRFS_SUPER_INFO_SIZE >
3721 scrub_dev->commit_total_bytes)
3722 break;
3723
3724 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
3725 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
3726 NULL, 1, bytenr);
3727 if (ret)
3728 return ret;
3729 }
3730 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3731
3732 return 0;
3733}
3734
3735
3736
3737
3738static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3739 int is_dev_replace)
3740{
3741 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
3742 int max_active = fs_info->thread_pool_size;
3743
3744 lockdep_assert_held(&fs_info->scrub_lock);
3745
3746 if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
3747 ASSERT(fs_info->scrub_workers == NULL);
3748 fs_info->scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub",
3749 flags, is_dev_replace ? 1 : max_active, 4);
3750 if (!fs_info->scrub_workers)
3751 goto fail_scrub_workers;
3752
3753 ASSERT(fs_info->scrub_wr_completion_workers == NULL);
3754 fs_info->scrub_wr_completion_workers =
3755 btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
3756 max_active, 2);
3757 if (!fs_info->scrub_wr_completion_workers)
3758 goto fail_scrub_wr_completion_workers;
3759
3760 ASSERT(fs_info->scrub_parity_workers == NULL);
3761 fs_info->scrub_parity_workers =
3762 btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
3763 max_active, 2);
3764 if (!fs_info->scrub_parity_workers)
3765 goto fail_scrub_parity_workers;
3766
3767 refcount_set(&fs_info->scrub_workers_refcnt, 1);
3768 } else {
3769 refcount_inc(&fs_info->scrub_workers_refcnt);
3770 }
3771 return 0;
3772
3773fail_scrub_parity_workers:
3774 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
3775fail_scrub_wr_completion_workers:
3776 btrfs_destroy_workqueue(fs_info->scrub_workers);
3777fail_scrub_workers:
3778 return -ENOMEM;
3779}
3780
3781int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3782 u64 end, struct btrfs_scrub_progress *progress,
3783 int readonly, int is_dev_replace)
3784{
3785 struct scrub_ctx *sctx;
3786 int ret;
3787 struct btrfs_device *dev;
3788 unsigned int nofs_flag;
3789 struct btrfs_workqueue *scrub_workers = NULL;
3790 struct btrfs_workqueue *scrub_wr_comp = NULL;
3791 struct btrfs_workqueue *scrub_parity = NULL;
3792
3793 if (btrfs_fs_closing(fs_info))
3794 return -EINVAL;
3795
3796 if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
3797
3798
3799
3800
3801
3802 btrfs_err(fs_info,
3803 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
3804 fs_info->nodesize,
3805 BTRFS_STRIPE_LEN);
3806 return -EINVAL;
3807 }
3808
3809 if (fs_info->sectorsize != PAGE_SIZE) {
3810
3811 btrfs_err_rl(fs_info,
3812 "scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails",
3813 fs_info->sectorsize, PAGE_SIZE);
3814 return -EINVAL;
3815 }
3816
3817 if (fs_info->nodesize >
3818 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
3819 fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
3820
3821
3822
3823
3824 btrfs_err(fs_info,
3825 "scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
3826 fs_info->nodesize,
3827 SCRUB_MAX_PAGES_PER_BLOCK,
3828 fs_info->sectorsize,
3829 SCRUB_MAX_PAGES_PER_BLOCK);
3830 return -EINVAL;
3831 }
3832
3833
3834 sctx = scrub_setup_ctx(fs_info, is_dev_replace);
3835 if (IS_ERR(sctx))
3836 return PTR_ERR(sctx);
3837
3838 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3839 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
3840 if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
3841 !is_dev_replace)) {
3842 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3843 ret = -ENODEV;
3844 goto out_free_ctx;
3845 }
3846
3847 if (!is_dev_replace && !readonly &&
3848 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
3849 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3850 btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
3851 rcu_str_deref(dev->name));
3852 ret = -EROFS;
3853 goto out_free_ctx;
3854 }
3855
3856 mutex_lock(&fs_info->scrub_lock);
3857 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
3858 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
3859 mutex_unlock(&fs_info->scrub_lock);
3860 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3861 ret = -EIO;
3862 goto out_free_ctx;
3863 }
3864
3865 down_read(&fs_info->dev_replace.rwsem);
3866 if (dev->scrub_ctx ||
3867 (!is_dev_replace &&
3868 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
3869 up_read(&fs_info->dev_replace.rwsem);
3870 mutex_unlock(&fs_info->scrub_lock);
3871 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3872 ret = -EINPROGRESS;
3873 goto out_free_ctx;
3874 }
3875 up_read(&fs_info->dev_replace.rwsem);
3876
3877 ret = scrub_workers_get(fs_info, is_dev_replace);
3878 if (ret) {
3879 mutex_unlock(&fs_info->scrub_lock);
3880 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3881 goto out_free_ctx;
3882 }
3883
3884 sctx->readonly = readonly;
3885 dev->scrub_ctx = sctx;
3886 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3887
3888
3889
3890
3891
3892 __scrub_blocked_if_needed(fs_info);
3893 atomic_inc(&fs_info->scrubs_running);
3894 mutex_unlock(&fs_info->scrub_lock);
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905 nofs_flag = memalloc_nofs_save();
3906 if (!is_dev_replace) {
3907 btrfs_info(fs_info, "scrub: started on devid %llu", devid);
3908
3909
3910
3911
3912 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3913 ret = scrub_supers(sctx, dev);
3914 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3915 }
3916
3917 if (!ret)
3918 ret = scrub_enumerate_chunks(sctx, dev, start, end);
3919 memalloc_nofs_restore(nofs_flag);
3920
3921 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3922 atomic_dec(&fs_info->scrubs_running);
3923 wake_up(&fs_info->scrub_pause_wait);
3924
3925 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
3926
3927 if (progress)
3928 memcpy(progress, &sctx->stat, sizeof(*progress));
3929
3930 if (!is_dev_replace)
3931 btrfs_info(fs_info, "scrub: %s on devid %llu with status: %d",
3932 ret ? "not finished" : "finished", devid, ret);
3933
3934 mutex_lock(&fs_info->scrub_lock);
3935 dev->scrub_ctx = NULL;
3936 if (refcount_dec_and_test(&fs_info->scrub_workers_refcnt)) {
3937 scrub_workers = fs_info->scrub_workers;
3938 scrub_wr_comp = fs_info->scrub_wr_completion_workers;
3939 scrub_parity = fs_info->scrub_parity_workers;
3940
3941 fs_info->scrub_workers = NULL;
3942 fs_info->scrub_wr_completion_workers = NULL;
3943 fs_info->scrub_parity_workers = NULL;
3944 }
3945 mutex_unlock(&fs_info->scrub_lock);
3946
3947 btrfs_destroy_workqueue(scrub_workers);
3948 btrfs_destroy_workqueue(scrub_wr_comp);
3949 btrfs_destroy_workqueue(scrub_parity);
3950 scrub_put_ctx(sctx);
3951
3952 return ret;
3953
3954out_free_ctx:
3955 scrub_free_ctx(sctx);
3956
3957 return ret;
3958}
3959
3960void btrfs_scrub_pause(struct btrfs_fs_info *fs_info)
3961{
3962 mutex_lock(&fs_info->scrub_lock);
3963 atomic_inc(&fs_info->scrub_pause_req);
3964 while (atomic_read(&fs_info->scrubs_paused) !=
3965 atomic_read(&fs_info->scrubs_running)) {
3966 mutex_unlock(&fs_info->scrub_lock);
3967 wait_event(fs_info->scrub_pause_wait,
3968 atomic_read(&fs_info->scrubs_paused) ==
3969 atomic_read(&fs_info->scrubs_running));
3970 mutex_lock(&fs_info->scrub_lock);
3971 }
3972 mutex_unlock(&fs_info->scrub_lock);
3973}
3974
3975void btrfs_scrub_continue(struct btrfs_fs_info *fs_info)
3976{
3977 atomic_dec(&fs_info->scrub_pause_req);
3978 wake_up(&fs_info->scrub_pause_wait);
3979}
3980
3981int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
3982{
3983 mutex_lock(&fs_info->scrub_lock);
3984 if (!atomic_read(&fs_info->scrubs_running)) {
3985 mutex_unlock(&fs_info->scrub_lock);
3986 return -ENOTCONN;
3987 }
3988
3989 atomic_inc(&fs_info->scrub_cancel_req);
3990 while (atomic_read(&fs_info->scrubs_running)) {
3991 mutex_unlock(&fs_info->scrub_lock);
3992 wait_event(fs_info->scrub_pause_wait,
3993 atomic_read(&fs_info->scrubs_running) == 0);
3994 mutex_lock(&fs_info->scrub_lock);
3995 }
3996 atomic_dec(&fs_info->scrub_cancel_req);
3997 mutex_unlock(&fs_info->scrub_lock);
3998
3999 return 0;
4000}
4001
4002int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
4003 struct btrfs_device *dev)
4004{
4005 struct scrub_ctx *sctx;
4006
4007 mutex_lock(&fs_info->scrub_lock);
4008 sctx = dev->scrub_ctx;
4009 if (!sctx) {
4010 mutex_unlock(&fs_info->scrub_lock);
4011 return -ENOTCONN;
4012 }
4013 atomic_inc(&sctx->cancel_req);
4014 while (dev->scrub_ctx) {
4015 mutex_unlock(&fs_info->scrub_lock);
4016 wait_event(fs_info->scrub_pause_wait,
4017 dev->scrub_ctx == NULL);
4018 mutex_lock(&fs_info->scrub_lock);
4019 }
4020 mutex_unlock(&fs_info->scrub_lock);
4021
4022 return 0;
4023}
4024
4025int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
4026 struct btrfs_scrub_progress *progress)
4027{
4028 struct btrfs_device *dev;
4029 struct scrub_ctx *sctx = NULL;
4030
4031 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4032 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
4033 if (dev)
4034 sctx = dev->scrub_ctx;
4035 if (sctx)
4036 memcpy(progress, &sctx->stat, sizeof(*progress));
4037 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4038
4039 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
4040}
4041
4042static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
4043 u64 extent_logical, u64 extent_len,
4044 u64 *extent_physical,
4045 struct btrfs_device **extent_dev,
4046 int *extent_mirror_num)
4047{
4048 u64 mapped_length;
4049 struct btrfs_bio *bbio = NULL;
4050 int ret;
4051
4052 mapped_length = extent_len;
4053 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
4054 &mapped_length, &bbio, 0);
4055 if (ret || !bbio || mapped_length < extent_len ||
4056 !bbio->stripes[0].dev->bdev) {
4057 btrfs_put_bbio(bbio);
4058 return;
4059 }
4060
4061 *extent_physical = bbio->stripes[0].physical;
4062 *extent_mirror_num = bbio->mirror_num;
4063 *extent_dev = bbio->stripes[0].dev;
4064 btrfs_put_bbio(bbio);
4065}
4066