1
2
3
4
5
6#include <linux/blkdev.h>
7#include <linux/ratelimit.h>
8#include <linux/sched/mm.h>
9#include <crypto/hash.h>
10#include "ctree.h"
11#include "volumes.h"
12#include "disk-io.h"
13#include "ordered-data.h"
14#include "transaction.h"
15#include "backref.h"
16#include "extent_io.h"
17#include "dev-replace.h"
18#include "check-integrity.h"
19#include "rcu-string.h"
20#include "raid56.h"
21#include "block-group.h"
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36struct scrub_block;
37struct scrub_ctx;
38
39
40
41
42
43
44
45#define SCRUB_PAGES_PER_RD_BIO 32
46#define SCRUB_PAGES_PER_WR_BIO 32
47#define SCRUB_BIOS_PER_SCTX 64
48
49
50
51
52
53
54#define SCRUB_MAX_PAGES_PER_BLOCK 16
55
56struct scrub_recover {
57 refcount_t refs;
58 struct btrfs_bio *bbio;
59 u64 map_length;
60};
61
62struct scrub_page {
63 struct scrub_block *sblock;
64 struct page *page;
65 struct btrfs_device *dev;
66 struct list_head list;
67 u64 flags;
68 u64 generation;
69 u64 logical;
70 u64 physical;
71 u64 physical_for_dev_replace;
72 atomic_t refs;
73 struct {
74 unsigned int mirror_num:8;
75 unsigned int have_csum:1;
76 unsigned int io_error:1;
77 };
78 u8 csum[BTRFS_CSUM_SIZE];
79
80 struct scrub_recover *recover;
81};
82
83struct scrub_bio {
84 int index;
85 struct scrub_ctx *sctx;
86 struct btrfs_device *dev;
87 struct bio *bio;
88 blk_status_t status;
89 u64 logical;
90 u64 physical;
91#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
92 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
93#else
94 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
95#endif
96 int page_count;
97 int next_free;
98 struct btrfs_work work;
99};
100
101struct scrub_block {
102 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
103 int page_count;
104 atomic_t outstanding_pages;
105 refcount_t refs;
106 struct scrub_ctx *sctx;
107 struct scrub_parity *sparity;
108 struct {
109 unsigned int header_error:1;
110 unsigned int checksum_error:1;
111 unsigned int no_io_error_seen:1;
112 unsigned int generation_error:1;
113
114
115
116 unsigned int data_corrected:1;
117 };
118 struct btrfs_work work;
119};
120
121
122struct scrub_parity {
123 struct scrub_ctx *sctx;
124
125 struct btrfs_device *scrub_dev;
126
127 u64 logic_start;
128
129 u64 logic_end;
130
131 int nsectors;
132
133 u64 stripe_len;
134
135 refcount_t refs;
136
137 struct list_head spages;
138
139
140 struct btrfs_work work;
141
142
143 unsigned long *dbitmap;
144
145
146
147
148
149 unsigned long *ebitmap;
150
151 unsigned long bitmap[0];
152};
153
154struct scrub_ctx {
155 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
156 struct btrfs_fs_info *fs_info;
157 int first_free;
158 int curr;
159 atomic_t bios_in_flight;
160 atomic_t workers_pending;
161 spinlock_t list_lock;
162 wait_queue_head_t list_wait;
163 u16 csum_size;
164 struct list_head csum_list;
165 atomic_t cancel_req;
166 int readonly;
167 int pages_per_rd_bio;
168
169 int is_dev_replace;
170
171 struct scrub_bio *wr_curr_bio;
172 struct mutex wr_lock;
173 int pages_per_wr_bio;
174 struct btrfs_device *wr_tgtdev;
175 bool flush_all_writes;
176
177
178
179
180 struct btrfs_scrub_progress stat;
181 spinlock_t stat_lock;
182
183
184
185
186
187
188
189
190 refcount_t refs;
191};
192
193struct scrub_warning {
194 struct btrfs_path *path;
195 u64 extent_item_size;
196 const char *errstr;
197 u64 physical;
198 u64 logical;
199 struct btrfs_device *dev;
200};
201
202struct full_stripe_lock {
203 struct rb_node node;
204 u64 logical;
205 u64 refs;
206 struct mutex mutex;
207};
208
209static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
210static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
211static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
212static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
213 struct scrub_block *sblocks_for_recheck);
214static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
215 struct scrub_block *sblock,
216 int retry_failed_mirror);
217static void scrub_recheck_block_checksum(struct scrub_block *sblock);
218static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
219 struct scrub_block *sblock_good);
220static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
221 struct scrub_block *sblock_good,
222 int page_num, int force_write);
223static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
224static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
225 int page_num);
226static int scrub_checksum_data(struct scrub_block *sblock);
227static int scrub_checksum_tree_block(struct scrub_block *sblock);
228static int scrub_checksum_super(struct scrub_block *sblock);
229static void scrub_block_get(struct scrub_block *sblock);
230static void scrub_block_put(struct scrub_block *sblock);
231static void scrub_page_get(struct scrub_page *spage);
232static void scrub_page_put(struct scrub_page *spage);
233static void scrub_parity_get(struct scrub_parity *sparity);
234static void scrub_parity_put(struct scrub_parity *sparity);
235static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
236 struct scrub_page *spage);
237static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
238 u64 physical, struct btrfs_device *dev, u64 flags,
239 u64 gen, int mirror_num, u8 *csum, int force,
240 u64 physical_for_dev_replace);
241static void scrub_bio_end_io(struct bio *bio);
242static void scrub_bio_end_io_worker(struct btrfs_work *work);
243static void scrub_block_complete(struct scrub_block *sblock);
244static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
245 u64 extent_logical, u64 extent_len,
246 u64 *extent_physical,
247 struct btrfs_device **extent_dev,
248 int *extent_mirror_num);
249static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
250 struct scrub_page *spage);
251static void scrub_wr_submit(struct scrub_ctx *sctx);
252static void scrub_wr_bio_end_io(struct bio *bio);
253static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
254static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
255static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
256static void scrub_put_ctx(struct scrub_ctx *sctx);
257
258static inline int scrub_is_page_on_raid56(struct scrub_page *page)
259{
260 return page->recover &&
261 (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
262}
263
264static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
265{
266 refcount_inc(&sctx->refs);
267 atomic_inc(&sctx->bios_in_flight);
268}
269
270static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
271{
272 atomic_dec(&sctx->bios_in_flight);
273 wake_up(&sctx->list_wait);
274 scrub_put_ctx(sctx);
275}
276
277static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
278{
279 while (atomic_read(&fs_info->scrub_pause_req)) {
280 mutex_unlock(&fs_info->scrub_lock);
281 wait_event(fs_info->scrub_pause_wait,
282 atomic_read(&fs_info->scrub_pause_req) == 0);
283 mutex_lock(&fs_info->scrub_lock);
284 }
285}
286
287static void scrub_pause_on(struct btrfs_fs_info *fs_info)
288{
289 atomic_inc(&fs_info->scrubs_paused);
290 wake_up(&fs_info->scrub_pause_wait);
291}
292
293static void scrub_pause_off(struct btrfs_fs_info *fs_info)
294{
295 mutex_lock(&fs_info->scrub_lock);
296 __scrub_blocked_if_needed(fs_info);
297 atomic_dec(&fs_info->scrubs_paused);
298 mutex_unlock(&fs_info->scrub_lock);
299
300 wake_up(&fs_info->scrub_pause_wait);
301}
302
303static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
304{
305 scrub_pause_on(fs_info);
306 scrub_pause_off(fs_info);
307}
308
309
310
311
312
313
314
315
316
317
318
319static struct full_stripe_lock *insert_full_stripe_lock(
320 struct btrfs_full_stripe_locks_tree *locks_root,
321 u64 fstripe_logical)
322{
323 struct rb_node **p;
324 struct rb_node *parent = NULL;
325 struct full_stripe_lock *entry;
326 struct full_stripe_lock *ret;
327
328 lockdep_assert_held(&locks_root->lock);
329
330 p = &locks_root->root.rb_node;
331 while (*p) {
332 parent = *p;
333 entry = rb_entry(parent, struct full_stripe_lock, node);
334 if (fstripe_logical < entry->logical) {
335 p = &(*p)->rb_left;
336 } else if (fstripe_logical > entry->logical) {
337 p = &(*p)->rb_right;
338 } else {
339 entry->refs++;
340 return entry;
341 }
342 }
343
344
345
346
347 ret = kmalloc(sizeof(*ret), GFP_KERNEL);
348 if (!ret)
349 return ERR_PTR(-ENOMEM);
350 ret->logical = fstripe_logical;
351 ret->refs = 1;
352 mutex_init(&ret->mutex);
353
354 rb_link_node(&ret->node, parent, p);
355 rb_insert_color(&ret->node, &locks_root->root);
356 return ret;
357}
358
359
360
361
362
363
364
365static struct full_stripe_lock *search_full_stripe_lock(
366 struct btrfs_full_stripe_locks_tree *locks_root,
367 u64 fstripe_logical)
368{
369 struct rb_node *node;
370 struct full_stripe_lock *entry;
371
372 lockdep_assert_held(&locks_root->lock);
373
374 node = locks_root->root.rb_node;
375 while (node) {
376 entry = rb_entry(node, struct full_stripe_lock, node);
377 if (fstripe_logical < entry->logical)
378 node = node->rb_left;
379 else if (fstripe_logical > entry->logical)
380 node = node->rb_right;
381 else
382 return entry;
383 }
384 return NULL;
385}
386
387
388
389
390
391
392static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache,
393 u64 bytenr)
394{
395 u64 ret;
396
397
398
399
400
401 WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
402
403
404
405
406
407 ret = div64_u64(bytenr - cache->key.objectid, cache->full_stripe_len) *
408 cache->full_stripe_len + cache->key.objectid;
409 return ret;
410}
411
412
413
414
415
416
417
418
419
420
421
422
423static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
424 bool *locked_ret)
425{
426 struct btrfs_block_group_cache *bg_cache;
427 struct btrfs_full_stripe_locks_tree *locks_root;
428 struct full_stripe_lock *existing;
429 u64 fstripe_start;
430 int ret = 0;
431
432 *locked_ret = false;
433 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
434 if (!bg_cache) {
435 ASSERT(0);
436 return -ENOENT;
437 }
438
439
440 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
441 goto out;
442 locks_root = &bg_cache->full_stripe_locks_root;
443
444 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
445
446
447 mutex_lock(&locks_root->lock);
448 existing = insert_full_stripe_lock(locks_root, fstripe_start);
449 mutex_unlock(&locks_root->lock);
450 if (IS_ERR(existing)) {
451 ret = PTR_ERR(existing);
452 goto out;
453 }
454 mutex_lock(&existing->mutex);
455 *locked_ret = true;
456out:
457 btrfs_put_block_group(bg_cache);
458 return ret;
459}
460
461
462
463
464
465
466
467
468
469
470static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
471 bool locked)
472{
473 struct btrfs_block_group_cache *bg_cache;
474 struct btrfs_full_stripe_locks_tree *locks_root;
475 struct full_stripe_lock *fstripe_lock;
476 u64 fstripe_start;
477 bool freeit = false;
478 int ret = 0;
479
480
481 if (!locked)
482 return 0;
483
484 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
485 if (!bg_cache) {
486 ASSERT(0);
487 return -ENOENT;
488 }
489 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
490 goto out;
491
492 locks_root = &bg_cache->full_stripe_locks_root;
493 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
494
495 mutex_lock(&locks_root->lock);
496 fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
497
498 if (!fstripe_lock) {
499 WARN_ON(1);
500 ret = -ENOENT;
501 mutex_unlock(&locks_root->lock);
502 goto out;
503 }
504
505 if (fstripe_lock->refs == 0) {
506 WARN_ON(1);
507 btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
508 fstripe_lock->logical);
509 } else {
510 fstripe_lock->refs--;
511 }
512
513 if (fstripe_lock->refs == 0) {
514 rb_erase(&fstripe_lock->node, &locks_root->root);
515 freeit = true;
516 }
517 mutex_unlock(&locks_root->lock);
518
519 mutex_unlock(&fstripe_lock->mutex);
520 if (freeit)
521 kfree(fstripe_lock);
522out:
523 btrfs_put_block_group(bg_cache);
524 return ret;
525}
526
527static void scrub_free_csums(struct scrub_ctx *sctx)
528{
529 while (!list_empty(&sctx->csum_list)) {
530 struct btrfs_ordered_sum *sum;
531 sum = list_first_entry(&sctx->csum_list,
532 struct btrfs_ordered_sum, list);
533 list_del(&sum->list);
534 kfree(sum);
535 }
536}
537
538static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
539{
540 int i;
541
542 if (!sctx)
543 return;
544
545
546 if (sctx->curr != -1) {
547 struct scrub_bio *sbio = sctx->bios[sctx->curr];
548
549 for (i = 0; i < sbio->page_count; i++) {
550 WARN_ON(!sbio->pagev[i]->page);
551 scrub_block_put(sbio->pagev[i]->sblock);
552 }
553 bio_put(sbio->bio);
554 }
555
556 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
557 struct scrub_bio *sbio = sctx->bios[i];
558
559 if (!sbio)
560 break;
561 kfree(sbio);
562 }
563
564 kfree(sctx->wr_curr_bio);
565 scrub_free_csums(sctx);
566 kfree(sctx);
567}
568
569static void scrub_put_ctx(struct scrub_ctx *sctx)
570{
571 if (refcount_dec_and_test(&sctx->refs))
572 scrub_free_ctx(sctx);
573}
574
575static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
576 struct btrfs_fs_info *fs_info, int is_dev_replace)
577{
578 struct scrub_ctx *sctx;
579 int i;
580
581 sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
582 if (!sctx)
583 goto nomem;
584 refcount_set(&sctx->refs, 1);
585 sctx->is_dev_replace = is_dev_replace;
586 sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
587 sctx->curr = -1;
588 sctx->fs_info = fs_info;
589 INIT_LIST_HEAD(&sctx->csum_list);
590 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
591 struct scrub_bio *sbio;
592
593 sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
594 if (!sbio)
595 goto nomem;
596 sctx->bios[i] = sbio;
597
598 sbio->index = i;
599 sbio->sctx = sctx;
600 sbio->page_count = 0;
601 btrfs_init_work(&sbio->work, btrfs_scrub_helper,
602 scrub_bio_end_io_worker, NULL, NULL);
603
604 if (i != SCRUB_BIOS_PER_SCTX - 1)
605 sctx->bios[i]->next_free = i + 1;
606 else
607 sctx->bios[i]->next_free = -1;
608 }
609 sctx->first_free = 0;
610 atomic_set(&sctx->bios_in_flight, 0);
611 atomic_set(&sctx->workers_pending, 0);
612 atomic_set(&sctx->cancel_req, 0);
613 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
614
615 spin_lock_init(&sctx->list_lock);
616 spin_lock_init(&sctx->stat_lock);
617 init_waitqueue_head(&sctx->list_wait);
618
619 WARN_ON(sctx->wr_curr_bio != NULL);
620 mutex_init(&sctx->wr_lock);
621 sctx->wr_curr_bio = NULL;
622 if (is_dev_replace) {
623 WARN_ON(!fs_info->dev_replace.tgtdev);
624 sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
625 sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;
626 sctx->flush_all_writes = false;
627 }
628
629 return sctx;
630
631nomem:
632 scrub_free_ctx(sctx);
633 return ERR_PTR(-ENOMEM);
634}
635
636static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
637 void *warn_ctx)
638{
639 u64 isize;
640 u32 nlink;
641 int ret;
642 int i;
643 unsigned nofs_flag;
644 struct extent_buffer *eb;
645 struct btrfs_inode_item *inode_item;
646 struct scrub_warning *swarn = warn_ctx;
647 struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
648 struct inode_fs_paths *ipath = NULL;
649 struct btrfs_root *local_root;
650 struct btrfs_key root_key;
651 struct btrfs_key key;
652
653 root_key.objectid = root;
654 root_key.type = BTRFS_ROOT_ITEM_KEY;
655 root_key.offset = (u64)-1;
656 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
657 if (IS_ERR(local_root)) {
658 ret = PTR_ERR(local_root);
659 goto err;
660 }
661
662
663
664
665 key.objectid = inum;
666 key.type = BTRFS_INODE_ITEM_KEY;
667 key.offset = 0;
668
669 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
670 if (ret) {
671 btrfs_release_path(swarn->path);
672 goto err;
673 }
674
675 eb = swarn->path->nodes[0];
676 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
677 struct btrfs_inode_item);
678 isize = btrfs_inode_size(eb, inode_item);
679 nlink = btrfs_inode_nlink(eb, inode_item);
680 btrfs_release_path(swarn->path);
681
682
683
684
685
686
687 nofs_flag = memalloc_nofs_save();
688 ipath = init_ipath(4096, local_root, swarn->path);
689 memalloc_nofs_restore(nofs_flag);
690 if (IS_ERR(ipath)) {
691 ret = PTR_ERR(ipath);
692 ipath = NULL;
693 goto err;
694 }
695 ret = paths_from_inode(inum, ipath);
696
697 if (ret < 0)
698 goto err;
699
700
701
702
703
704 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
705 btrfs_warn_in_rcu(fs_info,
706"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
707 swarn->errstr, swarn->logical,
708 rcu_str_deref(swarn->dev->name),
709 swarn->physical,
710 root, inum, offset,
711 min(isize - offset, (u64)PAGE_SIZE), nlink,
712 (char *)(unsigned long)ipath->fspath->val[i]);
713
714 free_ipath(ipath);
715 return 0;
716
717err:
718 btrfs_warn_in_rcu(fs_info,
719 "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
720 swarn->errstr, swarn->logical,
721 rcu_str_deref(swarn->dev->name),
722 swarn->physical,
723 root, inum, offset, ret);
724
725 free_ipath(ipath);
726 return 0;
727}
728
729static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
730{
731 struct btrfs_device *dev;
732 struct btrfs_fs_info *fs_info;
733 struct btrfs_path *path;
734 struct btrfs_key found_key;
735 struct extent_buffer *eb;
736 struct btrfs_extent_item *ei;
737 struct scrub_warning swarn;
738 unsigned long ptr = 0;
739 u64 extent_item_pos;
740 u64 flags = 0;
741 u64 ref_root;
742 u32 item_size;
743 u8 ref_level = 0;
744 int ret;
745
746 WARN_ON(sblock->page_count < 1);
747 dev = sblock->pagev[0]->dev;
748 fs_info = sblock->sctx->fs_info;
749
750 path = btrfs_alloc_path();
751 if (!path)
752 return;
753
754 swarn.physical = sblock->pagev[0]->physical;
755 swarn.logical = sblock->pagev[0]->logical;
756 swarn.errstr = errstr;
757 swarn.dev = NULL;
758
759 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
760 &flags);
761 if (ret < 0)
762 goto out;
763
764 extent_item_pos = swarn.logical - found_key.objectid;
765 swarn.extent_item_size = found_key.offset;
766
767 eb = path->nodes[0];
768 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
769 item_size = btrfs_item_size_nr(eb, path->slots[0]);
770
771 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
772 do {
773 ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
774 item_size, &ref_root,
775 &ref_level);
776 btrfs_warn_in_rcu(fs_info,
777"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
778 errstr, swarn.logical,
779 rcu_str_deref(dev->name),
780 swarn.physical,
781 ref_level ? "node" : "leaf",
782 ret < 0 ? -1 : ref_level,
783 ret < 0 ? -1 : ref_root);
784 } while (ret != 1);
785 btrfs_release_path(path);
786 } else {
787 btrfs_release_path(path);
788 swarn.path = path;
789 swarn.dev = dev;
790 iterate_extent_inodes(fs_info, found_key.objectid,
791 extent_item_pos, 1,
792 scrub_print_warning_inode, &swarn, false);
793 }
794
795out:
796 btrfs_free_path(path);
797}
798
799static inline void scrub_get_recover(struct scrub_recover *recover)
800{
801 refcount_inc(&recover->refs);
802}
803
804static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
805 struct scrub_recover *recover)
806{
807 if (refcount_dec_and_test(&recover->refs)) {
808 btrfs_bio_counter_dec(fs_info);
809 btrfs_put_bbio(recover->bbio);
810 kfree(recover);
811 }
812}
813
814
815
816
817
818
819
820
821
822static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
823{
824 struct scrub_ctx *sctx = sblock_to_check->sctx;
825 struct btrfs_device *dev;
826 struct btrfs_fs_info *fs_info;
827 u64 logical;
828 unsigned int failed_mirror_index;
829 unsigned int is_metadata;
830 unsigned int have_csum;
831 struct scrub_block *sblocks_for_recheck;
832 struct scrub_block *sblock_bad;
833 int ret;
834 int mirror_index;
835 int page_num;
836 int success;
837 bool full_stripe_locked;
838 unsigned int nofs_flag;
839 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
840 DEFAULT_RATELIMIT_BURST);
841
842 BUG_ON(sblock_to_check->page_count < 1);
843 fs_info = sctx->fs_info;
844 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
845
846
847
848
849
850 spin_lock(&sctx->stat_lock);
851 ++sctx->stat.super_errors;
852 spin_unlock(&sctx->stat_lock);
853 return 0;
854 }
855 logical = sblock_to_check->pagev[0]->logical;
856 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
857 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
858 is_metadata = !(sblock_to_check->pagev[0]->flags &
859 BTRFS_EXTENT_FLAG_DATA);
860 have_csum = sblock_to_check->pagev[0]->have_csum;
861 dev = sblock_to_check->pagev[0]->dev;
862
863
864
865
866
867
868
869
870
871
872 nofs_flag = memalloc_nofs_save();
873
874
875
876
877
878
879
880 ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
881 if (ret < 0) {
882 memalloc_nofs_restore(nofs_flag);
883 spin_lock(&sctx->stat_lock);
884 if (ret == -ENOMEM)
885 sctx->stat.malloc_errors++;
886 sctx->stat.read_errors++;
887 sctx->stat.uncorrectable_errors++;
888 spin_unlock(&sctx->stat_lock);
889 return ret;
890 }
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921 sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
922 sizeof(*sblocks_for_recheck), GFP_KERNEL);
923 if (!sblocks_for_recheck) {
924 spin_lock(&sctx->stat_lock);
925 sctx->stat.malloc_errors++;
926 sctx->stat.read_errors++;
927 sctx->stat.uncorrectable_errors++;
928 spin_unlock(&sctx->stat_lock);
929 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
930 goto out;
931 }
932
933
934 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
935 if (ret) {
936 spin_lock(&sctx->stat_lock);
937 sctx->stat.read_errors++;
938 sctx->stat.uncorrectable_errors++;
939 spin_unlock(&sctx->stat_lock);
940 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
941 goto out;
942 }
943 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
944 sblock_bad = sblocks_for_recheck + failed_mirror_index;
945
946
947 scrub_recheck_block(fs_info, sblock_bad, 1);
948
949 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
950 sblock_bad->no_io_error_seen) {
951
952
953
954
955
956
957
958
959 spin_lock(&sctx->stat_lock);
960 sctx->stat.unverified_errors++;
961 sblock_to_check->data_corrected = 1;
962 spin_unlock(&sctx->stat_lock);
963
964 if (sctx->is_dev_replace)
965 scrub_write_block_to_dev_replace(sblock_bad);
966 goto out;
967 }
968
969 if (!sblock_bad->no_io_error_seen) {
970 spin_lock(&sctx->stat_lock);
971 sctx->stat.read_errors++;
972 spin_unlock(&sctx->stat_lock);
973 if (__ratelimit(&_rs))
974 scrub_print_warning("i/o error", sblock_to_check);
975 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
976 } else if (sblock_bad->checksum_error) {
977 spin_lock(&sctx->stat_lock);
978 sctx->stat.csum_errors++;
979 spin_unlock(&sctx->stat_lock);
980 if (__ratelimit(&_rs))
981 scrub_print_warning("checksum error", sblock_to_check);
982 btrfs_dev_stat_inc_and_print(dev,
983 BTRFS_DEV_STAT_CORRUPTION_ERRS);
984 } else if (sblock_bad->header_error) {
985 spin_lock(&sctx->stat_lock);
986 sctx->stat.verify_errors++;
987 spin_unlock(&sctx->stat_lock);
988 if (__ratelimit(&_rs))
989 scrub_print_warning("checksum/header error",
990 sblock_to_check);
991 if (sblock_bad->generation_error)
992 btrfs_dev_stat_inc_and_print(dev,
993 BTRFS_DEV_STAT_GENERATION_ERRS);
994 else
995 btrfs_dev_stat_inc_and_print(dev,
996 BTRFS_DEV_STAT_CORRUPTION_ERRS);
997 }
998
999 if (sctx->readonly) {
1000 ASSERT(!sctx->is_dev_replace);
1001 goto out;
1002 }
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019 for (mirror_index = 0; ;mirror_index++) {
1020 struct scrub_block *sblock_other;
1021
1022 if (mirror_index == failed_mirror_index)
1023 continue;
1024
1025
1026 if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1027 if (mirror_index >= BTRFS_MAX_MIRRORS)
1028 break;
1029 if (!sblocks_for_recheck[mirror_index].page_count)
1030 break;
1031
1032 sblock_other = sblocks_for_recheck + mirror_index;
1033 } else {
1034 struct scrub_recover *r = sblock_bad->pagev[0]->recover;
1035 int max_allowed = r->bbio->num_stripes -
1036 r->bbio->num_tgtdevs;
1037
1038 if (mirror_index >= max_allowed)
1039 break;
1040 if (!sblocks_for_recheck[1].page_count)
1041 break;
1042
1043 ASSERT(failed_mirror_index == 0);
1044 sblock_other = sblocks_for_recheck + 1;
1045 sblock_other->pagev[0]->mirror_num = 1 + mirror_index;
1046 }
1047
1048
1049 scrub_recheck_block(fs_info, sblock_other, 0);
1050
1051 if (!sblock_other->header_error &&
1052 !sblock_other->checksum_error &&
1053 sblock_other->no_io_error_seen) {
1054 if (sctx->is_dev_replace) {
1055 scrub_write_block_to_dev_replace(sblock_other);
1056 goto corrected_error;
1057 } else {
1058 ret = scrub_repair_block_from_good_copy(
1059 sblock_bad, sblock_other);
1060 if (!ret)
1061 goto corrected_error;
1062 }
1063 }
1064 }
1065
1066 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1067 goto did_not_correct_error;
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093 success = 1;
1094 for (page_num = 0; page_num < sblock_bad->page_count;
1095 page_num++) {
1096 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1097 struct scrub_block *sblock_other = NULL;
1098
1099
1100 if (!page_bad->io_error && !sctx->is_dev_replace)
1101 continue;
1102
1103 if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1104
1105
1106
1107
1108
1109
1110
1111 sblock_other = NULL;
1112 } else if (page_bad->io_error) {
1113
1114 for (mirror_index = 0;
1115 mirror_index < BTRFS_MAX_MIRRORS &&
1116 sblocks_for_recheck[mirror_index].page_count > 0;
1117 mirror_index++) {
1118 if (!sblocks_for_recheck[mirror_index].
1119 pagev[page_num]->io_error) {
1120 sblock_other = sblocks_for_recheck +
1121 mirror_index;
1122 break;
1123 }
1124 }
1125 if (!sblock_other)
1126 success = 0;
1127 }
1128
1129 if (sctx->is_dev_replace) {
1130
1131
1132
1133
1134
1135
1136
1137 if (!sblock_other)
1138 sblock_other = sblock_bad;
1139
1140 if (scrub_write_page_to_dev_replace(sblock_other,
1141 page_num) != 0) {
1142 atomic64_inc(
1143 &fs_info->dev_replace.num_write_errors);
1144 success = 0;
1145 }
1146 } else if (sblock_other) {
1147 ret = scrub_repair_page_from_good_copy(sblock_bad,
1148 sblock_other,
1149 page_num, 0);
1150 if (0 == ret)
1151 page_bad->io_error = 0;
1152 else
1153 success = 0;
1154 }
1155 }
1156
1157 if (success && !sctx->is_dev_replace) {
1158 if (is_metadata || have_csum) {
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168 scrub_recheck_block(fs_info, sblock_bad, 1);
1169 if (!sblock_bad->header_error &&
1170 !sblock_bad->checksum_error &&
1171 sblock_bad->no_io_error_seen)
1172 goto corrected_error;
1173 else
1174 goto did_not_correct_error;
1175 } else {
1176corrected_error:
1177 spin_lock(&sctx->stat_lock);
1178 sctx->stat.corrected_errors++;
1179 sblock_to_check->data_corrected = 1;
1180 spin_unlock(&sctx->stat_lock);
1181 btrfs_err_rl_in_rcu(fs_info,
1182 "fixed up error at logical %llu on dev %s",
1183 logical, rcu_str_deref(dev->name));
1184 }
1185 } else {
1186did_not_correct_error:
1187 spin_lock(&sctx->stat_lock);
1188 sctx->stat.uncorrectable_errors++;
1189 spin_unlock(&sctx->stat_lock);
1190 btrfs_err_rl_in_rcu(fs_info,
1191 "unable to fixup (regular) error at logical %llu on dev %s",
1192 logical, rcu_str_deref(dev->name));
1193 }
1194
1195out:
1196 if (sblocks_for_recheck) {
1197 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1198 mirror_index++) {
1199 struct scrub_block *sblock = sblocks_for_recheck +
1200 mirror_index;
1201 struct scrub_recover *recover;
1202 int page_index;
1203
1204 for (page_index = 0; page_index < sblock->page_count;
1205 page_index++) {
1206 sblock->pagev[page_index]->sblock = NULL;
1207 recover = sblock->pagev[page_index]->recover;
1208 if (recover) {
1209 scrub_put_recover(fs_info, recover);
1210 sblock->pagev[page_index]->recover =
1211 NULL;
1212 }
1213 scrub_page_put(sblock->pagev[page_index]);
1214 }
1215 }
1216 kfree(sblocks_for_recheck);
1217 }
1218
1219 ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
1220 memalloc_nofs_restore(nofs_flag);
1221 if (ret < 0)
1222 return ret;
1223 return 0;
1224}
1225
1226static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1227{
1228 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1229 return 2;
1230 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1231 return 3;
1232 else
1233 return (int)bbio->num_stripes;
1234}
1235
1236static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1237 u64 *raid_map,
1238 u64 mapped_length,
1239 int nstripes, int mirror,
1240 int *stripe_index,
1241 u64 *stripe_offset)
1242{
1243 int i;
1244
1245 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1246
1247 for (i = 0; i < nstripes; i++) {
1248 if (raid_map[i] == RAID6_Q_STRIPE ||
1249 raid_map[i] == RAID5_P_STRIPE)
1250 continue;
1251
1252 if (logical >= raid_map[i] &&
1253 logical < raid_map[i] + mapped_length)
1254 break;
1255 }
1256
1257 *stripe_index = i;
1258 *stripe_offset = logical - raid_map[i];
1259 } else {
1260
1261 *stripe_index = mirror;
1262 *stripe_offset = 0;
1263 }
1264}
1265
1266static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1267 struct scrub_block *sblocks_for_recheck)
1268{
1269 struct scrub_ctx *sctx = original_sblock->sctx;
1270 struct btrfs_fs_info *fs_info = sctx->fs_info;
1271 u64 length = original_sblock->page_count * PAGE_SIZE;
1272 u64 logical = original_sblock->pagev[0]->logical;
1273 u64 generation = original_sblock->pagev[0]->generation;
1274 u64 flags = original_sblock->pagev[0]->flags;
1275 u64 have_csum = original_sblock->pagev[0]->have_csum;
1276 struct scrub_recover *recover;
1277 struct btrfs_bio *bbio;
1278 u64 sublen;
1279 u64 mapped_length;
1280 u64 stripe_offset;
1281 int stripe_index;
1282 int page_index = 0;
1283 int mirror_index;
1284 int nmirrors;
1285 int ret;
1286
1287
1288
1289
1290
1291
1292
1293 while (length > 0) {
1294 sublen = min_t(u64, length, PAGE_SIZE);
1295 mapped_length = sublen;
1296 bbio = NULL;
1297
1298
1299
1300
1301
1302 btrfs_bio_counter_inc_blocked(fs_info);
1303 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
1304 logical, &mapped_length, &bbio);
1305 if (ret || !bbio || mapped_length < sublen) {
1306 btrfs_put_bbio(bbio);
1307 btrfs_bio_counter_dec(fs_info);
1308 return -EIO;
1309 }
1310
1311 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1312 if (!recover) {
1313 btrfs_put_bbio(bbio);
1314 btrfs_bio_counter_dec(fs_info);
1315 return -ENOMEM;
1316 }
1317
1318 refcount_set(&recover->refs, 1);
1319 recover->bbio = bbio;
1320 recover->map_length = mapped_length;
1321
1322 BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
1323
1324 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1325
1326 for (mirror_index = 0; mirror_index < nmirrors;
1327 mirror_index++) {
1328 struct scrub_block *sblock;
1329 struct scrub_page *page;
1330
1331 sblock = sblocks_for_recheck + mirror_index;
1332 sblock->sctx = sctx;
1333
1334 page = kzalloc(sizeof(*page), GFP_NOFS);
1335 if (!page) {
1336leave_nomem:
1337 spin_lock(&sctx->stat_lock);
1338 sctx->stat.malloc_errors++;
1339 spin_unlock(&sctx->stat_lock);
1340 scrub_put_recover(fs_info, recover);
1341 return -ENOMEM;
1342 }
1343 scrub_page_get(page);
1344 sblock->pagev[page_index] = page;
1345 page->sblock = sblock;
1346 page->flags = flags;
1347 page->generation = generation;
1348 page->logical = logical;
1349 page->have_csum = have_csum;
1350 if (have_csum)
1351 memcpy(page->csum,
1352 original_sblock->pagev[0]->csum,
1353 sctx->csum_size);
1354
1355 scrub_stripe_index_and_offset(logical,
1356 bbio->map_type,
1357 bbio->raid_map,
1358 mapped_length,
1359 bbio->num_stripes -
1360 bbio->num_tgtdevs,
1361 mirror_index,
1362 &stripe_index,
1363 &stripe_offset);
1364 page->physical = bbio->stripes[stripe_index].physical +
1365 stripe_offset;
1366 page->dev = bbio->stripes[stripe_index].dev;
1367
1368 BUG_ON(page_index >= original_sblock->page_count);
1369 page->physical_for_dev_replace =
1370 original_sblock->pagev[page_index]->
1371 physical_for_dev_replace;
1372
1373 page->mirror_num = mirror_index + 1;
1374 sblock->page_count++;
1375 page->page = alloc_page(GFP_NOFS);
1376 if (!page->page)
1377 goto leave_nomem;
1378
1379 scrub_get_recover(recover);
1380 page->recover = recover;
1381 }
1382 scrub_put_recover(fs_info, recover);
1383 length -= sublen;
1384 logical += sublen;
1385 page_index++;
1386 }
1387
1388 return 0;
1389}
1390
1391static void scrub_bio_wait_endio(struct bio *bio)
1392{
1393 complete(bio->bi_private);
1394}
1395
1396static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1397 struct bio *bio,
1398 struct scrub_page *page)
1399{
1400 DECLARE_COMPLETION_ONSTACK(done);
1401 int ret;
1402 int mirror_num;
1403
1404 bio->bi_iter.bi_sector = page->logical >> 9;
1405 bio->bi_private = &done;
1406 bio->bi_end_io = scrub_bio_wait_endio;
1407
1408 mirror_num = page->sblock->pagev[0]->mirror_num;
1409 ret = raid56_parity_recover(fs_info, bio, page->recover->bbio,
1410 page->recover->map_length,
1411 mirror_num, 0);
1412 if (ret)
1413 return ret;
1414
1415 wait_for_completion_io(&done);
1416 return blk_status_to_errno(bio->bi_status);
1417}
1418
1419static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
1420 struct scrub_block *sblock)
1421{
1422 struct scrub_page *first_page = sblock->pagev[0];
1423 struct bio *bio;
1424 int page_num;
1425
1426
1427 ASSERT(first_page->dev);
1428 if (!first_page->dev->bdev)
1429 goto out;
1430
1431 bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
1432 bio_set_dev(bio, first_page->dev->bdev);
1433
1434 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1435 struct scrub_page *page = sblock->pagev[page_num];
1436
1437 WARN_ON(!page->page);
1438 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1439 }
1440
1441 if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
1442 bio_put(bio);
1443 goto out;
1444 }
1445
1446 bio_put(bio);
1447
1448 scrub_recheck_block_checksum(sblock);
1449
1450 return;
1451out:
1452 for (page_num = 0; page_num < sblock->page_count; page_num++)
1453 sblock->pagev[page_num]->io_error = 1;
1454
1455 sblock->no_io_error_seen = 0;
1456}
1457
1458
1459
1460
1461
1462
1463
1464
1465static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1466 struct scrub_block *sblock,
1467 int retry_failed_mirror)
1468{
1469 int page_num;
1470
1471 sblock->no_io_error_seen = 1;
1472
1473
1474 if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
1475 return scrub_recheck_block_on_raid56(fs_info, sblock);
1476
1477 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1478 struct bio *bio;
1479 struct scrub_page *page = sblock->pagev[page_num];
1480
1481 if (page->dev->bdev == NULL) {
1482 page->io_error = 1;
1483 sblock->no_io_error_seen = 0;
1484 continue;
1485 }
1486
1487 WARN_ON(!page->page);
1488 bio = btrfs_io_bio_alloc(1);
1489 bio_set_dev(bio, page->dev->bdev);
1490
1491 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1492 bio->bi_iter.bi_sector = page->physical >> 9;
1493 bio->bi_opf = REQ_OP_READ;
1494
1495 if (btrfsic_submit_bio_wait(bio)) {
1496 page->io_error = 1;
1497 sblock->no_io_error_seen = 0;
1498 }
1499
1500 bio_put(bio);
1501 }
1502
1503 if (sblock->no_io_error_seen)
1504 scrub_recheck_block_checksum(sblock);
1505}
1506
1507static inline int scrub_check_fsid(u8 fsid[],
1508 struct scrub_page *spage)
1509{
1510 struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
1511 int ret;
1512
1513 ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1514 return !ret;
1515}
1516
1517static void scrub_recheck_block_checksum(struct scrub_block *sblock)
1518{
1519 sblock->header_error = 0;
1520 sblock->checksum_error = 0;
1521 sblock->generation_error = 0;
1522
1523 if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
1524 scrub_checksum_data(sblock);
1525 else
1526 scrub_checksum_tree_block(sblock);
1527}
1528
1529static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1530 struct scrub_block *sblock_good)
1531{
1532 int page_num;
1533 int ret = 0;
1534
1535 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1536 int ret_sub;
1537
1538 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1539 sblock_good,
1540 page_num, 1);
1541 if (ret_sub)
1542 ret = ret_sub;
1543 }
1544
1545 return ret;
1546}
1547
1548static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1549 struct scrub_block *sblock_good,
1550 int page_num, int force_write)
1551{
1552 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1553 struct scrub_page *page_good = sblock_good->pagev[page_num];
1554 struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
1555
1556 BUG_ON(page_bad->page == NULL);
1557 BUG_ON(page_good->page == NULL);
1558 if (force_write || sblock_bad->header_error ||
1559 sblock_bad->checksum_error || page_bad->io_error) {
1560 struct bio *bio;
1561 int ret;
1562
1563 if (!page_bad->dev->bdev) {
1564 btrfs_warn_rl(fs_info,
1565 "scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
1566 return -EIO;
1567 }
1568
1569 bio = btrfs_io_bio_alloc(1);
1570 bio_set_dev(bio, page_bad->dev->bdev);
1571 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1572 bio->bi_opf = REQ_OP_WRITE;
1573
1574 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1575 if (PAGE_SIZE != ret) {
1576 bio_put(bio);
1577 return -EIO;
1578 }
1579
1580 if (btrfsic_submit_bio_wait(bio)) {
1581 btrfs_dev_stat_inc_and_print(page_bad->dev,
1582 BTRFS_DEV_STAT_WRITE_ERRS);
1583 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1584 bio_put(bio);
1585 return -EIO;
1586 }
1587 bio_put(bio);
1588 }
1589
1590 return 0;
1591}
1592
1593static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1594{
1595 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
1596 int page_num;
1597
1598
1599
1600
1601
1602 if (sblock->sparity)
1603 return;
1604
1605 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1606 int ret;
1607
1608 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1609 if (ret)
1610 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1611 }
1612}
1613
1614static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1615 int page_num)
1616{
1617 struct scrub_page *spage = sblock->pagev[page_num];
1618
1619 BUG_ON(spage->page == NULL);
1620 if (spage->io_error) {
1621 void *mapped_buffer = kmap_atomic(spage->page);
1622
1623 clear_page(mapped_buffer);
1624 flush_dcache_page(spage->page);
1625 kunmap_atomic(mapped_buffer);
1626 }
1627 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1628}
1629
1630static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1631 struct scrub_page *spage)
1632{
1633 struct scrub_bio *sbio;
1634 int ret;
1635
1636 mutex_lock(&sctx->wr_lock);
1637again:
1638 if (!sctx->wr_curr_bio) {
1639 sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio),
1640 GFP_KERNEL);
1641 if (!sctx->wr_curr_bio) {
1642 mutex_unlock(&sctx->wr_lock);
1643 return -ENOMEM;
1644 }
1645 sctx->wr_curr_bio->sctx = sctx;
1646 sctx->wr_curr_bio->page_count = 0;
1647 }
1648 sbio = sctx->wr_curr_bio;
1649 if (sbio->page_count == 0) {
1650 struct bio *bio;
1651
1652 sbio->physical = spage->physical_for_dev_replace;
1653 sbio->logical = spage->logical;
1654 sbio->dev = sctx->wr_tgtdev;
1655 bio = sbio->bio;
1656 if (!bio) {
1657 bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
1658 sbio->bio = bio;
1659 }
1660
1661 bio->bi_private = sbio;
1662 bio->bi_end_io = scrub_wr_bio_end_io;
1663 bio_set_dev(bio, sbio->dev->bdev);
1664 bio->bi_iter.bi_sector = sbio->physical >> 9;
1665 bio->bi_opf = REQ_OP_WRITE;
1666 sbio->status = 0;
1667 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1668 spage->physical_for_dev_replace ||
1669 sbio->logical + sbio->page_count * PAGE_SIZE !=
1670 spage->logical) {
1671 scrub_wr_submit(sctx);
1672 goto again;
1673 }
1674
1675 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1676 if (ret != PAGE_SIZE) {
1677 if (sbio->page_count < 1) {
1678 bio_put(sbio->bio);
1679 sbio->bio = NULL;
1680 mutex_unlock(&sctx->wr_lock);
1681 return -EIO;
1682 }
1683 scrub_wr_submit(sctx);
1684 goto again;
1685 }
1686
1687 sbio->pagev[sbio->page_count] = spage;
1688 scrub_page_get(spage);
1689 sbio->page_count++;
1690 if (sbio->page_count == sctx->pages_per_wr_bio)
1691 scrub_wr_submit(sctx);
1692 mutex_unlock(&sctx->wr_lock);
1693
1694 return 0;
1695}
1696
1697static void scrub_wr_submit(struct scrub_ctx *sctx)
1698{
1699 struct scrub_bio *sbio;
1700
1701 if (!sctx->wr_curr_bio)
1702 return;
1703
1704 sbio = sctx->wr_curr_bio;
1705 sctx->wr_curr_bio = NULL;
1706 WARN_ON(!sbio->bio->bi_disk);
1707 scrub_pending_bio_inc(sctx);
1708
1709
1710
1711
1712 btrfsic_submit_bio(sbio->bio);
1713}
1714
1715static void scrub_wr_bio_end_io(struct bio *bio)
1716{
1717 struct scrub_bio *sbio = bio->bi_private;
1718 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
1719
1720 sbio->status = bio->bi_status;
1721 sbio->bio = bio;
1722
1723 btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
1724 scrub_wr_bio_end_io_worker, NULL, NULL);
1725 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1726}
1727
1728static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1729{
1730 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1731 struct scrub_ctx *sctx = sbio->sctx;
1732 int i;
1733
1734 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1735 if (sbio->status) {
1736 struct btrfs_dev_replace *dev_replace =
1737 &sbio->sctx->fs_info->dev_replace;
1738
1739 for (i = 0; i < sbio->page_count; i++) {
1740 struct scrub_page *spage = sbio->pagev[i];
1741
1742 spage->io_error = 1;
1743 atomic64_inc(&dev_replace->num_write_errors);
1744 }
1745 }
1746
1747 for (i = 0; i < sbio->page_count; i++)
1748 scrub_page_put(sbio->pagev[i]);
1749
1750 bio_put(sbio->bio);
1751 kfree(sbio);
1752 scrub_pending_bio_dec(sctx);
1753}
1754
1755static int scrub_checksum(struct scrub_block *sblock)
1756{
1757 u64 flags;
1758 int ret;
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768 sblock->header_error = 0;
1769 sblock->generation_error = 0;
1770 sblock->checksum_error = 0;
1771
1772 WARN_ON(sblock->page_count < 1);
1773 flags = sblock->pagev[0]->flags;
1774 ret = 0;
1775 if (flags & BTRFS_EXTENT_FLAG_DATA)
1776 ret = scrub_checksum_data(sblock);
1777 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1778 ret = scrub_checksum_tree_block(sblock);
1779 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1780 (void)scrub_checksum_super(sblock);
1781 else
1782 WARN_ON(1);
1783 if (ret)
1784 scrub_handle_errored_block(sblock);
1785
1786 return ret;
1787}
1788
1789static int scrub_checksum_data(struct scrub_block *sblock)
1790{
1791 struct scrub_ctx *sctx = sblock->sctx;
1792 struct btrfs_fs_info *fs_info = sctx->fs_info;
1793 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1794 u8 csum[BTRFS_CSUM_SIZE];
1795 u8 *on_disk_csum;
1796 struct page *page;
1797 void *buffer;
1798 u64 len;
1799 int index;
1800
1801 BUG_ON(sblock->page_count < 1);
1802 if (!sblock->pagev[0]->have_csum)
1803 return 0;
1804
1805 shash->tfm = fs_info->csum_shash;
1806 crypto_shash_init(shash);
1807
1808 on_disk_csum = sblock->pagev[0]->csum;
1809 page = sblock->pagev[0]->page;
1810 buffer = kmap_atomic(page);
1811
1812 len = sctx->fs_info->sectorsize;
1813 index = 0;
1814 for (;;) {
1815 u64 l = min_t(u64, len, PAGE_SIZE);
1816
1817 crypto_shash_update(shash, buffer, l);
1818 kunmap_atomic(buffer);
1819 len -= l;
1820 if (len == 0)
1821 break;
1822 index++;
1823 BUG_ON(index >= sblock->page_count);
1824 BUG_ON(!sblock->pagev[index]->page);
1825 page = sblock->pagev[index]->page;
1826 buffer = kmap_atomic(page);
1827 }
1828
1829 crypto_shash_final(shash, csum);
1830 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1831 sblock->checksum_error = 1;
1832
1833 return sblock->checksum_error;
1834}
1835
1836static int scrub_checksum_tree_block(struct scrub_block *sblock)
1837{
1838 struct scrub_ctx *sctx = sblock->sctx;
1839 struct btrfs_header *h;
1840 struct btrfs_fs_info *fs_info = sctx->fs_info;
1841 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1842 u8 calculated_csum[BTRFS_CSUM_SIZE];
1843 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1844 struct page *page;
1845 void *mapped_buffer;
1846 u64 mapped_size;
1847 void *p;
1848 u64 len;
1849 int index;
1850
1851 shash->tfm = fs_info->csum_shash;
1852 crypto_shash_init(shash);
1853
1854 BUG_ON(sblock->page_count < 1);
1855 page = sblock->pagev[0]->page;
1856 mapped_buffer = kmap_atomic(page);
1857 h = (struct btrfs_header *)mapped_buffer;
1858 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1859
1860
1861
1862
1863
1864
1865 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
1866 sblock->header_error = 1;
1867
1868 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) {
1869 sblock->header_error = 1;
1870 sblock->generation_error = 1;
1871 }
1872
1873 if (!scrub_check_fsid(h->fsid, sblock->pagev[0]))
1874 sblock->header_error = 1;
1875
1876 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1877 BTRFS_UUID_SIZE))
1878 sblock->header_error = 1;
1879
1880 len = sctx->fs_info->nodesize - BTRFS_CSUM_SIZE;
1881 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1882 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1883 index = 0;
1884 for (;;) {
1885 u64 l = min_t(u64, len, mapped_size);
1886
1887 crypto_shash_update(shash, p, l);
1888 kunmap_atomic(mapped_buffer);
1889 len -= l;
1890 if (len == 0)
1891 break;
1892 index++;
1893 BUG_ON(index >= sblock->page_count);
1894 BUG_ON(!sblock->pagev[index]->page);
1895 page = sblock->pagev[index]->page;
1896 mapped_buffer = kmap_atomic(page);
1897 mapped_size = PAGE_SIZE;
1898 p = mapped_buffer;
1899 }
1900
1901 crypto_shash_final(shash, calculated_csum);
1902 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1903 sblock->checksum_error = 1;
1904
1905 return sblock->header_error || sblock->checksum_error;
1906}
1907
1908static int scrub_checksum_super(struct scrub_block *sblock)
1909{
1910 struct btrfs_super_block *s;
1911 struct scrub_ctx *sctx = sblock->sctx;
1912 struct btrfs_fs_info *fs_info = sctx->fs_info;
1913 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1914 u8 calculated_csum[BTRFS_CSUM_SIZE];
1915 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1916 struct page *page;
1917 void *mapped_buffer;
1918 u64 mapped_size;
1919 void *p;
1920 int fail_gen = 0;
1921 int fail_cor = 0;
1922 u64 len;
1923 int index;
1924
1925 shash->tfm = fs_info->csum_shash;
1926 crypto_shash_init(shash);
1927
1928 BUG_ON(sblock->page_count < 1);
1929 page = sblock->pagev[0]->page;
1930 mapped_buffer = kmap_atomic(page);
1931 s = (struct btrfs_super_block *)mapped_buffer;
1932 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1933
1934 if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
1935 ++fail_cor;
1936
1937 if (sblock->pagev[0]->generation != btrfs_super_generation(s))
1938 ++fail_gen;
1939
1940 if (!scrub_check_fsid(s->fsid, sblock->pagev[0]))
1941 ++fail_cor;
1942
1943 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1944 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1945 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1946 index = 0;
1947 for (;;) {
1948 u64 l = min_t(u64, len, mapped_size);
1949
1950 crypto_shash_update(shash, p, l);
1951 kunmap_atomic(mapped_buffer);
1952 len -= l;
1953 if (len == 0)
1954 break;
1955 index++;
1956 BUG_ON(index >= sblock->page_count);
1957 BUG_ON(!sblock->pagev[index]->page);
1958 page = sblock->pagev[index]->page;
1959 mapped_buffer = kmap_atomic(page);
1960 mapped_size = PAGE_SIZE;
1961 p = mapped_buffer;
1962 }
1963
1964 crypto_shash_final(shash, calculated_csum);
1965 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1966 ++fail_cor;
1967
1968 if (fail_cor + fail_gen) {
1969
1970
1971
1972
1973
1974 spin_lock(&sctx->stat_lock);
1975 ++sctx->stat.super_errors;
1976 spin_unlock(&sctx->stat_lock);
1977 if (fail_cor)
1978 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1979 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1980 else
1981 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1982 BTRFS_DEV_STAT_GENERATION_ERRS);
1983 }
1984
1985 return fail_cor + fail_gen;
1986}
1987
1988static void scrub_block_get(struct scrub_block *sblock)
1989{
1990 refcount_inc(&sblock->refs);
1991}
1992
1993static void scrub_block_put(struct scrub_block *sblock)
1994{
1995 if (refcount_dec_and_test(&sblock->refs)) {
1996 int i;
1997
1998 if (sblock->sparity)
1999 scrub_parity_put(sblock->sparity);
2000
2001 for (i = 0; i < sblock->page_count; i++)
2002 scrub_page_put(sblock->pagev[i]);
2003 kfree(sblock);
2004 }
2005}
2006
2007static void scrub_page_get(struct scrub_page *spage)
2008{
2009 atomic_inc(&spage->refs);
2010}
2011
2012static void scrub_page_put(struct scrub_page *spage)
2013{
2014 if (atomic_dec_and_test(&spage->refs)) {
2015 if (spage->page)
2016 __free_page(spage->page);
2017 kfree(spage);
2018 }
2019}
2020
2021static void scrub_submit(struct scrub_ctx *sctx)
2022{
2023 struct scrub_bio *sbio;
2024
2025 if (sctx->curr == -1)
2026 return;
2027
2028 sbio = sctx->bios[sctx->curr];
2029 sctx->curr = -1;
2030 scrub_pending_bio_inc(sctx);
2031 btrfsic_submit_bio(sbio->bio);
2032}
2033
2034static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
2035 struct scrub_page *spage)
2036{
2037 struct scrub_block *sblock = spage->sblock;
2038 struct scrub_bio *sbio;
2039 int ret;
2040
2041again:
2042
2043
2044
2045 while (sctx->curr == -1) {
2046 spin_lock(&sctx->list_lock);
2047 sctx->curr = sctx->first_free;
2048 if (sctx->curr != -1) {
2049 sctx->first_free = sctx->bios[sctx->curr]->next_free;
2050 sctx->bios[sctx->curr]->next_free = -1;
2051 sctx->bios[sctx->curr]->page_count = 0;
2052 spin_unlock(&sctx->list_lock);
2053 } else {
2054 spin_unlock(&sctx->list_lock);
2055 wait_event(sctx->list_wait, sctx->first_free != -1);
2056 }
2057 }
2058 sbio = sctx->bios[sctx->curr];
2059 if (sbio->page_count == 0) {
2060 struct bio *bio;
2061
2062 sbio->physical = spage->physical;
2063 sbio->logical = spage->logical;
2064 sbio->dev = spage->dev;
2065 bio = sbio->bio;
2066 if (!bio) {
2067 bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
2068 sbio->bio = bio;
2069 }
2070
2071 bio->bi_private = sbio;
2072 bio->bi_end_io = scrub_bio_end_io;
2073 bio_set_dev(bio, sbio->dev->bdev);
2074 bio->bi_iter.bi_sector = sbio->physical >> 9;
2075 bio->bi_opf = REQ_OP_READ;
2076 sbio->status = 0;
2077 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
2078 spage->physical ||
2079 sbio->logical + sbio->page_count * PAGE_SIZE !=
2080 spage->logical ||
2081 sbio->dev != spage->dev) {
2082 scrub_submit(sctx);
2083 goto again;
2084 }
2085
2086 sbio->pagev[sbio->page_count] = spage;
2087 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
2088 if (ret != PAGE_SIZE) {
2089 if (sbio->page_count < 1) {
2090 bio_put(sbio->bio);
2091 sbio->bio = NULL;
2092 return -EIO;
2093 }
2094 scrub_submit(sctx);
2095 goto again;
2096 }
2097
2098 scrub_block_get(sblock);
2099 atomic_inc(&sblock->outstanding_pages);
2100 sbio->page_count++;
2101 if (sbio->page_count == sctx->pages_per_rd_bio)
2102 scrub_submit(sctx);
2103
2104 return 0;
2105}
2106
2107static void scrub_missing_raid56_end_io(struct bio *bio)
2108{
2109 struct scrub_block *sblock = bio->bi_private;
2110 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
2111
2112 if (bio->bi_status)
2113 sblock->no_io_error_seen = 0;
2114
2115 bio_put(bio);
2116
2117 btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
2118}
2119
2120static void scrub_missing_raid56_worker(struct btrfs_work *work)
2121{
2122 struct scrub_block *sblock = container_of(work, struct scrub_block, work);
2123 struct scrub_ctx *sctx = sblock->sctx;
2124 struct btrfs_fs_info *fs_info = sctx->fs_info;
2125 u64 logical;
2126 struct btrfs_device *dev;
2127
2128 logical = sblock->pagev[0]->logical;
2129 dev = sblock->pagev[0]->dev;
2130
2131 if (sblock->no_io_error_seen)
2132 scrub_recheck_block_checksum(sblock);
2133
2134 if (!sblock->no_io_error_seen) {
2135 spin_lock(&sctx->stat_lock);
2136 sctx->stat.read_errors++;
2137 spin_unlock(&sctx->stat_lock);
2138 btrfs_err_rl_in_rcu(fs_info,
2139 "IO error rebuilding logical %llu for dev %s",
2140 logical, rcu_str_deref(dev->name));
2141 } else if (sblock->header_error || sblock->checksum_error) {
2142 spin_lock(&sctx->stat_lock);
2143 sctx->stat.uncorrectable_errors++;
2144 spin_unlock(&sctx->stat_lock);
2145 btrfs_err_rl_in_rcu(fs_info,
2146 "failed to rebuild valid logical %llu for dev %s",
2147 logical, rcu_str_deref(dev->name));
2148 } else {
2149 scrub_write_block_to_dev_replace(sblock);
2150 }
2151
2152 scrub_block_put(sblock);
2153
2154 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2155 mutex_lock(&sctx->wr_lock);
2156 scrub_wr_submit(sctx);
2157 mutex_unlock(&sctx->wr_lock);
2158 }
2159
2160 scrub_pending_bio_dec(sctx);
2161}
2162
2163static void scrub_missing_raid56_pages(struct scrub_block *sblock)
2164{
2165 struct scrub_ctx *sctx = sblock->sctx;
2166 struct btrfs_fs_info *fs_info = sctx->fs_info;
2167 u64 length = sblock->page_count * PAGE_SIZE;
2168 u64 logical = sblock->pagev[0]->logical;
2169 struct btrfs_bio *bbio = NULL;
2170 struct bio *bio;
2171 struct btrfs_raid_bio *rbio;
2172 int ret;
2173 int i;
2174
2175 btrfs_bio_counter_inc_blocked(fs_info);
2176 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
2177 &length, &bbio);
2178 if (ret || !bbio || !bbio->raid_map)
2179 goto bbio_out;
2180
2181 if (WARN_ON(!sctx->is_dev_replace ||
2182 !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
2183
2184
2185
2186
2187
2188
2189 goto bbio_out;
2190 }
2191
2192 bio = btrfs_io_bio_alloc(0);
2193 bio->bi_iter.bi_sector = logical >> 9;
2194 bio->bi_private = sblock;
2195 bio->bi_end_io = scrub_missing_raid56_end_io;
2196
2197 rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length);
2198 if (!rbio)
2199 goto rbio_out;
2200
2201 for (i = 0; i < sblock->page_count; i++) {
2202 struct scrub_page *spage = sblock->pagev[i];
2203
2204 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2205 }
2206
2207 btrfs_init_work(&sblock->work, btrfs_scrub_helper,
2208 scrub_missing_raid56_worker, NULL, NULL);
2209 scrub_block_get(sblock);
2210 scrub_pending_bio_inc(sctx);
2211 raid56_submit_missing_rbio(rbio);
2212 return;
2213
2214rbio_out:
2215 bio_put(bio);
2216bbio_out:
2217 btrfs_bio_counter_dec(fs_info);
2218 btrfs_put_bbio(bbio);
2219 spin_lock(&sctx->stat_lock);
2220 sctx->stat.malloc_errors++;
2221 spin_unlock(&sctx->stat_lock);
2222}
2223
2224static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2225 u64 physical, struct btrfs_device *dev, u64 flags,
2226 u64 gen, int mirror_num, u8 *csum, int force,
2227 u64 physical_for_dev_replace)
2228{
2229 struct scrub_block *sblock;
2230 int index;
2231
2232 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2233 if (!sblock) {
2234 spin_lock(&sctx->stat_lock);
2235 sctx->stat.malloc_errors++;
2236 spin_unlock(&sctx->stat_lock);
2237 return -ENOMEM;
2238 }
2239
2240
2241
2242 refcount_set(&sblock->refs, 1);
2243 sblock->sctx = sctx;
2244 sblock->no_io_error_seen = 1;
2245
2246 for (index = 0; len > 0; index++) {
2247 struct scrub_page *spage;
2248 u64 l = min_t(u64, len, PAGE_SIZE);
2249
2250 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2251 if (!spage) {
2252leave_nomem:
2253 spin_lock(&sctx->stat_lock);
2254 sctx->stat.malloc_errors++;
2255 spin_unlock(&sctx->stat_lock);
2256 scrub_block_put(sblock);
2257 return -ENOMEM;
2258 }
2259 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2260 scrub_page_get(spage);
2261 sblock->pagev[index] = spage;
2262 spage->sblock = sblock;
2263 spage->dev = dev;
2264 spage->flags = flags;
2265 spage->generation = gen;
2266 spage->logical = logical;
2267 spage->physical = physical;
2268 spage->physical_for_dev_replace = physical_for_dev_replace;
2269 spage->mirror_num = mirror_num;
2270 if (csum) {
2271 spage->have_csum = 1;
2272 memcpy(spage->csum, csum, sctx->csum_size);
2273 } else {
2274 spage->have_csum = 0;
2275 }
2276 sblock->page_count++;
2277 spage->page = alloc_page(GFP_KERNEL);
2278 if (!spage->page)
2279 goto leave_nomem;
2280 len -= l;
2281 logical += l;
2282 physical += l;
2283 physical_for_dev_replace += l;
2284 }
2285
2286 WARN_ON(sblock->page_count == 0);
2287 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2288
2289
2290
2291
2292 scrub_missing_raid56_pages(sblock);
2293 } else {
2294 for (index = 0; index < sblock->page_count; index++) {
2295 struct scrub_page *spage = sblock->pagev[index];
2296 int ret;
2297
2298 ret = scrub_add_page_to_rd_bio(sctx, spage);
2299 if (ret) {
2300 scrub_block_put(sblock);
2301 return ret;
2302 }
2303 }
2304
2305 if (force)
2306 scrub_submit(sctx);
2307 }
2308
2309
2310 scrub_block_put(sblock);
2311 return 0;
2312}
2313
2314static void scrub_bio_end_io(struct bio *bio)
2315{
2316 struct scrub_bio *sbio = bio->bi_private;
2317 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
2318
2319 sbio->status = bio->bi_status;
2320 sbio->bio = bio;
2321
2322 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
2323}
2324
2325static void scrub_bio_end_io_worker(struct btrfs_work *work)
2326{
2327 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2328 struct scrub_ctx *sctx = sbio->sctx;
2329 int i;
2330
2331 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2332 if (sbio->status) {
2333 for (i = 0; i < sbio->page_count; i++) {
2334 struct scrub_page *spage = sbio->pagev[i];
2335
2336 spage->io_error = 1;
2337 spage->sblock->no_io_error_seen = 0;
2338 }
2339 }
2340
2341
2342 for (i = 0; i < sbio->page_count; i++) {
2343 struct scrub_page *spage = sbio->pagev[i];
2344 struct scrub_block *sblock = spage->sblock;
2345
2346 if (atomic_dec_and_test(&sblock->outstanding_pages))
2347 scrub_block_complete(sblock);
2348 scrub_block_put(sblock);
2349 }
2350
2351 bio_put(sbio->bio);
2352 sbio->bio = NULL;
2353 spin_lock(&sctx->list_lock);
2354 sbio->next_free = sctx->first_free;
2355 sctx->first_free = sbio->index;
2356 spin_unlock(&sctx->list_lock);
2357
2358 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2359 mutex_lock(&sctx->wr_lock);
2360 scrub_wr_submit(sctx);
2361 mutex_unlock(&sctx->wr_lock);
2362 }
2363
2364 scrub_pending_bio_dec(sctx);
2365}
2366
2367static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2368 unsigned long *bitmap,
2369 u64 start, u64 len)
2370{
2371 u64 offset;
2372 u64 nsectors64;
2373 u32 nsectors;
2374 int sectorsize = sparity->sctx->fs_info->sectorsize;
2375
2376 if (len >= sparity->stripe_len) {
2377 bitmap_set(bitmap, 0, sparity->nsectors);
2378 return;
2379 }
2380
2381 start -= sparity->logic_start;
2382 start = div64_u64_rem(start, sparity->stripe_len, &offset);
2383 offset = div_u64(offset, sectorsize);
2384 nsectors64 = div_u64(len, sectorsize);
2385
2386 ASSERT(nsectors64 < UINT_MAX);
2387 nsectors = (u32)nsectors64;
2388
2389 if (offset + nsectors <= sparity->nsectors) {
2390 bitmap_set(bitmap, offset, nsectors);
2391 return;
2392 }
2393
2394 bitmap_set(bitmap, offset, sparity->nsectors - offset);
2395 bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
2396}
2397
2398static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
2399 u64 start, u64 len)
2400{
2401 __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
2402}
2403
2404static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
2405 u64 start, u64 len)
2406{
2407 __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
2408}
2409
2410static void scrub_block_complete(struct scrub_block *sblock)
2411{
2412 int corrupted = 0;
2413
2414 if (!sblock->no_io_error_seen) {
2415 corrupted = 1;
2416 scrub_handle_errored_block(sblock);
2417 } else {
2418
2419
2420
2421
2422
2423 corrupted = scrub_checksum(sblock);
2424 if (!corrupted && sblock->sctx->is_dev_replace)
2425 scrub_write_block_to_dev_replace(sblock);
2426 }
2427
2428 if (sblock->sparity && corrupted && !sblock->data_corrected) {
2429 u64 start = sblock->pagev[0]->logical;
2430 u64 end = sblock->pagev[sblock->page_count - 1]->logical +
2431 PAGE_SIZE;
2432
2433 scrub_parity_mark_sectors_error(sblock->sparity,
2434 start, end - start);
2435 }
2436}
2437
2438static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
2439{
2440 struct btrfs_ordered_sum *sum = NULL;
2441 unsigned long index;
2442 unsigned long num_sectors;
2443
2444 while (!list_empty(&sctx->csum_list)) {
2445 sum = list_first_entry(&sctx->csum_list,
2446 struct btrfs_ordered_sum, list);
2447 if (sum->bytenr > logical)
2448 return 0;
2449 if (sum->bytenr + sum->len > logical)
2450 break;
2451
2452 ++sctx->stat.csum_discards;
2453 list_del(&sum->list);
2454 kfree(sum);
2455 sum = NULL;
2456 }
2457 if (!sum)
2458 return 0;
2459
2460 index = div_u64(logical - sum->bytenr, sctx->fs_info->sectorsize);
2461 ASSERT(index < UINT_MAX);
2462
2463 num_sectors = sum->len / sctx->fs_info->sectorsize;
2464 memcpy(csum, sum->sums + index * sctx->csum_size, sctx->csum_size);
2465 if (index == num_sectors - 1) {
2466 list_del(&sum->list);
2467 kfree(sum);
2468 }
2469 return 1;
2470}
2471
2472
2473static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
2474 u64 logical, u64 len,
2475 u64 physical, struct btrfs_device *dev, u64 flags,
2476 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2477{
2478 int ret;
2479 u8 csum[BTRFS_CSUM_SIZE];
2480 u32 blocksize;
2481
2482 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2483 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2484 blocksize = map->stripe_len;
2485 else
2486 blocksize = sctx->fs_info->sectorsize;
2487 spin_lock(&sctx->stat_lock);
2488 sctx->stat.data_extents_scrubbed++;
2489 sctx->stat.data_bytes_scrubbed += len;
2490 spin_unlock(&sctx->stat_lock);
2491 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2492 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2493 blocksize = map->stripe_len;
2494 else
2495 blocksize = sctx->fs_info->nodesize;
2496 spin_lock(&sctx->stat_lock);
2497 sctx->stat.tree_extents_scrubbed++;
2498 sctx->stat.tree_bytes_scrubbed += len;
2499 spin_unlock(&sctx->stat_lock);
2500 } else {
2501 blocksize = sctx->fs_info->sectorsize;
2502 WARN_ON(1);
2503 }
2504
2505 while (len) {
2506 u64 l = min_t(u64, len, blocksize);
2507 int have_csum = 0;
2508
2509 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2510
2511 have_csum = scrub_find_csum(sctx, logical, csum);
2512 if (have_csum == 0)
2513 ++sctx->stat.no_csum;
2514 }
2515 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2516 mirror_num, have_csum ? csum : NULL, 0,
2517 physical_for_dev_replace);
2518 if (ret)
2519 return ret;
2520 len -= l;
2521 logical += l;
2522 physical += l;
2523 physical_for_dev_replace += l;
2524 }
2525 return 0;
2526}
2527
2528static int scrub_pages_for_parity(struct scrub_parity *sparity,
2529 u64 logical, u64 len,
2530 u64 physical, struct btrfs_device *dev,
2531 u64 flags, u64 gen, int mirror_num, u8 *csum)
2532{
2533 struct scrub_ctx *sctx = sparity->sctx;
2534 struct scrub_block *sblock;
2535 int index;
2536
2537 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2538 if (!sblock) {
2539 spin_lock(&sctx->stat_lock);
2540 sctx->stat.malloc_errors++;
2541 spin_unlock(&sctx->stat_lock);
2542 return -ENOMEM;
2543 }
2544
2545
2546
2547 refcount_set(&sblock->refs, 1);
2548 sblock->sctx = sctx;
2549 sblock->no_io_error_seen = 1;
2550 sblock->sparity = sparity;
2551 scrub_parity_get(sparity);
2552
2553 for (index = 0; len > 0; index++) {
2554 struct scrub_page *spage;
2555 u64 l = min_t(u64, len, PAGE_SIZE);
2556
2557 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2558 if (!spage) {
2559leave_nomem:
2560 spin_lock(&sctx->stat_lock);
2561 sctx->stat.malloc_errors++;
2562 spin_unlock(&sctx->stat_lock);
2563 scrub_block_put(sblock);
2564 return -ENOMEM;
2565 }
2566 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2567
2568 scrub_page_get(spage);
2569 sblock->pagev[index] = spage;
2570
2571 scrub_page_get(spage);
2572 list_add_tail(&spage->list, &sparity->spages);
2573 spage->sblock = sblock;
2574 spage->dev = dev;
2575 spage->flags = flags;
2576 spage->generation = gen;
2577 spage->logical = logical;
2578 spage->physical = physical;
2579 spage->mirror_num = mirror_num;
2580 if (csum) {
2581 spage->have_csum = 1;
2582 memcpy(spage->csum, csum, sctx->csum_size);
2583 } else {
2584 spage->have_csum = 0;
2585 }
2586 sblock->page_count++;
2587 spage->page = alloc_page(GFP_KERNEL);
2588 if (!spage->page)
2589 goto leave_nomem;
2590 len -= l;
2591 logical += l;
2592 physical += l;
2593 }
2594
2595 WARN_ON(sblock->page_count == 0);
2596 for (index = 0; index < sblock->page_count; index++) {
2597 struct scrub_page *spage = sblock->pagev[index];
2598 int ret;
2599
2600 ret = scrub_add_page_to_rd_bio(sctx, spage);
2601 if (ret) {
2602 scrub_block_put(sblock);
2603 return ret;
2604 }
2605 }
2606
2607
2608 scrub_block_put(sblock);
2609 return 0;
2610}
2611
2612static int scrub_extent_for_parity(struct scrub_parity *sparity,
2613 u64 logical, u64 len,
2614 u64 physical, struct btrfs_device *dev,
2615 u64 flags, u64 gen, int mirror_num)
2616{
2617 struct scrub_ctx *sctx = sparity->sctx;
2618 int ret;
2619 u8 csum[BTRFS_CSUM_SIZE];
2620 u32 blocksize;
2621
2622 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2623 scrub_parity_mark_sectors_error(sparity, logical, len);
2624 return 0;
2625 }
2626
2627 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2628 blocksize = sparity->stripe_len;
2629 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2630 blocksize = sparity->stripe_len;
2631 } else {
2632 blocksize = sctx->fs_info->sectorsize;
2633 WARN_ON(1);
2634 }
2635
2636 while (len) {
2637 u64 l = min_t(u64, len, blocksize);
2638 int have_csum = 0;
2639
2640 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2641
2642 have_csum = scrub_find_csum(sctx, logical, csum);
2643 if (have_csum == 0)
2644 goto skip;
2645 }
2646 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2647 flags, gen, mirror_num,
2648 have_csum ? csum : NULL);
2649 if (ret)
2650 return ret;
2651skip:
2652 len -= l;
2653 logical += l;
2654 physical += l;
2655 }
2656 return 0;
2657}
2658
2659
2660
2661
2662
2663
2664
2665
2666static int get_raid56_logic_offset(u64 physical, int num,
2667 struct map_lookup *map, u64 *offset,
2668 u64 *stripe_start)
2669{
2670 int i;
2671 int j = 0;
2672 u64 stripe_nr;
2673 u64 last_offset;
2674 u32 stripe_index;
2675 u32 rot;
2676 const int data_stripes = nr_data_stripes(map);
2677
2678 last_offset = (physical - map->stripes[num].physical) * data_stripes;
2679 if (stripe_start)
2680 *stripe_start = last_offset;
2681
2682 *offset = last_offset;
2683 for (i = 0; i < data_stripes; i++) {
2684 *offset = last_offset + i * map->stripe_len;
2685
2686 stripe_nr = div64_u64(*offset, map->stripe_len);
2687 stripe_nr = div_u64(stripe_nr, data_stripes);
2688
2689
2690 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
2691
2692 rot += i;
2693 stripe_index = rot % map->num_stripes;
2694 if (stripe_index == num)
2695 return 0;
2696 if (stripe_index < num)
2697 j++;
2698 }
2699 *offset = last_offset + j * map->stripe_len;
2700 return 1;
2701}
2702
2703static void scrub_free_parity(struct scrub_parity *sparity)
2704{
2705 struct scrub_ctx *sctx = sparity->sctx;
2706 struct scrub_page *curr, *next;
2707 int nbits;
2708
2709 nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
2710 if (nbits) {
2711 spin_lock(&sctx->stat_lock);
2712 sctx->stat.read_errors += nbits;
2713 sctx->stat.uncorrectable_errors += nbits;
2714 spin_unlock(&sctx->stat_lock);
2715 }
2716
2717 list_for_each_entry_safe(curr, next, &sparity->spages, list) {
2718 list_del_init(&curr->list);
2719 scrub_page_put(curr);
2720 }
2721
2722 kfree(sparity);
2723}
2724
2725static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
2726{
2727 struct scrub_parity *sparity = container_of(work, struct scrub_parity,
2728 work);
2729 struct scrub_ctx *sctx = sparity->sctx;
2730
2731 scrub_free_parity(sparity);
2732 scrub_pending_bio_dec(sctx);
2733}
2734
2735static void scrub_parity_bio_endio(struct bio *bio)
2736{
2737 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
2738 struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
2739
2740 if (bio->bi_status)
2741 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2742 sparity->nsectors);
2743
2744 bio_put(bio);
2745
2746 btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
2747 scrub_parity_bio_endio_worker, NULL, NULL);
2748 btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
2749}
2750
2751static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2752{
2753 struct scrub_ctx *sctx = sparity->sctx;
2754 struct btrfs_fs_info *fs_info = sctx->fs_info;
2755 struct bio *bio;
2756 struct btrfs_raid_bio *rbio;
2757 struct btrfs_bio *bbio = NULL;
2758 u64 length;
2759 int ret;
2760
2761 if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
2762 sparity->nsectors))
2763 goto out;
2764
2765 length = sparity->logic_end - sparity->logic_start;
2766
2767 btrfs_bio_counter_inc_blocked(fs_info);
2768 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
2769 &length, &bbio);
2770 if (ret || !bbio || !bbio->raid_map)
2771 goto bbio_out;
2772
2773 bio = btrfs_io_bio_alloc(0);
2774 bio->bi_iter.bi_sector = sparity->logic_start >> 9;
2775 bio->bi_private = sparity;
2776 bio->bi_end_io = scrub_parity_bio_endio;
2777
2778 rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio,
2779 length, sparity->scrub_dev,
2780 sparity->dbitmap,
2781 sparity->nsectors);
2782 if (!rbio)
2783 goto rbio_out;
2784
2785 scrub_pending_bio_inc(sctx);
2786 raid56_parity_submit_scrub_rbio(rbio);
2787 return;
2788
2789rbio_out:
2790 bio_put(bio);
2791bbio_out:
2792 btrfs_bio_counter_dec(fs_info);
2793 btrfs_put_bbio(bbio);
2794 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2795 sparity->nsectors);
2796 spin_lock(&sctx->stat_lock);
2797 sctx->stat.malloc_errors++;
2798 spin_unlock(&sctx->stat_lock);
2799out:
2800 scrub_free_parity(sparity);
2801}
2802
2803static inline int scrub_calc_parity_bitmap_len(int nsectors)
2804{
2805 return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
2806}
2807
2808static void scrub_parity_get(struct scrub_parity *sparity)
2809{
2810 refcount_inc(&sparity->refs);
2811}
2812
2813static void scrub_parity_put(struct scrub_parity *sparity)
2814{
2815 if (!refcount_dec_and_test(&sparity->refs))
2816 return;
2817
2818 scrub_parity_check_and_repair(sparity);
2819}
2820
2821static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2822 struct map_lookup *map,
2823 struct btrfs_device *sdev,
2824 struct btrfs_path *path,
2825 u64 logic_start,
2826 u64 logic_end)
2827{
2828 struct btrfs_fs_info *fs_info = sctx->fs_info;
2829 struct btrfs_root *root = fs_info->extent_root;
2830 struct btrfs_root *csum_root = fs_info->csum_root;
2831 struct btrfs_extent_item *extent;
2832 struct btrfs_bio *bbio = NULL;
2833 u64 flags;
2834 int ret;
2835 int slot;
2836 struct extent_buffer *l;
2837 struct btrfs_key key;
2838 u64 generation;
2839 u64 extent_logical;
2840 u64 extent_physical;
2841 u64 extent_len;
2842 u64 mapped_length;
2843 struct btrfs_device *extent_dev;
2844 struct scrub_parity *sparity;
2845 int nsectors;
2846 int bitmap_len;
2847 int extent_mirror_num;
2848 int stop_loop = 0;
2849
2850 nsectors = div_u64(map->stripe_len, fs_info->sectorsize);
2851 bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
2852 sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
2853 GFP_NOFS);
2854 if (!sparity) {
2855 spin_lock(&sctx->stat_lock);
2856 sctx->stat.malloc_errors++;
2857 spin_unlock(&sctx->stat_lock);
2858 return -ENOMEM;
2859 }
2860
2861 sparity->stripe_len = map->stripe_len;
2862 sparity->nsectors = nsectors;
2863 sparity->sctx = sctx;
2864 sparity->scrub_dev = sdev;
2865 sparity->logic_start = logic_start;
2866 sparity->logic_end = logic_end;
2867 refcount_set(&sparity->refs, 1);
2868 INIT_LIST_HEAD(&sparity->spages);
2869 sparity->dbitmap = sparity->bitmap;
2870 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
2871
2872 ret = 0;
2873 while (logic_start < logic_end) {
2874 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2875 key.type = BTRFS_METADATA_ITEM_KEY;
2876 else
2877 key.type = BTRFS_EXTENT_ITEM_KEY;
2878 key.objectid = logic_start;
2879 key.offset = (u64)-1;
2880
2881 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2882 if (ret < 0)
2883 goto out;
2884
2885 if (ret > 0) {
2886 ret = btrfs_previous_extent_item(root, path, 0);
2887 if (ret < 0)
2888 goto out;
2889 if (ret > 0) {
2890 btrfs_release_path(path);
2891 ret = btrfs_search_slot(NULL, root, &key,
2892 path, 0, 0);
2893 if (ret < 0)
2894 goto out;
2895 }
2896 }
2897
2898 stop_loop = 0;
2899 while (1) {
2900 u64 bytes;
2901
2902 l = path->nodes[0];
2903 slot = path->slots[0];
2904 if (slot >= btrfs_header_nritems(l)) {
2905 ret = btrfs_next_leaf(root, path);
2906 if (ret == 0)
2907 continue;
2908 if (ret < 0)
2909 goto out;
2910
2911 stop_loop = 1;
2912 break;
2913 }
2914 btrfs_item_key_to_cpu(l, &key, slot);
2915
2916 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2917 key.type != BTRFS_METADATA_ITEM_KEY)
2918 goto next;
2919
2920 if (key.type == BTRFS_METADATA_ITEM_KEY)
2921 bytes = fs_info->nodesize;
2922 else
2923 bytes = key.offset;
2924
2925 if (key.objectid + bytes <= logic_start)
2926 goto next;
2927
2928 if (key.objectid >= logic_end) {
2929 stop_loop = 1;
2930 break;
2931 }
2932
2933 while (key.objectid >= logic_start + map->stripe_len)
2934 logic_start += map->stripe_len;
2935
2936 extent = btrfs_item_ptr(l, slot,
2937 struct btrfs_extent_item);
2938 flags = btrfs_extent_flags(l, extent);
2939 generation = btrfs_extent_generation(l, extent);
2940
2941 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
2942 (key.objectid < logic_start ||
2943 key.objectid + bytes >
2944 logic_start + map->stripe_len)) {
2945 btrfs_err(fs_info,
2946 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
2947 key.objectid, logic_start);
2948 spin_lock(&sctx->stat_lock);
2949 sctx->stat.uncorrectable_errors++;
2950 spin_unlock(&sctx->stat_lock);
2951 goto next;
2952 }
2953again:
2954 extent_logical = key.objectid;
2955 extent_len = bytes;
2956
2957 if (extent_logical < logic_start) {
2958 extent_len -= logic_start - extent_logical;
2959 extent_logical = logic_start;
2960 }
2961
2962 if (extent_logical + extent_len >
2963 logic_start + map->stripe_len)
2964 extent_len = logic_start + map->stripe_len -
2965 extent_logical;
2966
2967 scrub_parity_mark_sectors_data(sparity, extent_logical,
2968 extent_len);
2969
2970 mapped_length = extent_len;
2971 bbio = NULL;
2972 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
2973 extent_logical, &mapped_length, &bbio,
2974 0);
2975 if (!ret) {
2976 if (!bbio || mapped_length < extent_len)
2977 ret = -EIO;
2978 }
2979 if (ret) {
2980 btrfs_put_bbio(bbio);
2981 goto out;
2982 }
2983 extent_physical = bbio->stripes[0].physical;
2984 extent_mirror_num = bbio->mirror_num;
2985 extent_dev = bbio->stripes[0].dev;
2986 btrfs_put_bbio(bbio);
2987
2988 ret = btrfs_lookup_csums_range(csum_root,
2989 extent_logical,
2990 extent_logical + extent_len - 1,
2991 &sctx->csum_list, 1);
2992 if (ret)
2993 goto out;
2994
2995 ret = scrub_extent_for_parity(sparity, extent_logical,
2996 extent_len,
2997 extent_physical,
2998 extent_dev, flags,
2999 generation,
3000 extent_mirror_num);
3001
3002 scrub_free_csums(sctx);
3003
3004 if (ret)
3005 goto out;
3006
3007 if (extent_logical + extent_len <
3008 key.objectid + bytes) {
3009 logic_start += map->stripe_len;
3010
3011 if (logic_start >= logic_end) {
3012 stop_loop = 1;
3013 break;
3014 }
3015
3016 if (logic_start < key.objectid + bytes) {
3017 cond_resched();
3018 goto again;
3019 }
3020 }
3021next:
3022 path->slots[0]++;
3023 }
3024
3025 btrfs_release_path(path);
3026
3027 if (stop_loop)
3028 break;
3029
3030 logic_start += map->stripe_len;
3031 }
3032out:
3033 if (ret < 0)
3034 scrub_parity_mark_sectors_error(sparity, logic_start,
3035 logic_end - logic_start);
3036 scrub_parity_put(sparity);
3037 scrub_submit(sctx);
3038 mutex_lock(&sctx->wr_lock);
3039 scrub_wr_submit(sctx);
3040 mutex_unlock(&sctx->wr_lock);
3041
3042 btrfs_release_path(path);
3043 return ret < 0 ? ret : 0;
3044}
3045
3046static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3047 struct map_lookup *map,
3048 struct btrfs_device *scrub_dev,
3049 int num, u64 base, u64 length)
3050{
3051 struct btrfs_path *path, *ppath;
3052 struct btrfs_fs_info *fs_info = sctx->fs_info;
3053 struct btrfs_root *root = fs_info->extent_root;
3054 struct btrfs_root *csum_root = fs_info->csum_root;
3055 struct btrfs_extent_item *extent;
3056 struct blk_plug plug;
3057 u64 flags;
3058 int ret;
3059 int slot;
3060 u64 nstripes;
3061 struct extent_buffer *l;
3062 u64 physical;
3063 u64 logical;
3064 u64 logic_end;
3065 u64 physical_end;
3066 u64 generation;
3067 int mirror_num;
3068 struct reada_control *reada1;
3069 struct reada_control *reada2;
3070 struct btrfs_key key;
3071 struct btrfs_key key_end;
3072 u64 increment = map->stripe_len;
3073 u64 offset;
3074 u64 extent_logical;
3075 u64 extent_physical;
3076 u64 extent_len;
3077 u64 stripe_logical;
3078 u64 stripe_end;
3079 struct btrfs_device *extent_dev;
3080 int extent_mirror_num;
3081 int stop_loop = 0;
3082
3083 physical = map->stripes[num].physical;
3084 offset = 0;
3085 nstripes = div64_u64(length, map->stripe_len);
3086 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3087 offset = map->stripe_len * num;
3088 increment = map->stripe_len * map->num_stripes;
3089 mirror_num = 1;
3090 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3091 int factor = map->num_stripes / map->sub_stripes;
3092 offset = map->stripe_len * (num / map->sub_stripes);
3093 increment = map->stripe_len * factor;
3094 mirror_num = num % map->sub_stripes + 1;
3095 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
3096 increment = map->stripe_len;
3097 mirror_num = num % map->num_stripes + 1;
3098 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3099 increment = map->stripe_len;
3100 mirror_num = num % map->num_stripes + 1;
3101 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3102 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3103 increment = map->stripe_len * nr_data_stripes(map);
3104 mirror_num = 1;
3105 } else {
3106 increment = map->stripe_len;
3107 mirror_num = 1;
3108 }
3109
3110 path = btrfs_alloc_path();
3111 if (!path)
3112 return -ENOMEM;
3113
3114 ppath = btrfs_alloc_path();
3115 if (!ppath) {
3116 btrfs_free_path(path);
3117 return -ENOMEM;
3118 }
3119
3120
3121
3122
3123
3124
3125 path->search_commit_root = 1;
3126 path->skip_locking = 1;
3127
3128 ppath->search_commit_root = 1;
3129 ppath->skip_locking = 1;
3130
3131
3132
3133
3134
3135 logical = base + offset;
3136 physical_end = physical + nstripes * map->stripe_len;
3137 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3138 get_raid56_logic_offset(physical_end, num,
3139 map, &logic_end, NULL);
3140 logic_end += base;
3141 } else {
3142 logic_end = logical + increment * nstripes;
3143 }
3144 wait_event(sctx->list_wait,
3145 atomic_read(&sctx->bios_in_flight) == 0);
3146 scrub_blocked_if_needed(fs_info);
3147
3148
3149 key.objectid = logical;
3150 key.type = BTRFS_EXTENT_ITEM_KEY;
3151 key.offset = (u64)0;
3152 key_end.objectid = logic_end;
3153 key_end.type = BTRFS_METADATA_ITEM_KEY;
3154 key_end.offset = (u64)-1;
3155 reada1 = btrfs_reada_add(root, &key, &key_end);
3156
3157 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3158 key.type = BTRFS_EXTENT_CSUM_KEY;
3159 key.offset = logical;
3160 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3161 key_end.type = BTRFS_EXTENT_CSUM_KEY;
3162 key_end.offset = logic_end;
3163 reada2 = btrfs_reada_add(csum_root, &key, &key_end);
3164
3165 if (!IS_ERR(reada1))
3166 btrfs_reada_wait(reada1);
3167 if (!IS_ERR(reada2))
3168 btrfs_reada_wait(reada2);
3169
3170
3171
3172
3173
3174
3175 blk_start_plug(&plug);
3176
3177
3178
3179
3180 ret = 0;
3181 while (physical < physical_end) {
3182
3183
3184
3185 if (atomic_read(&fs_info->scrub_cancel_req) ||
3186 atomic_read(&sctx->cancel_req)) {
3187 ret = -ECANCELED;
3188 goto out;
3189 }
3190
3191
3192
3193 if (atomic_read(&fs_info->scrub_pause_req)) {
3194
3195 sctx->flush_all_writes = true;
3196 scrub_submit(sctx);
3197 mutex_lock(&sctx->wr_lock);
3198 scrub_wr_submit(sctx);
3199 mutex_unlock(&sctx->wr_lock);
3200 wait_event(sctx->list_wait,
3201 atomic_read(&sctx->bios_in_flight) == 0);
3202 sctx->flush_all_writes = false;
3203 scrub_blocked_if_needed(fs_info);
3204 }
3205
3206 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3207 ret = get_raid56_logic_offset(physical, num, map,
3208 &logical,
3209 &stripe_logical);
3210 logical += base;
3211 if (ret) {
3212
3213 stripe_logical += base;
3214 stripe_end = stripe_logical + increment;
3215 ret = scrub_raid56_parity(sctx, map, scrub_dev,
3216 ppath, stripe_logical,
3217 stripe_end);
3218 if (ret)
3219 goto out;
3220 goto skip;
3221 }
3222 }
3223
3224 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
3225 key.type = BTRFS_METADATA_ITEM_KEY;
3226 else
3227 key.type = BTRFS_EXTENT_ITEM_KEY;
3228 key.objectid = logical;
3229 key.offset = (u64)-1;
3230
3231 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3232 if (ret < 0)
3233 goto out;
3234
3235 if (ret > 0) {
3236 ret = btrfs_previous_extent_item(root, path, 0);
3237 if (ret < 0)
3238 goto out;
3239 if (ret > 0) {
3240
3241
3242 btrfs_release_path(path);
3243 ret = btrfs_search_slot(NULL, root, &key,
3244 path, 0, 0);
3245 if (ret < 0)
3246 goto out;
3247 }
3248 }
3249
3250 stop_loop = 0;
3251 while (1) {
3252 u64 bytes;
3253
3254 l = path->nodes[0];
3255 slot = path->slots[0];
3256 if (slot >= btrfs_header_nritems(l)) {
3257 ret = btrfs_next_leaf(root, path);
3258 if (ret == 0)
3259 continue;
3260 if (ret < 0)
3261 goto out;
3262
3263 stop_loop = 1;
3264 break;
3265 }
3266 btrfs_item_key_to_cpu(l, &key, slot);
3267
3268 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3269 key.type != BTRFS_METADATA_ITEM_KEY)
3270 goto next;
3271
3272 if (key.type == BTRFS_METADATA_ITEM_KEY)
3273 bytes = fs_info->nodesize;
3274 else
3275 bytes = key.offset;
3276
3277 if (key.objectid + bytes <= logical)
3278 goto next;
3279
3280 if (key.objectid >= logical + map->stripe_len) {
3281
3282 if (key.objectid >= logic_end)
3283 stop_loop = 1;
3284 break;
3285 }
3286
3287 extent = btrfs_item_ptr(l, slot,
3288 struct btrfs_extent_item);
3289 flags = btrfs_extent_flags(l, extent);
3290 generation = btrfs_extent_generation(l, extent);
3291
3292 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3293 (key.objectid < logical ||
3294 key.objectid + bytes >
3295 logical + map->stripe_len)) {
3296 btrfs_err(fs_info,
3297 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3298 key.objectid, logical);
3299 spin_lock(&sctx->stat_lock);
3300 sctx->stat.uncorrectable_errors++;
3301 spin_unlock(&sctx->stat_lock);
3302 goto next;
3303 }
3304
3305again:
3306 extent_logical = key.objectid;
3307 extent_len = bytes;
3308
3309
3310
3311
3312 if (extent_logical < logical) {
3313 extent_len -= logical - extent_logical;
3314 extent_logical = logical;
3315 }
3316 if (extent_logical + extent_len >
3317 logical + map->stripe_len) {
3318 extent_len = logical + map->stripe_len -
3319 extent_logical;
3320 }
3321
3322 extent_physical = extent_logical - logical + physical;
3323 extent_dev = scrub_dev;
3324 extent_mirror_num = mirror_num;
3325 if (sctx->is_dev_replace)
3326 scrub_remap_extent(fs_info, extent_logical,
3327 extent_len, &extent_physical,
3328 &extent_dev,
3329 &extent_mirror_num);
3330
3331 ret = btrfs_lookup_csums_range(csum_root,
3332 extent_logical,
3333 extent_logical +
3334 extent_len - 1,
3335 &sctx->csum_list, 1);
3336 if (ret)
3337 goto out;
3338
3339 ret = scrub_extent(sctx, map, extent_logical, extent_len,
3340 extent_physical, extent_dev, flags,
3341 generation, extent_mirror_num,
3342 extent_logical - logical + physical);
3343
3344 scrub_free_csums(sctx);
3345
3346 if (ret)
3347 goto out;
3348
3349 if (extent_logical + extent_len <
3350 key.objectid + bytes) {
3351 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3352
3353
3354
3355
3356loop:
3357 physical += map->stripe_len;
3358 ret = get_raid56_logic_offset(physical,
3359 num, map, &logical,
3360 &stripe_logical);
3361 logical += base;
3362
3363 if (ret && physical < physical_end) {
3364 stripe_logical += base;
3365 stripe_end = stripe_logical +
3366 increment;
3367 ret = scrub_raid56_parity(sctx,
3368 map, scrub_dev, ppath,
3369 stripe_logical,
3370 stripe_end);
3371 if (ret)
3372 goto out;
3373 goto loop;
3374 }
3375 } else {
3376 physical += map->stripe_len;
3377 logical += increment;
3378 }
3379 if (logical < key.objectid + bytes) {
3380 cond_resched();
3381 goto again;
3382 }
3383
3384 if (physical >= physical_end) {
3385 stop_loop = 1;
3386 break;
3387 }
3388 }
3389next:
3390 path->slots[0]++;
3391 }
3392 btrfs_release_path(path);
3393skip:
3394 logical += increment;
3395 physical += map->stripe_len;
3396 spin_lock(&sctx->stat_lock);
3397 if (stop_loop)
3398 sctx->stat.last_physical = map->stripes[num].physical +
3399 length;
3400 else
3401 sctx->stat.last_physical = physical;
3402 spin_unlock(&sctx->stat_lock);
3403 if (stop_loop)
3404 break;
3405 }
3406out:
3407
3408 scrub_submit(sctx);
3409 mutex_lock(&sctx->wr_lock);
3410 scrub_wr_submit(sctx);
3411 mutex_unlock(&sctx->wr_lock);
3412
3413 blk_finish_plug(&plug);
3414 btrfs_free_path(path);
3415 btrfs_free_path(ppath);
3416 return ret < 0 ? ret : 0;
3417}
3418
3419static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
3420 struct btrfs_device *scrub_dev,
3421 u64 chunk_offset, u64 length,
3422 u64 dev_offset,
3423 struct btrfs_block_group_cache *cache)
3424{
3425 struct btrfs_fs_info *fs_info = sctx->fs_info;
3426 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
3427 struct map_lookup *map;
3428 struct extent_map *em;
3429 int i;
3430 int ret = 0;
3431
3432 read_lock(&map_tree->lock);
3433 em = lookup_extent_mapping(map_tree, chunk_offset, 1);
3434 read_unlock(&map_tree->lock);
3435
3436 if (!em) {
3437
3438
3439
3440
3441 spin_lock(&cache->lock);
3442 if (!cache->removed)
3443 ret = -EINVAL;
3444 spin_unlock(&cache->lock);
3445
3446 return ret;
3447 }
3448
3449 map = em->map_lookup;
3450 if (em->start != chunk_offset)
3451 goto out;
3452
3453 if (em->len < length)
3454 goto out;
3455
3456 for (i = 0; i < map->num_stripes; ++i) {
3457 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
3458 map->stripes[i].physical == dev_offset) {
3459 ret = scrub_stripe(sctx, map, scrub_dev, i,
3460 chunk_offset, length);
3461 if (ret)
3462 goto out;
3463 }
3464 }
3465out:
3466 free_extent_map(em);
3467
3468 return ret;
3469}
3470
3471static noinline_for_stack
3472int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3473 struct btrfs_device *scrub_dev, u64 start, u64 end)
3474{
3475 struct btrfs_dev_extent *dev_extent = NULL;
3476 struct btrfs_path *path;
3477 struct btrfs_fs_info *fs_info = sctx->fs_info;
3478 struct btrfs_root *root = fs_info->dev_root;
3479 u64 length;
3480 u64 chunk_offset;
3481 int ret = 0;
3482 int ro_set;
3483 int slot;
3484 struct extent_buffer *l;
3485 struct btrfs_key key;
3486 struct btrfs_key found_key;
3487 struct btrfs_block_group_cache *cache;
3488 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
3489
3490 path = btrfs_alloc_path();
3491 if (!path)
3492 return -ENOMEM;
3493
3494 path->reada = READA_FORWARD;
3495 path->search_commit_root = 1;
3496 path->skip_locking = 1;
3497
3498 key.objectid = scrub_dev->devid;
3499 key.offset = 0ull;
3500 key.type = BTRFS_DEV_EXTENT_KEY;
3501
3502 while (1) {
3503 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3504 if (ret < 0)
3505 break;
3506 if (ret > 0) {
3507 if (path->slots[0] >=
3508 btrfs_header_nritems(path->nodes[0])) {
3509 ret = btrfs_next_leaf(root, path);
3510 if (ret < 0)
3511 break;
3512 if (ret > 0) {
3513 ret = 0;
3514 break;
3515 }
3516 } else {
3517 ret = 0;
3518 }
3519 }
3520
3521 l = path->nodes[0];
3522 slot = path->slots[0];
3523
3524 btrfs_item_key_to_cpu(l, &found_key, slot);
3525
3526 if (found_key.objectid != scrub_dev->devid)
3527 break;
3528
3529 if (found_key.type != BTRFS_DEV_EXTENT_KEY)
3530 break;
3531
3532 if (found_key.offset >= end)
3533 break;
3534
3535 if (found_key.offset < key.offset)
3536 break;
3537
3538 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
3539 length = btrfs_dev_extent_length(l, dev_extent);
3540
3541 if (found_key.offset + length <= start)
3542 goto skip;
3543
3544 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
3545
3546
3547
3548
3549
3550 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3551
3552
3553
3554 if (!cache)
3555 goto skip;
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565 scrub_pause_on(fs_info);
3566 ret = btrfs_inc_block_group_ro(cache);
3567 if (!ret && sctx->is_dev_replace) {
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586 btrfs_wait_block_group_reservations(cache);
3587 btrfs_wait_nocow_writers(cache);
3588 ret = btrfs_wait_ordered_roots(fs_info, U64_MAX,
3589 cache->key.objectid,
3590 cache->key.offset);
3591 if (ret > 0) {
3592 struct btrfs_trans_handle *trans;
3593
3594 trans = btrfs_join_transaction(root);
3595 if (IS_ERR(trans))
3596 ret = PTR_ERR(trans);
3597 else
3598 ret = btrfs_commit_transaction(trans);
3599 if (ret) {
3600 scrub_pause_off(fs_info);
3601 btrfs_put_block_group(cache);
3602 break;
3603 }
3604 }
3605 }
3606 scrub_pause_off(fs_info);
3607
3608 if (ret == 0) {
3609 ro_set = 1;
3610 } else if (ret == -ENOSPC) {
3611
3612
3613
3614
3615
3616
3617
3618 ro_set = 0;
3619 } else {
3620 btrfs_warn(fs_info,
3621 "failed setting block group ro: %d", ret);
3622 btrfs_put_block_group(cache);
3623 break;
3624 }
3625
3626 down_write(&fs_info->dev_replace.rwsem);
3627 dev_replace->cursor_right = found_key.offset + length;
3628 dev_replace->cursor_left = found_key.offset;
3629 dev_replace->item_needs_writeback = 1;
3630 up_write(&dev_replace->rwsem);
3631
3632 ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
3633 found_key.offset, cache);
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645 sctx->flush_all_writes = true;
3646 scrub_submit(sctx);
3647 mutex_lock(&sctx->wr_lock);
3648 scrub_wr_submit(sctx);
3649 mutex_unlock(&sctx->wr_lock);
3650
3651 wait_event(sctx->list_wait,
3652 atomic_read(&sctx->bios_in_flight) == 0);
3653
3654 scrub_pause_on(fs_info);
3655
3656
3657
3658
3659
3660
3661 wait_event(sctx->list_wait,
3662 atomic_read(&sctx->workers_pending) == 0);
3663 sctx->flush_all_writes = false;
3664
3665 scrub_pause_off(fs_info);
3666
3667 down_write(&fs_info->dev_replace.rwsem);
3668 dev_replace->cursor_left = dev_replace->cursor_right;
3669 dev_replace->item_needs_writeback = 1;
3670 up_write(&fs_info->dev_replace.rwsem);
3671
3672 if (ro_set)
3673 btrfs_dec_block_group_ro(cache);
3674
3675
3676
3677
3678
3679
3680
3681
3682 spin_lock(&cache->lock);
3683 if (!cache->removed && !cache->ro && cache->reserved == 0 &&
3684 btrfs_block_group_used(&cache->item) == 0) {
3685 spin_unlock(&cache->lock);
3686 btrfs_mark_bg_unused(cache);
3687 } else {
3688 spin_unlock(&cache->lock);
3689 }
3690
3691 btrfs_put_block_group(cache);
3692 if (ret)
3693 break;
3694 if (sctx->is_dev_replace &&
3695 atomic64_read(&dev_replace->num_write_errors) > 0) {
3696 ret = -EIO;
3697 break;
3698 }
3699 if (sctx->stat.malloc_errors > 0) {
3700 ret = -ENOMEM;
3701 break;
3702 }
3703skip:
3704 key.offset = found_key.offset + length;
3705 btrfs_release_path(path);
3706 }
3707
3708 btrfs_free_path(path);
3709
3710 return ret;
3711}
3712
3713static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
3714 struct btrfs_device *scrub_dev)
3715{
3716 int i;
3717 u64 bytenr;
3718 u64 gen;
3719 int ret;
3720 struct btrfs_fs_info *fs_info = sctx->fs_info;
3721
3722 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3723 return -EIO;
3724
3725
3726 if (scrub_dev->fs_devices != fs_info->fs_devices)
3727 gen = scrub_dev->generation;
3728 else
3729 gen = fs_info->last_trans_committed;
3730
3731 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3732 bytenr = btrfs_sb_offset(i);
3733 if (bytenr + BTRFS_SUPER_INFO_SIZE >
3734 scrub_dev->commit_total_bytes)
3735 break;
3736
3737 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
3738 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
3739 NULL, 1, bytenr);
3740 if (ret)
3741 return ret;
3742 }
3743 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3744
3745 return 0;
3746}
3747
3748
3749
3750
3751static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3752 int is_dev_replace)
3753{
3754 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
3755 int max_active = fs_info->thread_pool_size;
3756
3757 lockdep_assert_held(&fs_info->scrub_lock);
3758
3759 if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
3760 ASSERT(fs_info->scrub_workers == NULL);
3761 fs_info->scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub",
3762 flags, is_dev_replace ? 1 : max_active, 4);
3763 if (!fs_info->scrub_workers)
3764 goto fail_scrub_workers;
3765
3766 ASSERT(fs_info->scrub_wr_completion_workers == NULL);
3767 fs_info->scrub_wr_completion_workers =
3768 btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
3769 max_active, 2);
3770 if (!fs_info->scrub_wr_completion_workers)
3771 goto fail_scrub_wr_completion_workers;
3772
3773 ASSERT(fs_info->scrub_parity_workers == NULL);
3774 fs_info->scrub_parity_workers =
3775 btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
3776 max_active, 2);
3777 if (!fs_info->scrub_parity_workers)
3778 goto fail_scrub_parity_workers;
3779
3780 refcount_set(&fs_info->scrub_workers_refcnt, 1);
3781 } else {
3782 refcount_inc(&fs_info->scrub_workers_refcnt);
3783 }
3784 return 0;
3785
3786fail_scrub_parity_workers:
3787 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
3788fail_scrub_wr_completion_workers:
3789 btrfs_destroy_workqueue(fs_info->scrub_workers);
3790fail_scrub_workers:
3791 return -ENOMEM;
3792}
3793
3794int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3795 u64 end, struct btrfs_scrub_progress *progress,
3796 int readonly, int is_dev_replace)
3797{
3798 struct scrub_ctx *sctx;
3799 int ret;
3800 struct btrfs_device *dev;
3801 unsigned int nofs_flag;
3802 struct btrfs_workqueue *scrub_workers = NULL;
3803 struct btrfs_workqueue *scrub_wr_comp = NULL;
3804 struct btrfs_workqueue *scrub_parity = NULL;
3805
3806 if (btrfs_fs_closing(fs_info))
3807 return -EAGAIN;
3808
3809 if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
3810
3811
3812
3813
3814
3815 btrfs_err(fs_info,
3816 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
3817 fs_info->nodesize,
3818 BTRFS_STRIPE_LEN);
3819 return -EINVAL;
3820 }
3821
3822 if (fs_info->sectorsize != PAGE_SIZE) {
3823
3824 btrfs_err_rl(fs_info,
3825 "scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails",
3826 fs_info->sectorsize, PAGE_SIZE);
3827 return -EINVAL;
3828 }
3829
3830 if (fs_info->nodesize >
3831 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
3832 fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
3833
3834
3835
3836
3837 btrfs_err(fs_info,
3838 "scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
3839 fs_info->nodesize,
3840 SCRUB_MAX_PAGES_PER_BLOCK,
3841 fs_info->sectorsize,
3842 SCRUB_MAX_PAGES_PER_BLOCK);
3843 return -EINVAL;
3844 }
3845
3846
3847 sctx = scrub_setup_ctx(fs_info, is_dev_replace);
3848 if (IS_ERR(sctx))
3849 return PTR_ERR(sctx);
3850
3851 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3852 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
3853 if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
3854 !is_dev_replace)) {
3855 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3856 ret = -ENODEV;
3857 goto out_free_ctx;
3858 }
3859
3860 if (!is_dev_replace && !readonly &&
3861 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
3862 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3863 btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
3864 rcu_str_deref(dev->name));
3865 ret = -EROFS;
3866 goto out_free_ctx;
3867 }
3868
3869 mutex_lock(&fs_info->scrub_lock);
3870 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
3871 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
3872 mutex_unlock(&fs_info->scrub_lock);
3873 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3874 ret = -EIO;
3875 goto out_free_ctx;
3876 }
3877
3878 down_read(&fs_info->dev_replace.rwsem);
3879 if (dev->scrub_ctx ||
3880 (!is_dev_replace &&
3881 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
3882 up_read(&fs_info->dev_replace.rwsem);
3883 mutex_unlock(&fs_info->scrub_lock);
3884 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3885 ret = -EINPROGRESS;
3886 goto out_free_ctx;
3887 }
3888 up_read(&fs_info->dev_replace.rwsem);
3889
3890 ret = scrub_workers_get(fs_info, is_dev_replace);
3891 if (ret) {
3892 mutex_unlock(&fs_info->scrub_lock);
3893 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3894 goto out_free_ctx;
3895 }
3896
3897 sctx->readonly = readonly;
3898 dev->scrub_ctx = sctx;
3899 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3900
3901
3902
3903
3904
3905 __scrub_blocked_if_needed(fs_info);
3906 atomic_inc(&fs_info->scrubs_running);
3907 mutex_unlock(&fs_info->scrub_lock);
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918 nofs_flag = memalloc_nofs_save();
3919 if (!is_dev_replace) {
3920 btrfs_info(fs_info, "scrub: started on devid %llu", devid);
3921
3922
3923
3924
3925 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3926 ret = scrub_supers(sctx, dev);
3927 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3928 }
3929
3930 if (!ret)
3931 ret = scrub_enumerate_chunks(sctx, dev, start, end);
3932 memalloc_nofs_restore(nofs_flag);
3933
3934 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3935 atomic_dec(&fs_info->scrubs_running);
3936 wake_up(&fs_info->scrub_pause_wait);
3937
3938 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
3939
3940 if (progress)
3941 memcpy(progress, &sctx->stat, sizeof(*progress));
3942
3943 if (!is_dev_replace)
3944 btrfs_info(fs_info, "scrub: %s on devid %llu with status: %d",
3945 ret ? "not finished" : "finished", devid, ret);
3946
3947 mutex_lock(&fs_info->scrub_lock);
3948 dev->scrub_ctx = NULL;
3949 if (refcount_dec_and_test(&fs_info->scrub_workers_refcnt)) {
3950 scrub_workers = fs_info->scrub_workers;
3951 scrub_wr_comp = fs_info->scrub_wr_completion_workers;
3952 scrub_parity = fs_info->scrub_parity_workers;
3953
3954 fs_info->scrub_workers = NULL;
3955 fs_info->scrub_wr_completion_workers = NULL;
3956 fs_info->scrub_parity_workers = NULL;
3957 }
3958 mutex_unlock(&fs_info->scrub_lock);
3959
3960 btrfs_destroy_workqueue(scrub_workers);
3961 btrfs_destroy_workqueue(scrub_wr_comp);
3962 btrfs_destroy_workqueue(scrub_parity);
3963 scrub_put_ctx(sctx);
3964
3965 return ret;
3966
3967out_free_ctx:
3968 scrub_free_ctx(sctx);
3969
3970 return ret;
3971}
3972
3973void btrfs_scrub_pause(struct btrfs_fs_info *fs_info)
3974{
3975 mutex_lock(&fs_info->scrub_lock);
3976 atomic_inc(&fs_info->scrub_pause_req);
3977 while (atomic_read(&fs_info->scrubs_paused) !=
3978 atomic_read(&fs_info->scrubs_running)) {
3979 mutex_unlock(&fs_info->scrub_lock);
3980 wait_event(fs_info->scrub_pause_wait,
3981 atomic_read(&fs_info->scrubs_paused) ==
3982 atomic_read(&fs_info->scrubs_running));
3983 mutex_lock(&fs_info->scrub_lock);
3984 }
3985 mutex_unlock(&fs_info->scrub_lock);
3986}
3987
3988void btrfs_scrub_continue(struct btrfs_fs_info *fs_info)
3989{
3990 atomic_dec(&fs_info->scrub_pause_req);
3991 wake_up(&fs_info->scrub_pause_wait);
3992}
3993
3994int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
3995{
3996 mutex_lock(&fs_info->scrub_lock);
3997 if (!atomic_read(&fs_info->scrubs_running)) {
3998 mutex_unlock(&fs_info->scrub_lock);
3999 return -ENOTCONN;
4000 }
4001
4002 atomic_inc(&fs_info->scrub_cancel_req);
4003 while (atomic_read(&fs_info->scrubs_running)) {
4004 mutex_unlock(&fs_info->scrub_lock);
4005 wait_event(fs_info->scrub_pause_wait,
4006 atomic_read(&fs_info->scrubs_running) == 0);
4007 mutex_lock(&fs_info->scrub_lock);
4008 }
4009 atomic_dec(&fs_info->scrub_cancel_req);
4010 mutex_unlock(&fs_info->scrub_lock);
4011
4012 return 0;
4013}
4014
4015int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
4016{
4017 struct btrfs_fs_info *fs_info = dev->fs_info;
4018 struct scrub_ctx *sctx;
4019
4020 mutex_lock(&fs_info->scrub_lock);
4021 sctx = dev->scrub_ctx;
4022 if (!sctx) {
4023 mutex_unlock(&fs_info->scrub_lock);
4024 return -ENOTCONN;
4025 }
4026 atomic_inc(&sctx->cancel_req);
4027 while (dev->scrub_ctx) {
4028 mutex_unlock(&fs_info->scrub_lock);
4029 wait_event(fs_info->scrub_pause_wait,
4030 dev->scrub_ctx == NULL);
4031 mutex_lock(&fs_info->scrub_lock);
4032 }
4033 mutex_unlock(&fs_info->scrub_lock);
4034
4035 return 0;
4036}
4037
4038int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
4039 struct btrfs_scrub_progress *progress)
4040{
4041 struct btrfs_device *dev;
4042 struct scrub_ctx *sctx = NULL;
4043
4044 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4045 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
4046 if (dev)
4047 sctx = dev->scrub_ctx;
4048 if (sctx)
4049 memcpy(progress, &sctx->stat, sizeof(*progress));
4050 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4051
4052 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
4053}
4054
4055static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
4056 u64 extent_logical, u64 extent_len,
4057 u64 *extent_physical,
4058 struct btrfs_device **extent_dev,
4059 int *extent_mirror_num)
4060{
4061 u64 mapped_length;
4062 struct btrfs_bio *bbio = NULL;
4063 int ret;
4064
4065 mapped_length = extent_len;
4066 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
4067 &mapped_length, &bbio, 0);
4068 if (ret || !bbio || mapped_length < extent_len ||
4069 !bbio->stripes[0].dev->bdev) {
4070 btrfs_put_bbio(bbio);
4071 return;
4072 }
4073
4074 *extent_physical = bbio->stripes[0].physical;
4075 *extent_mirror_num = bbio->mirror_num;
4076 *extent_dev = bbio->stripes[0].dev;
4077 btrfs_put_bbio(bbio);
4078}
4079