1
2
3
4
5
6#include <linux/blkdev.h>
7#include <linux/ratelimit.h>
8#include <linux/sched/mm.h>
9#include <crypto/hash.h>
10#include "ctree.h"
11#include "volumes.h"
12#include "disk-io.h"
13#include "ordered-data.h"
14#include "transaction.h"
15#include "backref.h"
16#include "extent_io.h"
17#include "dev-replace.h"
18#include "check-integrity.h"
19#include "rcu-string.h"
20#include "raid56.h"
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35struct scrub_block;
36struct scrub_ctx;
37
38
39
40
41
42
43
44#define SCRUB_PAGES_PER_RD_BIO 32
45#define SCRUB_PAGES_PER_WR_BIO 32
46#define SCRUB_BIOS_PER_SCTX 64
47
48
49
50
51
52
53#define SCRUB_MAX_PAGES_PER_BLOCK 16
54
55struct scrub_recover {
56 refcount_t refs;
57 struct btrfs_bio *bbio;
58 u64 map_length;
59};
60
61struct scrub_page {
62 struct scrub_block *sblock;
63 struct page *page;
64 struct btrfs_device *dev;
65 struct list_head list;
66 u64 flags;
67 u64 generation;
68 u64 logical;
69 u64 physical;
70 u64 physical_for_dev_replace;
71 atomic_t refs;
72 struct {
73 unsigned int mirror_num:8;
74 unsigned int have_csum:1;
75 unsigned int io_error:1;
76 };
77 u8 csum[BTRFS_CSUM_SIZE];
78
79 struct scrub_recover *recover;
80};
81
82struct scrub_bio {
83 int index;
84 struct scrub_ctx *sctx;
85 struct btrfs_device *dev;
86 struct bio *bio;
87 blk_status_t status;
88 u64 logical;
89 u64 physical;
90#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
91 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
92#else
93 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
94#endif
95 int page_count;
96 int next_free;
97 struct btrfs_work work;
98};
99
100struct scrub_block {
101 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
102 int page_count;
103 atomic_t outstanding_pages;
104 refcount_t refs;
105 struct scrub_ctx *sctx;
106 struct scrub_parity *sparity;
107 struct {
108 unsigned int header_error:1;
109 unsigned int checksum_error:1;
110 unsigned int no_io_error_seen:1;
111 unsigned int generation_error:1;
112
113
114
115 unsigned int data_corrected:1;
116 };
117 struct btrfs_work work;
118};
119
120
121struct scrub_parity {
122 struct scrub_ctx *sctx;
123
124 struct btrfs_device *scrub_dev;
125
126 u64 logic_start;
127
128 u64 logic_end;
129
130 int nsectors;
131
132 u64 stripe_len;
133
134 refcount_t refs;
135
136 struct list_head spages;
137
138
139 struct btrfs_work work;
140
141
142 unsigned long *dbitmap;
143
144
145
146
147
148 unsigned long *ebitmap;
149
150 unsigned long bitmap[0];
151};
152
153struct scrub_ctx {
154 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
155 struct btrfs_fs_info *fs_info;
156 int first_free;
157 int curr;
158 atomic_t bios_in_flight;
159 atomic_t workers_pending;
160 spinlock_t list_lock;
161 wait_queue_head_t list_wait;
162 u16 csum_size;
163 struct list_head csum_list;
164 atomic_t cancel_req;
165 int readonly;
166 int pages_per_rd_bio;
167
168 int is_dev_replace;
169
170 struct scrub_bio *wr_curr_bio;
171 struct mutex wr_lock;
172 int pages_per_wr_bio;
173 struct btrfs_device *wr_tgtdev;
174 bool flush_all_writes;
175
176
177
178
179 struct btrfs_scrub_progress stat;
180 spinlock_t stat_lock;
181
182
183
184
185
186
187
188
189 refcount_t refs;
190};
191
192struct scrub_warning {
193 struct btrfs_path *path;
194 u64 extent_item_size;
195 const char *errstr;
196 u64 physical;
197 u64 logical;
198 struct btrfs_device *dev;
199};
200
201struct full_stripe_lock {
202 struct rb_node node;
203 u64 logical;
204 u64 refs;
205 struct mutex mutex;
206};
207
208static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
209static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
210static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
211static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
212 struct scrub_block *sblocks_for_recheck);
213static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
214 struct scrub_block *sblock,
215 int retry_failed_mirror);
216static void scrub_recheck_block_checksum(struct scrub_block *sblock);
217static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
218 struct scrub_block *sblock_good);
219static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
220 struct scrub_block *sblock_good,
221 int page_num, int force_write);
222static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
223static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
224 int page_num);
225static int scrub_checksum_data(struct scrub_block *sblock);
226static int scrub_checksum_tree_block(struct scrub_block *sblock);
227static int scrub_checksum_super(struct scrub_block *sblock);
228static void scrub_block_get(struct scrub_block *sblock);
229static void scrub_block_put(struct scrub_block *sblock);
230static void scrub_page_get(struct scrub_page *spage);
231static void scrub_page_put(struct scrub_page *spage);
232static void scrub_parity_get(struct scrub_parity *sparity);
233static void scrub_parity_put(struct scrub_parity *sparity);
234static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
235 struct scrub_page *spage);
236static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
237 u64 physical, struct btrfs_device *dev, u64 flags,
238 u64 gen, int mirror_num, u8 *csum, int force,
239 u64 physical_for_dev_replace);
240static void scrub_bio_end_io(struct bio *bio);
241static void scrub_bio_end_io_worker(struct btrfs_work *work);
242static void scrub_block_complete(struct scrub_block *sblock);
243static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
244 u64 extent_logical, u64 extent_len,
245 u64 *extent_physical,
246 struct btrfs_device **extent_dev,
247 int *extent_mirror_num);
248static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
249 struct scrub_page *spage);
250static void scrub_wr_submit(struct scrub_ctx *sctx);
251static void scrub_wr_bio_end_io(struct bio *bio);
252static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
253static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
254static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
255static void scrub_put_ctx(struct scrub_ctx *sctx);
256
257static inline int scrub_is_page_on_raid56(struct scrub_page *page)
258{
259 return page->recover &&
260 (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
261}
262
263static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
264{
265 refcount_inc(&sctx->refs);
266 atomic_inc(&sctx->bios_in_flight);
267}
268
269static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
270{
271 atomic_dec(&sctx->bios_in_flight);
272 wake_up(&sctx->list_wait);
273 scrub_put_ctx(sctx);
274}
275
276static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
277{
278 while (atomic_read(&fs_info->scrub_pause_req)) {
279 mutex_unlock(&fs_info->scrub_lock);
280 wait_event(fs_info->scrub_pause_wait,
281 atomic_read(&fs_info->scrub_pause_req) == 0);
282 mutex_lock(&fs_info->scrub_lock);
283 }
284}
285
286static void scrub_pause_on(struct btrfs_fs_info *fs_info)
287{
288 atomic_inc(&fs_info->scrubs_paused);
289 wake_up(&fs_info->scrub_pause_wait);
290}
291
292static void scrub_pause_off(struct btrfs_fs_info *fs_info)
293{
294 mutex_lock(&fs_info->scrub_lock);
295 __scrub_blocked_if_needed(fs_info);
296 atomic_dec(&fs_info->scrubs_paused);
297 mutex_unlock(&fs_info->scrub_lock);
298
299 wake_up(&fs_info->scrub_pause_wait);
300}
301
302static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
303{
304 scrub_pause_on(fs_info);
305 scrub_pause_off(fs_info);
306}
307
308
309
310
311
312
313
314
315
316
317
318static struct full_stripe_lock *insert_full_stripe_lock(
319 struct btrfs_full_stripe_locks_tree *locks_root,
320 u64 fstripe_logical)
321{
322 struct rb_node **p;
323 struct rb_node *parent = NULL;
324 struct full_stripe_lock *entry;
325 struct full_stripe_lock *ret;
326
327 lockdep_assert_held(&locks_root->lock);
328
329 p = &locks_root->root.rb_node;
330 while (*p) {
331 parent = *p;
332 entry = rb_entry(parent, struct full_stripe_lock, node);
333 if (fstripe_logical < entry->logical) {
334 p = &(*p)->rb_left;
335 } else if (fstripe_logical > entry->logical) {
336 p = &(*p)->rb_right;
337 } else {
338 entry->refs++;
339 return entry;
340 }
341 }
342
343
344
345
346 ret = kmalloc(sizeof(*ret), GFP_KERNEL);
347 if (!ret)
348 return ERR_PTR(-ENOMEM);
349 ret->logical = fstripe_logical;
350 ret->refs = 1;
351 mutex_init(&ret->mutex);
352
353 rb_link_node(&ret->node, parent, p);
354 rb_insert_color(&ret->node, &locks_root->root);
355 return ret;
356}
357
358
359
360
361
362
363
364static struct full_stripe_lock *search_full_stripe_lock(
365 struct btrfs_full_stripe_locks_tree *locks_root,
366 u64 fstripe_logical)
367{
368 struct rb_node *node;
369 struct full_stripe_lock *entry;
370
371 lockdep_assert_held(&locks_root->lock);
372
373 node = locks_root->root.rb_node;
374 while (node) {
375 entry = rb_entry(node, struct full_stripe_lock, node);
376 if (fstripe_logical < entry->logical)
377 node = node->rb_left;
378 else if (fstripe_logical > entry->logical)
379 node = node->rb_right;
380 else
381 return entry;
382 }
383 return NULL;
384}
385
386
387
388
389
390
391static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache,
392 u64 bytenr)
393{
394 u64 ret;
395
396
397
398
399
400 WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
401
402
403
404
405
406 ret = div64_u64(bytenr - cache->key.objectid, cache->full_stripe_len) *
407 cache->full_stripe_len + cache->key.objectid;
408 return ret;
409}
410
411
412
413
414
415
416
417
418
419
420
421
422static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
423 bool *locked_ret)
424{
425 struct btrfs_block_group_cache *bg_cache;
426 struct btrfs_full_stripe_locks_tree *locks_root;
427 struct full_stripe_lock *existing;
428 u64 fstripe_start;
429 int ret = 0;
430
431 *locked_ret = false;
432 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
433 if (!bg_cache) {
434 ASSERT(0);
435 return -ENOENT;
436 }
437
438
439 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
440 goto out;
441 locks_root = &bg_cache->full_stripe_locks_root;
442
443 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
444
445
446 mutex_lock(&locks_root->lock);
447 existing = insert_full_stripe_lock(locks_root, fstripe_start);
448 mutex_unlock(&locks_root->lock);
449 if (IS_ERR(existing)) {
450 ret = PTR_ERR(existing);
451 goto out;
452 }
453 mutex_lock(&existing->mutex);
454 *locked_ret = true;
455out:
456 btrfs_put_block_group(bg_cache);
457 return ret;
458}
459
460
461
462
463
464
465
466
467
468
469static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
470 bool locked)
471{
472 struct btrfs_block_group_cache *bg_cache;
473 struct btrfs_full_stripe_locks_tree *locks_root;
474 struct full_stripe_lock *fstripe_lock;
475 u64 fstripe_start;
476 bool freeit = false;
477 int ret = 0;
478
479
480 if (!locked)
481 return 0;
482
483 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
484 if (!bg_cache) {
485 ASSERT(0);
486 return -ENOENT;
487 }
488 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
489 goto out;
490
491 locks_root = &bg_cache->full_stripe_locks_root;
492 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
493
494 mutex_lock(&locks_root->lock);
495 fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
496
497 if (!fstripe_lock) {
498 WARN_ON(1);
499 ret = -ENOENT;
500 mutex_unlock(&locks_root->lock);
501 goto out;
502 }
503
504 if (fstripe_lock->refs == 0) {
505 WARN_ON(1);
506 btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
507 fstripe_lock->logical);
508 } else {
509 fstripe_lock->refs--;
510 }
511
512 if (fstripe_lock->refs == 0) {
513 rb_erase(&fstripe_lock->node, &locks_root->root);
514 freeit = true;
515 }
516 mutex_unlock(&locks_root->lock);
517
518 mutex_unlock(&fstripe_lock->mutex);
519 if (freeit)
520 kfree(fstripe_lock);
521out:
522 btrfs_put_block_group(bg_cache);
523 return ret;
524}
525
526static void scrub_free_csums(struct scrub_ctx *sctx)
527{
528 while (!list_empty(&sctx->csum_list)) {
529 struct btrfs_ordered_sum *sum;
530 sum = list_first_entry(&sctx->csum_list,
531 struct btrfs_ordered_sum, list);
532 list_del(&sum->list);
533 kfree(sum);
534 }
535}
536
537static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
538{
539 int i;
540
541 if (!sctx)
542 return;
543
544
545 if (sctx->curr != -1) {
546 struct scrub_bio *sbio = sctx->bios[sctx->curr];
547
548 for (i = 0; i < sbio->page_count; i++) {
549 WARN_ON(!sbio->pagev[i]->page);
550 scrub_block_put(sbio->pagev[i]->sblock);
551 }
552 bio_put(sbio->bio);
553 }
554
555 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
556 struct scrub_bio *sbio = sctx->bios[i];
557
558 if (!sbio)
559 break;
560 kfree(sbio);
561 }
562
563 kfree(sctx->wr_curr_bio);
564 scrub_free_csums(sctx);
565 kfree(sctx);
566}
567
568static void scrub_put_ctx(struct scrub_ctx *sctx)
569{
570 if (refcount_dec_and_test(&sctx->refs))
571 scrub_free_ctx(sctx);
572}
573
574static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
575 struct btrfs_fs_info *fs_info, int is_dev_replace)
576{
577 struct scrub_ctx *sctx;
578 int i;
579
580 sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
581 if (!sctx)
582 goto nomem;
583 refcount_set(&sctx->refs, 1);
584 sctx->is_dev_replace = is_dev_replace;
585 sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
586 sctx->curr = -1;
587 sctx->fs_info = fs_info;
588 INIT_LIST_HEAD(&sctx->csum_list);
589 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
590 struct scrub_bio *sbio;
591
592 sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
593 if (!sbio)
594 goto nomem;
595 sctx->bios[i] = sbio;
596
597 sbio->index = i;
598 sbio->sctx = sctx;
599 sbio->page_count = 0;
600 btrfs_init_work(&sbio->work, btrfs_scrub_helper,
601 scrub_bio_end_io_worker, NULL, NULL);
602
603 if (i != SCRUB_BIOS_PER_SCTX - 1)
604 sctx->bios[i]->next_free = i + 1;
605 else
606 sctx->bios[i]->next_free = -1;
607 }
608 sctx->first_free = 0;
609 atomic_set(&sctx->bios_in_flight, 0);
610 atomic_set(&sctx->workers_pending, 0);
611 atomic_set(&sctx->cancel_req, 0);
612 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
613
614 spin_lock_init(&sctx->list_lock);
615 spin_lock_init(&sctx->stat_lock);
616 init_waitqueue_head(&sctx->list_wait);
617
618 WARN_ON(sctx->wr_curr_bio != NULL);
619 mutex_init(&sctx->wr_lock);
620 sctx->wr_curr_bio = NULL;
621 if (is_dev_replace) {
622 WARN_ON(!fs_info->dev_replace.tgtdev);
623 sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
624 sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;
625 sctx->flush_all_writes = false;
626 }
627
628 return sctx;
629
630nomem:
631 scrub_free_ctx(sctx);
632 return ERR_PTR(-ENOMEM);
633}
634
635static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
636 void *warn_ctx)
637{
638 u64 isize;
639 u32 nlink;
640 int ret;
641 int i;
642 unsigned nofs_flag;
643 struct extent_buffer *eb;
644 struct btrfs_inode_item *inode_item;
645 struct scrub_warning *swarn = warn_ctx;
646 struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
647 struct inode_fs_paths *ipath = NULL;
648 struct btrfs_root *local_root;
649 struct btrfs_key root_key;
650 struct btrfs_key key;
651
652 root_key.objectid = root;
653 root_key.type = BTRFS_ROOT_ITEM_KEY;
654 root_key.offset = (u64)-1;
655 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
656 if (IS_ERR(local_root)) {
657 ret = PTR_ERR(local_root);
658 goto err;
659 }
660
661
662
663
664 key.objectid = inum;
665 key.type = BTRFS_INODE_ITEM_KEY;
666 key.offset = 0;
667
668 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
669 if (ret) {
670 btrfs_release_path(swarn->path);
671 goto err;
672 }
673
674 eb = swarn->path->nodes[0];
675 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
676 struct btrfs_inode_item);
677 isize = btrfs_inode_size(eb, inode_item);
678 nlink = btrfs_inode_nlink(eb, inode_item);
679 btrfs_release_path(swarn->path);
680
681
682
683
684
685
686 nofs_flag = memalloc_nofs_save();
687 ipath = init_ipath(4096, local_root, swarn->path);
688 memalloc_nofs_restore(nofs_flag);
689 if (IS_ERR(ipath)) {
690 ret = PTR_ERR(ipath);
691 ipath = NULL;
692 goto err;
693 }
694 ret = paths_from_inode(inum, ipath);
695
696 if (ret < 0)
697 goto err;
698
699
700
701
702
703 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
704 btrfs_warn_in_rcu(fs_info,
705"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
706 swarn->errstr, swarn->logical,
707 rcu_str_deref(swarn->dev->name),
708 swarn->physical,
709 root, inum, offset,
710 min(isize - offset, (u64)PAGE_SIZE), nlink,
711 (char *)(unsigned long)ipath->fspath->val[i]);
712
713 free_ipath(ipath);
714 return 0;
715
716err:
717 btrfs_warn_in_rcu(fs_info,
718 "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
719 swarn->errstr, swarn->logical,
720 rcu_str_deref(swarn->dev->name),
721 swarn->physical,
722 root, inum, offset, ret);
723
724 free_ipath(ipath);
725 return 0;
726}
727
728static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
729{
730 struct btrfs_device *dev;
731 struct btrfs_fs_info *fs_info;
732 struct btrfs_path *path;
733 struct btrfs_key found_key;
734 struct extent_buffer *eb;
735 struct btrfs_extent_item *ei;
736 struct scrub_warning swarn;
737 unsigned long ptr = 0;
738 u64 extent_item_pos;
739 u64 flags = 0;
740 u64 ref_root;
741 u32 item_size;
742 u8 ref_level = 0;
743 int ret;
744
745 WARN_ON(sblock->page_count < 1);
746 dev = sblock->pagev[0]->dev;
747 fs_info = sblock->sctx->fs_info;
748
749 path = btrfs_alloc_path();
750 if (!path)
751 return;
752
753 swarn.physical = sblock->pagev[0]->physical;
754 swarn.logical = sblock->pagev[0]->logical;
755 swarn.errstr = errstr;
756 swarn.dev = NULL;
757
758 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
759 &flags);
760 if (ret < 0)
761 goto out;
762
763 extent_item_pos = swarn.logical - found_key.objectid;
764 swarn.extent_item_size = found_key.offset;
765
766 eb = path->nodes[0];
767 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
768 item_size = btrfs_item_size_nr(eb, path->slots[0]);
769
770 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
771 do {
772 ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
773 item_size, &ref_root,
774 &ref_level);
775 btrfs_warn_in_rcu(fs_info,
776"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
777 errstr, swarn.logical,
778 rcu_str_deref(dev->name),
779 swarn.physical,
780 ref_level ? "node" : "leaf",
781 ret < 0 ? -1 : ref_level,
782 ret < 0 ? -1 : ref_root);
783 } while (ret != 1);
784 btrfs_release_path(path);
785 } else {
786 btrfs_release_path(path);
787 swarn.path = path;
788 swarn.dev = dev;
789 iterate_extent_inodes(fs_info, found_key.objectid,
790 extent_item_pos, 1,
791 scrub_print_warning_inode, &swarn, false);
792 }
793
794out:
795 btrfs_free_path(path);
796}
797
798static inline void scrub_get_recover(struct scrub_recover *recover)
799{
800 refcount_inc(&recover->refs);
801}
802
803static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
804 struct scrub_recover *recover)
805{
806 if (refcount_dec_and_test(&recover->refs)) {
807 btrfs_bio_counter_dec(fs_info);
808 btrfs_put_bbio(recover->bbio);
809 kfree(recover);
810 }
811}
812
813
814
815
816
817
818
819
820
821static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
822{
823 struct scrub_ctx *sctx = sblock_to_check->sctx;
824 struct btrfs_device *dev;
825 struct btrfs_fs_info *fs_info;
826 u64 logical;
827 unsigned int failed_mirror_index;
828 unsigned int is_metadata;
829 unsigned int have_csum;
830 struct scrub_block *sblocks_for_recheck;
831 struct scrub_block *sblock_bad;
832 int ret;
833 int mirror_index;
834 int page_num;
835 int success;
836 bool full_stripe_locked;
837 unsigned int nofs_flag;
838 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
839 DEFAULT_RATELIMIT_BURST);
840
841 BUG_ON(sblock_to_check->page_count < 1);
842 fs_info = sctx->fs_info;
843 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
844
845
846
847
848
849 spin_lock(&sctx->stat_lock);
850 ++sctx->stat.super_errors;
851 spin_unlock(&sctx->stat_lock);
852 return 0;
853 }
854 logical = sblock_to_check->pagev[0]->logical;
855 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
856 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
857 is_metadata = !(sblock_to_check->pagev[0]->flags &
858 BTRFS_EXTENT_FLAG_DATA);
859 have_csum = sblock_to_check->pagev[0]->have_csum;
860 dev = sblock_to_check->pagev[0]->dev;
861
862
863
864
865
866
867
868
869
870
871 nofs_flag = memalloc_nofs_save();
872
873
874
875
876
877
878
879 ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
880 if (ret < 0) {
881 memalloc_nofs_restore(nofs_flag);
882 spin_lock(&sctx->stat_lock);
883 if (ret == -ENOMEM)
884 sctx->stat.malloc_errors++;
885 sctx->stat.read_errors++;
886 sctx->stat.uncorrectable_errors++;
887 spin_unlock(&sctx->stat_lock);
888 return ret;
889 }
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920 sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
921 sizeof(*sblocks_for_recheck), GFP_KERNEL);
922 if (!sblocks_for_recheck) {
923 spin_lock(&sctx->stat_lock);
924 sctx->stat.malloc_errors++;
925 sctx->stat.read_errors++;
926 sctx->stat.uncorrectable_errors++;
927 spin_unlock(&sctx->stat_lock);
928 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
929 goto out;
930 }
931
932
933 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
934 if (ret) {
935 spin_lock(&sctx->stat_lock);
936 sctx->stat.read_errors++;
937 sctx->stat.uncorrectable_errors++;
938 spin_unlock(&sctx->stat_lock);
939 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
940 goto out;
941 }
942 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
943 sblock_bad = sblocks_for_recheck + failed_mirror_index;
944
945
946 scrub_recheck_block(fs_info, sblock_bad, 1);
947
948 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
949 sblock_bad->no_io_error_seen) {
950
951
952
953
954
955
956
957
958 spin_lock(&sctx->stat_lock);
959 sctx->stat.unverified_errors++;
960 sblock_to_check->data_corrected = 1;
961 spin_unlock(&sctx->stat_lock);
962
963 if (sctx->is_dev_replace)
964 scrub_write_block_to_dev_replace(sblock_bad);
965 goto out;
966 }
967
968 if (!sblock_bad->no_io_error_seen) {
969 spin_lock(&sctx->stat_lock);
970 sctx->stat.read_errors++;
971 spin_unlock(&sctx->stat_lock);
972 if (__ratelimit(&_rs))
973 scrub_print_warning("i/o error", sblock_to_check);
974 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
975 } else if (sblock_bad->checksum_error) {
976 spin_lock(&sctx->stat_lock);
977 sctx->stat.csum_errors++;
978 spin_unlock(&sctx->stat_lock);
979 if (__ratelimit(&_rs))
980 scrub_print_warning("checksum error", sblock_to_check);
981 btrfs_dev_stat_inc_and_print(dev,
982 BTRFS_DEV_STAT_CORRUPTION_ERRS);
983 } else if (sblock_bad->header_error) {
984 spin_lock(&sctx->stat_lock);
985 sctx->stat.verify_errors++;
986 spin_unlock(&sctx->stat_lock);
987 if (__ratelimit(&_rs))
988 scrub_print_warning("checksum/header error",
989 sblock_to_check);
990 if (sblock_bad->generation_error)
991 btrfs_dev_stat_inc_and_print(dev,
992 BTRFS_DEV_STAT_GENERATION_ERRS);
993 else
994 btrfs_dev_stat_inc_and_print(dev,
995 BTRFS_DEV_STAT_CORRUPTION_ERRS);
996 }
997
998 if (sctx->readonly) {
999 ASSERT(!sctx->is_dev_replace);
1000 goto out;
1001 }
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018 for (mirror_index = 0; ;mirror_index++) {
1019 struct scrub_block *sblock_other;
1020
1021 if (mirror_index == failed_mirror_index)
1022 continue;
1023
1024
1025 if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1026 if (mirror_index >= BTRFS_MAX_MIRRORS)
1027 break;
1028 if (!sblocks_for_recheck[mirror_index].page_count)
1029 break;
1030
1031 sblock_other = sblocks_for_recheck + mirror_index;
1032 } else {
1033 struct scrub_recover *r = sblock_bad->pagev[0]->recover;
1034 int max_allowed = r->bbio->num_stripes -
1035 r->bbio->num_tgtdevs;
1036
1037 if (mirror_index >= max_allowed)
1038 break;
1039 if (!sblocks_for_recheck[1].page_count)
1040 break;
1041
1042 ASSERT(failed_mirror_index == 0);
1043 sblock_other = sblocks_for_recheck + 1;
1044 sblock_other->pagev[0]->mirror_num = 1 + mirror_index;
1045 }
1046
1047
1048 scrub_recheck_block(fs_info, sblock_other, 0);
1049
1050 if (!sblock_other->header_error &&
1051 !sblock_other->checksum_error &&
1052 sblock_other->no_io_error_seen) {
1053 if (sctx->is_dev_replace) {
1054 scrub_write_block_to_dev_replace(sblock_other);
1055 goto corrected_error;
1056 } else {
1057 ret = scrub_repair_block_from_good_copy(
1058 sblock_bad, sblock_other);
1059 if (!ret)
1060 goto corrected_error;
1061 }
1062 }
1063 }
1064
1065 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1066 goto did_not_correct_error;
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092 success = 1;
1093 for (page_num = 0; page_num < sblock_bad->page_count;
1094 page_num++) {
1095 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1096 struct scrub_block *sblock_other = NULL;
1097
1098
1099 if (!page_bad->io_error && !sctx->is_dev_replace)
1100 continue;
1101
1102 if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1103
1104
1105
1106
1107
1108
1109
1110 sblock_other = NULL;
1111 } else if (page_bad->io_error) {
1112
1113 for (mirror_index = 0;
1114 mirror_index < BTRFS_MAX_MIRRORS &&
1115 sblocks_for_recheck[mirror_index].page_count > 0;
1116 mirror_index++) {
1117 if (!sblocks_for_recheck[mirror_index].
1118 pagev[page_num]->io_error) {
1119 sblock_other = sblocks_for_recheck +
1120 mirror_index;
1121 break;
1122 }
1123 }
1124 if (!sblock_other)
1125 success = 0;
1126 }
1127
1128 if (sctx->is_dev_replace) {
1129
1130
1131
1132
1133
1134
1135
1136 if (!sblock_other)
1137 sblock_other = sblock_bad;
1138
1139 if (scrub_write_page_to_dev_replace(sblock_other,
1140 page_num) != 0) {
1141 atomic64_inc(
1142 &fs_info->dev_replace.num_write_errors);
1143 success = 0;
1144 }
1145 } else if (sblock_other) {
1146 ret = scrub_repair_page_from_good_copy(sblock_bad,
1147 sblock_other,
1148 page_num, 0);
1149 if (0 == ret)
1150 page_bad->io_error = 0;
1151 else
1152 success = 0;
1153 }
1154 }
1155
1156 if (success && !sctx->is_dev_replace) {
1157 if (is_metadata || have_csum) {
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167 scrub_recheck_block(fs_info, sblock_bad, 1);
1168 if (!sblock_bad->header_error &&
1169 !sblock_bad->checksum_error &&
1170 sblock_bad->no_io_error_seen)
1171 goto corrected_error;
1172 else
1173 goto did_not_correct_error;
1174 } else {
1175corrected_error:
1176 spin_lock(&sctx->stat_lock);
1177 sctx->stat.corrected_errors++;
1178 sblock_to_check->data_corrected = 1;
1179 spin_unlock(&sctx->stat_lock);
1180 btrfs_err_rl_in_rcu(fs_info,
1181 "fixed up error at logical %llu on dev %s",
1182 logical, rcu_str_deref(dev->name));
1183 }
1184 } else {
1185did_not_correct_error:
1186 spin_lock(&sctx->stat_lock);
1187 sctx->stat.uncorrectable_errors++;
1188 spin_unlock(&sctx->stat_lock);
1189 btrfs_err_rl_in_rcu(fs_info,
1190 "unable to fixup (regular) error at logical %llu on dev %s",
1191 logical, rcu_str_deref(dev->name));
1192 }
1193
1194out:
1195 if (sblocks_for_recheck) {
1196 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1197 mirror_index++) {
1198 struct scrub_block *sblock = sblocks_for_recheck +
1199 mirror_index;
1200 struct scrub_recover *recover;
1201 int page_index;
1202
1203 for (page_index = 0; page_index < sblock->page_count;
1204 page_index++) {
1205 sblock->pagev[page_index]->sblock = NULL;
1206 recover = sblock->pagev[page_index]->recover;
1207 if (recover) {
1208 scrub_put_recover(fs_info, recover);
1209 sblock->pagev[page_index]->recover =
1210 NULL;
1211 }
1212 scrub_page_put(sblock->pagev[page_index]);
1213 }
1214 }
1215 kfree(sblocks_for_recheck);
1216 }
1217
1218 ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
1219 memalloc_nofs_restore(nofs_flag);
1220 if (ret < 0)
1221 return ret;
1222 return 0;
1223}
1224
1225static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1226{
1227 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1228 return 2;
1229 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1230 return 3;
1231 else
1232 return (int)bbio->num_stripes;
1233}
1234
1235static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1236 u64 *raid_map,
1237 u64 mapped_length,
1238 int nstripes, int mirror,
1239 int *stripe_index,
1240 u64 *stripe_offset)
1241{
1242 int i;
1243
1244 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1245
1246 for (i = 0; i < nstripes; i++) {
1247 if (raid_map[i] == RAID6_Q_STRIPE ||
1248 raid_map[i] == RAID5_P_STRIPE)
1249 continue;
1250
1251 if (logical >= raid_map[i] &&
1252 logical < raid_map[i] + mapped_length)
1253 break;
1254 }
1255
1256 *stripe_index = i;
1257 *stripe_offset = logical - raid_map[i];
1258 } else {
1259
1260 *stripe_index = mirror;
1261 *stripe_offset = 0;
1262 }
1263}
1264
1265static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1266 struct scrub_block *sblocks_for_recheck)
1267{
1268 struct scrub_ctx *sctx = original_sblock->sctx;
1269 struct btrfs_fs_info *fs_info = sctx->fs_info;
1270 u64 length = original_sblock->page_count * PAGE_SIZE;
1271 u64 logical = original_sblock->pagev[0]->logical;
1272 u64 generation = original_sblock->pagev[0]->generation;
1273 u64 flags = original_sblock->pagev[0]->flags;
1274 u64 have_csum = original_sblock->pagev[0]->have_csum;
1275 struct scrub_recover *recover;
1276 struct btrfs_bio *bbio;
1277 u64 sublen;
1278 u64 mapped_length;
1279 u64 stripe_offset;
1280 int stripe_index;
1281 int page_index = 0;
1282 int mirror_index;
1283 int nmirrors;
1284 int ret;
1285
1286
1287
1288
1289
1290
1291
1292 while (length > 0) {
1293 sublen = min_t(u64, length, PAGE_SIZE);
1294 mapped_length = sublen;
1295 bbio = NULL;
1296
1297
1298
1299
1300
1301 btrfs_bio_counter_inc_blocked(fs_info);
1302 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
1303 logical, &mapped_length, &bbio);
1304 if (ret || !bbio || mapped_length < sublen) {
1305 btrfs_put_bbio(bbio);
1306 btrfs_bio_counter_dec(fs_info);
1307 return -EIO;
1308 }
1309
1310 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1311 if (!recover) {
1312 btrfs_put_bbio(bbio);
1313 btrfs_bio_counter_dec(fs_info);
1314 return -ENOMEM;
1315 }
1316
1317 refcount_set(&recover->refs, 1);
1318 recover->bbio = bbio;
1319 recover->map_length = mapped_length;
1320
1321 BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
1322
1323 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1324
1325 for (mirror_index = 0; mirror_index < nmirrors;
1326 mirror_index++) {
1327 struct scrub_block *sblock;
1328 struct scrub_page *page;
1329
1330 sblock = sblocks_for_recheck + mirror_index;
1331 sblock->sctx = sctx;
1332
1333 page = kzalloc(sizeof(*page), GFP_NOFS);
1334 if (!page) {
1335leave_nomem:
1336 spin_lock(&sctx->stat_lock);
1337 sctx->stat.malloc_errors++;
1338 spin_unlock(&sctx->stat_lock);
1339 scrub_put_recover(fs_info, recover);
1340 return -ENOMEM;
1341 }
1342 scrub_page_get(page);
1343 sblock->pagev[page_index] = page;
1344 page->sblock = sblock;
1345 page->flags = flags;
1346 page->generation = generation;
1347 page->logical = logical;
1348 page->have_csum = have_csum;
1349 if (have_csum)
1350 memcpy(page->csum,
1351 original_sblock->pagev[0]->csum,
1352 sctx->csum_size);
1353
1354 scrub_stripe_index_and_offset(logical,
1355 bbio->map_type,
1356 bbio->raid_map,
1357 mapped_length,
1358 bbio->num_stripes -
1359 bbio->num_tgtdevs,
1360 mirror_index,
1361 &stripe_index,
1362 &stripe_offset);
1363 page->physical = bbio->stripes[stripe_index].physical +
1364 stripe_offset;
1365 page->dev = bbio->stripes[stripe_index].dev;
1366
1367 BUG_ON(page_index >= original_sblock->page_count);
1368 page->physical_for_dev_replace =
1369 original_sblock->pagev[page_index]->
1370 physical_for_dev_replace;
1371
1372 page->mirror_num = mirror_index + 1;
1373 sblock->page_count++;
1374 page->page = alloc_page(GFP_NOFS);
1375 if (!page->page)
1376 goto leave_nomem;
1377
1378 scrub_get_recover(recover);
1379 page->recover = recover;
1380 }
1381 scrub_put_recover(fs_info, recover);
1382 length -= sublen;
1383 logical += sublen;
1384 page_index++;
1385 }
1386
1387 return 0;
1388}
1389
1390static void scrub_bio_wait_endio(struct bio *bio)
1391{
1392 complete(bio->bi_private);
1393}
1394
1395static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1396 struct bio *bio,
1397 struct scrub_page *page)
1398{
1399 DECLARE_COMPLETION_ONSTACK(done);
1400 int ret;
1401 int mirror_num;
1402
1403 bio->bi_iter.bi_sector = page->logical >> 9;
1404 bio->bi_private = &done;
1405 bio->bi_end_io = scrub_bio_wait_endio;
1406
1407 mirror_num = page->sblock->pagev[0]->mirror_num;
1408 ret = raid56_parity_recover(fs_info, bio, page->recover->bbio,
1409 page->recover->map_length,
1410 mirror_num, 0);
1411 if (ret)
1412 return ret;
1413
1414 wait_for_completion_io(&done);
1415 return blk_status_to_errno(bio->bi_status);
1416}
1417
1418static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
1419 struct scrub_block *sblock)
1420{
1421 struct scrub_page *first_page = sblock->pagev[0];
1422 struct bio *bio;
1423 int page_num;
1424
1425
1426 ASSERT(first_page->dev);
1427 if (!first_page->dev->bdev)
1428 goto out;
1429
1430 bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
1431 bio_set_dev(bio, first_page->dev->bdev);
1432
1433 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1434 struct scrub_page *page = sblock->pagev[page_num];
1435
1436 WARN_ON(!page->page);
1437 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1438 }
1439
1440 if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
1441 bio_put(bio);
1442 goto out;
1443 }
1444
1445 bio_put(bio);
1446
1447 scrub_recheck_block_checksum(sblock);
1448
1449 return;
1450out:
1451 for (page_num = 0; page_num < sblock->page_count; page_num++)
1452 sblock->pagev[page_num]->io_error = 1;
1453
1454 sblock->no_io_error_seen = 0;
1455}
1456
1457
1458
1459
1460
1461
1462
1463
1464static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1465 struct scrub_block *sblock,
1466 int retry_failed_mirror)
1467{
1468 int page_num;
1469
1470 sblock->no_io_error_seen = 1;
1471
1472
1473 if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
1474 return scrub_recheck_block_on_raid56(fs_info, sblock);
1475
1476 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1477 struct bio *bio;
1478 struct scrub_page *page = sblock->pagev[page_num];
1479
1480 if (page->dev->bdev == NULL) {
1481 page->io_error = 1;
1482 sblock->no_io_error_seen = 0;
1483 continue;
1484 }
1485
1486 WARN_ON(!page->page);
1487 bio = btrfs_io_bio_alloc(1);
1488 bio_set_dev(bio, page->dev->bdev);
1489
1490 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1491 bio->bi_iter.bi_sector = page->physical >> 9;
1492 bio->bi_opf = REQ_OP_READ;
1493
1494 if (btrfsic_submit_bio_wait(bio)) {
1495 page->io_error = 1;
1496 sblock->no_io_error_seen = 0;
1497 }
1498
1499 bio_put(bio);
1500 }
1501
1502 if (sblock->no_io_error_seen)
1503 scrub_recheck_block_checksum(sblock);
1504}
1505
1506static inline int scrub_check_fsid(u8 fsid[],
1507 struct scrub_page *spage)
1508{
1509 struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
1510 int ret;
1511
1512 ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1513 return !ret;
1514}
1515
1516static void scrub_recheck_block_checksum(struct scrub_block *sblock)
1517{
1518 sblock->header_error = 0;
1519 sblock->checksum_error = 0;
1520 sblock->generation_error = 0;
1521
1522 if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
1523 scrub_checksum_data(sblock);
1524 else
1525 scrub_checksum_tree_block(sblock);
1526}
1527
1528static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1529 struct scrub_block *sblock_good)
1530{
1531 int page_num;
1532 int ret = 0;
1533
1534 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1535 int ret_sub;
1536
1537 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1538 sblock_good,
1539 page_num, 1);
1540 if (ret_sub)
1541 ret = ret_sub;
1542 }
1543
1544 return ret;
1545}
1546
1547static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1548 struct scrub_block *sblock_good,
1549 int page_num, int force_write)
1550{
1551 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1552 struct scrub_page *page_good = sblock_good->pagev[page_num];
1553 struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
1554
1555 BUG_ON(page_bad->page == NULL);
1556 BUG_ON(page_good->page == NULL);
1557 if (force_write || sblock_bad->header_error ||
1558 sblock_bad->checksum_error || page_bad->io_error) {
1559 struct bio *bio;
1560 int ret;
1561
1562 if (!page_bad->dev->bdev) {
1563 btrfs_warn_rl(fs_info,
1564 "scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
1565 return -EIO;
1566 }
1567
1568 bio = btrfs_io_bio_alloc(1);
1569 bio_set_dev(bio, page_bad->dev->bdev);
1570 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1571 bio->bi_opf = REQ_OP_WRITE;
1572
1573 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1574 if (PAGE_SIZE != ret) {
1575 bio_put(bio);
1576 return -EIO;
1577 }
1578
1579 if (btrfsic_submit_bio_wait(bio)) {
1580 btrfs_dev_stat_inc_and_print(page_bad->dev,
1581 BTRFS_DEV_STAT_WRITE_ERRS);
1582 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1583 bio_put(bio);
1584 return -EIO;
1585 }
1586 bio_put(bio);
1587 }
1588
1589 return 0;
1590}
1591
1592static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1593{
1594 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
1595 int page_num;
1596
1597
1598
1599
1600
1601 if (sblock->sparity)
1602 return;
1603
1604 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1605 int ret;
1606
1607 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1608 if (ret)
1609 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1610 }
1611}
1612
1613static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1614 int page_num)
1615{
1616 struct scrub_page *spage = sblock->pagev[page_num];
1617
1618 BUG_ON(spage->page == NULL);
1619 if (spage->io_error) {
1620 void *mapped_buffer = kmap_atomic(spage->page);
1621
1622 clear_page(mapped_buffer);
1623 flush_dcache_page(spage->page);
1624 kunmap_atomic(mapped_buffer);
1625 }
1626 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1627}
1628
1629static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1630 struct scrub_page *spage)
1631{
1632 struct scrub_bio *sbio;
1633 int ret;
1634
1635 mutex_lock(&sctx->wr_lock);
1636again:
1637 if (!sctx->wr_curr_bio) {
1638 sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio),
1639 GFP_KERNEL);
1640 if (!sctx->wr_curr_bio) {
1641 mutex_unlock(&sctx->wr_lock);
1642 return -ENOMEM;
1643 }
1644 sctx->wr_curr_bio->sctx = sctx;
1645 sctx->wr_curr_bio->page_count = 0;
1646 }
1647 sbio = sctx->wr_curr_bio;
1648 if (sbio->page_count == 0) {
1649 struct bio *bio;
1650
1651 sbio->physical = spage->physical_for_dev_replace;
1652 sbio->logical = spage->logical;
1653 sbio->dev = sctx->wr_tgtdev;
1654 bio = sbio->bio;
1655 if (!bio) {
1656 bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
1657 sbio->bio = bio;
1658 }
1659
1660 bio->bi_private = sbio;
1661 bio->bi_end_io = scrub_wr_bio_end_io;
1662 bio_set_dev(bio, sbio->dev->bdev);
1663 bio->bi_iter.bi_sector = sbio->physical >> 9;
1664 bio->bi_opf = REQ_OP_WRITE;
1665 sbio->status = 0;
1666 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1667 spage->physical_for_dev_replace ||
1668 sbio->logical + sbio->page_count * PAGE_SIZE !=
1669 spage->logical) {
1670 scrub_wr_submit(sctx);
1671 goto again;
1672 }
1673
1674 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1675 if (ret != PAGE_SIZE) {
1676 if (sbio->page_count < 1) {
1677 bio_put(sbio->bio);
1678 sbio->bio = NULL;
1679 mutex_unlock(&sctx->wr_lock);
1680 return -EIO;
1681 }
1682 scrub_wr_submit(sctx);
1683 goto again;
1684 }
1685
1686 sbio->pagev[sbio->page_count] = spage;
1687 scrub_page_get(spage);
1688 sbio->page_count++;
1689 if (sbio->page_count == sctx->pages_per_wr_bio)
1690 scrub_wr_submit(sctx);
1691 mutex_unlock(&sctx->wr_lock);
1692
1693 return 0;
1694}
1695
1696static void scrub_wr_submit(struct scrub_ctx *sctx)
1697{
1698 struct scrub_bio *sbio;
1699
1700 if (!sctx->wr_curr_bio)
1701 return;
1702
1703 sbio = sctx->wr_curr_bio;
1704 sctx->wr_curr_bio = NULL;
1705 WARN_ON(!sbio->bio->bi_disk);
1706 scrub_pending_bio_inc(sctx);
1707
1708
1709
1710
1711 btrfsic_submit_bio(sbio->bio);
1712}
1713
1714static void scrub_wr_bio_end_io(struct bio *bio)
1715{
1716 struct scrub_bio *sbio = bio->bi_private;
1717 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
1718
1719 sbio->status = bio->bi_status;
1720 sbio->bio = bio;
1721
1722 btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
1723 scrub_wr_bio_end_io_worker, NULL, NULL);
1724 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1725}
1726
1727static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1728{
1729 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1730 struct scrub_ctx *sctx = sbio->sctx;
1731 int i;
1732
1733 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1734 if (sbio->status) {
1735 struct btrfs_dev_replace *dev_replace =
1736 &sbio->sctx->fs_info->dev_replace;
1737
1738 for (i = 0; i < sbio->page_count; i++) {
1739 struct scrub_page *spage = sbio->pagev[i];
1740
1741 spage->io_error = 1;
1742 atomic64_inc(&dev_replace->num_write_errors);
1743 }
1744 }
1745
1746 for (i = 0; i < sbio->page_count; i++)
1747 scrub_page_put(sbio->pagev[i]);
1748
1749 bio_put(sbio->bio);
1750 kfree(sbio);
1751 scrub_pending_bio_dec(sctx);
1752}
1753
1754static int scrub_checksum(struct scrub_block *sblock)
1755{
1756 u64 flags;
1757 int ret;
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767 sblock->header_error = 0;
1768 sblock->generation_error = 0;
1769 sblock->checksum_error = 0;
1770
1771 WARN_ON(sblock->page_count < 1);
1772 flags = sblock->pagev[0]->flags;
1773 ret = 0;
1774 if (flags & BTRFS_EXTENT_FLAG_DATA)
1775 ret = scrub_checksum_data(sblock);
1776 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1777 ret = scrub_checksum_tree_block(sblock);
1778 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1779 (void)scrub_checksum_super(sblock);
1780 else
1781 WARN_ON(1);
1782 if (ret)
1783 scrub_handle_errored_block(sblock);
1784
1785 return ret;
1786}
1787
1788static int scrub_checksum_data(struct scrub_block *sblock)
1789{
1790 struct scrub_ctx *sctx = sblock->sctx;
1791 struct btrfs_fs_info *fs_info = sctx->fs_info;
1792 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1793 u8 csum[BTRFS_CSUM_SIZE];
1794 u8 *on_disk_csum;
1795 struct page *page;
1796 void *buffer;
1797 u64 len;
1798 int index;
1799
1800 BUG_ON(sblock->page_count < 1);
1801 if (!sblock->pagev[0]->have_csum)
1802 return 0;
1803
1804 shash->tfm = fs_info->csum_shash;
1805 crypto_shash_init(shash);
1806
1807 on_disk_csum = sblock->pagev[0]->csum;
1808 page = sblock->pagev[0]->page;
1809 buffer = kmap_atomic(page);
1810
1811 len = sctx->fs_info->sectorsize;
1812 index = 0;
1813 for (;;) {
1814 u64 l = min_t(u64, len, PAGE_SIZE);
1815
1816 crypto_shash_update(shash, buffer, l);
1817 kunmap_atomic(buffer);
1818 len -= l;
1819 if (len == 0)
1820 break;
1821 index++;
1822 BUG_ON(index >= sblock->page_count);
1823 BUG_ON(!sblock->pagev[index]->page);
1824 page = sblock->pagev[index]->page;
1825 buffer = kmap_atomic(page);
1826 }
1827
1828 crypto_shash_final(shash, csum);
1829 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1830 sblock->checksum_error = 1;
1831
1832 return sblock->checksum_error;
1833}
1834
1835static int scrub_checksum_tree_block(struct scrub_block *sblock)
1836{
1837 struct scrub_ctx *sctx = sblock->sctx;
1838 struct btrfs_header *h;
1839 struct btrfs_fs_info *fs_info = sctx->fs_info;
1840 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1841 u8 calculated_csum[BTRFS_CSUM_SIZE];
1842 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1843 struct page *page;
1844 void *mapped_buffer;
1845 u64 mapped_size;
1846 void *p;
1847 u64 len;
1848 int index;
1849
1850 shash->tfm = fs_info->csum_shash;
1851 crypto_shash_init(shash);
1852
1853 BUG_ON(sblock->page_count < 1);
1854 page = sblock->pagev[0]->page;
1855 mapped_buffer = kmap_atomic(page);
1856 h = (struct btrfs_header *)mapped_buffer;
1857 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1858
1859
1860
1861
1862
1863
1864 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
1865 sblock->header_error = 1;
1866
1867 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) {
1868 sblock->header_error = 1;
1869 sblock->generation_error = 1;
1870 }
1871
1872 if (!scrub_check_fsid(h->fsid, sblock->pagev[0]))
1873 sblock->header_error = 1;
1874
1875 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1876 BTRFS_UUID_SIZE))
1877 sblock->header_error = 1;
1878
1879 len = sctx->fs_info->nodesize - BTRFS_CSUM_SIZE;
1880 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1881 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1882 index = 0;
1883 for (;;) {
1884 u64 l = min_t(u64, len, mapped_size);
1885
1886 crypto_shash_update(shash, p, l);
1887 kunmap_atomic(mapped_buffer);
1888 len -= l;
1889 if (len == 0)
1890 break;
1891 index++;
1892 BUG_ON(index >= sblock->page_count);
1893 BUG_ON(!sblock->pagev[index]->page);
1894 page = sblock->pagev[index]->page;
1895 mapped_buffer = kmap_atomic(page);
1896 mapped_size = PAGE_SIZE;
1897 p = mapped_buffer;
1898 }
1899
1900 crypto_shash_final(shash, calculated_csum);
1901 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1902 sblock->checksum_error = 1;
1903
1904 return sblock->header_error || sblock->checksum_error;
1905}
1906
1907static int scrub_checksum_super(struct scrub_block *sblock)
1908{
1909 struct btrfs_super_block *s;
1910 struct scrub_ctx *sctx = sblock->sctx;
1911 struct btrfs_fs_info *fs_info = sctx->fs_info;
1912 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1913 u8 calculated_csum[BTRFS_CSUM_SIZE];
1914 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1915 struct page *page;
1916 void *mapped_buffer;
1917 u64 mapped_size;
1918 void *p;
1919 int fail_gen = 0;
1920 int fail_cor = 0;
1921 u64 len;
1922 int index;
1923
1924 shash->tfm = fs_info->csum_shash;
1925 crypto_shash_init(shash);
1926
1927 BUG_ON(sblock->page_count < 1);
1928 page = sblock->pagev[0]->page;
1929 mapped_buffer = kmap_atomic(page);
1930 s = (struct btrfs_super_block *)mapped_buffer;
1931 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1932
1933 if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
1934 ++fail_cor;
1935
1936 if (sblock->pagev[0]->generation != btrfs_super_generation(s))
1937 ++fail_gen;
1938
1939 if (!scrub_check_fsid(s->fsid, sblock->pagev[0]))
1940 ++fail_cor;
1941
1942 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1943 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1944 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1945 index = 0;
1946 for (;;) {
1947 u64 l = min_t(u64, len, mapped_size);
1948
1949 crypto_shash_update(shash, p, l);
1950 kunmap_atomic(mapped_buffer);
1951 len -= l;
1952 if (len == 0)
1953 break;
1954 index++;
1955 BUG_ON(index >= sblock->page_count);
1956 BUG_ON(!sblock->pagev[index]->page);
1957 page = sblock->pagev[index]->page;
1958 mapped_buffer = kmap_atomic(page);
1959 mapped_size = PAGE_SIZE;
1960 p = mapped_buffer;
1961 }
1962
1963 crypto_shash_final(shash, calculated_csum);
1964 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1965 ++fail_cor;
1966
1967 if (fail_cor + fail_gen) {
1968
1969
1970
1971
1972
1973 spin_lock(&sctx->stat_lock);
1974 ++sctx->stat.super_errors;
1975 spin_unlock(&sctx->stat_lock);
1976 if (fail_cor)
1977 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1978 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1979 else
1980 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1981 BTRFS_DEV_STAT_GENERATION_ERRS);
1982 }
1983
1984 return fail_cor + fail_gen;
1985}
1986
1987static void scrub_block_get(struct scrub_block *sblock)
1988{
1989 refcount_inc(&sblock->refs);
1990}
1991
1992static void scrub_block_put(struct scrub_block *sblock)
1993{
1994 if (refcount_dec_and_test(&sblock->refs)) {
1995 int i;
1996
1997 if (sblock->sparity)
1998 scrub_parity_put(sblock->sparity);
1999
2000 for (i = 0; i < sblock->page_count; i++)
2001 scrub_page_put(sblock->pagev[i]);
2002 kfree(sblock);
2003 }
2004}
2005
2006static void scrub_page_get(struct scrub_page *spage)
2007{
2008 atomic_inc(&spage->refs);
2009}
2010
2011static void scrub_page_put(struct scrub_page *spage)
2012{
2013 if (atomic_dec_and_test(&spage->refs)) {
2014 if (spage->page)
2015 __free_page(spage->page);
2016 kfree(spage);
2017 }
2018}
2019
2020static void scrub_submit(struct scrub_ctx *sctx)
2021{
2022 struct scrub_bio *sbio;
2023
2024 if (sctx->curr == -1)
2025 return;
2026
2027 sbio = sctx->bios[sctx->curr];
2028 sctx->curr = -1;
2029 scrub_pending_bio_inc(sctx);
2030 btrfsic_submit_bio(sbio->bio);
2031}
2032
2033static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
2034 struct scrub_page *spage)
2035{
2036 struct scrub_block *sblock = spage->sblock;
2037 struct scrub_bio *sbio;
2038 int ret;
2039
2040again:
2041
2042
2043
2044 while (sctx->curr == -1) {
2045 spin_lock(&sctx->list_lock);
2046 sctx->curr = sctx->first_free;
2047 if (sctx->curr != -1) {
2048 sctx->first_free = sctx->bios[sctx->curr]->next_free;
2049 sctx->bios[sctx->curr]->next_free = -1;
2050 sctx->bios[sctx->curr]->page_count = 0;
2051 spin_unlock(&sctx->list_lock);
2052 } else {
2053 spin_unlock(&sctx->list_lock);
2054 wait_event(sctx->list_wait, sctx->first_free != -1);
2055 }
2056 }
2057 sbio = sctx->bios[sctx->curr];
2058 if (sbio->page_count == 0) {
2059 struct bio *bio;
2060
2061 sbio->physical = spage->physical;
2062 sbio->logical = spage->logical;
2063 sbio->dev = spage->dev;
2064 bio = sbio->bio;
2065 if (!bio) {
2066 bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
2067 sbio->bio = bio;
2068 }
2069
2070 bio->bi_private = sbio;
2071 bio->bi_end_io = scrub_bio_end_io;
2072 bio_set_dev(bio, sbio->dev->bdev);
2073 bio->bi_iter.bi_sector = sbio->physical >> 9;
2074 bio->bi_opf = REQ_OP_READ;
2075 sbio->status = 0;
2076 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
2077 spage->physical ||
2078 sbio->logical + sbio->page_count * PAGE_SIZE !=
2079 spage->logical ||
2080 sbio->dev != spage->dev) {
2081 scrub_submit(sctx);
2082 goto again;
2083 }
2084
2085 sbio->pagev[sbio->page_count] = spage;
2086 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
2087 if (ret != PAGE_SIZE) {
2088 if (sbio->page_count < 1) {
2089 bio_put(sbio->bio);
2090 sbio->bio = NULL;
2091 return -EIO;
2092 }
2093 scrub_submit(sctx);
2094 goto again;
2095 }
2096
2097 scrub_block_get(sblock);
2098 atomic_inc(&sblock->outstanding_pages);
2099 sbio->page_count++;
2100 if (sbio->page_count == sctx->pages_per_rd_bio)
2101 scrub_submit(sctx);
2102
2103 return 0;
2104}
2105
2106static void scrub_missing_raid56_end_io(struct bio *bio)
2107{
2108 struct scrub_block *sblock = bio->bi_private;
2109 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
2110
2111 if (bio->bi_status)
2112 sblock->no_io_error_seen = 0;
2113
2114 bio_put(bio);
2115
2116 btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
2117}
2118
2119static void scrub_missing_raid56_worker(struct btrfs_work *work)
2120{
2121 struct scrub_block *sblock = container_of(work, struct scrub_block, work);
2122 struct scrub_ctx *sctx = sblock->sctx;
2123 struct btrfs_fs_info *fs_info = sctx->fs_info;
2124 u64 logical;
2125 struct btrfs_device *dev;
2126
2127 logical = sblock->pagev[0]->logical;
2128 dev = sblock->pagev[0]->dev;
2129
2130 if (sblock->no_io_error_seen)
2131 scrub_recheck_block_checksum(sblock);
2132
2133 if (!sblock->no_io_error_seen) {
2134 spin_lock(&sctx->stat_lock);
2135 sctx->stat.read_errors++;
2136 spin_unlock(&sctx->stat_lock);
2137 btrfs_err_rl_in_rcu(fs_info,
2138 "IO error rebuilding logical %llu for dev %s",
2139 logical, rcu_str_deref(dev->name));
2140 } else if (sblock->header_error || sblock->checksum_error) {
2141 spin_lock(&sctx->stat_lock);
2142 sctx->stat.uncorrectable_errors++;
2143 spin_unlock(&sctx->stat_lock);
2144 btrfs_err_rl_in_rcu(fs_info,
2145 "failed to rebuild valid logical %llu for dev %s",
2146 logical, rcu_str_deref(dev->name));
2147 } else {
2148 scrub_write_block_to_dev_replace(sblock);
2149 }
2150
2151 scrub_block_put(sblock);
2152
2153 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2154 mutex_lock(&sctx->wr_lock);
2155 scrub_wr_submit(sctx);
2156 mutex_unlock(&sctx->wr_lock);
2157 }
2158
2159 scrub_pending_bio_dec(sctx);
2160}
2161
2162static void scrub_missing_raid56_pages(struct scrub_block *sblock)
2163{
2164 struct scrub_ctx *sctx = sblock->sctx;
2165 struct btrfs_fs_info *fs_info = sctx->fs_info;
2166 u64 length = sblock->page_count * PAGE_SIZE;
2167 u64 logical = sblock->pagev[0]->logical;
2168 struct btrfs_bio *bbio = NULL;
2169 struct bio *bio;
2170 struct btrfs_raid_bio *rbio;
2171 int ret;
2172 int i;
2173
2174 btrfs_bio_counter_inc_blocked(fs_info);
2175 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
2176 &length, &bbio);
2177 if (ret || !bbio || !bbio->raid_map)
2178 goto bbio_out;
2179
2180 if (WARN_ON(!sctx->is_dev_replace ||
2181 !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
2182
2183
2184
2185
2186
2187
2188 goto bbio_out;
2189 }
2190
2191 bio = btrfs_io_bio_alloc(0);
2192 bio->bi_iter.bi_sector = logical >> 9;
2193 bio->bi_private = sblock;
2194 bio->bi_end_io = scrub_missing_raid56_end_io;
2195
2196 rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length);
2197 if (!rbio)
2198 goto rbio_out;
2199
2200 for (i = 0; i < sblock->page_count; i++) {
2201 struct scrub_page *spage = sblock->pagev[i];
2202
2203 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2204 }
2205
2206 btrfs_init_work(&sblock->work, btrfs_scrub_helper,
2207 scrub_missing_raid56_worker, NULL, NULL);
2208 scrub_block_get(sblock);
2209 scrub_pending_bio_inc(sctx);
2210 raid56_submit_missing_rbio(rbio);
2211 return;
2212
2213rbio_out:
2214 bio_put(bio);
2215bbio_out:
2216 btrfs_bio_counter_dec(fs_info);
2217 btrfs_put_bbio(bbio);
2218 spin_lock(&sctx->stat_lock);
2219 sctx->stat.malloc_errors++;
2220 spin_unlock(&sctx->stat_lock);
2221}
2222
2223static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2224 u64 physical, struct btrfs_device *dev, u64 flags,
2225 u64 gen, int mirror_num, u8 *csum, int force,
2226 u64 physical_for_dev_replace)
2227{
2228 struct scrub_block *sblock;
2229 int index;
2230
2231 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2232 if (!sblock) {
2233 spin_lock(&sctx->stat_lock);
2234 sctx->stat.malloc_errors++;
2235 spin_unlock(&sctx->stat_lock);
2236 return -ENOMEM;
2237 }
2238
2239
2240
2241 refcount_set(&sblock->refs, 1);
2242 sblock->sctx = sctx;
2243 sblock->no_io_error_seen = 1;
2244
2245 for (index = 0; len > 0; index++) {
2246 struct scrub_page *spage;
2247 u64 l = min_t(u64, len, PAGE_SIZE);
2248
2249 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2250 if (!spage) {
2251leave_nomem:
2252 spin_lock(&sctx->stat_lock);
2253 sctx->stat.malloc_errors++;
2254 spin_unlock(&sctx->stat_lock);
2255 scrub_block_put(sblock);
2256 return -ENOMEM;
2257 }
2258 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2259 scrub_page_get(spage);
2260 sblock->pagev[index] = spage;
2261 spage->sblock = sblock;
2262 spage->dev = dev;
2263 spage->flags = flags;
2264 spage->generation = gen;
2265 spage->logical = logical;
2266 spage->physical = physical;
2267 spage->physical_for_dev_replace = physical_for_dev_replace;
2268 spage->mirror_num = mirror_num;
2269 if (csum) {
2270 spage->have_csum = 1;
2271 memcpy(spage->csum, csum, sctx->csum_size);
2272 } else {
2273 spage->have_csum = 0;
2274 }
2275 sblock->page_count++;
2276 spage->page = alloc_page(GFP_KERNEL);
2277 if (!spage->page)
2278 goto leave_nomem;
2279 len -= l;
2280 logical += l;
2281 physical += l;
2282 physical_for_dev_replace += l;
2283 }
2284
2285 WARN_ON(sblock->page_count == 0);
2286 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2287
2288
2289
2290
2291 scrub_missing_raid56_pages(sblock);
2292 } else {
2293 for (index = 0; index < sblock->page_count; index++) {
2294 struct scrub_page *spage = sblock->pagev[index];
2295 int ret;
2296
2297 ret = scrub_add_page_to_rd_bio(sctx, spage);
2298 if (ret) {
2299 scrub_block_put(sblock);
2300 return ret;
2301 }
2302 }
2303
2304 if (force)
2305 scrub_submit(sctx);
2306 }
2307
2308
2309 scrub_block_put(sblock);
2310 return 0;
2311}
2312
2313static void scrub_bio_end_io(struct bio *bio)
2314{
2315 struct scrub_bio *sbio = bio->bi_private;
2316 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
2317
2318 sbio->status = bio->bi_status;
2319 sbio->bio = bio;
2320
2321 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
2322}
2323
2324static void scrub_bio_end_io_worker(struct btrfs_work *work)
2325{
2326 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2327 struct scrub_ctx *sctx = sbio->sctx;
2328 int i;
2329
2330 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2331 if (sbio->status) {
2332 for (i = 0; i < sbio->page_count; i++) {
2333 struct scrub_page *spage = sbio->pagev[i];
2334
2335 spage->io_error = 1;
2336 spage->sblock->no_io_error_seen = 0;
2337 }
2338 }
2339
2340
2341 for (i = 0; i < sbio->page_count; i++) {
2342 struct scrub_page *spage = sbio->pagev[i];
2343 struct scrub_block *sblock = spage->sblock;
2344
2345 if (atomic_dec_and_test(&sblock->outstanding_pages))
2346 scrub_block_complete(sblock);
2347 scrub_block_put(sblock);
2348 }
2349
2350 bio_put(sbio->bio);
2351 sbio->bio = NULL;
2352 spin_lock(&sctx->list_lock);
2353 sbio->next_free = sctx->first_free;
2354 sctx->first_free = sbio->index;
2355 spin_unlock(&sctx->list_lock);
2356
2357 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2358 mutex_lock(&sctx->wr_lock);
2359 scrub_wr_submit(sctx);
2360 mutex_unlock(&sctx->wr_lock);
2361 }
2362
2363 scrub_pending_bio_dec(sctx);
2364}
2365
2366static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2367 unsigned long *bitmap,
2368 u64 start, u64 len)
2369{
2370 u64 offset;
2371 u64 nsectors64;
2372 u32 nsectors;
2373 int sectorsize = sparity->sctx->fs_info->sectorsize;
2374
2375 if (len >= sparity->stripe_len) {
2376 bitmap_set(bitmap, 0, sparity->nsectors);
2377 return;
2378 }
2379
2380 start -= sparity->logic_start;
2381 start = div64_u64_rem(start, sparity->stripe_len, &offset);
2382 offset = div_u64(offset, sectorsize);
2383 nsectors64 = div_u64(len, sectorsize);
2384
2385 ASSERT(nsectors64 < UINT_MAX);
2386 nsectors = (u32)nsectors64;
2387
2388 if (offset + nsectors <= sparity->nsectors) {
2389 bitmap_set(bitmap, offset, nsectors);
2390 return;
2391 }
2392
2393 bitmap_set(bitmap, offset, sparity->nsectors - offset);
2394 bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
2395}
2396
2397static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
2398 u64 start, u64 len)
2399{
2400 __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
2401}
2402
2403static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
2404 u64 start, u64 len)
2405{
2406 __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
2407}
2408
2409static void scrub_block_complete(struct scrub_block *sblock)
2410{
2411 int corrupted = 0;
2412
2413 if (!sblock->no_io_error_seen) {
2414 corrupted = 1;
2415 scrub_handle_errored_block(sblock);
2416 } else {
2417
2418
2419
2420
2421
2422 corrupted = scrub_checksum(sblock);
2423 if (!corrupted && sblock->sctx->is_dev_replace)
2424 scrub_write_block_to_dev_replace(sblock);
2425 }
2426
2427 if (sblock->sparity && corrupted && !sblock->data_corrected) {
2428 u64 start = sblock->pagev[0]->logical;
2429 u64 end = sblock->pagev[sblock->page_count - 1]->logical +
2430 PAGE_SIZE;
2431
2432 scrub_parity_mark_sectors_error(sblock->sparity,
2433 start, end - start);
2434 }
2435}
2436
2437static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
2438{
2439 struct btrfs_ordered_sum *sum = NULL;
2440 unsigned long index;
2441 unsigned long num_sectors;
2442
2443 while (!list_empty(&sctx->csum_list)) {
2444 sum = list_first_entry(&sctx->csum_list,
2445 struct btrfs_ordered_sum, list);
2446 if (sum->bytenr > logical)
2447 return 0;
2448 if (sum->bytenr + sum->len > logical)
2449 break;
2450
2451 ++sctx->stat.csum_discards;
2452 list_del(&sum->list);
2453 kfree(sum);
2454 sum = NULL;
2455 }
2456 if (!sum)
2457 return 0;
2458
2459 index = div_u64(logical - sum->bytenr, sctx->fs_info->sectorsize);
2460 ASSERT(index < UINT_MAX);
2461
2462 num_sectors = sum->len / sctx->fs_info->sectorsize;
2463 memcpy(csum, sum->sums + index * sctx->csum_size, sctx->csum_size);
2464 if (index == num_sectors - 1) {
2465 list_del(&sum->list);
2466 kfree(sum);
2467 }
2468 return 1;
2469}
2470
2471
2472static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
2473 u64 logical, u64 len,
2474 u64 physical, struct btrfs_device *dev, u64 flags,
2475 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2476{
2477 int ret;
2478 u8 csum[BTRFS_CSUM_SIZE];
2479 u32 blocksize;
2480
2481 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2482 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2483 blocksize = map->stripe_len;
2484 else
2485 blocksize = sctx->fs_info->sectorsize;
2486 spin_lock(&sctx->stat_lock);
2487 sctx->stat.data_extents_scrubbed++;
2488 sctx->stat.data_bytes_scrubbed += len;
2489 spin_unlock(&sctx->stat_lock);
2490 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2491 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2492 blocksize = map->stripe_len;
2493 else
2494 blocksize = sctx->fs_info->nodesize;
2495 spin_lock(&sctx->stat_lock);
2496 sctx->stat.tree_extents_scrubbed++;
2497 sctx->stat.tree_bytes_scrubbed += len;
2498 spin_unlock(&sctx->stat_lock);
2499 } else {
2500 blocksize = sctx->fs_info->sectorsize;
2501 WARN_ON(1);
2502 }
2503
2504 while (len) {
2505 u64 l = min_t(u64, len, blocksize);
2506 int have_csum = 0;
2507
2508 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2509
2510 have_csum = scrub_find_csum(sctx, logical, csum);
2511 if (have_csum == 0)
2512 ++sctx->stat.no_csum;
2513 }
2514 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2515 mirror_num, have_csum ? csum : NULL, 0,
2516 physical_for_dev_replace);
2517 if (ret)
2518 return ret;
2519 len -= l;
2520 logical += l;
2521 physical += l;
2522 physical_for_dev_replace += l;
2523 }
2524 return 0;
2525}
2526
2527static int scrub_pages_for_parity(struct scrub_parity *sparity,
2528 u64 logical, u64 len,
2529 u64 physical, struct btrfs_device *dev,
2530 u64 flags, u64 gen, int mirror_num, u8 *csum)
2531{
2532 struct scrub_ctx *sctx = sparity->sctx;
2533 struct scrub_block *sblock;
2534 int index;
2535
2536 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2537 if (!sblock) {
2538 spin_lock(&sctx->stat_lock);
2539 sctx->stat.malloc_errors++;
2540 spin_unlock(&sctx->stat_lock);
2541 return -ENOMEM;
2542 }
2543
2544
2545
2546 refcount_set(&sblock->refs, 1);
2547 sblock->sctx = sctx;
2548 sblock->no_io_error_seen = 1;
2549 sblock->sparity = sparity;
2550 scrub_parity_get(sparity);
2551
2552 for (index = 0; len > 0; index++) {
2553 struct scrub_page *spage;
2554 u64 l = min_t(u64, len, PAGE_SIZE);
2555
2556 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2557 if (!spage) {
2558leave_nomem:
2559 spin_lock(&sctx->stat_lock);
2560 sctx->stat.malloc_errors++;
2561 spin_unlock(&sctx->stat_lock);
2562 scrub_block_put(sblock);
2563 return -ENOMEM;
2564 }
2565 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2566
2567 scrub_page_get(spage);
2568 sblock->pagev[index] = spage;
2569
2570 scrub_page_get(spage);
2571 list_add_tail(&spage->list, &sparity->spages);
2572 spage->sblock = sblock;
2573 spage->dev = dev;
2574 spage->flags = flags;
2575 spage->generation = gen;
2576 spage->logical = logical;
2577 spage->physical = physical;
2578 spage->mirror_num = mirror_num;
2579 if (csum) {
2580 spage->have_csum = 1;
2581 memcpy(spage->csum, csum, sctx->csum_size);
2582 } else {
2583 spage->have_csum = 0;
2584 }
2585 sblock->page_count++;
2586 spage->page = alloc_page(GFP_KERNEL);
2587 if (!spage->page)
2588 goto leave_nomem;
2589 len -= l;
2590 logical += l;
2591 physical += l;
2592 }
2593
2594 WARN_ON(sblock->page_count == 0);
2595 for (index = 0; index < sblock->page_count; index++) {
2596 struct scrub_page *spage = sblock->pagev[index];
2597 int ret;
2598
2599 ret = scrub_add_page_to_rd_bio(sctx, spage);
2600 if (ret) {
2601 scrub_block_put(sblock);
2602 return ret;
2603 }
2604 }
2605
2606
2607 scrub_block_put(sblock);
2608 return 0;
2609}
2610
2611static int scrub_extent_for_parity(struct scrub_parity *sparity,
2612 u64 logical, u64 len,
2613 u64 physical, struct btrfs_device *dev,
2614 u64 flags, u64 gen, int mirror_num)
2615{
2616 struct scrub_ctx *sctx = sparity->sctx;
2617 int ret;
2618 u8 csum[BTRFS_CSUM_SIZE];
2619 u32 blocksize;
2620
2621 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2622 scrub_parity_mark_sectors_error(sparity, logical, len);
2623 return 0;
2624 }
2625
2626 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2627 blocksize = sparity->stripe_len;
2628 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2629 blocksize = sparity->stripe_len;
2630 } else {
2631 blocksize = sctx->fs_info->sectorsize;
2632 WARN_ON(1);
2633 }
2634
2635 while (len) {
2636 u64 l = min_t(u64, len, blocksize);
2637 int have_csum = 0;
2638
2639 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2640
2641 have_csum = scrub_find_csum(sctx, logical, csum);
2642 if (have_csum == 0)
2643 goto skip;
2644 }
2645 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2646 flags, gen, mirror_num,
2647 have_csum ? csum : NULL);
2648 if (ret)
2649 return ret;
2650skip:
2651 len -= l;
2652 logical += l;
2653 physical += l;
2654 }
2655 return 0;
2656}
2657
2658
2659
2660
2661
2662
2663
2664
2665static int get_raid56_logic_offset(u64 physical, int num,
2666 struct map_lookup *map, u64 *offset,
2667 u64 *stripe_start)
2668{
2669 int i;
2670 int j = 0;
2671 u64 stripe_nr;
2672 u64 last_offset;
2673 u32 stripe_index;
2674 u32 rot;
2675 const int data_stripes = nr_data_stripes(map);
2676
2677 last_offset = (physical - map->stripes[num].physical) * data_stripes;
2678 if (stripe_start)
2679 *stripe_start = last_offset;
2680
2681 *offset = last_offset;
2682 for (i = 0; i < data_stripes; i++) {
2683 *offset = last_offset + i * map->stripe_len;
2684
2685 stripe_nr = div64_u64(*offset, map->stripe_len);
2686 stripe_nr = div_u64(stripe_nr, data_stripes);
2687
2688
2689 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
2690
2691 rot += i;
2692 stripe_index = rot % map->num_stripes;
2693 if (stripe_index == num)
2694 return 0;
2695 if (stripe_index < num)
2696 j++;
2697 }
2698 *offset = last_offset + j * map->stripe_len;
2699 return 1;
2700}
2701
2702static void scrub_free_parity(struct scrub_parity *sparity)
2703{
2704 struct scrub_ctx *sctx = sparity->sctx;
2705 struct scrub_page *curr, *next;
2706 int nbits;
2707
2708 nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
2709 if (nbits) {
2710 spin_lock(&sctx->stat_lock);
2711 sctx->stat.read_errors += nbits;
2712 sctx->stat.uncorrectable_errors += nbits;
2713 spin_unlock(&sctx->stat_lock);
2714 }
2715
2716 list_for_each_entry_safe(curr, next, &sparity->spages, list) {
2717 list_del_init(&curr->list);
2718 scrub_page_put(curr);
2719 }
2720
2721 kfree(sparity);
2722}
2723
2724static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
2725{
2726 struct scrub_parity *sparity = container_of(work, struct scrub_parity,
2727 work);
2728 struct scrub_ctx *sctx = sparity->sctx;
2729
2730 scrub_free_parity(sparity);
2731 scrub_pending_bio_dec(sctx);
2732}
2733
2734static void scrub_parity_bio_endio(struct bio *bio)
2735{
2736 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
2737 struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
2738
2739 if (bio->bi_status)
2740 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2741 sparity->nsectors);
2742
2743 bio_put(bio);
2744
2745 btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
2746 scrub_parity_bio_endio_worker, NULL, NULL);
2747 btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
2748}
2749
2750static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2751{
2752 struct scrub_ctx *sctx = sparity->sctx;
2753 struct btrfs_fs_info *fs_info = sctx->fs_info;
2754 struct bio *bio;
2755 struct btrfs_raid_bio *rbio;
2756 struct btrfs_bio *bbio = NULL;
2757 u64 length;
2758 int ret;
2759
2760 if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
2761 sparity->nsectors))
2762 goto out;
2763
2764 length = sparity->logic_end - sparity->logic_start;
2765
2766 btrfs_bio_counter_inc_blocked(fs_info);
2767 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
2768 &length, &bbio);
2769 if (ret || !bbio || !bbio->raid_map)
2770 goto bbio_out;
2771
2772 bio = btrfs_io_bio_alloc(0);
2773 bio->bi_iter.bi_sector = sparity->logic_start >> 9;
2774 bio->bi_private = sparity;
2775 bio->bi_end_io = scrub_parity_bio_endio;
2776
2777 rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio,
2778 length, sparity->scrub_dev,
2779 sparity->dbitmap,
2780 sparity->nsectors);
2781 if (!rbio)
2782 goto rbio_out;
2783
2784 scrub_pending_bio_inc(sctx);
2785 raid56_parity_submit_scrub_rbio(rbio);
2786 return;
2787
2788rbio_out:
2789 bio_put(bio);
2790bbio_out:
2791 btrfs_bio_counter_dec(fs_info);
2792 btrfs_put_bbio(bbio);
2793 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2794 sparity->nsectors);
2795 spin_lock(&sctx->stat_lock);
2796 sctx->stat.malloc_errors++;
2797 spin_unlock(&sctx->stat_lock);
2798out:
2799 scrub_free_parity(sparity);
2800}
2801
2802static inline int scrub_calc_parity_bitmap_len(int nsectors)
2803{
2804 return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
2805}
2806
2807static void scrub_parity_get(struct scrub_parity *sparity)
2808{
2809 refcount_inc(&sparity->refs);
2810}
2811
2812static void scrub_parity_put(struct scrub_parity *sparity)
2813{
2814 if (!refcount_dec_and_test(&sparity->refs))
2815 return;
2816
2817 scrub_parity_check_and_repair(sparity);
2818}
2819
2820static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2821 struct map_lookup *map,
2822 struct btrfs_device *sdev,
2823 struct btrfs_path *path,
2824 u64 logic_start,
2825 u64 logic_end)
2826{
2827 struct btrfs_fs_info *fs_info = sctx->fs_info;
2828 struct btrfs_root *root = fs_info->extent_root;
2829 struct btrfs_root *csum_root = fs_info->csum_root;
2830 struct btrfs_extent_item *extent;
2831 struct btrfs_bio *bbio = NULL;
2832 u64 flags;
2833 int ret;
2834 int slot;
2835 struct extent_buffer *l;
2836 struct btrfs_key key;
2837 u64 generation;
2838 u64 extent_logical;
2839 u64 extent_physical;
2840 u64 extent_len;
2841 u64 mapped_length;
2842 struct btrfs_device *extent_dev;
2843 struct scrub_parity *sparity;
2844 int nsectors;
2845 int bitmap_len;
2846 int extent_mirror_num;
2847 int stop_loop = 0;
2848
2849 nsectors = div_u64(map->stripe_len, fs_info->sectorsize);
2850 bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
2851 sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
2852 GFP_NOFS);
2853 if (!sparity) {
2854 spin_lock(&sctx->stat_lock);
2855 sctx->stat.malloc_errors++;
2856 spin_unlock(&sctx->stat_lock);
2857 return -ENOMEM;
2858 }
2859
2860 sparity->stripe_len = map->stripe_len;
2861 sparity->nsectors = nsectors;
2862 sparity->sctx = sctx;
2863 sparity->scrub_dev = sdev;
2864 sparity->logic_start = logic_start;
2865 sparity->logic_end = logic_end;
2866 refcount_set(&sparity->refs, 1);
2867 INIT_LIST_HEAD(&sparity->spages);
2868 sparity->dbitmap = sparity->bitmap;
2869 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
2870
2871 ret = 0;
2872 while (logic_start < logic_end) {
2873 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2874 key.type = BTRFS_METADATA_ITEM_KEY;
2875 else
2876 key.type = BTRFS_EXTENT_ITEM_KEY;
2877 key.objectid = logic_start;
2878 key.offset = (u64)-1;
2879
2880 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2881 if (ret < 0)
2882 goto out;
2883
2884 if (ret > 0) {
2885 ret = btrfs_previous_extent_item(root, path, 0);
2886 if (ret < 0)
2887 goto out;
2888 if (ret > 0) {
2889 btrfs_release_path(path);
2890 ret = btrfs_search_slot(NULL, root, &key,
2891 path, 0, 0);
2892 if (ret < 0)
2893 goto out;
2894 }
2895 }
2896
2897 stop_loop = 0;
2898 while (1) {
2899 u64 bytes;
2900
2901 l = path->nodes[0];
2902 slot = path->slots[0];
2903 if (slot >= btrfs_header_nritems(l)) {
2904 ret = btrfs_next_leaf(root, path);
2905 if (ret == 0)
2906 continue;
2907 if (ret < 0)
2908 goto out;
2909
2910 stop_loop = 1;
2911 break;
2912 }
2913 btrfs_item_key_to_cpu(l, &key, slot);
2914
2915 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2916 key.type != BTRFS_METADATA_ITEM_KEY)
2917 goto next;
2918
2919 if (key.type == BTRFS_METADATA_ITEM_KEY)
2920 bytes = fs_info->nodesize;
2921 else
2922 bytes = key.offset;
2923
2924 if (key.objectid + bytes <= logic_start)
2925 goto next;
2926
2927 if (key.objectid >= logic_end) {
2928 stop_loop = 1;
2929 break;
2930 }
2931
2932 while (key.objectid >= logic_start + map->stripe_len)
2933 logic_start += map->stripe_len;
2934
2935 extent = btrfs_item_ptr(l, slot,
2936 struct btrfs_extent_item);
2937 flags = btrfs_extent_flags(l, extent);
2938 generation = btrfs_extent_generation(l, extent);
2939
2940 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
2941 (key.objectid < logic_start ||
2942 key.objectid + bytes >
2943 logic_start + map->stripe_len)) {
2944 btrfs_err(fs_info,
2945 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
2946 key.objectid, logic_start);
2947 spin_lock(&sctx->stat_lock);
2948 sctx->stat.uncorrectable_errors++;
2949 spin_unlock(&sctx->stat_lock);
2950 goto next;
2951 }
2952again:
2953 extent_logical = key.objectid;
2954 extent_len = bytes;
2955
2956 if (extent_logical < logic_start) {
2957 extent_len -= logic_start - extent_logical;
2958 extent_logical = logic_start;
2959 }
2960
2961 if (extent_logical + extent_len >
2962 logic_start + map->stripe_len)
2963 extent_len = logic_start + map->stripe_len -
2964 extent_logical;
2965
2966 scrub_parity_mark_sectors_data(sparity, extent_logical,
2967 extent_len);
2968
2969 mapped_length = extent_len;
2970 bbio = NULL;
2971 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
2972 extent_logical, &mapped_length, &bbio,
2973 0);
2974 if (!ret) {
2975 if (!bbio || mapped_length < extent_len)
2976 ret = -EIO;
2977 }
2978 if (ret) {
2979 btrfs_put_bbio(bbio);
2980 goto out;
2981 }
2982 extent_physical = bbio->stripes[0].physical;
2983 extent_mirror_num = bbio->mirror_num;
2984 extent_dev = bbio->stripes[0].dev;
2985 btrfs_put_bbio(bbio);
2986
2987 ret = btrfs_lookup_csums_range(csum_root,
2988 extent_logical,
2989 extent_logical + extent_len - 1,
2990 &sctx->csum_list, 1);
2991 if (ret)
2992 goto out;
2993
2994 ret = scrub_extent_for_parity(sparity, extent_logical,
2995 extent_len,
2996 extent_physical,
2997 extent_dev, flags,
2998 generation,
2999 extent_mirror_num);
3000
3001 scrub_free_csums(sctx);
3002
3003 if (ret)
3004 goto out;
3005
3006 if (extent_logical + extent_len <
3007 key.objectid + bytes) {
3008 logic_start += map->stripe_len;
3009
3010 if (logic_start >= logic_end) {
3011 stop_loop = 1;
3012 break;
3013 }
3014
3015 if (logic_start < key.objectid + bytes) {
3016 cond_resched();
3017 goto again;
3018 }
3019 }
3020next:
3021 path->slots[0]++;
3022 }
3023
3024 btrfs_release_path(path);
3025
3026 if (stop_loop)
3027 break;
3028
3029 logic_start += map->stripe_len;
3030 }
3031out:
3032 if (ret < 0)
3033 scrub_parity_mark_sectors_error(sparity, logic_start,
3034 logic_end - logic_start);
3035 scrub_parity_put(sparity);
3036 scrub_submit(sctx);
3037 mutex_lock(&sctx->wr_lock);
3038 scrub_wr_submit(sctx);
3039 mutex_unlock(&sctx->wr_lock);
3040
3041 btrfs_release_path(path);
3042 return ret < 0 ? ret : 0;
3043}
3044
3045static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3046 struct map_lookup *map,
3047 struct btrfs_device *scrub_dev,
3048 int num, u64 base, u64 length)
3049{
3050 struct btrfs_path *path, *ppath;
3051 struct btrfs_fs_info *fs_info = sctx->fs_info;
3052 struct btrfs_root *root = fs_info->extent_root;
3053 struct btrfs_root *csum_root = fs_info->csum_root;
3054 struct btrfs_extent_item *extent;
3055 struct blk_plug plug;
3056 u64 flags;
3057 int ret;
3058 int slot;
3059 u64 nstripes;
3060 struct extent_buffer *l;
3061 u64 physical;
3062 u64 logical;
3063 u64 logic_end;
3064 u64 physical_end;
3065 u64 generation;
3066 int mirror_num;
3067 struct reada_control *reada1;
3068 struct reada_control *reada2;
3069 struct btrfs_key key;
3070 struct btrfs_key key_end;
3071 u64 increment = map->stripe_len;
3072 u64 offset;
3073 u64 extent_logical;
3074 u64 extent_physical;
3075 u64 extent_len;
3076 u64 stripe_logical;
3077 u64 stripe_end;
3078 struct btrfs_device *extent_dev;
3079 int extent_mirror_num;
3080 int stop_loop = 0;
3081
3082 physical = map->stripes[num].physical;
3083 offset = 0;
3084 nstripes = div64_u64(length, map->stripe_len);
3085 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3086 offset = map->stripe_len * num;
3087 increment = map->stripe_len * map->num_stripes;
3088 mirror_num = 1;
3089 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3090 int factor = map->num_stripes / map->sub_stripes;
3091 offset = map->stripe_len * (num / map->sub_stripes);
3092 increment = map->stripe_len * factor;
3093 mirror_num = num % map->sub_stripes + 1;
3094 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
3095 increment = map->stripe_len;
3096 mirror_num = num % map->num_stripes + 1;
3097 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3098 increment = map->stripe_len;
3099 mirror_num = num % map->num_stripes + 1;
3100 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3101 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3102 increment = map->stripe_len * nr_data_stripes(map);
3103 mirror_num = 1;
3104 } else {
3105 increment = map->stripe_len;
3106 mirror_num = 1;
3107 }
3108
3109 path = btrfs_alloc_path();
3110 if (!path)
3111 return -ENOMEM;
3112
3113 ppath = btrfs_alloc_path();
3114 if (!ppath) {
3115 btrfs_free_path(path);
3116 return -ENOMEM;
3117 }
3118
3119
3120
3121
3122
3123
3124 path->search_commit_root = 1;
3125 path->skip_locking = 1;
3126
3127 ppath->search_commit_root = 1;
3128 ppath->skip_locking = 1;
3129
3130
3131
3132
3133
3134 logical = base + offset;
3135 physical_end = physical + nstripes * map->stripe_len;
3136 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3137 get_raid56_logic_offset(physical_end, num,
3138 map, &logic_end, NULL);
3139 logic_end += base;
3140 } else {
3141 logic_end = logical + increment * nstripes;
3142 }
3143 wait_event(sctx->list_wait,
3144 atomic_read(&sctx->bios_in_flight) == 0);
3145 scrub_blocked_if_needed(fs_info);
3146
3147
3148 key.objectid = logical;
3149 key.type = BTRFS_EXTENT_ITEM_KEY;
3150 key.offset = (u64)0;
3151 key_end.objectid = logic_end;
3152 key_end.type = BTRFS_METADATA_ITEM_KEY;
3153 key_end.offset = (u64)-1;
3154 reada1 = btrfs_reada_add(root, &key, &key_end);
3155
3156 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3157 key.type = BTRFS_EXTENT_CSUM_KEY;
3158 key.offset = logical;
3159 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3160 key_end.type = BTRFS_EXTENT_CSUM_KEY;
3161 key_end.offset = logic_end;
3162 reada2 = btrfs_reada_add(csum_root, &key, &key_end);
3163
3164 if (!IS_ERR(reada1))
3165 btrfs_reada_wait(reada1);
3166 if (!IS_ERR(reada2))
3167 btrfs_reada_wait(reada2);
3168
3169
3170
3171
3172
3173
3174 blk_start_plug(&plug);
3175
3176
3177
3178
3179 ret = 0;
3180 while (physical < physical_end) {
3181
3182
3183
3184 if (atomic_read(&fs_info->scrub_cancel_req) ||
3185 atomic_read(&sctx->cancel_req)) {
3186 ret = -ECANCELED;
3187 goto out;
3188 }
3189
3190
3191
3192 if (atomic_read(&fs_info->scrub_pause_req)) {
3193
3194 sctx->flush_all_writes = true;
3195 scrub_submit(sctx);
3196 mutex_lock(&sctx->wr_lock);
3197 scrub_wr_submit(sctx);
3198 mutex_unlock(&sctx->wr_lock);
3199 wait_event(sctx->list_wait,
3200 atomic_read(&sctx->bios_in_flight) == 0);
3201 sctx->flush_all_writes = false;
3202 scrub_blocked_if_needed(fs_info);
3203 }
3204
3205 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3206 ret = get_raid56_logic_offset(physical, num, map,
3207 &logical,
3208 &stripe_logical);
3209 logical += base;
3210 if (ret) {
3211
3212 stripe_logical += base;
3213 stripe_end = stripe_logical + increment;
3214 ret = scrub_raid56_parity(sctx, map, scrub_dev,
3215 ppath, stripe_logical,
3216 stripe_end);
3217 if (ret)
3218 goto out;
3219 goto skip;
3220 }
3221 }
3222
3223 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
3224 key.type = BTRFS_METADATA_ITEM_KEY;
3225 else
3226 key.type = BTRFS_EXTENT_ITEM_KEY;
3227 key.objectid = logical;
3228 key.offset = (u64)-1;
3229
3230 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3231 if (ret < 0)
3232 goto out;
3233
3234 if (ret > 0) {
3235 ret = btrfs_previous_extent_item(root, path, 0);
3236 if (ret < 0)
3237 goto out;
3238 if (ret > 0) {
3239
3240
3241 btrfs_release_path(path);
3242 ret = btrfs_search_slot(NULL, root, &key,
3243 path, 0, 0);
3244 if (ret < 0)
3245 goto out;
3246 }
3247 }
3248
3249 stop_loop = 0;
3250 while (1) {
3251 u64 bytes;
3252
3253 l = path->nodes[0];
3254 slot = path->slots[0];
3255 if (slot >= btrfs_header_nritems(l)) {
3256 ret = btrfs_next_leaf(root, path);
3257 if (ret == 0)
3258 continue;
3259 if (ret < 0)
3260 goto out;
3261
3262 stop_loop = 1;
3263 break;
3264 }
3265 btrfs_item_key_to_cpu(l, &key, slot);
3266
3267 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3268 key.type != BTRFS_METADATA_ITEM_KEY)
3269 goto next;
3270
3271 if (key.type == BTRFS_METADATA_ITEM_KEY)
3272 bytes = fs_info->nodesize;
3273 else
3274 bytes = key.offset;
3275
3276 if (key.objectid + bytes <= logical)
3277 goto next;
3278
3279 if (key.objectid >= logical + map->stripe_len) {
3280
3281 if (key.objectid >= logic_end)
3282 stop_loop = 1;
3283 break;
3284 }
3285
3286 extent = btrfs_item_ptr(l, slot,
3287 struct btrfs_extent_item);
3288 flags = btrfs_extent_flags(l, extent);
3289 generation = btrfs_extent_generation(l, extent);
3290
3291 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3292 (key.objectid < logical ||
3293 key.objectid + bytes >
3294 logical + map->stripe_len)) {
3295 btrfs_err(fs_info,
3296 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3297 key.objectid, logical);
3298 spin_lock(&sctx->stat_lock);
3299 sctx->stat.uncorrectable_errors++;
3300 spin_unlock(&sctx->stat_lock);
3301 goto next;
3302 }
3303
3304again:
3305 extent_logical = key.objectid;
3306 extent_len = bytes;
3307
3308
3309
3310
3311 if (extent_logical < logical) {
3312 extent_len -= logical - extent_logical;
3313 extent_logical = logical;
3314 }
3315 if (extent_logical + extent_len >
3316 logical + map->stripe_len) {
3317 extent_len = logical + map->stripe_len -
3318 extent_logical;
3319 }
3320
3321 extent_physical = extent_logical - logical + physical;
3322 extent_dev = scrub_dev;
3323 extent_mirror_num = mirror_num;
3324 if (sctx->is_dev_replace)
3325 scrub_remap_extent(fs_info, extent_logical,
3326 extent_len, &extent_physical,
3327 &extent_dev,
3328 &extent_mirror_num);
3329
3330 ret = btrfs_lookup_csums_range(csum_root,
3331 extent_logical,
3332 extent_logical +
3333 extent_len - 1,
3334 &sctx->csum_list, 1);
3335 if (ret)
3336 goto out;
3337
3338 ret = scrub_extent(sctx, map, extent_logical, extent_len,
3339 extent_physical, extent_dev, flags,
3340 generation, extent_mirror_num,
3341 extent_logical - logical + physical);
3342
3343 scrub_free_csums(sctx);
3344
3345 if (ret)
3346 goto out;
3347
3348 if (extent_logical + extent_len <
3349 key.objectid + bytes) {
3350 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3351
3352
3353
3354
3355loop:
3356 physical += map->stripe_len;
3357 ret = get_raid56_logic_offset(physical,
3358 num, map, &logical,
3359 &stripe_logical);
3360 logical += base;
3361
3362 if (ret && physical < physical_end) {
3363 stripe_logical += base;
3364 stripe_end = stripe_logical +
3365 increment;
3366 ret = scrub_raid56_parity(sctx,
3367 map, scrub_dev, ppath,
3368 stripe_logical,
3369 stripe_end);
3370 if (ret)
3371 goto out;
3372 goto loop;
3373 }
3374 } else {
3375 physical += map->stripe_len;
3376 logical += increment;
3377 }
3378 if (logical < key.objectid + bytes) {
3379 cond_resched();
3380 goto again;
3381 }
3382
3383 if (physical >= physical_end) {
3384 stop_loop = 1;
3385 break;
3386 }
3387 }
3388next:
3389 path->slots[0]++;
3390 }
3391 btrfs_release_path(path);
3392skip:
3393 logical += increment;
3394 physical += map->stripe_len;
3395 spin_lock(&sctx->stat_lock);
3396 if (stop_loop)
3397 sctx->stat.last_physical = map->stripes[num].physical +
3398 length;
3399 else
3400 sctx->stat.last_physical = physical;
3401 spin_unlock(&sctx->stat_lock);
3402 if (stop_loop)
3403 break;
3404 }
3405out:
3406
3407 scrub_submit(sctx);
3408 mutex_lock(&sctx->wr_lock);
3409 scrub_wr_submit(sctx);
3410 mutex_unlock(&sctx->wr_lock);
3411
3412 blk_finish_plug(&plug);
3413 btrfs_free_path(path);
3414 btrfs_free_path(ppath);
3415 return ret < 0 ? ret : 0;
3416}
3417
3418static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
3419 struct btrfs_device *scrub_dev,
3420 u64 chunk_offset, u64 length,
3421 u64 dev_offset,
3422 struct btrfs_block_group_cache *cache)
3423{
3424 struct btrfs_fs_info *fs_info = sctx->fs_info;
3425 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
3426 struct map_lookup *map;
3427 struct extent_map *em;
3428 int i;
3429 int ret = 0;
3430
3431 read_lock(&map_tree->lock);
3432 em = lookup_extent_mapping(map_tree, chunk_offset, 1);
3433 read_unlock(&map_tree->lock);
3434
3435 if (!em) {
3436
3437
3438
3439
3440 spin_lock(&cache->lock);
3441 if (!cache->removed)
3442 ret = -EINVAL;
3443 spin_unlock(&cache->lock);
3444
3445 return ret;
3446 }
3447
3448 map = em->map_lookup;
3449 if (em->start != chunk_offset)
3450 goto out;
3451
3452 if (em->len < length)
3453 goto out;
3454
3455 for (i = 0; i < map->num_stripes; ++i) {
3456 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
3457 map->stripes[i].physical == dev_offset) {
3458 ret = scrub_stripe(sctx, map, scrub_dev, i,
3459 chunk_offset, length);
3460 if (ret)
3461 goto out;
3462 }
3463 }
3464out:
3465 free_extent_map(em);
3466
3467 return ret;
3468}
3469
3470static noinline_for_stack
3471int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3472 struct btrfs_device *scrub_dev, u64 start, u64 end)
3473{
3474 struct btrfs_dev_extent *dev_extent = NULL;
3475 struct btrfs_path *path;
3476 struct btrfs_fs_info *fs_info = sctx->fs_info;
3477 struct btrfs_root *root = fs_info->dev_root;
3478 u64 length;
3479 u64 chunk_offset;
3480 int ret = 0;
3481 int ro_set;
3482 int slot;
3483 struct extent_buffer *l;
3484 struct btrfs_key key;
3485 struct btrfs_key found_key;
3486 struct btrfs_block_group_cache *cache;
3487 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
3488
3489 path = btrfs_alloc_path();
3490 if (!path)
3491 return -ENOMEM;
3492
3493 path->reada = READA_FORWARD;
3494 path->search_commit_root = 1;
3495 path->skip_locking = 1;
3496
3497 key.objectid = scrub_dev->devid;
3498 key.offset = 0ull;
3499 key.type = BTRFS_DEV_EXTENT_KEY;
3500
3501 while (1) {
3502 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3503 if (ret < 0)
3504 break;
3505 if (ret > 0) {
3506 if (path->slots[0] >=
3507 btrfs_header_nritems(path->nodes[0])) {
3508 ret = btrfs_next_leaf(root, path);
3509 if (ret < 0)
3510 break;
3511 if (ret > 0) {
3512 ret = 0;
3513 break;
3514 }
3515 } else {
3516 ret = 0;
3517 }
3518 }
3519
3520 l = path->nodes[0];
3521 slot = path->slots[0];
3522
3523 btrfs_item_key_to_cpu(l, &found_key, slot);
3524
3525 if (found_key.objectid != scrub_dev->devid)
3526 break;
3527
3528 if (found_key.type != BTRFS_DEV_EXTENT_KEY)
3529 break;
3530
3531 if (found_key.offset >= end)
3532 break;
3533
3534 if (found_key.offset < key.offset)
3535 break;
3536
3537 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
3538 length = btrfs_dev_extent_length(l, dev_extent);
3539
3540 if (found_key.offset + length <= start)
3541 goto skip;
3542
3543 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
3544
3545
3546
3547
3548
3549 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3550
3551
3552
3553 if (!cache)
3554 goto skip;
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564 scrub_pause_on(fs_info);
3565 ret = btrfs_inc_block_group_ro(cache);
3566 if (!ret && sctx->is_dev_replace) {
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585 btrfs_wait_block_group_reservations(cache);
3586 btrfs_wait_nocow_writers(cache);
3587 ret = btrfs_wait_ordered_roots(fs_info, U64_MAX,
3588 cache->key.objectid,
3589 cache->key.offset);
3590 if (ret > 0) {
3591 struct btrfs_trans_handle *trans;
3592
3593 trans = btrfs_join_transaction(root);
3594 if (IS_ERR(trans))
3595 ret = PTR_ERR(trans);
3596 else
3597 ret = btrfs_commit_transaction(trans);
3598 if (ret) {
3599 scrub_pause_off(fs_info);
3600 btrfs_put_block_group(cache);
3601 break;
3602 }
3603 }
3604 }
3605 scrub_pause_off(fs_info);
3606
3607 if (ret == 0) {
3608 ro_set = 1;
3609 } else if (ret == -ENOSPC) {
3610
3611
3612
3613
3614
3615
3616
3617 ro_set = 0;
3618 } else {
3619 btrfs_warn(fs_info,
3620 "failed setting block group ro: %d", ret);
3621 btrfs_put_block_group(cache);
3622 break;
3623 }
3624
3625 down_write(&fs_info->dev_replace.rwsem);
3626 dev_replace->cursor_right = found_key.offset + length;
3627 dev_replace->cursor_left = found_key.offset;
3628 dev_replace->item_needs_writeback = 1;
3629 up_write(&dev_replace->rwsem);
3630
3631 ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
3632 found_key.offset, cache);
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644 sctx->flush_all_writes = true;
3645 scrub_submit(sctx);
3646 mutex_lock(&sctx->wr_lock);
3647 scrub_wr_submit(sctx);
3648 mutex_unlock(&sctx->wr_lock);
3649
3650 wait_event(sctx->list_wait,
3651 atomic_read(&sctx->bios_in_flight) == 0);
3652
3653 scrub_pause_on(fs_info);
3654
3655
3656
3657
3658
3659
3660 wait_event(sctx->list_wait,
3661 atomic_read(&sctx->workers_pending) == 0);
3662 sctx->flush_all_writes = false;
3663
3664 scrub_pause_off(fs_info);
3665
3666 down_write(&fs_info->dev_replace.rwsem);
3667 dev_replace->cursor_left = dev_replace->cursor_right;
3668 dev_replace->item_needs_writeback = 1;
3669 up_write(&fs_info->dev_replace.rwsem);
3670
3671 if (ro_set)
3672 btrfs_dec_block_group_ro(cache);
3673
3674
3675
3676
3677
3678
3679
3680
3681 spin_lock(&cache->lock);
3682 if (!cache->removed && !cache->ro && cache->reserved == 0 &&
3683 btrfs_block_group_used(&cache->item) == 0) {
3684 spin_unlock(&cache->lock);
3685 btrfs_mark_bg_unused(cache);
3686 } else {
3687 spin_unlock(&cache->lock);
3688 }
3689
3690 btrfs_put_block_group(cache);
3691 if (ret)
3692 break;
3693 if (sctx->is_dev_replace &&
3694 atomic64_read(&dev_replace->num_write_errors) > 0) {
3695 ret = -EIO;
3696 break;
3697 }
3698 if (sctx->stat.malloc_errors > 0) {
3699 ret = -ENOMEM;
3700 break;
3701 }
3702skip:
3703 key.offset = found_key.offset + length;
3704 btrfs_release_path(path);
3705 }
3706
3707 btrfs_free_path(path);
3708
3709 return ret;
3710}
3711
3712static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
3713 struct btrfs_device *scrub_dev)
3714{
3715 int i;
3716 u64 bytenr;
3717 u64 gen;
3718 int ret;
3719 struct btrfs_fs_info *fs_info = sctx->fs_info;
3720
3721 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3722 return -EIO;
3723
3724
3725 if (scrub_dev->fs_devices != fs_info->fs_devices)
3726 gen = scrub_dev->generation;
3727 else
3728 gen = fs_info->last_trans_committed;
3729
3730 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3731 bytenr = btrfs_sb_offset(i);
3732 if (bytenr + BTRFS_SUPER_INFO_SIZE >
3733 scrub_dev->commit_total_bytes)
3734 break;
3735
3736 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
3737 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
3738 NULL, 1, bytenr);
3739 if (ret)
3740 return ret;
3741 }
3742 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3743
3744 return 0;
3745}
3746
3747
3748
3749
3750static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3751 int is_dev_replace)
3752{
3753 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
3754 int max_active = fs_info->thread_pool_size;
3755
3756 lockdep_assert_held(&fs_info->scrub_lock);
3757
3758 if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
3759 ASSERT(fs_info->scrub_workers == NULL);
3760 fs_info->scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub",
3761 flags, is_dev_replace ? 1 : max_active, 4);
3762 if (!fs_info->scrub_workers)
3763 goto fail_scrub_workers;
3764
3765 ASSERT(fs_info->scrub_wr_completion_workers == NULL);
3766 fs_info->scrub_wr_completion_workers =
3767 btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
3768 max_active, 2);
3769 if (!fs_info->scrub_wr_completion_workers)
3770 goto fail_scrub_wr_completion_workers;
3771
3772 ASSERT(fs_info->scrub_parity_workers == NULL);
3773 fs_info->scrub_parity_workers =
3774 btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
3775 max_active, 2);
3776 if (!fs_info->scrub_parity_workers)
3777 goto fail_scrub_parity_workers;
3778
3779 refcount_set(&fs_info->scrub_workers_refcnt, 1);
3780 } else {
3781 refcount_inc(&fs_info->scrub_workers_refcnt);
3782 }
3783 return 0;
3784
3785fail_scrub_parity_workers:
3786 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
3787fail_scrub_wr_completion_workers:
3788 btrfs_destroy_workqueue(fs_info->scrub_workers);
3789fail_scrub_workers:
3790 return -ENOMEM;
3791}
3792
3793int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3794 u64 end, struct btrfs_scrub_progress *progress,
3795 int readonly, int is_dev_replace)
3796{
3797 struct scrub_ctx *sctx;
3798 int ret;
3799 struct btrfs_device *dev;
3800 unsigned int nofs_flag;
3801 struct btrfs_workqueue *scrub_workers = NULL;
3802 struct btrfs_workqueue *scrub_wr_comp = NULL;
3803 struct btrfs_workqueue *scrub_parity = NULL;
3804
3805 if (btrfs_fs_closing(fs_info))
3806 return -EAGAIN;
3807
3808 if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
3809
3810
3811
3812
3813
3814 btrfs_err(fs_info,
3815 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
3816 fs_info->nodesize,
3817 BTRFS_STRIPE_LEN);
3818 return -EINVAL;
3819 }
3820
3821 if (fs_info->sectorsize != PAGE_SIZE) {
3822
3823 btrfs_err_rl(fs_info,
3824 "scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails",
3825 fs_info->sectorsize, PAGE_SIZE);
3826 return -EINVAL;
3827 }
3828
3829 if (fs_info->nodesize >
3830 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
3831 fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
3832
3833
3834
3835
3836 btrfs_err(fs_info,
3837 "scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
3838 fs_info->nodesize,
3839 SCRUB_MAX_PAGES_PER_BLOCK,
3840 fs_info->sectorsize,
3841 SCRUB_MAX_PAGES_PER_BLOCK);
3842 return -EINVAL;
3843 }
3844
3845
3846 sctx = scrub_setup_ctx(fs_info, is_dev_replace);
3847 if (IS_ERR(sctx))
3848 return PTR_ERR(sctx);
3849
3850 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3851 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
3852 if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
3853 !is_dev_replace)) {
3854 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3855 ret = -ENODEV;
3856 goto out_free_ctx;
3857 }
3858
3859 if (!is_dev_replace && !readonly &&
3860 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
3861 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3862 btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
3863 rcu_str_deref(dev->name));
3864 ret = -EROFS;
3865 goto out_free_ctx;
3866 }
3867
3868 mutex_lock(&fs_info->scrub_lock);
3869 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
3870 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
3871 mutex_unlock(&fs_info->scrub_lock);
3872 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3873 ret = -EIO;
3874 goto out_free_ctx;
3875 }
3876
3877 down_read(&fs_info->dev_replace.rwsem);
3878 if (dev->scrub_ctx ||
3879 (!is_dev_replace &&
3880 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
3881 up_read(&fs_info->dev_replace.rwsem);
3882 mutex_unlock(&fs_info->scrub_lock);
3883 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3884 ret = -EINPROGRESS;
3885 goto out_free_ctx;
3886 }
3887 up_read(&fs_info->dev_replace.rwsem);
3888
3889 ret = scrub_workers_get(fs_info, is_dev_replace);
3890 if (ret) {
3891 mutex_unlock(&fs_info->scrub_lock);
3892 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3893 goto out_free_ctx;
3894 }
3895
3896 sctx->readonly = readonly;
3897 dev->scrub_ctx = sctx;
3898 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3899
3900
3901
3902
3903
3904 __scrub_blocked_if_needed(fs_info);
3905 atomic_inc(&fs_info->scrubs_running);
3906 mutex_unlock(&fs_info->scrub_lock);
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917 nofs_flag = memalloc_nofs_save();
3918 if (!is_dev_replace) {
3919 btrfs_info(fs_info, "scrub: started on devid %llu", devid);
3920
3921
3922
3923
3924 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3925 ret = scrub_supers(sctx, dev);
3926 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3927 }
3928
3929 if (!ret)
3930 ret = scrub_enumerate_chunks(sctx, dev, start, end);
3931 memalloc_nofs_restore(nofs_flag);
3932
3933 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3934 atomic_dec(&fs_info->scrubs_running);
3935 wake_up(&fs_info->scrub_pause_wait);
3936
3937 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
3938
3939 if (progress)
3940 memcpy(progress, &sctx->stat, sizeof(*progress));
3941
3942 if (!is_dev_replace)
3943 btrfs_info(fs_info, "scrub: %s on devid %llu with status: %d",
3944 ret ? "not finished" : "finished", devid, ret);
3945
3946 mutex_lock(&fs_info->scrub_lock);
3947 dev->scrub_ctx = NULL;
3948 if (refcount_dec_and_test(&fs_info->scrub_workers_refcnt)) {
3949 scrub_workers = fs_info->scrub_workers;
3950 scrub_wr_comp = fs_info->scrub_wr_completion_workers;
3951 scrub_parity = fs_info->scrub_parity_workers;
3952
3953 fs_info->scrub_workers = NULL;
3954 fs_info->scrub_wr_completion_workers = NULL;
3955 fs_info->scrub_parity_workers = NULL;
3956 }
3957 mutex_unlock(&fs_info->scrub_lock);
3958
3959 btrfs_destroy_workqueue(scrub_workers);
3960 btrfs_destroy_workqueue(scrub_wr_comp);
3961 btrfs_destroy_workqueue(scrub_parity);
3962 scrub_put_ctx(sctx);
3963
3964 return ret;
3965
3966out_free_ctx:
3967 scrub_free_ctx(sctx);
3968
3969 return ret;
3970}
3971
3972void btrfs_scrub_pause(struct btrfs_fs_info *fs_info)
3973{
3974 mutex_lock(&fs_info->scrub_lock);
3975 atomic_inc(&fs_info->scrub_pause_req);
3976 while (atomic_read(&fs_info->scrubs_paused) !=
3977 atomic_read(&fs_info->scrubs_running)) {
3978 mutex_unlock(&fs_info->scrub_lock);
3979 wait_event(fs_info->scrub_pause_wait,
3980 atomic_read(&fs_info->scrubs_paused) ==
3981 atomic_read(&fs_info->scrubs_running));
3982 mutex_lock(&fs_info->scrub_lock);
3983 }
3984 mutex_unlock(&fs_info->scrub_lock);
3985}
3986
3987void btrfs_scrub_continue(struct btrfs_fs_info *fs_info)
3988{
3989 atomic_dec(&fs_info->scrub_pause_req);
3990 wake_up(&fs_info->scrub_pause_wait);
3991}
3992
3993int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
3994{
3995 mutex_lock(&fs_info->scrub_lock);
3996 if (!atomic_read(&fs_info->scrubs_running)) {
3997 mutex_unlock(&fs_info->scrub_lock);
3998 return -ENOTCONN;
3999 }
4000
4001 atomic_inc(&fs_info->scrub_cancel_req);
4002 while (atomic_read(&fs_info->scrubs_running)) {
4003 mutex_unlock(&fs_info->scrub_lock);
4004 wait_event(fs_info->scrub_pause_wait,
4005 atomic_read(&fs_info->scrubs_running) == 0);
4006 mutex_lock(&fs_info->scrub_lock);
4007 }
4008 atomic_dec(&fs_info->scrub_cancel_req);
4009 mutex_unlock(&fs_info->scrub_lock);
4010
4011 return 0;
4012}
4013
4014int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
4015{
4016 struct btrfs_fs_info *fs_info = dev->fs_info;
4017 struct scrub_ctx *sctx;
4018
4019 mutex_lock(&fs_info->scrub_lock);
4020 sctx = dev->scrub_ctx;
4021 if (!sctx) {
4022 mutex_unlock(&fs_info->scrub_lock);
4023 return -ENOTCONN;
4024 }
4025 atomic_inc(&sctx->cancel_req);
4026 while (dev->scrub_ctx) {
4027 mutex_unlock(&fs_info->scrub_lock);
4028 wait_event(fs_info->scrub_pause_wait,
4029 dev->scrub_ctx == NULL);
4030 mutex_lock(&fs_info->scrub_lock);
4031 }
4032 mutex_unlock(&fs_info->scrub_lock);
4033
4034 return 0;
4035}
4036
4037int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
4038 struct btrfs_scrub_progress *progress)
4039{
4040 struct btrfs_device *dev;
4041 struct scrub_ctx *sctx = NULL;
4042
4043 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4044 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
4045 if (dev)
4046 sctx = dev->scrub_ctx;
4047 if (sctx)
4048 memcpy(progress, &sctx->stat, sizeof(*progress));
4049 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4050
4051 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
4052}
4053
4054static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
4055 u64 extent_logical, u64 extent_len,
4056 u64 *extent_physical,
4057 struct btrfs_device **extent_dev,
4058 int *extent_mirror_num)
4059{
4060 u64 mapped_length;
4061 struct btrfs_bio *bbio = NULL;
4062 int ret;
4063
4064 mapped_length = extent_len;
4065 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
4066 &mapped_length, &bbio, 0);
4067 if (ret || !bbio || mapped_length < extent_len ||
4068 !bbio->stripes[0].dev->bdev) {
4069 btrfs_put_bbio(bbio);
4070 return;
4071 }
4072
4073 *extent_physical = bbio->stripes[0].physical;
4074 *extent_mirror_num = bbio->mirror_num;
4075 *extent_dev = bbio->stripes[0].dev;
4076 btrfs_put_bbio(bbio);
4077}
4078